From 3c88afc5410b5bf75e6ad598bcd7eca8829a5be5 Mon Sep 17 00:00:00 2001 From: cvs2svn Date: Thu, 3 Jul 2003 20:06:57 +0000 Subject: [PATCH] This commit was manufactured by cvs2svn to create branch 'unlabeled-1.2.762'. --- lnet/.cvsignore | 8 - lnet/AUTHORS | 0 lnet/ChangeLog | 0 lnet/Kernelenv.in | 1 - lnet/Kernelenv.mk | 1 - lnet/Makefile.am | 12 - lnet/Makefile.mk | 6 - lnet/NEWS | 0 lnet/README | 0 lnet/Rules.linux | 25 - lnet/archdep.m4 | 317 - lnet/autogen.sh | 5 - lnet/build.m4 | 95 - lnet/configure.in | 34 - lnet/doc/.cvsignore | 4 - lnet/doc/Data-structures | 65 - lnet/doc/Makefile.am | 46 - lnet/doc/Message-life-cycle | 118 - lnet/doc/NAL-HOWTO | 293 - lnet/doc/file.fig | 111 - lnet/doc/flow_new.fig | 213 - lnet/doc/get.fig | 33 - lnet/doc/ieee.bst | 1112 - lnet/doc/mpi.fig | 117 - lnet/doc/portals.fig | 68 - lnet/doc/portals3.bib | 124 - lnet/doc/portals3.lyx | 15944 -------- lnet/doc/put.fig | 32 - lnet/include/.cvsignore | 4 - lnet/include/Makefile.am | 8 - lnet/include/config.h.in | 58 - lnet/include/linux/Makefile.am | 10 - lnet/include/linux/kp30.h | 941 - lnet/include/linux/portals_compat25.h | 13 - lnet/include/linux/portals_lib.h | 188 - lnet/include/lnet/Makefile.am | 10 - lnet/include/lnet/api-support.h | 27 - lnet/include/lnet/api.h | 159 - lnet/include/lnet/arg-blocks.h | 265 - lnet/include/lnet/defines.h | 116 - lnet/include/lnet/errno.h | 61 - lnet/include/lnet/internal.h | 45 - lnet/include/lnet/lib-dispatch.h | 45 - lnet/include/lnet/lib-lnet.h | 385 - lnet/include/lnet/lib-nal.h | 102 - lnet/include/lnet/lib-p30.h | 385 - lnet/include/lnet/lib-types.h | 282 - lnet/include/lnet/list.h | 245 - lnet/include/lnet/lltrace.h | 175 - lnet/include/lnet/lnet.h | 72 - lnet/include/lnet/lnetctl.h | 75 - lnet/include/lnet/myrnal.h | 26 - lnet/include/lnet/nal.h | 49 - lnet/include/lnet/nalids.h | 4 - lnet/include/lnet/p30.h | 72 - lnet/include/lnet/ppid.h | 52 - lnet/include/lnet/ptlctl.h | 75 - lnet/include/lnet/stringtab.h | 5 - lnet/include/lnet/types.h | 157 - lnet/klnds/.cvsignore | 2 - lnet/klnds/Makefile.am | 7 - lnet/klnds/Makefile.mk | 4 - lnet/klnds/gmlnd/.cvsignore | 3 - lnet/klnds/gmlnd/Makefile.am | 13 - lnet/klnds/gmlnd/gm-1.5.2.1-exports.patch | 43 - lnet/klnds/gmlnd/gmlnd.h | 101 - lnet/klnds/gmlnd/gmlnd_cb.c | 517 - lnet/klnds/gmlnd/gmnal.c | 284 - lnet/klnds/qswlnd/.cvsignore | 3 - lnet/klnds/qswlnd/Makefile.am | 17 - lnet/klnds/qswlnd/qswlnd.c | 608 - lnet/klnds/qswlnd/qswlnd.h | 270 - lnet/klnds/qswlnd/qswlnd_cb.c | 1240 - lnet/klnds/scimaclnd/.cvsignore | 3 - lnet/klnds/scimaclnd/Makefile.am | 11 - lnet/klnds/scimaclnd/README.scimacnal | 14 - lnet/klnds/scimaclnd/scimac.conf | 35 - lnet/klnds/scimaclnd/scimacnal.c | 219 - lnet/klnds/scimaclnd/scimacnal.h | 85 - lnet/klnds/scimaclnd/scimacnal_cb.c | 468 - lnet/klnds/socklnd/.cvsignore | 3 - lnet/klnds/socklnd/Makefile.am | 13 - lnet/klnds/socklnd/Makefile.mk | 10 - lnet/klnds/socklnd/socklnd.c | 860 - lnet/klnds/socklnd/socklnd.h | 292 - lnet/klnds/socklnd/socklnd_cb.c | 1613 - lnet/klnds/toelnd/.cvsignore | 3 - lnet/klnds/toelnd/Makefile.am | 13 - lnet/klnds/toelnd/toenal.c | 629 - lnet/klnds/toelnd/toenal.h | 236 - lnet/klnds/toelnd/toenal_cb.c | 1219 - lnet/libcfs/.cvsignore | 4 - lnet/libcfs/Makefile.am | 29 - lnet/libcfs/Makefile.mk | 9 - lnet/libcfs/debug.c | 830 - lnet/libcfs/module.c | 575 - lnet/libcfs/proc.c | 290 - lnet/lnet/.cvsignore | 3 - lnet/lnet/Makefile.am | 10 - lnet/lnet/Makefile.mk | 9 - lnet/lnet/api-eq.c | 158 - lnet/lnet/api-errno.c | 55 - lnet/lnet/api-init.c | 71 - lnet/lnet/api-me.c | 42 - lnet/lnet/api-ni.c | 197 - lnet/lnet/api-wrap.c | 599 - lnet/lnet/lib-dispatch.c | 80 - lnet/lnet/lib-eq.c | 128 - lnet/lnet/lib-init.c | 474 - lnet/lnet/lib-md.c | 412 - lnet/lnet/lib-me.c | 227 - lnet/lnet/lib-move.c | 1379 - lnet/lnet/lib-msg.c | 163 - lnet/lnet/lib-ni.c | 128 - lnet/lnet/lib-pid.c | 58 - lnet/packaging/.cvsignore | 8 - lnet/packaging/Makefile.am | 6 - lnet/packaging/portals.spec.in | 116 - lnet/router/.cvsignore | 3 - lnet/router/Makefile.am | 16 - lnet/router/Makefile.mk | 9 - lnet/router/proc.c | 78 - lnet/router/router.c | 449 - lnet/router/router.h | 81 - lnet/tests/.cvsignore | 3 - lnet/tests/Makefile.am | 23 - lnet/tests/ping.h | 80 - lnet/tests/ping_cli.c | 300 - lnet/tests/ping_srv.c | 308 - lnet/tests/sping_cli.c | 276 - lnet/tests/sping_srv.c | 295 - lnet/tests/startclient.sh | 37 - lnet/tests/startserver.sh | 38 - lnet/tests/stopclient.sh | 14 - lnet/tests/stopserver.sh | 16 - lnet/ulnds/.cvsignore | 3 - lnet/ulnds/Makefile.am | 5 - lnet/ulnds/README | 53 - lnet/ulnds/address.c | 146 - lnet/ulnds/bridge.h | 29 - lnet/ulnds/connection.c | 294 - lnet/ulnds/connection.h | 32 - lnet/ulnds/debug.c | 119 - lnet/ulnds/dispatch.h | 39 - lnet/ulnds/ipmap.h | 38 - lnet/ulnds/pqtimer.c | 226 - lnet/ulnds/pqtimer.h | 25 - lnet/ulnds/procapi.c | 283 - lnet/ulnds/procbridge.h | 40 - lnet/ulnds/proclib.c | 270 - lnet/ulnds/select.c | 165 - lnet/ulnds/socklnd/Makefile.am | 5 - lnet/ulnds/socklnd/README | 53 - lnet/ulnds/socklnd/address.c | 146 - lnet/ulnds/socklnd/bridge.h | 29 - lnet/ulnds/socklnd/connection.c | 294 - lnet/ulnds/socklnd/connection.h | 32 - lnet/ulnds/socklnd/debug.c | 119 - lnet/ulnds/socklnd/dispatch.h | 39 - lnet/ulnds/socklnd/ipmap.h | 38 - lnet/ulnds/socklnd/pqtimer.c | 226 - lnet/ulnds/socklnd/pqtimer.h | 25 - lnet/ulnds/socklnd/procapi.c | 283 - lnet/ulnds/socklnd/procbridge.h | 40 - lnet/ulnds/socklnd/proclib.c | 270 - lnet/ulnds/socklnd/select.c | 165 - lnet/ulnds/socklnd/table.c | 264 - lnet/ulnds/socklnd/table.h | 39 - lnet/ulnds/socklnd/tcplnd.c | 198 - lnet/ulnds/socklnd/timer.h | 30 - lnet/ulnds/socklnd/utypes.h | 12 - lnet/ulnds/table.c | 264 - lnet/ulnds/table.h | 39 - lnet/ulnds/tcplnd.c | 198 - lnet/ulnds/timer.h | 30 - lnet/ulnds/utypes.h | 12 - lnet/utils/.cvsignore | 8 - lnet/utils/Makefile.am | 27 - lnet/utils/acceptor.c | 466 - lnet/utils/debug.c | 618 - lnet/utils/debugctl.c | 66 - lnet/utils/l_ioctl.c | 281 - lnet/utils/parser.c | 703 - lnet/utils/parser.h | 73 - lnet/utils/portals.c | 985 - lnet/utils/ptlctl.c | 65 - lnet/utils/routerstat.c | 99 - lnet/utils/wirecheck.c | 141 - lustre/.cvsignore | 18 - lustre/BUGS | 15 - lustre/BUILDING | 25 - lustre/COPYING | 352 - lustre/ChangeLog | 406 - lustre/FDL | 355 - lustre/Makefile.am | 38 - lustre/Makefile.mk | 4 - lustre/README | 8 - lustre/Rules | 33 - lustre/autogen.sh | 5 - lustre/cobd/.cvsignore | 3 - lustre/cobd/Makefile.am | 15 - lustre/cobd/cache_obd.c | 337 - lustre/cobd/lproc_cache.c | 91 - lustre/conf/.cvsignore | 2 - lustre/conf/Makefile.am | 15 - lustre/conf/lustre.dtd | 132 - lustre/conf/lustre2ldif.xsl | 287 - lustre/conf/slapd-lustre.conf | 11 - lustre/conf/top.ldif | 4 - lustre/configure.in | 49 - lustre/doc/.cvsignore | 23 - lustre/doc/Makefile.am | 124 - lustre/doc/VERSIONING | 91 - lustre/doc/chbar.sh | 243 - lustre/doc/lconf.lyx | 286 - lustre/doc/lctl.lyx | 736 - lustre/doc/lmc.lyx | 541 - lustre/doc/postbar | 151 - lustre/doc/tex2pdf | 3043 -- lustre/include/.cvsignore | 12 - lustre/include/config.h.in | 10 - lustre/include/ioctl.h | 64 - lustre/include/liblustre.h | 608 - lustre/include/linux/.cvsignore | 15 - lustre/include/linux/Makefile | 7 - lustre/include/linux/lprocfs_status.h | 354 - lustre/include/linux/lustre_compat25.h | 76 - lustre/include/linux/lustre_debug.h | 53 - lustre/include/linux/lustre_dlm.h | 467 - lustre/include/linux/lustre_export.h | 68 - lustre/include/linux/lustre_fsfilt.h | 177 - lustre/include/linux/lustre_ha.h | 24 - lustre/include/linux/lustre_handles.h | 39 - lustre/include/linux/lustre_idl.h | 728 - lustre/include/linux/lustre_import.h | 73 - lustre/include/linux/lustre_lib.h | 706 - lustre/include/linux/lustre_lite.h | 380 - lustre/include/linux/lustre_mds.h | 305 - lustre/include/linux/lustre_net.h | 541 - lustre/include/linux/lustre_otree.h | 31 - lustre/include/linux/obd.h | 432 - lustre/include/linux/obd_cache.h | 13 - lustre/include/linux/obd_class.h | 1159 - lustre/include/linux/obd_echo.h | 42 - lustre/include/linux/obd_ext2.h | 49 - lustre/include/linux/obd_filter.h | 104 - lustre/include/linux/obd_lov.h | 45 - lustre/include/linux/obd_ost.h | 48 - lustre/include/linux/obd_ptlbd.h | 33 - lustre/include/linux/obd_snap.h | 29 - lustre/include/linux/obd_snap_support.h | 85 - lustre/include/linux/obd_support.h | 308 - lustre/include/linux/obd_trace.h | 20 - lustre/install-sh | 251 - lustre/kernel_patches/README | 717 - .../kernel_configs/config-linux-2.4.18-i386 | 1834 - .../config-linux-2.4.18-p4smp-61chaos | 1035 - .../kernel_configs/config-linux-2.4.18-uml | 458 - .../kernel_configs/config-linux-2.4.20-i386-rh | 1849 - .../kernel_configs/config-linux-2.4.20-uml | 297 - .../kernel_configs/jdike-2.5.69-uml.config | 321 - lustre/kernel_patches/patches/dev_read_only.patch | 73 - .../patches/dev_read_only_2.4.20-rh.patch | 77 - .../patches/dev_read_only_2.4.20.patch | 77 - .../patches/dev_read_only_hp_2.4.20.patch | 77 - lustre/kernel_patches/patches/dsp.patch | 130 - .../patches/export-truncate-2.5.63.patch | 37 - .../kernel_patches/patches/export-truncate.patch | 35 - lustre/kernel_patches/patches/exports.patch | 57 - .../patches/exports_2.4.20-rh-hp.patch | 53 - lustre/kernel_patches/patches/exports_2.4.20.patch | 57 - .../patches/ext-2.4-patch-1-chaos.patch | 2527 -- .../kernel_patches/patches/ext-2.4-patch-1.patch | 2527 -- .../kernel_patches/patches/ext-2.4-patch-2.patch | 34 - .../kernel_patches/patches/ext-2.4-patch-3.patch | 96 - .../kernel_patches/patches/ext-2.4-patch-4.patch | 48 - lustre/kernel_patches/patches/ext3-2.4-ino_t.patch | 144 - .../kernel_patches/patches/ext3-2.4.18-fixes.patch | 353 - .../patches/ext3-2.4.18-ino_sb_macro.patch | 1540 - .../kernel_patches/patches/ext3-2.4.20-fixes.patch | 118 - .../kernel_patches/patches/ext3-2.5-noread.patch | 266 - lustre/kernel_patches/patches/ext3-2.5.63.patch | 150 - .../patches/ext3-delete_thread-2.4.18.patch | 326 - .../patches/ext3-delete_thread-2.4.20.patch | 321 - lustre/kernel_patches/patches/ext3-largefile.patch | 16 - .../patches/ext3-noread-2.4.20.patch | 218 - .../kernel_patches/patches/ext3-orphan_lock.patch | 79 - .../kernel_patches/patches/ext3-san-2.4.20.patch | 117 - .../patches/ext3-truncate_blocks-chaos.patch.patch | 92 - .../patches/ext3-truncate_blocks.patch | 92 - .../kernel_patches/patches/ext3-unmount_sync.patch | 21 - .../patches/ext3-use-after-free.patch | 53 - .../patches/ext3_orphan_lock-2.4.20-rh.patch | 82 - .../patches/extN-2.4.18-ino_sb_fixup.patch | 33 - .../patches/extN-delete_thread.patch | 278 - .../kernel_patches/patches/extN-iget-debug.patch | 48 - .../kernel_patches/patches/extN-misc-fixup.patch | 23 - lustre/kernel_patches/patches/extN-noread.patch | 237 - lustre/kernel_patches/patches/extN-san.patch | 106 - lustre/kernel_patches/patches/extN-wantedi.patch | 171 - .../kernel_patches/patches/htree-ext3-2.4.18.patch | 1201 - .../patches/invalidate_show-2.4.20-rh.patch | 114 - .../kernel_patches/patches/invalidate_show.patch | 115 - .../patches/iod-rmap-exports-2.4.20.patch | 86 - .../kernel_patches/patches/iod-rmap-exports.patch | 86 - .../patches/iod-stock-24-exports.patch | 48 - .../patches/iod-stock-24-exports_hp.patch | 48 - lustre/kernel_patches/patches/iopen-2.4.18.patch | 414 - lustre/kernel_patches/patches/iopen-2.4.20.patch | 423 - lustre/kernel_patches/patches/jbd-transno-cb.patch | 240 - .../patches/kmem_cache_validate.patch | 119 - .../patches/kmem_cache_validate_2.4.20-rh.patch | 124 - .../patches/kmem_cache_validate_2.4.20.patch | 116 - .../patches/kmem_cache_validate_hp.patch | 121 - .../patches/linux-2.4.18ea-0.8.26.patch | 1759 - .../patches/linux-2.4.20-xattr-0.8.54-chaos.patch | 5538 --- .../patches/linux-2.4.20-xattr-0.8.54-hp.patch | 5536 --- .../patches/linux-2.4.20-xattr-0.8.54.patch | 5595 --- lustre/kernel_patches/patches/lustre-2.5.63.patch | 862 - lustre/kernel_patches/patches/lustre_version.patch | 12 - lustre/kernel_patches/patches/mcore-2.4.20-8.patch | 2738 -- .../kernel_patches/patches/netconsole_sysrq.patch | 41 - lustre/kernel_patches/patches/tcp-zero-copy.patch | 455 - lustre/kernel_patches/patches/tg3_netconsole.patch | 247 - .../patches/uml-patch-2.4.20-4.patch | 39358 ------------------- .../patches/uml_check_get_page.patch | 32 - .../kernel_patches/patches/uml_compile_fixes.patch | 18 - lustre/kernel_patches/patches/uml_no_panic.patch | 32 - .../patches/vfs_intent-2.4.18-18.patch | 1542 - .../patches/vfs_intent-2.4.20-rh.patch | 1529 - .../patches/vfs_intent-2.4.20-vanilla.patch | 1607 - lustre/kernel_patches/pc/dev_read_only.pc | 3 - .../kernel_patches/pc/dev_read_only_2.4.20-rh.pc | 3 - lustre/kernel_patches/pc/dev_read_only_2.4.20.pc | 3 - .../kernel_patches/pc/dev_read_only_hp_2.4.20.pc | 3 - lustre/kernel_patches/pc/dsp.pc | 6 - lustre/kernel_patches/pc/export-truncate-2.5.63.pc | 2 - lustre/kernel_patches/pc/export-truncate.pc | 2 - lustre/kernel_patches/pc/exports.pc | 4 - lustre/kernel_patches/pc/exports_2.4.20-rh-hp.pc | 4 - lustre/kernel_patches/pc/exports_2.4.20.pc | 4 - lustre/kernel_patches/pc/exports_hp_2.4.20.pc | 4 - lustre/kernel_patches/pc/ext-2.4-patch-1-chaos.pc | 11 - lustre/kernel_patches/pc/ext-2.4-patch-1.pc | 11 - lustre/kernel_patches/pc/ext-2.4-patch-2.pc | 1 - lustre/kernel_patches/pc/ext-2.4-patch-3.pc | 3 - lustre/kernel_patches/pc/ext-2.4-patch-4.pc | 1 - lustre/kernel_patches/pc/ext3-2.4-ino_t.pc | 3 - lustre/kernel_patches/pc/ext3-2.4.18-fixes.pc | 7 - .../kernel_patches/pc/ext3-2.4.18-ino_sb_macro.pc | 10 - lustre/kernel_patches/pc/ext3-2.4.20-fixes.pc | 1 - lustre/kernel_patches/pc/ext3-2.5-noread.pc | 3 - lustre/kernel_patches/pc/ext3-2.5.63.pc | 4 - .../kernel_patches/pc/ext3-delete_thread-2.4.18.pc | 3 - .../kernel_patches/pc/ext3-delete_thread-2.4.20.pc | 3 - lustre/kernel_patches/pc/ext3-largefile.pc | 1 - lustre/kernel_patches/pc/ext3-noread-2.4.20.pc | 3 - lustre/kernel_patches/pc/ext3-orphan_lock.pc | 3 - lustre/kernel_patches/pc/ext3-san-2.4.20.pc | 2 - .../pc/ext3-truncate_blocks-chaos.patch.pc | 1 - lustre/kernel_patches/pc/ext3-truncate_blocks.pc | 1 - lustre/kernel_patches/pc/ext3-unmount_sync.pc | 1 - lustre/kernel_patches/pc/ext3-use-after-free.pc | 1 - .../pc/ext3_orphan_lock-2.4.20-rh.pc | 3 - .../kernel_patches/pc/extN-2.4.18-ino_sb_fixup.pc | 1 - lustre/kernel_patches/pc/extN-delete_thread.pc | 3 - lustre/kernel_patches/pc/extN-iget-debug.pc | 2 - lustre/kernel_patches/pc/extN-misc-fixup.pc | 1 - lustre/kernel_patches/pc/extN-noread.pc | 3 - lustre/kernel_patches/pc/extN-san.pc | 2 - lustre/kernel_patches/pc/extN-wantedi.pc | 4 - lustre/kernel_patches/pc/htree-ext3-2.4.18.pc | 4 - .../kernel_patches/pc/invalidate_show-2.4.20-rh.pc | 4 - lustre/kernel_patches/pc/invalidate_show.pc | 4 - .../kernel_patches/pc/iod-rmap-exports-2.4.20.pc | 5 - lustre/kernel_patches/pc/iod-rmap-exports.pc | 5 - lustre/kernel_patches/pc/iod-stock-24-exports.pc | 3 - .../kernel_patches/pc/iod-stock-24-exports_hp.pc | 3 - lustre/kernel_patches/pc/iopen-2.4.18.pc | 8 - lustre/kernel_patches/pc/iopen-2.4.20.pc | 8 - lustre/kernel_patches/pc/jbd-transno-cb.pc | 4 - lustre/kernel_patches/pc/kmem_cache_validate.pc | 5 - .../pc/kmem_cache_validate_2.4.20-rh.pc | 5 - .../pc/kmem_cache_validate_2.4.20.pc | 5 - lustre/kernel_patches/pc/kmem_cache_validate_hp.pc | 5 - lustre/kernel_patches/pc/linux-2.4.18ea-0.8.26.pc | 10 - .../pc/linux-2.4.20-xattr-0.8.54-chaos.pc | 62 - .../pc/linux-2.4.20-xattr-0.8.54-hp.pc | 62 - .../kernel_patches/pc/linux-2.4.20-xattr-0.8.54.pc | 64 - lustre/kernel_patches/pc/lustre-2.5.63.pc | 12 - lustre/kernel_patches/pc/lustre-2.5.pc | 11 - lustre/kernel_patches/pc/lustre_version.pc | 1 - lustre/kernel_patches/pc/mcore-2.4.20-8.pc | 34 - lustre/kernel_patches/pc/netconsole_sysrq.pc | 2 - lustre/kernel_patches/pc/tcp-zero-copy.pc | 5 - lustre/kernel_patches/pc/tg3_netconsole.pc | 1 - lustre/kernel_patches/pc/uml-patch-2.4.20-4.pc | 394 - lustre/kernel_patches/pc/uml_check_get_page.pc | 1 - lustre/kernel_patches/pc/uml_compile_fixes.pc | 1 - lustre/kernel_patches/pc/uml_no_panic.pc | 1 - lustre/kernel_patches/pc/vfs_intent-2.4.18-18.pc | 10 - lustre/kernel_patches/pc/vfs_intent-2.4.20-rh.pc | 10 - .../kernel_patches/pc/vfs_intent-2.4.20-vanilla.pc | 10 - lustre/kernel_patches/prepare_tree.sh | 88 - lustre/kernel_patches/scripts/added-by-patch | 14 - lustre/kernel_patches/scripts/apatch | 97 - lustre/kernel_patches/scripts/cat-series | 17 - lustre/kernel_patches/scripts/combine-applied | 45 - lustre/kernel_patches/scripts/combine-series | 43 - lustre/kernel_patches/scripts/cvs-take-patch | 78 - lustre/kernel_patches/scripts/export_patch | 55 - lustre/kernel_patches/scripts/extract_description | 87 - lustre/kernel_patches/scripts/forkpatch | 76 - lustre/kernel_patches/scripts/fpatch | 53 - lustre/kernel_patches/scripts/import_patch | 102 - lustre/kernel_patches/scripts/inpatch | 27 - lustre/kernel_patches/scripts/join-patch | 28 - lustre/kernel_patches/scripts/linus-patch | 26 - lustre/kernel_patches/scripts/mpatch | 101 - lustre/kernel_patches/scripts/new-kernel | 82 - lustre/kernel_patches/scripts/p0-2-p1 | 10 - lustre/kernel_patches/scripts/p_diff | 60 - lustre/kernel_patches/scripts/patchdesc | 21 - lustre/kernel_patches/scripts/patchfns | 256 - lustre/kernel_patches/scripts/pcpatch | 45 - lustre/kernel_patches/scripts/poppatch | 72 - lustre/kernel_patches/scripts/prep-patch | 18 - lustre/kernel_patches/scripts/pstatus | 156 - lustre/kernel_patches/scripts/ptkdiff | 46 - lustre/kernel_patches/scripts/pushpatch | 86 - lustre/kernel_patches/scripts/refpatch | 32 - lustre/kernel_patches/scripts/removed-by-patch | 14 - lustre/kernel_patches/scripts/rename-patch | 20 - lustre/kernel_patches/scripts/rolled-up-patch | 30 - lustre/kernel_patches/scripts/rpatch | 90 - lustre/kernel_patches/scripts/split-patch | 29 - lustre/kernel_patches/scripts/sum-series | 41 - lustre/kernel_patches/scripts/tag-series | 41 - lustre/kernel_patches/scripts/toppatch | 27 - lustre/kernel_patches/scripts/touched-by-patch | 32 - lustre/kernel_patches/scripts/trypatch | 72 - lustre/kernel_patches/scripts/unitdiff.py | 223 - lustre/kernel_patches/scripts/unused-patches | 39 - lustre/kernel_patches/series/chaos | 20 - lustre/kernel_patches/series/hp-pnnl-2.4.20 | 25 - lustre/kernel_patches/series/rh-2.4.18-18 | 24 - lustre/kernel_patches/series/rh-2.4.20 | 23 - lustre/kernel_patches/series/vanilla-2.4.20 | 29 - lustre/kernel_patches/series/vanilla-2.5 | 2 - lustre/kernel_patches/txt/dev_read_only.txt | 3 - lustre/kernel_patches/txt/exports.txt | 3 - lustre/kernel_patches/txt/exports_hp.txt | 3 - lustre/kernel_patches/txt/ext3-2.4.20-fixes.txt | 3 - lustre/kernel_patches/txt/invalidate_show.txt | 3 - lustre/kernel_patches/txt/kmem_cache_validate.txt | 3 - lustre/kernel_patches/txt/lustre_version.txt | 3 - lustre/kernel_patches/txt/uml_check_get_page.txt | 3 - lustre/kernel_patches/txt/uml_compile_fixes.txt | 3 - lustre/kernel_patches/txt/uml_no_panic.txt | 3 - lustre/kernel_patches/which_patch | 13 - lustre/ldlm/.cvsignore | 3 - lustre/ldlm/Makefile.am | 22 - lustre/ldlm/l_lock.c | 116 - lustre/ldlm/ldlm_extent.c | 122 - lustre/ldlm/ldlm_internal.h | 1 - lustre/ldlm/ldlm_lib.c | 885 - lustre/ldlm/ldlm_lock.c | 1194 - lustre/ldlm/ldlm_lockd.c | 1132 - lustre/ldlm/ldlm_request.c | 919 - lustre/ldlm/ldlm_resource.c | 578 - lustre/ldlm/ldlm_test.c | 648 - lustre/liblustre/.cvsignore | 9 - lustre/liblustre/Makefile.am | 30 - lustre/liblustre/file.c | 553 - lustre/liblustre/libtest.c | 136 - lustre/liblustre/llite_lib.c | 226 - lustre/liblustre/llite_lib.h | 135 - lustre/liblustre/lltest.c | 159 - lustre/liblustre/rw.c | 519 - lustre/liblustre/super.c | 779 - lustre/llite/.cvsignore | 8 - lustre/llite/Makefile.am | 16 - lustre/llite/commit_callback.c | 132 - lustre/llite/dcache.c | 286 - lustre/llite/dir.c | 860 - lustre/llite/file.c | 1271 - lustre/llite/iod.c | 411 - lustre/llite/llite_internal.h | 29 - lustre/llite/lproc_llite.c | 285 - lustre/llite/namei.c | 1160 - lustre/llite/rw.c | 705 - lustre/llite/super.c | 854 - lustre/llite/super25.c | 856 - lustre/llite/symlink.c | 198 - lustre/llite/sysctl.c | 70 - lustre/lov/.cvsignore | 3 - lustre/lov/Makefile.am | 18 - lustre/lov/lov_obd.c | 2397 -- lustre/lov/lov_pack.c | 361 - lustre/lov/lproc_lov.c | 212 - lustre/mdc/.cvsignore | 8 - lustre/mdc/Makefile.am | 19 - lustre/mdc/lproc_mdc.c | 63 - lustre/mdc/mdc_internal.h | 24 - lustre/mdc/mdc_lib.c | 281 - lustre/mdc/mdc_reint.c | 237 - lustre/mdc/mdc_request.c | 736 - lustre/mds/.cvsignore | 8 - lustre/mds/Makefile.am | 13 - lustre/mds/Makefile.mk | 10 - lustre/mds/handler.c | 1976 - lustre/mds/lproc_mds.c | 116 - lustre/mds/mds_fs.c | 453 - lustre/mds/mds_internal.h | 15 - lustre/mds/mds_lib.c | 310 - lustre/mds/mds_lov.c | 286 - lustre/mds/mds_open.c | 429 - lustre/mds/mds_reint.c | 1148 - lustre/missing | 336 - lustre/mkinstalldirs | 40 - lustre/nodist | 21 - lustre/obdclass/.cvsignore | 8 - lustre/obdclass/Makefile.am | 39 - lustre/obdclass/class_obd.c | 935 - lustre/obdclass/debug.c | 173 - lustre/obdclass/fsfilt.c | 107 - lustre/obdclass/fsfilt_ext3.c | 545 - lustre/obdclass/fsfilt_extN.c | 545 - lustre/obdclass/fsfilt_reiserfs.c | 201 - lustre/obdclass/genops.c | 554 - lustre/obdclass/lprocfs_status.c | 629 - lustre/obdclass/lustre_handles.c | 166 - lustre/obdclass/lustre_peer.c | 179 - lustre/obdclass/otree.c | 266 - lustre/obdclass/simple.c | 278 - lustre/obdclass/statfs_pack.c | 102 - lustre/obdclass/sysctl.c | 140 - lustre/obdclass/uuid.c | 165 - lustre/obdecho/.cvsignore | 8 - lustre/obdecho/Makefile.am | 19 - lustre/obdecho/echo.c | 576 - lustre/obdecho/echo_client.c | 1139 - lustre/obdecho/lproc_echo.c | 54 - lustre/obdfilter/.cvsignore | 8 - lustre/obdfilter/Makefile.am | 11 - lustre/obdfilter/filter.c | 2888 -- lustre/obdfilter/lproc_obdfilter.c | 89 - lustre/osc/.cvsignore | 8 - lustre/osc/Makefile.am | 18 - lustre/osc/lproc_osc.c | 62 - lustre/osc/osc_lib.c | 76 - lustre/osc/osc_request.c | 1913 - lustre/ost/.cvsignore | 8 - lustre/ost/Makefile.am | 13 - lustre/ost/lproc_ost.c | 42 - lustre/ost/ost_handler.c | 1171 - lustre/portals/.cvsignore | 8 - lustre/portals/AUTHORS | 0 lustre/portals/ChangeLog | 0 lustre/portals/Kernelenv.in | 1 - lustre/portals/Kernelenv.mk | 1 - lustre/portals/Makefile.am | 12 - lustre/portals/Makefile.mk | 6 - lustre/portals/NEWS | 0 lustre/portals/README | 0 lustre/portals/Rules.linux | 25 - lustre/portals/archdep.m4 | 317 - lustre/portals/autogen.sh | 5 - lustre/portals/build.m4 | 95 - lustre/portals/configure.in | 34 - lustre/portals/doc/.cvsignore | 4 - lustre/portals/doc/Data-structures | 65 - lustre/portals/doc/Makefile.am | 46 - lustre/portals/doc/Message-life-cycle | 118 - lustre/portals/doc/NAL-HOWTO | 293 - lustre/portals/doc/file.fig | 111 - lustre/portals/doc/flow_new.fig | 213 - lustre/portals/doc/get.fig | 33 - lustre/portals/doc/ieee.bst | 1112 - lustre/portals/doc/mpi.fig | 117 - lustre/portals/doc/portals.fig | 68 - lustre/portals/doc/portals3.bib | 124 - lustre/portals/doc/portals3.lyx | 15944 -------- lustre/portals/doc/put.fig | 32 - lustre/portals/include/.cvsignore | 4 - lustre/portals/include/Makefile.am | 8 - lustre/portals/include/config.h.in | 58 - lustre/portals/include/linux/Makefile.am | 10 - lustre/portals/include/linux/kp30.h | 941 - lustre/portals/include/linux/portals_compat25.h | 13 - lustre/portals/include/linux/portals_lib.h | 188 - lustre/portals/include/portals/Makefile.am | 10 - lustre/portals/include/portals/api-support.h | 27 - lustre/portals/include/portals/api.h | 159 - lustre/portals/include/portals/arg-blocks.h | 265 - lustre/portals/include/portals/defines.h | 116 - lustre/portals/include/portals/errno.h | 61 - lustre/portals/include/portals/internal.h | 0 lustre/portals/include/portals/lib-dispatch.h | 45 - lustre/portals/include/portals/lib-nal.h | 102 - lustre/portals/include/portals/lib-p30.h | 385 - lustre/portals/include/portals/lib-types.h | 282 - lustre/portals/include/portals/list.h | 245 - lustre/portals/include/portals/lltrace.h | 175 - lustre/portals/include/portals/myrnal.h | 26 - lustre/portals/include/portals/nal.h | 49 - lustre/portals/include/portals/nalids.h | 4 - lustre/portals/include/portals/p30.h | 72 - lustre/portals/include/portals/ppid.h | 52 - lustre/portals/include/portals/ptlctl.h | 75 - lustre/portals/include/portals/stringtab.h | 5 - lustre/portals/include/portals/types.h | 157 - lustre/portals/knals/.cvsignore | 2 - lustre/portals/knals/Makefile.am | 7 - lustre/portals/knals/Makefile.mk | 4 - lustre/portals/knals/gmnal/.cvsignore | 3 - lustre/portals/knals/gmnal/Makefile.am | 13 - .../portals/knals/gmnal/gm-1.5.2.1-exports.patch | 43 - lustre/portals/knals/gmnal/gmnal.c | 284 - lustre/portals/knals/gmnal/gmnal.h | 101 - lustre/portals/knals/gmnal/gmnal_cb.c | 517 - lustre/portals/knals/qswnal/.cvsignore | 3 - lustre/portals/knals/qswnal/Makefile.am | 17 - lustre/portals/knals/qswnal/qswnal.c | 608 - lustre/portals/knals/qswnal/qswnal.h | 270 - lustre/portals/knals/qswnal/qswnal_cb.c | 1240 - lustre/portals/knals/scimacnal/.cvsignore | 3 - lustre/portals/knals/scimacnal/Makefile.am | 11 - lustre/portals/knals/scimacnal/README.scimacnal | 14 - lustre/portals/knals/scimacnal/scimac.conf | 35 - lustre/portals/knals/scimacnal/scimacnal.c | 219 - lustre/portals/knals/scimacnal/scimacnal.h | 85 - lustre/portals/knals/scimacnal/scimacnal_cb.c | 468 - lustre/portals/knals/socknal/.cvsignore | 3 - lustre/portals/knals/socknal/Makefile.am | 13 - lustre/portals/knals/socknal/Makefile.mk | 10 - lustre/portals/knals/socknal/socknal.c | 860 - lustre/portals/knals/socknal/socknal.h | 292 - lustre/portals/knals/socknal/socknal_cb.c | 1613 - lustre/portals/knals/toenal/.cvsignore | 3 - lustre/portals/knals/toenal/Makefile.am | 13 - lustre/portals/knals/toenal/toenal.c | 629 - lustre/portals/knals/toenal/toenal.h | 236 - lustre/portals/knals/toenal/toenal_cb.c | 1219 - lustre/portals/libcfs/.cvsignore | 4 - lustre/portals/libcfs/Makefile.am | 29 - lustre/portals/libcfs/Makefile.mk | 9 - lustre/portals/libcfs/debug.c | 830 - lustre/portals/libcfs/module.c | 575 - lustre/portals/libcfs/proc.c | 290 - lustre/portals/packaging/.cvsignore | 8 - lustre/portals/packaging/Makefile.am | 6 - lustre/portals/packaging/portals.spec.in | 116 - lustre/portals/portals/.cvsignore | 3 - lustre/portals/portals/Makefile.am | 10 - lustre/portals/portals/Makefile.mk | 9 - lustre/portals/portals/api-eq.c | 158 - lustre/portals/portals/api-errno.c | 55 - lustre/portals/portals/api-init.c | 71 - lustre/portals/portals/api-me.c | 42 - lustre/portals/portals/api-ni.c | 197 - lustre/portals/portals/api-wrap.c | 599 - lustre/portals/portals/lib-dispatch.c | 80 - lustre/portals/portals/lib-eq.c | 128 - lustre/portals/portals/lib-init.c | 474 - lustre/portals/portals/lib-md.c | 412 - lustre/portals/portals/lib-me.c | 227 - lustre/portals/portals/lib-move.c | 1379 - lustre/portals/portals/lib-msg.c | 163 - lustre/portals/portals/lib-ni.c | 128 - lustre/portals/portals/lib-pid.c | 58 - lustre/portals/router/.cvsignore | 3 - lustre/portals/router/Makefile.am | 16 - lustre/portals/router/Makefile.mk | 9 - lustre/portals/router/proc.c | 78 - lustre/portals/router/router.c | 449 - lustre/portals/router/router.h | 81 - lustre/portals/tests/.cvsignore | 3 - lustre/portals/tests/Makefile.am | 23 - lustre/portals/tests/ping.h | 80 - lustre/portals/tests/ping_cli.c | 300 - lustre/portals/tests/ping_srv.c | 308 - lustre/portals/tests/sping_cli.c | 276 - lustre/portals/tests/sping_srv.c | 295 - lustre/portals/tests/startclient.sh | 37 - lustre/portals/tests/startserver.sh | 38 - lustre/portals/tests/stopclient.sh | 14 - lustre/portals/tests/stopserver.sh | 16 - lustre/portals/unals/.cvsignore | 3 - lustre/portals/unals/Makefile.am | 5 - lustre/portals/unals/README | 53 - lustre/portals/unals/address.c | 146 - lustre/portals/unals/bridge.h | 29 - lustre/portals/unals/connection.c | 294 - lustre/portals/unals/connection.h | 32 - lustre/portals/unals/debug.c | 119 - lustre/portals/unals/dispatch.h | 39 - lustre/portals/unals/ipmap.h | 38 - lustre/portals/unals/pqtimer.c | 226 - lustre/portals/unals/pqtimer.h | 25 - lustre/portals/unals/procapi.c | 283 - lustre/portals/unals/procbridge.h | 40 - lustre/portals/unals/proclib.c | 270 - lustre/portals/unals/select.c | 165 - lustre/portals/unals/table.c | 264 - lustre/portals/unals/table.h | 39 - lustre/portals/unals/tcpnal.c | 198 - lustre/portals/unals/timer.h | 30 - lustre/portals/unals/utypes.h | 12 - lustre/portals/utils/.cvsignore | 8 - lustre/portals/utils/Makefile.am | 27 - lustre/portals/utils/acceptor.c | 466 - lustre/portals/utils/debug.c | 618 - lustre/portals/utils/debugctl.c | 66 - lustre/portals/utils/l_ioctl.c | 281 - lustre/portals/utils/parser.c | 703 - lustre/portals/utils/parser.h | 73 - lustre/portals/utils/portals.c | 985 - lustre/portals/utils/ptlctl.c | 65 - lustre/portals/utils/routerstat.c | 99 - lustre/portals/utils/wirecheck.c | 141 - lustre/ptlbd/.cvsignore | 3 - lustre/ptlbd/Makefile.am | 14 - lustre/ptlbd/blk.c | 301 - lustre/ptlbd/client.c | 232 - lustre/ptlbd/main.c | 71 - lustre/ptlbd/rpc.c | 385 - lustre/ptlbd/server.c | 113 - lustre/ptlrpc/.cvsignore | 9 - lustre/ptlrpc/Makefile.am | 22 - lustre/ptlrpc/client.c | 1605 - lustre/ptlrpc/connection.c | 186 - lustre/ptlrpc/events.c | 497 - lustre/ptlrpc/lproc_ptlrpc.c | 159 - lustre/ptlrpc/niobuf.c | 745 - lustre/ptlrpc/pack_generic.c | 1092 - lustre/ptlrpc/pinger.c | 316 - lustre/ptlrpc/ptlrpc_internal.h | 98 - lustre/ptlrpc/ptlrpc_lib.c | 125 - lustre/ptlrpc/ptlrpc_module.c | 242 - lustre/ptlrpc/recover.c | 596 - lustre/ptlrpc/service.c | 550 - lustre/scripts/.cvsignore | 9 - lustre/scripts/Makefile.am | 10 - lustre/scripts/dodiff.sh | 5 - lustre/scripts/license-status | 26 - lustre/scripts/llite-group.sh | 67 - lustre/scripts/lustre | 95 - lustre/scripts/lustre.spec.in | 263 - lustre/scripts/maketags.sh | 8 - lustre/scripts/nodelustre | 46 - lustre/scripts/system-profile.sh | 233 - lustre/scripts/version_tag.pl | 174 - lustre/tests/.cvsignore | 45 - lustre/tests/Makefile.am | 60 - lustre/tests/README | 85 - lustre/tests/acceptance-metadata-double.sh | 140 - lustre/tests/acceptance-metadata-single.sh | 146 - lustre/tests/acceptance-small.sh | 121 - lustre/tests/ba-echo.sh | 38 - lustre/tests/ba-mount.sh | 53 - lustre/tests/busy.sh | 7 - lustre/tests/checkstack.pl | 92 - lustre/tests/checkstat.c | 317 - lustre/tests/client-echo.cfg | 3 - lustre/tests/client-mount.cfg | 6 - lustre/tests/client-mount2.cfg | 10 - lustre/tests/cobd.sh | 31 - lustre/tests/common.sh | 713 - lustre/tests/compile.sh | 15 - lustre/tests/crash-mod.sh | 11 - lustre/tests/create.pl | 78 - lustre/tests/createdestroy.c | 224 - lustre/tests/createmany.c | 98 - lustre/tests/createtest.c | 142 - lustre/tests/directio.c | 79 - lustre/tests/echo.sh | 52 - lustre/tests/elan-client.cfg | 5 - lustre/tests/elan-server.cfg | 5 - lustre/tests/ext2_10000.gz | Bin 10228 -> 0 bytes lustre/tests/ext2_25000.gz | Bin 25136 -> 0 bytes lustre/tests/ext3_10000.gz | Bin 12172 -> 0 bytes lustre/tests/fchdir_test.c | 41 - lustre/tests/fs.sh | 27 - lustre/tests/fsx.c | 1228 - lustre/tests/intent-test.sh | 122 - lustre/tests/intent-test2.sh | 70 - lustre/tests/ldaptest.c | 27 - lustre/tests/ldlm.cfg | 3 - lustre/tests/leak_finder.pl | 69 - lustre/tests/lkcdmap | 13 - lustre/tests/llcleanup.sh | 19 - lustre/tests/lldlm.sh | 40 - lustre/tests/llecho.sh | 21 - lustre/tests/llechocleanup.sh | 15 - lustre/tests/llext3.sh | 10 - lustre/tests/llmodules.sh | 41 - lustre/tests/llmount-client.sh | 9 - lustre/tests/llmount-server.sh | 9 - lustre/tests/llmount.sh | 34 - lustre/tests/llmountcleanup.sh | 54 - lustre/tests/llrext3.sh | 10 - lustre/tests/llrmount.sh | 28 - lustre/tests/llrsetup.sh | 15 - lustre/tests/llsetup.sh | 15 - lustre/tests/llsimple.sh | 11 - lustre/tests/local.sh | 30 - lustre/tests/lov.sh | 34 - lustre/tests/lstiming.sh | 51 - lustre/tests/lustre.cfg | 49 - lustre/tests/mcr-individual-ost-nogw-config.sh | 46 - lustre/tests/mcr-mds-failover-config.sh | 50 - lustre/tests/mcr-routed-config.sh | 93 - lustre/tests/mcr.sh | 45 - lustre/tests/mcreate.c | 23 - lustre/tests/mcrlov.sh | 52 - lustre/tests/mdcreq.sh | 37 - lustre/tests/mdcreqcleanup.sh | 34 - lustre/tests/mds.cfg | 7 - lustre/tests/mkdirdeep.c | 257 - lustre/tests/mkdirmany.c | 40 - lustre/tests/mlink.c | 25 - lustre/tests/modules.cfg | 3 - lustre/tests/mount2.sh | 30 - lustre/tests/mount2fs.sh | 43 - lustre/tests/multifstat.c | 62 - lustre/tests/munlink.c | 23 - lustre/tests/net-client.cfg | 6 - lustre/tests/net-local.cfg | 6 - lustre/tests/net-server.cfg | 6 - lustre/tests/o_directory.c | 51 - lustre/tests/obddisk.cfg | 6 - lustre/tests/obdecho.cfg | 4 - lustre/tests/obdfilter.cfg | 7 - lustre/tests/open_delay.c | 27 - lustre/tests/openclose.c | 142 - lustre/tests/opendevunlink.c | 111 - lustre/tests/opendirunlink.c | 122 - lustre/tests/openfile.c | 153 - lustre/tests/openme.c | 23 - lustre/tests/openunlink.c | 147 - lustre/tests/ostreq.sh | 37 - lustre/tests/recovery-cleanup.sh | 141 - lustre/tests/recovery-small-upcall.sh | 3 - lustre/tests/recovery-small.sh | 169 - lustre/tests/rename.pl | 78 - lustre/tests/runas.c | 124 - lustre/tests/rundbench | 13 - lustre/tests/runfailure-client-mds-recover.sh | 103 - lustre/tests/runfailure-mds | 63 - lustre/tests/runfailure-net | 66 - lustre/tests/runfailure-ost | 51 - lustre/tests/runiozone | 17 - lustre/tests/runobdstat | 7 - lustre/tests/runregression-brw.sh | 111 - lustre/tests/runregression-mds.sh | 67 - lustre/tests/runregression-net.sh | 99 - lustre/tests/runslabinfo | 5 - lustre/tests/runtests | 130 - lustre/tests/runvmstat | 6 - lustre/tests/sanity-ldlm.sh | 61 - lustre/tests/sanity.sh | 881 - lustre/tests/sanityN.sh | 131 - lustre/tests/snaprun.sh | 36 - lustre/tests/stat.c | 24 - lustre/tests/statmany.c | 215 - lustre/tests/statone.c | 60 - lustre/tests/tbox.sh | 116 - lustre/tests/tchmod.c | 18 - lustre/tests/test.c | 101 - lustre/tests/test2.c | 60 - lustre/tests/test_brw.c | 221 - lustre/tests/testreq.c | 141 - lustre/tests/toexcl.c | 77 - lustre/tests/trivial.sh | 11 - lustre/tests/truncate.c | 24 - lustre/tests/uml.sh | 96 - lustre/tests/unlinkmany.c | 74 - lustre/tests/utime.c | 87 - lustre/tests/wantedi.c | 49 - lustre/tests/writeme.c | 32 - lustre/utils/.cvsignore | 18 - lustre/utils/Lustre/.cvsignore | 4 - lustre/utils/Lustre/Makefile.am | 2 - lustre/utils/Lustre/__init__.py | 7 - lustre/utils/Lustre/cmdline.py | 178 - lustre/utils/Lustre/error.py | 10 - lustre/utils/Lustre/lustredb.py | 413 - lustre/utils/Makefile.am | 22 - lustre/utils/automatic-reconnect-sample | 34 - lustre/utils/ha_assist.sh | 5 - lustre/utils/ha_assist2.sh | 35 - lustre/utils/lactive | 97 - lustre/utils/lconf | 2285 -- lustre/utils/lctl.c | 260 - lustre/utils/lfind.c | 296 - lustre/utils/llanalyze | 278 - lustre/utils/llobdstat.pl | 160 - lustre/utils/llstat.pl | 124 - lustre/utils/lmc | 986 - lustre/utils/load_ldap.sh | 50 - lustre/utils/lstripe.c | 116 - lustre/utils/mds-failover-sample | 20 - lustre/utils/obd.c | 2060 - lustre/utils/obdbarrier.c | 224 - lustre/utils/obdctl.c | 104 - lustre/utils/obdctl.h | 73 - lustre/utils/obdio.c | 305 - lustre/utils/obdiolib.c | 464 - lustre/utils/obdiolib.h | 69 - lustre/utils/parser.c | 725 - lustre/utils/parser.h | 74 - lustre/utils/wirecheck.c | 588 - 913 files changed, 265344 deletions(-) delete mode 100644 lnet/.cvsignore delete mode 100644 lnet/AUTHORS delete mode 100644 lnet/ChangeLog delete mode 100644 lnet/Kernelenv.in delete mode 100644 lnet/Kernelenv.mk delete mode 100644 lnet/Makefile.am delete mode 100644 lnet/Makefile.mk delete mode 100644 lnet/NEWS delete mode 100644 lnet/README delete mode 100644 lnet/Rules.linux delete mode 100644 lnet/archdep.m4 delete mode 100644 lnet/autogen.sh delete mode 100644 lnet/build.m4 delete mode 100644 lnet/configure.in delete mode 100644 lnet/doc/.cvsignore delete mode 100644 lnet/doc/Data-structures delete mode 100644 lnet/doc/Makefile.am delete mode 100644 lnet/doc/Message-life-cycle delete mode 100644 lnet/doc/NAL-HOWTO delete mode 100644 lnet/doc/file.fig delete mode 100644 lnet/doc/flow_new.fig delete mode 100644 lnet/doc/get.fig delete mode 100644 lnet/doc/ieee.bst delete mode 100644 lnet/doc/mpi.fig delete mode 100644 lnet/doc/portals.fig delete mode 100644 lnet/doc/portals3.bib delete mode 100644 lnet/doc/portals3.lyx delete mode 100644 lnet/doc/put.fig delete mode 100644 lnet/include/.cvsignore delete mode 100644 lnet/include/Makefile.am delete mode 100644 lnet/include/config.h.in delete mode 100644 lnet/include/linux/Makefile.am delete mode 100644 lnet/include/linux/kp30.h delete mode 100644 lnet/include/linux/portals_compat25.h delete mode 100644 lnet/include/linux/portals_lib.h delete mode 100644 lnet/include/lnet/Makefile.am delete mode 100644 lnet/include/lnet/api-support.h delete mode 100644 lnet/include/lnet/api.h delete mode 100644 lnet/include/lnet/arg-blocks.h delete mode 100644 lnet/include/lnet/defines.h delete mode 100644 lnet/include/lnet/errno.h delete mode 100644 lnet/include/lnet/internal.h delete mode 100644 lnet/include/lnet/lib-dispatch.h delete mode 100644 lnet/include/lnet/lib-lnet.h delete mode 100644 lnet/include/lnet/lib-nal.h delete mode 100644 lnet/include/lnet/lib-p30.h delete mode 100644 lnet/include/lnet/lib-types.h delete mode 100644 lnet/include/lnet/list.h delete mode 100644 lnet/include/lnet/lltrace.h delete mode 100644 lnet/include/lnet/lnet.h delete mode 100644 lnet/include/lnet/lnetctl.h delete mode 100644 lnet/include/lnet/myrnal.h delete mode 100644 lnet/include/lnet/nal.h delete mode 100644 lnet/include/lnet/nalids.h delete mode 100644 lnet/include/lnet/p30.h delete mode 100644 lnet/include/lnet/ppid.h delete mode 100644 lnet/include/lnet/ptlctl.h delete mode 100644 lnet/include/lnet/stringtab.h delete mode 100644 lnet/include/lnet/types.h delete mode 100644 lnet/klnds/.cvsignore delete mode 100644 lnet/klnds/Makefile.am delete mode 100644 lnet/klnds/Makefile.mk delete mode 100644 lnet/klnds/gmlnd/.cvsignore delete mode 100644 lnet/klnds/gmlnd/Makefile.am delete mode 100644 lnet/klnds/gmlnd/gm-1.5.2.1-exports.patch delete mode 100644 lnet/klnds/gmlnd/gmlnd.h delete mode 100644 lnet/klnds/gmlnd/gmlnd_cb.c delete mode 100644 lnet/klnds/gmlnd/gmnal.c delete mode 100644 lnet/klnds/qswlnd/.cvsignore delete mode 100644 lnet/klnds/qswlnd/Makefile.am delete mode 100644 lnet/klnds/qswlnd/qswlnd.c delete mode 100644 lnet/klnds/qswlnd/qswlnd.h delete mode 100644 lnet/klnds/qswlnd/qswlnd_cb.c delete mode 100644 lnet/klnds/scimaclnd/.cvsignore delete mode 100644 lnet/klnds/scimaclnd/Makefile.am delete mode 100644 lnet/klnds/scimaclnd/README.scimacnal delete mode 100644 lnet/klnds/scimaclnd/scimac.conf delete mode 100644 lnet/klnds/scimaclnd/scimacnal.c delete mode 100644 lnet/klnds/scimaclnd/scimacnal.h delete mode 100644 lnet/klnds/scimaclnd/scimacnal_cb.c delete mode 100644 lnet/klnds/socklnd/.cvsignore delete mode 100644 lnet/klnds/socklnd/Makefile.am delete mode 100644 lnet/klnds/socklnd/Makefile.mk delete mode 100644 lnet/klnds/socklnd/socklnd.c delete mode 100644 lnet/klnds/socklnd/socklnd.h delete mode 100644 lnet/klnds/socklnd/socklnd_cb.c delete mode 100644 lnet/klnds/toelnd/.cvsignore delete mode 100644 lnet/klnds/toelnd/Makefile.am delete mode 100644 lnet/klnds/toelnd/toenal.c delete mode 100644 lnet/klnds/toelnd/toenal.h delete mode 100644 lnet/klnds/toelnd/toenal_cb.c delete mode 100644 lnet/libcfs/.cvsignore delete mode 100644 lnet/libcfs/Makefile.am delete mode 100644 lnet/libcfs/Makefile.mk delete mode 100644 lnet/libcfs/debug.c delete mode 100644 lnet/libcfs/module.c delete mode 100644 lnet/libcfs/proc.c delete mode 100644 lnet/lnet/.cvsignore delete mode 100644 lnet/lnet/Makefile.am delete mode 100644 lnet/lnet/Makefile.mk delete mode 100644 lnet/lnet/api-eq.c delete mode 100644 lnet/lnet/api-errno.c delete mode 100644 lnet/lnet/api-init.c delete mode 100644 lnet/lnet/api-me.c delete mode 100644 lnet/lnet/api-ni.c delete mode 100644 lnet/lnet/api-wrap.c delete mode 100644 lnet/lnet/lib-dispatch.c delete mode 100644 lnet/lnet/lib-eq.c delete mode 100644 lnet/lnet/lib-init.c delete mode 100644 lnet/lnet/lib-md.c delete mode 100644 lnet/lnet/lib-me.c delete mode 100644 lnet/lnet/lib-move.c delete mode 100644 lnet/lnet/lib-msg.c delete mode 100644 lnet/lnet/lib-ni.c delete mode 100644 lnet/lnet/lib-pid.c delete mode 100644 lnet/packaging/.cvsignore delete mode 100644 lnet/packaging/Makefile.am delete mode 100644 lnet/packaging/portals.spec.in delete mode 100644 lnet/router/.cvsignore delete mode 100644 lnet/router/Makefile.am delete mode 100644 lnet/router/Makefile.mk delete mode 100644 lnet/router/proc.c delete mode 100644 lnet/router/router.c delete mode 100644 lnet/router/router.h delete mode 100644 lnet/tests/.cvsignore delete mode 100644 lnet/tests/Makefile.am delete mode 100644 lnet/tests/ping.h delete mode 100644 lnet/tests/ping_cli.c delete mode 100644 lnet/tests/ping_srv.c delete mode 100644 lnet/tests/sping_cli.c delete mode 100644 lnet/tests/sping_srv.c delete mode 100644 lnet/tests/startclient.sh delete mode 100644 lnet/tests/startserver.sh delete mode 100644 lnet/tests/stopclient.sh delete mode 100644 lnet/tests/stopserver.sh delete mode 100644 lnet/ulnds/.cvsignore delete mode 100644 lnet/ulnds/Makefile.am delete mode 100644 lnet/ulnds/README delete mode 100644 lnet/ulnds/address.c delete mode 100644 lnet/ulnds/bridge.h delete mode 100644 lnet/ulnds/connection.c delete mode 100644 lnet/ulnds/connection.h delete mode 100644 lnet/ulnds/debug.c delete mode 100644 lnet/ulnds/dispatch.h delete mode 100644 lnet/ulnds/ipmap.h delete mode 100644 lnet/ulnds/pqtimer.c delete mode 100644 lnet/ulnds/pqtimer.h delete mode 100644 lnet/ulnds/procapi.c delete mode 100644 lnet/ulnds/procbridge.h delete mode 100644 lnet/ulnds/proclib.c delete mode 100644 lnet/ulnds/select.c delete mode 100644 lnet/ulnds/socklnd/Makefile.am delete mode 100644 lnet/ulnds/socklnd/README delete mode 100644 lnet/ulnds/socklnd/address.c delete mode 100644 lnet/ulnds/socklnd/bridge.h delete mode 100644 lnet/ulnds/socklnd/connection.c delete mode 100644 lnet/ulnds/socklnd/connection.h delete mode 100644 lnet/ulnds/socklnd/debug.c delete mode 100644 lnet/ulnds/socklnd/dispatch.h delete mode 100644 lnet/ulnds/socklnd/ipmap.h delete mode 100644 lnet/ulnds/socklnd/pqtimer.c delete mode 100644 lnet/ulnds/socklnd/pqtimer.h delete mode 100644 lnet/ulnds/socklnd/procapi.c delete mode 100644 lnet/ulnds/socklnd/procbridge.h delete mode 100644 lnet/ulnds/socklnd/proclib.c delete mode 100644 lnet/ulnds/socklnd/select.c delete mode 100644 lnet/ulnds/socklnd/table.c delete mode 100644 lnet/ulnds/socklnd/table.h delete mode 100644 lnet/ulnds/socklnd/tcplnd.c delete mode 100644 lnet/ulnds/socklnd/timer.h delete mode 100644 lnet/ulnds/socklnd/utypes.h delete mode 100644 lnet/ulnds/table.c delete mode 100644 lnet/ulnds/table.h delete mode 100644 lnet/ulnds/tcplnd.c delete mode 100644 lnet/ulnds/timer.h delete mode 100644 lnet/ulnds/utypes.h delete mode 100644 lnet/utils/.cvsignore delete mode 100644 lnet/utils/Makefile.am delete mode 100644 lnet/utils/acceptor.c delete mode 100644 lnet/utils/debug.c delete mode 100644 lnet/utils/debugctl.c delete mode 100644 lnet/utils/l_ioctl.c delete mode 100644 lnet/utils/parser.c delete mode 100644 lnet/utils/parser.h delete mode 100644 lnet/utils/portals.c delete mode 100644 lnet/utils/ptlctl.c delete mode 100644 lnet/utils/routerstat.c delete mode 100644 lnet/utils/wirecheck.c delete mode 100644 lustre/.cvsignore delete mode 100644 lustre/BUGS delete mode 100644 lustre/BUILDING delete mode 100644 lustre/COPYING delete mode 100644 lustre/ChangeLog delete mode 100644 lustre/FDL delete mode 100644 lustre/Makefile.am delete mode 100644 lustre/Makefile.mk delete mode 100644 lustre/README delete mode 100644 lustre/Rules delete mode 100644 lustre/autogen.sh delete mode 100644 lustre/cobd/.cvsignore delete mode 100644 lustre/cobd/Makefile.am delete mode 100644 lustre/cobd/cache_obd.c delete mode 100644 lustre/cobd/lproc_cache.c delete mode 100644 lustre/conf/.cvsignore delete mode 100644 lustre/conf/Makefile.am delete mode 100644 lustre/conf/lustre.dtd delete mode 100644 lustre/conf/lustre2ldif.xsl delete mode 100644 lustre/conf/slapd-lustre.conf delete mode 100644 lustre/conf/top.ldif delete mode 100644 lustre/configure.in delete mode 100644 lustre/doc/.cvsignore delete mode 100644 lustre/doc/Makefile.am delete mode 100644 lustre/doc/VERSIONING delete mode 100755 lustre/doc/chbar.sh delete mode 100644 lustre/doc/lconf.lyx delete mode 100644 lustre/doc/lctl.lyx delete mode 100644 lustre/doc/lmc.lyx delete mode 100755 lustre/doc/postbar delete mode 100755 lustre/doc/tex2pdf delete mode 100644 lustre/include/.cvsignore delete mode 100644 lustre/include/config.h.in delete mode 100644 lustre/include/ioctl.h delete mode 100644 lustre/include/liblustre.h delete mode 100644 lustre/include/linux/.cvsignore delete mode 100644 lustre/include/linux/Makefile delete mode 100644 lustre/include/linux/lprocfs_status.h delete mode 100644 lustre/include/linux/lustre_compat25.h delete mode 100644 lustre/include/linux/lustre_debug.h delete mode 100644 lustre/include/linux/lustre_dlm.h delete mode 100644 lustre/include/linux/lustre_export.h delete mode 100644 lustre/include/linux/lustre_fsfilt.h delete mode 100644 lustre/include/linux/lustre_ha.h delete mode 100644 lustre/include/linux/lustre_handles.h delete mode 100644 lustre/include/linux/lustre_idl.h delete mode 100644 lustre/include/linux/lustre_import.h delete mode 100644 lustre/include/linux/lustre_lib.h delete mode 100644 lustre/include/linux/lustre_lite.h delete mode 100644 lustre/include/linux/lustre_mds.h delete mode 100644 lustre/include/linux/lustre_net.h delete mode 100644 lustre/include/linux/lustre_otree.h delete mode 100644 lustre/include/linux/obd.h delete mode 100644 lustre/include/linux/obd_cache.h delete mode 100644 lustre/include/linux/obd_class.h delete mode 100644 lustre/include/linux/obd_echo.h delete mode 100644 lustre/include/linux/obd_ext2.h delete mode 100644 lustre/include/linux/obd_filter.h delete mode 100644 lustre/include/linux/obd_lov.h delete mode 100644 lustre/include/linux/obd_ost.h delete mode 100644 lustre/include/linux/obd_ptlbd.h delete mode 100644 lustre/include/linux/obd_snap.h delete mode 100644 lustre/include/linux/obd_snap_support.h delete mode 100644 lustre/include/linux/obd_support.h delete mode 100644 lustre/include/linux/obd_trace.h delete mode 100755 lustre/install-sh delete mode 100644 lustre/kernel_patches/README delete mode 100644 lustre/kernel_patches/kernel_configs/config-linux-2.4.18-i386 delete mode 100644 lustre/kernel_patches/kernel_configs/config-linux-2.4.18-p4smp-61chaos delete mode 100644 lustre/kernel_patches/kernel_configs/config-linux-2.4.18-uml delete mode 100644 lustre/kernel_patches/kernel_configs/config-linux-2.4.20-i386-rh delete mode 100644 lustre/kernel_patches/kernel_configs/config-linux-2.4.20-uml delete mode 100644 lustre/kernel_patches/kernel_configs/jdike-2.5.69-uml.config delete mode 100644 lustre/kernel_patches/patches/dev_read_only.patch delete mode 100644 lustre/kernel_patches/patches/dev_read_only_2.4.20-rh.patch delete mode 100644 lustre/kernel_patches/patches/dev_read_only_2.4.20.patch delete mode 100644 lustre/kernel_patches/patches/dev_read_only_hp_2.4.20.patch delete mode 100644 lustre/kernel_patches/patches/dsp.patch delete mode 100644 lustre/kernel_patches/patches/export-truncate-2.5.63.patch delete mode 100644 lustre/kernel_patches/patches/export-truncate.patch delete mode 100644 lustre/kernel_patches/patches/exports.patch delete mode 100644 lustre/kernel_patches/patches/exports_2.4.20-rh-hp.patch delete mode 100644 lustre/kernel_patches/patches/exports_2.4.20.patch delete mode 100644 lustre/kernel_patches/patches/ext-2.4-patch-1-chaos.patch delete mode 100644 lustre/kernel_patches/patches/ext-2.4-patch-1.patch delete mode 100644 lustre/kernel_patches/patches/ext-2.4-patch-2.patch delete mode 100644 lustre/kernel_patches/patches/ext-2.4-patch-3.patch delete mode 100644 lustre/kernel_patches/patches/ext-2.4-patch-4.patch delete mode 100644 lustre/kernel_patches/patches/ext3-2.4-ino_t.patch delete mode 100644 lustre/kernel_patches/patches/ext3-2.4.18-fixes.patch delete mode 100644 lustre/kernel_patches/patches/ext3-2.4.18-ino_sb_macro.patch delete mode 100644 lustre/kernel_patches/patches/ext3-2.4.20-fixes.patch delete mode 100644 lustre/kernel_patches/patches/ext3-2.5-noread.patch delete mode 100644 lustre/kernel_patches/patches/ext3-2.5.63.patch delete mode 100644 lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch delete mode 100644 lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch delete mode 100644 lustre/kernel_patches/patches/ext3-largefile.patch delete mode 100644 lustre/kernel_patches/patches/ext3-noread-2.4.20.patch delete mode 100644 lustre/kernel_patches/patches/ext3-orphan_lock.patch delete mode 100644 lustre/kernel_patches/patches/ext3-san-2.4.20.patch delete mode 100644 lustre/kernel_patches/patches/ext3-truncate_blocks-chaos.patch.patch delete mode 100644 lustre/kernel_patches/patches/ext3-truncate_blocks.patch delete mode 100644 lustre/kernel_patches/patches/ext3-unmount_sync.patch delete mode 100644 lustre/kernel_patches/patches/ext3-use-after-free.patch delete mode 100644 lustre/kernel_patches/patches/ext3_orphan_lock-2.4.20-rh.patch delete mode 100644 lustre/kernel_patches/patches/extN-2.4.18-ino_sb_fixup.patch delete mode 100644 lustre/kernel_patches/patches/extN-delete_thread.patch delete mode 100644 lustre/kernel_patches/patches/extN-iget-debug.patch delete mode 100644 lustre/kernel_patches/patches/extN-misc-fixup.patch delete mode 100644 lustre/kernel_patches/patches/extN-noread.patch delete mode 100644 lustre/kernel_patches/patches/extN-san.patch delete mode 100644 lustre/kernel_patches/patches/extN-wantedi.patch delete mode 100644 lustre/kernel_patches/patches/htree-ext3-2.4.18.patch delete mode 100644 lustre/kernel_patches/patches/invalidate_show-2.4.20-rh.patch delete mode 100644 lustre/kernel_patches/patches/invalidate_show.patch delete mode 100644 lustre/kernel_patches/patches/iod-rmap-exports-2.4.20.patch delete mode 100644 lustre/kernel_patches/patches/iod-rmap-exports.patch delete mode 100644 lustre/kernel_patches/patches/iod-stock-24-exports.patch delete mode 100644 lustre/kernel_patches/patches/iod-stock-24-exports_hp.patch delete mode 100644 lustre/kernel_patches/patches/iopen-2.4.18.patch delete mode 100644 lustre/kernel_patches/patches/iopen-2.4.20.patch delete mode 100644 lustre/kernel_patches/patches/jbd-transno-cb.patch delete mode 100644 lustre/kernel_patches/patches/kmem_cache_validate.patch delete mode 100644 lustre/kernel_patches/patches/kmem_cache_validate_2.4.20-rh.patch delete mode 100644 lustre/kernel_patches/patches/kmem_cache_validate_2.4.20.patch delete mode 100644 lustre/kernel_patches/patches/kmem_cache_validate_hp.patch delete mode 100644 lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26.patch delete mode 100644 lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-chaos.patch delete mode 100644 lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-hp.patch delete mode 100644 lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54.patch delete mode 100644 lustre/kernel_patches/patches/lustre-2.5.63.patch delete mode 100644 lustre/kernel_patches/patches/lustre_version.patch delete mode 100644 lustre/kernel_patches/patches/mcore-2.4.20-8.patch delete mode 100644 lustre/kernel_patches/patches/netconsole_sysrq.patch delete mode 100644 lustre/kernel_patches/patches/tcp-zero-copy.patch delete mode 100644 lustre/kernel_patches/patches/tg3_netconsole.patch delete mode 100644 lustre/kernel_patches/patches/uml-patch-2.4.20-4.patch delete mode 100644 lustre/kernel_patches/patches/uml_check_get_page.patch delete mode 100644 lustre/kernel_patches/patches/uml_compile_fixes.patch delete mode 100644 lustre/kernel_patches/patches/uml_no_panic.patch delete mode 100644 lustre/kernel_patches/patches/vfs_intent-2.4.18-18.patch delete mode 100644 lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch delete mode 100644 lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch delete mode 100644 lustre/kernel_patches/pc/dev_read_only.pc delete mode 100644 lustre/kernel_patches/pc/dev_read_only_2.4.20-rh.pc delete mode 100644 lustre/kernel_patches/pc/dev_read_only_2.4.20.pc delete mode 100644 lustre/kernel_patches/pc/dev_read_only_hp_2.4.20.pc delete mode 100644 lustre/kernel_patches/pc/dsp.pc delete mode 100644 lustre/kernel_patches/pc/export-truncate-2.5.63.pc delete mode 100644 lustre/kernel_patches/pc/export-truncate.pc delete mode 100644 lustre/kernel_patches/pc/exports.pc delete mode 100644 lustre/kernel_patches/pc/exports_2.4.20-rh-hp.pc delete mode 100644 lustre/kernel_patches/pc/exports_2.4.20.pc delete mode 100644 lustre/kernel_patches/pc/exports_hp_2.4.20.pc delete mode 100644 lustre/kernel_patches/pc/ext-2.4-patch-1-chaos.pc delete mode 100644 lustre/kernel_patches/pc/ext-2.4-patch-1.pc delete mode 100644 lustre/kernel_patches/pc/ext-2.4-patch-2.pc delete mode 100644 lustre/kernel_patches/pc/ext-2.4-patch-3.pc delete mode 100644 lustre/kernel_patches/pc/ext-2.4-patch-4.pc delete mode 100644 lustre/kernel_patches/pc/ext3-2.4-ino_t.pc delete mode 100644 lustre/kernel_patches/pc/ext3-2.4.18-fixes.pc delete mode 100644 lustre/kernel_patches/pc/ext3-2.4.18-ino_sb_macro.pc delete mode 100644 lustre/kernel_patches/pc/ext3-2.4.20-fixes.pc delete mode 100644 lustre/kernel_patches/pc/ext3-2.5-noread.pc delete mode 100644 lustre/kernel_patches/pc/ext3-2.5.63.pc delete mode 100644 lustre/kernel_patches/pc/ext3-delete_thread-2.4.18.pc delete mode 100644 lustre/kernel_patches/pc/ext3-delete_thread-2.4.20.pc delete mode 100644 lustre/kernel_patches/pc/ext3-largefile.pc delete mode 100644 lustre/kernel_patches/pc/ext3-noread-2.4.20.pc delete mode 100644 lustre/kernel_patches/pc/ext3-orphan_lock.pc delete mode 100644 lustre/kernel_patches/pc/ext3-san-2.4.20.pc delete mode 100644 lustre/kernel_patches/pc/ext3-truncate_blocks-chaos.patch.pc delete mode 100644 lustre/kernel_patches/pc/ext3-truncate_blocks.pc delete mode 100644 lustre/kernel_patches/pc/ext3-unmount_sync.pc delete mode 100644 lustre/kernel_patches/pc/ext3-use-after-free.pc delete mode 100644 lustre/kernel_patches/pc/ext3_orphan_lock-2.4.20-rh.pc delete mode 100644 lustre/kernel_patches/pc/extN-2.4.18-ino_sb_fixup.pc delete mode 100644 lustre/kernel_patches/pc/extN-delete_thread.pc delete mode 100644 lustre/kernel_patches/pc/extN-iget-debug.pc delete mode 100644 lustre/kernel_patches/pc/extN-misc-fixup.pc delete mode 100644 lustre/kernel_patches/pc/extN-noread.pc delete mode 100644 lustre/kernel_patches/pc/extN-san.pc delete mode 100644 lustre/kernel_patches/pc/extN-wantedi.pc delete mode 100644 lustre/kernel_patches/pc/htree-ext3-2.4.18.pc delete mode 100644 lustre/kernel_patches/pc/invalidate_show-2.4.20-rh.pc delete mode 100644 lustre/kernel_patches/pc/invalidate_show.pc delete mode 100644 lustre/kernel_patches/pc/iod-rmap-exports-2.4.20.pc delete mode 100644 lustre/kernel_patches/pc/iod-rmap-exports.pc delete mode 100644 lustre/kernel_patches/pc/iod-stock-24-exports.pc delete mode 100644 lustre/kernel_patches/pc/iod-stock-24-exports_hp.pc delete mode 100644 lustre/kernel_patches/pc/iopen-2.4.18.pc delete mode 100644 lustre/kernel_patches/pc/iopen-2.4.20.pc delete mode 100644 lustre/kernel_patches/pc/jbd-transno-cb.pc delete mode 100644 lustre/kernel_patches/pc/kmem_cache_validate.pc delete mode 100644 lustre/kernel_patches/pc/kmem_cache_validate_2.4.20-rh.pc delete mode 100644 lustre/kernel_patches/pc/kmem_cache_validate_2.4.20.pc delete mode 100644 lustre/kernel_patches/pc/kmem_cache_validate_hp.pc delete mode 100644 lustre/kernel_patches/pc/linux-2.4.18ea-0.8.26.pc delete mode 100644 lustre/kernel_patches/pc/linux-2.4.20-xattr-0.8.54-chaos.pc delete mode 100644 lustre/kernel_patches/pc/linux-2.4.20-xattr-0.8.54-hp.pc delete mode 100644 lustre/kernel_patches/pc/linux-2.4.20-xattr-0.8.54.pc delete mode 100644 lustre/kernel_patches/pc/lustre-2.5.63.pc delete mode 100644 lustre/kernel_patches/pc/lustre-2.5.pc delete mode 100644 lustre/kernel_patches/pc/lustre_version.pc delete mode 100644 lustre/kernel_patches/pc/mcore-2.4.20-8.pc delete mode 100644 lustre/kernel_patches/pc/netconsole_sysrq.pc delete mode 100644 lustre/kernel_patches/pc/tcp-zero-copy.pc delete mode 100644 lustre/kernel_patches/pc/tg3_netconsole.pc delete mode 100644 lustre/kernel_patches/pc/uml-patch-2.4.20-4.pc delete mode 100644 lustre/kernel_patches/pc/uml_check_get_page.pc delete mode 100644 lustre/kernel_patches/pc/uml_compile_fixes.pc delete mode 100644 lustre/kernel_patches/pc/uml_no_panic.pc delete mode 100644 lustre/kernel_patches/pc/vfs_intent-2.4.18-18.pc delete mode 100644 lustre/kernel_patches/pc/vfs_intent-2.4.20-rh.pc delete mode 100644 lustre/kernel_patches/pc/vfs_intent-2.4.20-vanilla.pc delete mode 100755 lustre/kernel_patches/prepare_tree.sh delete mode 100755 lustre/kernel_patches/scripts/added-by-patch delete mode 100755 lustre/kernel_patches/scripts/apatch delete mode 100755 lustre/kernel_patches/scripts/cat-series delete mode 100755 lustre/kernel_patches/scripts/combine-applied delete mode 100755 lustre/kernel_patches/scripts/combine-series delete mode 100755 lustre/kernel_patches/scripts/cvs-take-patch delete mode 100755 lustre/kernel_patches/scripts/export_patch delete mode 100755 lustre/kernel_patches/scripts/extract_description delete mode 100755 lustre/kernel_patches/scripts/forkpatch delete mode 100755 lustre/kernel_patches/scripts/fpatch delete mode 100755 lustre/kernel_patches/scripts/import_patch delete mode 100755 lustre/kernel_patches/scripts/inpatch delete mode 100755 lustre/kernel_patches/scripts/join-patch delete mode 100755 lustre/kernel_patches/scripts/linus-patch delete mode 100755 lustre/kernel_patches/scripts/mpatch delete mode 100755 lustre/kernel_patches/scripts/new-kernel delete mode 100755 lustre/kernel_patches/scripts/p0-2-p1 delete mode 100755 lustre/kernel_patches/scripts/p_diff delete mode 100755 lustre/kernel_patches/scripts/patchdesc delete mode 100644 lustre/kernel_patches/scripts/patchfns delete mode 100755 lustre/kernel_patches/scripts/pcpatch delete mode 100755 lustre/kernel_patches/scripts/poppatch delete mode 100755 lustre/kernel_patches/scripts/prep-patch delete mode 100755 lustre/kernel_patches/scripts/pstatus delete mode 100755 lustre/kernel_patches/scripts/ptkdiff delete mode 100755 lustre/kernel_patches/scripts/pushpatch delete mode 100755 lustre/kernel_patches/scripts/refpatch delete mode 100755 lustre/kernel_patches/scripts/removed-by-patch delete mode 100755 lustre/kernel_patches/scripts/rename-patch delete mode 100755 lustre/kernel_patches/scripts/rolled-up-patch delete mode 100755 lustre/kernel_patches/scripts/rpatch delete mode 100755 lustre/kernel_patches/scripts/split-patch delete mode 100755 lustre/kernel_patches/scripts/sum-series delete mode 100755 lustre/kernel_patches/scripts/tag-series delete mode 100755 lustre/kernel_patches/scripts/toppatch delete mode 100755 lustre/kernel_patches/scripts/touched-by-patch delete mode 100755 lustre/kernel_patches/scripts/trypatch delete mode 100755 lustre/kernel_patches/scripts/unitdiff.py delete mode 100755 lustre/kernel_patches/scripts/unused-patches delete mode 100644 lustre/kernel_patches/series/chaos delete mode 100644 lustre/kernel_patches/series/hp-pnnl-2.4.20 delete mode 100644 lustre/kernel_patches/series/rh-2.4.18-18 delete mode 100644 lustre/kernel_patches/series/rh-2.4.20 delete mode 100644 lustre/kernel_patches/series/vanilla-2.4.20 delete mode 100644 lustre/kernel_patches/series/vanilla-2.5 delete mode 100644 lustre/kernel_patches/txt/dev_read_only.txt delete mode 100644 lustre/kernel_patches/txt/exports.txt delete mode 100644 lustre/kernel_patches/txt/exports_hp.txt delete mode 100644 lustre/kernel_patches/txt/ext3-2.4.20-fixes.txt delete mode 100644 lustre/kernel_patches/txt/invalidate_show.txt delete mode 100644 lustre/kernel_patches/txt/kmem_cache_validate.txt delete mode 100644 lustre/kernel_patches/txt/lustre_version.txt delete mode 100644 lustre/kernel_patches/txt/uml_check_get_page.txt delete mode 100644 lustre/kernel_patches/txt/uml_compile_fixes.txt delete mode 100644 lustre/kernel_patches/txt/uml_no_panic.txt delete mode 100644 lustre/kernel_patches/which_patch delete mode 100644 lustre/ldlm/.cvsignore delete mode 100644 lustre/ldlm/Makefile.am delete mode 100644 lustre/ldlm/l_lock.c delete mode 100644 lustre/ldlm/ldlm_extent.c delete mode 100644 lustre/ldlm/ldlm_internal.h delete mode 100644 lustre/ldlm/ldlm_lib.c delete mode 100644 lustre/ldlm/ldlm_lock.c delete mode 100644 lustre/ldlm/ldlm_lockd.c delete mode 100644 lustre/ldlm/ldlm_request.c delete mode 100644 lustre/ldlm/ldlm_resource.c delete mode 100644 lustre/ldlm/ldlm_test.c delete mode 100644 lustre/liblustre/.cvsignore delete mode 100644 lustre/liblustre/Makefile.am delete mode 100644 lustre/liblustre/file.c delete mode 100644 lustre/liblustre/libtest.c delete mode 100644 lustre/liblustre/llite_lib.c delete mode 100644 lustre/liblustre/llite_lib.h delete mode 100644 lustre/liblustre/lltest.c delete mode 100644 lustre/liblustre/rw.c delete mode 100644 lustre/liblustre/super.c delete mode 100644 lustre/llite/.cvsignore delete mode 100644 lustre/llite/Makefile.am delete mode 100644 lustre/llite/commit_callback.c delete mode 100644 lustre/llite/dcache.c delete mode 100644 lustre/llite/dir.c delete mode 100644 lustre/llite/file.c delete mode 100644 lustre/llite/iod.c delete mode 100644 lustre/llite/llite_internal.h delete mode 100644 lustre/llite/lproc_llite.c delete mode 100644 lustre/llite/namei.c delete mode 100644 lustre/llite/rw.c delete mode 100644 lustre/llite/super.c delete mode 100644 lustre/llite/super25.c delete mode 100644 lustre/llite/symlink.c delete mode 100644 lustre/llite/sysctl.c delete mode 100644 lustre/lov/.cvsignore delete mode 100644 lustre/lov/Makefile.am delete mode 100644 lustre/lov/lov_obd.c delete mode 100644 lustre/lov/lov_pack.c delete mode 100644 lustre/lov/lproc_lov.c delete mode 100644 lustre/mdc/.cvsignore delete mode 100644 lustre/mdc/Makefile.am delete mode 100644 lustre/mdc/lproc_mdc.c delete mode 100644 lustre/mdc/mdc_internal.h delete mode 100644 lustre/mdc/mdc_lib.c delete mode 100644 lustre/mdc/mdc_reint.c delete mode 100644 lustre/mdc/mdc_request.c delete mode 100644 lustre/mds/.cvsignore delete mode 100644 lustre/mds/Makefile.am delete mode 100644 lustre/mds/Makefile.mk delete mode 100644 lustre/mds/handler.c delete mode 100644 lustre/mds/lproc_mds.c delete mode 100644 lustre/mds/mds_fs.c delete mode 100644 lustre/mds/mds_internal.h delete mode 100644 lustre/mds/mds_lib.c delete mode 100644 lustre/mds/mds_lov.c delete mode 100644 lustre/mds/mds_open.c delete mode 100644 lustre/mds/mds_reint.c delete mode 100755 lustre/missing delete mode 100755 lustre/mkinstalldirs delete mode 100644 lustre/nodist delete mode 100644 lustre/obdclass/.cvsignore delete mode 100644 lustre/obdclass/Makefile.am delete mode 100644 lustre/obdclass/class_obd.c delete mode 100644 lustre/obdclass/debug.c delete mode 100644 lustre/obdclass/fsfilt.c delete mode 100644 lustre/obdclass/fsfilt_ext3.c delete mode 100644 lustre/obdclass/fsfilt_extN.c delete mode 100644 lustre/obdclass/fsfilt_reiserfs.c delete mode 100644 lustre/obdclass/genops.c delete mode 100644 lustre/obdclass/lprocfs_status.c delete mode 100644 lustre/obdclass/lustre_handles.c delete mode 100644 lustre/obdclass/lustre_peer.c delete mode 100644 lustre/obdclass/otree.c delete mode 100644 lustre/obdclass/simple.c delete mode 100644 lustre/obdclass/statfs_pack.c delete mode 100644 lustre/obdclass/sysctl.c delete mode 100644 lustre/obdclass/uuid.c delete mode 100644 lustre/obdecho/.cvsignore delete mode 100644 lustre/obdecho/Makefile.am delete mode 100644 lustre/obdecho/echo.c delete mode 100644 lustre/obdecho/echo_client.c delete mode 100644 lustre/obdecho/lproc_echo.c delete mode 100644 lustre/obdfilter/.cvsignore delete mode 100644 lustre/obdfilter/Makefile.am delete mode 100644 lustre/obdfilter/filter.c delete mode 100644 lustre/obdfilter/lproc_obdfilter.c delete mode 100644 lustre/osc/.cvsignore delete mode 100644 lustre/osc/Makefile.am delete mode 100644 lustre/osc/lproc_osc.c delete mode 100644 lustre/osc/osc_lib.c delete mode 100644 lustre/osc/osc_request.c delete mode 100644 lustre/ost/.cvsignore delete mode 100644 lustre/ost/Makefile.am delete mode 100644 lustre/ost/lproc_ost.c delete mode 100644 lustre/ost/ost_handler.c delete mode 100644 lustre/portals/.cvsignore delete mode 100644 lustre/portals/AUTHORS delete mode 100644 lustre/portals/ChangeLog delete mode 100644 lustre/portals/Kernelenv.in delete mode 100644 lustre/portals/Kernelenv.mk delete mode 100644 lustre/portals/Makefile.am delete mode 100644 lustre/portals/Makefile.mk delete mode 100644 lustre/portals/NEWS delete mode 100644 lustre/portals/README delete mode 100644 lustre/portals/Rules.linux delete mode 100644 lustre/portals/archdep.m4 delete mode 100755 lustre/portals/autogen.sh delete mode 100644 lustre/portals/build.m4 delete mode 100644 lustre/portals/configure.in delete mode 100644 lustre/portals/doc/.cvsignore delete mode 100644 lustre/portals/doc/Data-structures delete mode 100644 lustre/portals/doc/Makefile.am delete mode 100644 lustre/portals/doc/Message-life-cycle delete mode 100644 lustre/portals/doc/NAL-HOWTO delete mode 100644 lustre/portals/doc/file.fig delete mode 100644 lustre/portals/doc/flow_new.fig delete mode 100644 lustre/portals/doc/get.fig delete mode 100644 lustre/portals/doc/ieee.bst delete mode 100644 lustre/portals/doc/mpi.fig delete mode 100644 lustre/portals/doc/portals.fig delete mode 100644 lustre/portals/doc/portals3.bib delete mode 100644 lustre/portals/doc/portals3.lyx delete mode 100644 lustre/portals/doc/put.fig delete mode 100644 lustre/portals/include/.cvsignore delete mode 100644 lustre/portals/include/Makefile.am delete mode 100644 lustre/portals/include/config.h.in delete mode 100644 lustre/portals/include/linux/Makefile.am delete mode 100644 lustre/portals/include/linux/kp30.h delete mode 100644 lustre/portals/include/linux/portals_compat25.h delete mode 100644 lustre/portals/include/linux/portals_lib.h delete mode 100644 lustre/portals/include/portals/Makefile.am delete mode 100644 lustre/portals/include/portals/api-support.h delete mode 100644 lustre/portals/include/portals/api.h delete mode 100644 lustre/portals/include/portals/arg-blocks.h delete mode 100644 lustre/portals/include/portals/defines.h delete mode 100644 lustre/portals/include/portals/errno.h delete mode 100644 lustre/portals/include/portals/internal.h delete mode 100644 lustre/portals/include/portals/lib-dispatch.h delete mode 100644 lustre/portals/include/portals/lib-nal.h delete mode 100644 lustre/portals/include/portals/lib-p30.h delete mode 100644 lustre/portals/include/portals/lib-types.h delete mode 100644 lustre/portals/include/portals/list.h delete mode 100644 lustre/portals/include/portals/lltrace.h delete mode 100644 lustre/portals/include/portals/myrnal.h delete mode 100644 lustre/portals/include/portals/nal.h delete mode 100644 lustre/portals/include/portals/nalids.h delete mode 100644 lustre/portals/include/portals/p30.h delete mode 100644 lustre/portals/include/portals/ppid.h delete mode 100644 lustre/portals/include/portals/ptlctl.h delete mode 100644 lustre/portals/include/portals/stringtab.h delete mode 100644 lustre/portals/include/portals/types.h delete mode 100644 lustre/portals/knals/.cvsignore delete mode 100644 lustre/portals/knals/Makefile.am delete mode 100644 lustre/portals/knals/Makefile.mk delete mode 100644 lustre/portals/knals/gmnal/.cvsignore delete mode 100644 lustre/portals/knals/gmnal/Makefile.am delete mode 100644 lustre/portals/knals/gmnal/gm-1.5.2.1-exports.patch delete mode 100644 lustre/portals/knals/gmnal/gmnal.c delete mode 100644 lustre/portals/knals/gmnal/gmnal.h delete mode 100644 lustre/portals/knals/gmnal/gmnal_cb.c delete mode 100644 lustre/portals/knals/qswnal/.cvsignore delete mode 100644 lustre/portals/knals/qswnal/Makefile.am delete mode 100644 lustre/portals/knals/qswnal/qswnal.c delete mode 100644 lustre/portals/knals/qswnal/qswnal.h delete mode 100644 lustre/portals/knals/qswnal/qswnal_cb.c delete mode 100644 lustre/portals/knals/scimacnal/.cvsignore delete mode 100644 lustre/portals/knals/scimacnal/Makefile.am delete mode 100644 lustre/portals/knals/scimacnal/README.scimacnal delete mode 100644 lustre/portals/knals/scimacnal/scimac.conf delete mode 100644 lustre/portals/knals/scimacnal/scimacnal.c delete mode 100644 lustre/portals/knals/scimacnal/scimacnal.h delete mode 100644 lustre/portals/knals/scimacnal/scimacnal_cb.c delete mode 100644 lustre/portals/knals/socknal/.cvsignore delete mode 100644 lustre/portals/knals/socknal/Makefile.am delete mode 100644 lustre/portals/knals/socknal/Makefile.mk delete mode 100644 lustre/portals/knals/socknal/socknal.c delete mode 100644 lustre/portals/knals/socknal/socknal.h delete mode 100644 lustre/portals/knals/socknal/socknal_cb.c delete mode 100644 lustre/portals/knals/toenal/.cvsignore delete mode 100644 lustre/portals/knals/toenal/Makefile.am delete mode 100644 lustre/portals/knals/toenal/toenal.c delete mode 100644 lustre/portals/knals/toenal/toenal.h delete mode 100644 lustre/portals/knals/toenal/toenal_cb.c delete mode 100644 lustre/portals/libcfs/.cvsignore delete mode 100644 lustre/portals/libcfs/Makefile.am delete mode 100644 lustre/portals/libcfs/Makefile.mk delete mode 100644 lustre/portals/libcfs/debug.c delete mode 100644 lustre/portals/libcfs/module.c delete mode 100644 lustre/portals/libcfs/proc.c delete mode 100644 lustre/portals/packaging/.cvsignore delete mode 100644 lustre/portals/packaging/Makefile.am delete mode 100644 lustre/portals/packaging/portals.spec.in delete mode 100644 lustre/portals/portals/.cvsignore delete mode 100644 lustre/portals/portals/Makefile.am delete mode 100644 lustre/portals/portals/Makefile.mk delete mode 100644 lustre/portals/portals/api-eq.c delete mode 100644 lustre/portals/portals/api-errno.c delete mode 100644 lustre/portals/portals/api-init.c delete mode 100644 lustre/portals/portals/api-me.c delete mode 100644 lustre/portals/portals/api-ni.c delete mode 100644 lustre/portals/portals/api-wrap.c delete mode 100644 lustre/portals/portals/lib-dispatch.c delete mode 100644 lustre/portals/portals/lib-eq.c delete mode 100644 lustre/portals/portals/lib-init.c delete mode 100644 lustre/portals/portals/lib-md.c delete mode 100644 lustre/portals/portals/lib-me.c delete mode 100644 lustre/portals/portals/lib-move.c delete mode 100644 lustre/portals/portals/lib-msg.c delete mode 100644 lustre/portals/portals/lib-ni.c delete mode 100644 lustre/portals/portals/lib-pid.c delete mode 100644 lustre/portals/router/.cvsignore delete mode 100644 lustre/portals/router/Makefile.am delete mode 100644 lustre/portals/router/Makefile.mk delete mode 100644 lustre/portals/router/proc.c delete mode 100644 lustre/portals/router/router.c delete mode 100644 lustre/portals/router/router.h delete mode 100644 lustre/portals/tests/.cvsignore delete mode 100644 lustre/portals/tests/Makefile.am delete mode 100644 lustre/portals/tests/ping.h delete mode 100644 lustre/portals/tests/ping_cli.c delete mode 100644 lustre/portals/tests/ping_srv.c delete mode 100644 lustre/portals/tests/sping_cli.c delete mode 100644 lustre/portals/tests/sping_srv.c delete mode 100755 lustre/portals/tests/startclient.sh delete mode 100755 lustre/portals/tests/startserver.sh delete mode 100755 lustre/portals/tests/stopclient.sh delete mode 100644 lustre/portals/tests/stopserver.sh delete mode 100644 lustre/portals/unals/.cvsignore delete mode 100644 lustre/portals/unals/Makefile.am delete mode 100644 lustre/portals/unals/README delete mode 100644 lustre/portals/unals/address.c delete mode 100644 lustre/portals/unals/bridge.h delete mode 100644 lustre/portals/unals/connection.c delete mode 100644 lustre/portals/unals/connection.h delete mode 100644 lustre/portals/unals/debug.c delete mode 100644 lustre/portals/unals/dispatch.h delete mode 100644 lustre/portals/unals/ipmap.h delete mode 100644 lustre/portals/unals/pqtimer.c delete mode 100644 lustre/portals/unals/pqtimer.h delete mode 100644 lustre/portals/unals/procapi.c delete mode 100644 lustre/portals/unals/procbridge.h delete mode 100644 lustre/portals/unals/proclib.c delete mode 100644 lustre/portals/unals/select.c delete mode 100644 lustre/portals/unals/table.c delete mode 100644 lustre/portals/unals/table.h delete mode 100644 lustre/portals/unals/tcpnal.c delete mode 100644 lustre/portals/unals/timer.h delete mode 100644 lustre/portals/unals/utypes.h delete mode 100644 lustre/portals/utils/.cvsignore delete mode 100644 lustre/portals/utils/Makefile.am delete mode 100644 lustre/portals/utils/acceptor.c delete mode 100644 lustre/portals/utils/debug.c delete mode 100644 lustre/portals/utils/debugctl.c delete mode 100644 lustre/portals/utils/l_ioctl.c delete mode 100644 lustre/portals/utils/parser.c delete mode 100644 lustre/portals/utils/parser.h delete mode 100644 lustre/portals/utils/portals.c delete mode 100644 lustre/portals/utils/ptlctl.c delete mode 100644 lustre/portals/utils/routerstat.c delete mode 100644 lustre/portals/utils/wirecheck.c delete mode 100644 lustre/ptlbd/.cvsignore delete mode 100644 lustre/ptlbd/Makefile.am delete mode 100644 lustre/ptlbd/blk.c delete mode 100644 lustre/ptlbd/client.c delete mode 100644 lustre/ptlbd/main.c delete mode 100644 lustre/ptlbd/rpc.c delete mode 100644 lustre/ptlbd/server.c delete mode 100644 lustre/ptlrpc/.cvsignore delete mode 100644 lustre/ptlrpc/Makefile.am delete mode 100644 lustre/ptlrpc/client.c delete mode 100644 lustre/ptlrpc/connection.c delete mode 100644 lustre/ptlrpc/events.c delete mode 100644 lustre/ptlrpc/lproc_ptlrpc.c delete mode 100644 lustre/ptlrpc/niobuf.c delete mode 100644 lustre/ptlrpc/pack_generic.c delete mode 100644 lustre/ptlrpc/pinger.c delete mode 100644 lustre/ptlrpc/ptlrpc_internal.h delete mode 100644 lustre/ptlrpc/ptlrpc_lib.c delete mode 100644 lustre/ptlrpc/ptlrpc_module.c delete mode 100644 lustre/ptlrpc/recover.c delete mode 100644 lustre/ptlrpc/service.c delete mode 100644 lustre/scripts/.cvsignore delete mode 100644 lustre/scripts/Makefile.am delete mode 100755 lustre/scripts/dodiff.sh delete mode 100755 lustre/scripts/license-status delete mode 100644 lustre/scripts/llite-group.sh delete mode 100755 lustre/scripts/lustre delete mode 100644 lustre/scripts/lustre.spec.in delete mode 100755 lustre/scripts/maketags.sh delete mode 100755 lustre/scripts/nodelustre delete mode 100755 lustre/scripts/system-profile.sh delete mode 100644 lustre/scripts/version_tag.pl delete mode 100644 lustre/tests/.cvsignore delete mode 100644 lustre/tests/Makefile.am delete mode 100644 lustre/tests/README delete mode 100644 lustre/tests/acceptance-metadata-double.sh delete mode 100644 lustre/tests/acceptance-metadata-single.sh delete mode 100755 lustre/tests/acceptance-small.sh delete mode 100644 lustre/tests/ba-echo.sh delete mode 100644 lustre/tests/ba-mount.sh delete mode 100644 lustre/tests/busy.sh delete mode 100644 lustre/tests/checkstack.pl delete mode 100644 lustre/tests/checkstat.c delete mode 100644 lustre/tests/client-echo.cfg delete mode 100644 lustre/tests/client-mount.cfg delete mode 100644 lustre/tests/client-mount2.cfg delete mode 100755 lustre/tests/cobd.sh delete mode 100644 lustre/tests/common.sh delete mode 100644 lustre/tests/compile.sh delete mode 100644 lustre/tests/crash-mod.sh delete mode 100644 lustre/tests/create.pl delete mode 100644 lustre/tests/createdestroy.c delete mode 100644 lustre/tests/createmany.c delete mode 100644 lustre/tests/createtest.c delete mode 100644 lustre/tests/directio.c delete mode 100755 lustre/tests/echo.sh delete mode 100644 lustre/tests/elan-client.cfg delete mode 100644 lustre/tests/elan-server.cfg delete mode 100644 lustre/tests/ext2_10000.gz delete mode 100644 lustre/tests/ext2_25000.gz delete mode 100644 lustre/tests/ext3_10000.gz delete mode 100644 lustre/tests/fchdir_test.c delete mode 100644 lustre/tests/fs.sh delete mode 100644 lustre/tests/fsx.c delete mode 100755 lustre/tests/intent-test.sh delete mode 100644 lustre/tests/intent-test2.sh delete mode 100644 lustre/tests/ldaptest.c delete mode 100644 lustre/tests/ldlm.cfg delete mode 100644 lustre/tests/leak_finder.pl delete mode 100755 lustre/tests/lkcdmap delete mode 100755 lustre/tests/llcleanup.sh delete mode 100755 lustre/tests/lldlm.sh delete mode 100644 lustre/tests/llecho.sh delete mode 100755 lustre/tests/llechocleanup.sh delete mode 100755 lustre/tests/llext3.sh delete mode 100644 lustre/tests/llmodules.sh delete mode 100644 lustre/tests/llmount-client.sh delete mode 100644 lustre/tests/llmount-server.sh delete mode 100755 lustre/tests/llmount.sh delete mode 100755 lustre/tests/llmountcleanup.sh delete mode 100755 lustre/tests/llrext3.sh delete mode 100755 lustre/tests/llrmount.sh delete mode 100644 lustre/tests/llrsetup.sh delete mode 100644 lustre/tests/llsetup.sh delete mode 100755 lustre/tests/llsimple.sh delete mode 100755 lustre/tests/local.sh delete mode 100755 lustre/tests/lov.sh delete mode 100644 lustre/tests/lstiming.sh delete mode 100644 lustre/tests/lustre.cfg delete mode 100755 lustre/tests/mcr-individual-ost-nogw-config.sh delete mode 100755 lustre/tests/mcr-mds-failover-config.sh delete mode 100755 lustre/tests/mcr-routed-config.sh delete mode 100755 lustre/tests/mcr.sh delete mode 100644 lustre/tests/mcreate.c delete mode 100755 lustre/tests/mcrlov.sh delete mode 100644 lustre/tests/mdcreq.sh delete mode 100755 lustre/tests/mdcreqcleanup.sh delete mode 100644 lustre/tests/mds.cfg delete mode 100644 lustre/tests/mkdirdeep.c delete mode 100755 lustre/tests/mkdirmany.c delete mode 100755 lustre/tests/mlink.c delete mode 100755 lustre/tests/modules.cfg delete mode 100644 lustre/tests/mount2.sh delete mode 100644 lustre/tests/mount2fs.sh delete mode 100644 lustre/tests/multifstat.c delete mode 100755 lustre/tests/munlink.c delete mode 100644 lustre/tests/net-client.cfg delete mode 100644 lustre/tests/net-local.cfg delete mode 100644 lustre/tests/net-server.cfg delete mode 100644 lustre/tests/o_directory.c delete mode 100644 lustre/tests/obddisk.cfg delete mode 100644 lustre/tests/obdecho.cfg delete mode 100644 lustre/tests/obdfilter.cfg delete mode 100644 lustre/tests/open_delay.c delete mode 100644 lustre/tests/openclose.c delete mode 100644 lustre/tests/opendevunlink.c delete mode 100644 lustre/tests/opendirunlink.c delete mode 100644 lustre/tests/openfile.c delete mode 100644 lustre/tests/openme.c delete mode 100644 lustre/tests/openunlink.c delete mode 100644 lustre/tests/ostreq.sh delete mode 100755 lustre/tests/recovery-cleanup.sh delete mode 100755 lustre/tests/recovery-small-upcall.sh delete mode 100755 lustre/tests/recovery-small.sh delete mode 100644 lustre/tests/rename.pl delete mode 100644 lustre/tests/runas.c delete mode 100755 lustre/tests/rundbench delete mode 100755 lustre/tests/runfailure-client-mds-recover.sh delete mode 100755 lustre/tests/runfailure-mds delete mode 100755 lustre/tests/runfailure-net delete mode 100755 lustre/tests/runfailure-ost delete mode 100755 lustre/tests/runiozone delete mode 100644 lustre/tests/runobdstat delete mode 100644 lustre/tests/runregression-brw.sh delete mode 100755 lustre/tests/runregression-mds.sh delete mode 100644 lustre/tests/runregression-net.sh delete mode 100755 lustre/tests/runslabinfo delete mode 100755 lustre/tests/runtests delete mode 100755 lustre/tests/runvmstat delete mode 100644 lustre/tests/sanity-ldlm.sh delete mode 100644 lustre/tests/sanity.sh delete mode 100644 lustre/tests/sanityN.sh delete mode 100755 lustre/tests/snaprun.sh delete mode 100644 lustre/tests/stat.c delete mode 100644 lustre/tests/statmany.c delete mode 100644 lustre/tests/statone.c delete mode 100644 lustre/tests/tbox.sh delete mode 100644 lustre/tests/tchmod.c delete mode 100755 lustre/tests/test.c delete mode 100755 lustre/tests/test2.c delete mode 100644 lustre/tests/test_brw.c delete mode 100644 lustre/tests/testreq.c delete mode 100644 lustre/tests/toexcl.c delete mode 100755 lustre/tests/trivial.sh delete mode 100644 lustre/tests/truncate.c delete mode 100644 lustre/tests/uml.sh delete mode 100644 lustre/tests/unlinkmany.c delete mode 100644 lustre/tests/utime.c delete mode 100644 lustre/tests/wantedi.c delete mode 100644 lustre/tests/writeme.c delete mode 100644 lustre/utils/.cvsignore delete mode 100644 lustre/utils/Lustre/.cvsignore delete mode 100644 lustre/utils/Lustre/Makefile.am delete mode 100644 lustre/utils/Lustre/__init__.py delete mode 100644 lustre/utils/Lustre/cmdline.py delete mode 100644 lustre/utils/Lustre/error.py delete mode 100644 lustre/utils/Lustre/lustredb.py delete mode 100644 lustre/utils/Makefile.am delete mode 100755 lustre/utils/automatic-reconnect-sample delete mode 100755 lustre/utils/ha_assist.sh delete mode 100755 lustre/utils/ha_assist2.sh delete mode 100644 lustre/utils/lactive delete mode 100755 lustre/utils/lconf delete mode 100644 lustre/utils/lctl.c delete mode 100644 lustre/utils/lfind.c delete mode 100644 lustre/utils/llanalyze delete mode 100755 lustre/utils/llobdstat.pl delete mode 100755 lustre/utils/llstat.pl delete mode 100755 lustre/utils/lmc delete mode 100755 lustre/utils/load_ldap.sh delete mode 100644 lustre/utils/lstripe.c delete mode 100755 lustre/utils/mds-failover-sample delete mode 100644 lustre/utils/obd.c delete mode 100644 lustre/utils/obdbarrier.c delete mode 100644 lustre/utils/obdctl.c delete mode 100644 lustre/utils/obdctl.h delete mode 100644 lustre/utils/obdio.c delete mode 100644 lustre/utils/obdiolib.c delete mode 100644 lustre/utils/obdiolib.h delete mode 100644 lustre/utils/parser.c delete mode 100644 lustre/utils/parser.h delete mode 100644 lustre/utils/wirecheck.c diff --git a/lnet/.cvsignore b/lnet/.cvsignore deleted file mode 100644 index 99ac885..0000000 --- a/lnet/.cvsignore +++ /dev/null @@ -1,8 +0,0 @@ -Kernelenv -Makefile -Makefile.in -aclocal.m4 -autom4te.cache -config.log -config.status -configure diff --git a/lnet/AUTHORS b/lnet/AUTHORS deleted file mode 100644 index e69de29..0000000 diff --git a/lnet/ChangeLog b/lnet/ChangeLog deleted file mode 100644 index e69de29..0000000 diff --git a/lnet/Kernelenv.in b/lnet/Kernelenv.in deleted file mode 100644 index 29a713f..0000000 --- a/lnet/Kernelenv.in +++ /dev/null @@ -1 +0,0 @@ -EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include diff --git a/lnet/Kernelenv.mk b/lnet/Kernelenv.mk deleted file mode 100644 index 29a713f..0000000 --- a/lnet/Kernelenv.mk +++ /dev/null @@ -1 +0,0 @@ -EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include diff --git a/lnet/Makefile.am b/lnet/Makefile.am deleted file mode 100644 index 1a223f2..0000000 --- a/lnet/Makefile.am +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -EXTRA_DIST = Rules.linux archdep.m4 include -DIST_SUBDIRS = libcfs portals knals unals utils tests doc router -if LIBLUSTRE -SUBDIRS = portals unals utils -else -SUBDIRS = libcfs portals knals unals utils tests doc router -endif diff --git a/lnet/Makefile.mk b/lnet/Makefile.mk deleted file mode 100644 index be0e51a..0000000 --- a/lnet/Makefile.mk +++ /dev/null @@ -1,6 +0,0 @@ -include fs/lustre/portals/Kernelenv - -obj-y += portals/ -obj-y += libcfs/ -obj-y += knals/ -obj-y += router/ diff --git a/lnet/NEWS b/lnet/NEWS deleted file mode 100644 index e69de29..0000000 diff --git a/lnet/README b/lnet/README deleted file mode 100644 index e69de29..0000000 diff --git a/lnet/Rules.linux b/lnet/Rules.linux deleted file mode 100644 index 93943b7..0000000 --- a/lnet/Rules.linux +++ /dev/null @@ -1,25 +0,0 @@ -# included in Linux kernel directories -# Rules for module building - -if LINUX25 - -basename=$(shell echo $< | sed -e 's/\.c//g' | sed -e 's/-//g' | sed -e 's/\.o//g') -AM_CPPFLAGS= -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -mpreferred-stack-boundary=2 -DKBUILD_MODNAME=$(MODULE) -DKBUILD_BASENAME=$(basename) - -$(MODULE).o: $($(MODULE)_OBJECTS) - $(LD) -m $(MOD_LINK) -r -o $(MODULE).o $($(MODULE)_OBJECTS) - -else - -$(MODULE).o: $($(MODULE)_OBJECTS) - $(LD) -m $(MOD_LINK) -r -o $(MODULE).o $($(MODULE)_OBJECTS) - -endif - -tags: - rm -f $(top_srcdir)/TAGS - rm -f $(top_srcdir)/tags - find $(top_srcdir)/../portals/ -name '*.[hc]' | xargs etags -a - find $(top_srcdir) -name '*.[hc]' | grep -v ".orig" | xargs etags -a - find $(top_srcdir)/../portals/ -name '*.[hc]' | xargs ctags -a - find $(top_srcdir) -name '*.[hc]' | grep -v ".orig" | xargs ctags -a diff --git a/lnet/archdep.m4 b/lnet/archdep.m4 deleted file mode 100644 index 7a4e05c..0000000 --- a/lnet/archdep.m4 +++ /dev/null @@ -1,317 +0,0 @@ - -# -------- in kernel compilation? (2.5 only) ------------- -AC_ARG_ENABLE(inkernel, [ --enable-inkernel set up 2.5 kernel makefiles]) -AM_CONDITIONAL(INKERNEL, test x$enable_inkernel = xyes) -echo "Makefile for in kernel build: $INKERNEL" - -# -------- liblustre compilation -------------- -AC_ARG_WITH(lib, [ --with-lib compile lustre library], host_cpu="lib") - -# -------- set linuxdir ------------ - -AC_ARG_WITH(linux, [ --with-linux=[path] set path to Linux source (default=/usr/src/linux)],LINUX=$with_linux,LINUX=/usr/src/linux) -AC_SUBST(LINUX) - -# --------- UML? -------------------- -AC_MSG_CHECKING(if you are running user mode linux for $host_cpu ...) -if test $host_cpu = "lib" ; then - host_cpu="lib" - AC_MSG_RESULT(no building Lustre library) -else - if test -e $LINUX/include/asm-um ; then - if test X`ls -id $LINUX/include/asm/ | awk '{print $1}'` = X`ls -id $LINUX/include/asm-um | awk '{print $1}'` ; then - host_cpu="um"; - AC_MSG_RESULT(yes) - else - AC_MSG_RESULT(no (asm doesn't point at asm-um)) - fi - - else - AC_MSG_RESULT(no (asm-um missing)) - fi -fi - -# --------- Linux 25 ------------------ - -AC_MSG_CHECKING(if you are running linux 2.5) -if test -e $LINUX/include/linux/namei.h ; then - linux25="yes" - AC_MSG_RESULT(yes) -else - linux25="no" - AC_MSG_RESULT(no) -fi -AM_CONDITIONAL(LINUX25, test x$linux25 = xyes) -echo "Makefiles for in linux 2.5 build: $LINUX25" - -# ------- Makeflags ------------------ - -AC_MSG_CHECKING(setting make flags system architecture: ) -case ${host_cpu} in - lib ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-g -Wall ' - KCPPFLAGS='-D__arch_lib__ ' - libdir='${exec_prefix}/lib/lustre' - MOD_LINK=elf_i386 -;; - um ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-g -Wall -pipe -Wno-trigraphs -Wstrict-prototypes -fno-strict-aliasing -fno-common ' - case ${linux25} in - yes ) - KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/include -I$(LINUX)/arch/um/kernel/tt/include -I$(LINUX)/arch/um/kernel/skas/include -O2 -nostdinc -iwithprefix include -DKBUILD_BASENAME=$(MODULE) -DKBUILD_MODNAME=$(MODULE) ' - ;; - * ) - KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/kernel/tt/include -I$(LINUX)/arch/um/include ' - ;; - esac - - MOD_LINK=elf_i386 -;; - i*86 ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -pipe' - case ${linux25} in - yes ) - KCPPFLAGS='-D__KERNEL__ -DMODULE -march=i686 -I$(LINUX)/include/asm-i386/mach-default -nostdinc -iwithprefix include ' - ;; - * ) - KCPPFLAGS='-D__KERNEL__ -DMODULE ' - ;; - esac - MOD_LINK=elf_i386 -;; - - alphaev6 ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -pipe -mno-fp-regs -ffixed-8 -mcpu=ev5 -Wa,-mev6' - KCPPFLAGS='-D__KERNEL__ -DMODULE ' - MOD_LINK=elf64alpha -;; - - alphaev67 ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -pipe -mno-fp-regs -ffixed-8 -mcpu=ev5 -Wa,-mev6' - KCPPFLAGS='-D__KERNEL__ -DMODULE ' - MOD_LINK=elf64alpha -;; - - alpha* ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -pipe -mno-fp-regs -ffixed-8 -mcpu=ev5 -Wa,-mev5' - KCPPFLAGS='-D__KERNEL__ -DMODULE ' - MOD_LINK=elf64alpha -;; - - ia64 ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -ffixed-r13 -mfixed-range=f10-f15,f32-f127 -falign-functions=32 -mb-step' - KCPPFLAGS='-D__KERNEL__ -DMODULE' - MOD_LINK=elf64_ia64 -;; - - sparc64 ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -Wno-unused -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare -Wa,--undeclared-regs' - KCPPFLAGS='-D__KERNEL__' - MOD_LINK=elf64_sparc - -;; - - powerpc ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -D__powerpc__ -fsigned-char -msoft-float -pipe -ffixed-r2 -Wno-uninitialized -mmultiple -mstring' - KCPPFLAGS='-D__KERNEL__' - MOD_LINK=elf32ppclinux -;; - - *) - AC_ERROR("Unknown Linux Platform: $host_cpu") -;; -esac - -# ----------- make dep run? ------------------ - -if test $host_cpu != "lib" ; then - AC_MSG_CHECKING(if make dep has been run in kernel source (host $host_cpu) ) - if test -f $LINUX/include/linux/config.h ; then - AC_MSG_RESULT(yes) - else - AC_MSG_ERROR(** cannot find $LINUX/include/linux/config.h. Run make dep in $LINUX.) - fi -fi - -# ------------ include paths ------------------ - -if test $host_cpu != "lib" ; then - KINCFLAGS="-I\$(top_srcdir)/include -I\$(top_srcdir)/portals/include -I$LINUX/include" -else - KINCFLAGS='-I$(top_srcdir)/include -I$(top_srcdir)/portals/include' -fi -CPPFLAGS="$KINCFLAGS $ARCHCPPFLAGS" - -if test $host_cpu != "lib" ; then -# ------------ autoconf.h ------------------ - AC_MSG_CHECKING(if autoconf.h is in kernel source) - if test -f $LINUX/include/linux/autoconf.h ; then - AC_MSG_RESULT(yes) - else - AC_MSG_ERROR(** cannot find $LINUX/include/linux/autoconf.h. Run make config in $LINUX.) - fi - -# ------------ RELEASE and moduledir ------------------ - AC_MSG_CHECKING(for Linux release) - - dnl We need to rid ourselves of the nasty [ ] quotes. - changequote(, ) - dnl Get release from version.h - RELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`" - changequote([, ]) - - moduledir='$(libdir)/modules/'$RELEASE/kernel - AC_SUBST(moduledir) - - modulefsdir='$(moduledir)/fs/$(PACKAGE)' - AC_SUBST(modulefsdir) - - AC_MSG_RESULT($RELEASE) - AC_SUBST(RELEASE) - -# ---------- modversions? -------------------- - AC_MSG_CHECKING(for MODVERSIONS) - if egrep -e 'MODVERSIONS.*1' $LINUX/include/linux/autoconf.h >/dev/null 2>&1; - then - MFLAGS="-DMODULE -DMODVERSIONS -include $LINUX/include/linux/modversions.h -DEXPORT_SYMTAB" - AC_MSG_RESULT(yes) - else - MFLAGS= - AC_MSG_RESULT(no) - fi -fi - -# ---------- Portals flags -------------------- - -#AC_PREFIX_DEFAULT([]) -#if test "x$prefix" = xNONE || test "x$prefix" = x; then -# usrprefix=/usr -#else -# usrprefix='${prefix}' -#fi -#AC_SUBST(usrprefix) - -AC_MSG_CHECKING(if kernel has CPU affinity support) -if test "$target_cpu" != ia64 ; then - enable_affinity_temp="-DCPU_AFFINITY=1" - AC_MSG_RESULT(yes) -else - enable_affinity_temp="" - AC_MSG_RESULT(no) -fi - -AC_MSG_CHECKING(if kernel has zero-copy TCP support) -ZCCD="`grep -c zccd $LINUX/include/linux/skbuff.h`" -if test "$ZCCD" != 0 ; then - enable_zerocopy_temp="-DSOCKNAL_ZC=1" - AC_MSG_RESULT(yes) -else - enable_zerocopy_temp="" - AC_MSG_RESULT(no) -fi - -AC_ARG_ENABLE(zerocopy, [ --enable-zerocopy enable socknal zerocopy],enable_zerocopy=$enable_zerocopy_temp, enable_zerocopy="") - -AC_ARG_ENABLE(affinity, [ --enable-affinity enable process/irq affinity],enable_affinity="-DCPU_AFFINITY=1", enable_affinity=$enable_affinity_temp) -##################################### - -AC_MSG_CHECKING(if quadrics kernel headers are present) -if test -d $LINUX/drivers/net/qsnet ; then - AC_MSG_RESULT(yes) - QSWNAL="qswnal" - with_quadrics="-I$LINUX/drivers/net/qsnet/include" - : -elif test -d $LINUX/drivers/qsnet1 ; then - AC_MSG_RESULT(yes) - QSWNAL="qswnal" - with_quadrics="-I$LINUX/drivers/qsnet1/include -DPROPRIETARY_ELAN" - : -elif test -d $LINUX/drivers/quadrics ; then - AC_MSG_RESULT(yes) - QSWNAL="qswnal" - with_quadrics="-I$LINUX/drivers/quadrics/include -DPROPRIETARY_ELAN" - : -#elif test -d /usr/include/elan3 ; then -# AC_MSG_RESULT(yes) -# QSWNAL="qswnal" -# with_quadrics="" -# : -else - AC_MSG_RESULT(no) - QSWNAL="" - with_quadrics="" - : -fi -AC_SUBST(with_quadrics) -AC_SUBST(QSWNAL) - -# R. Read 5/02 -GMNAL="" -echo "checking with-gm=" ${with_gm} -if test "${with_gm+set}" = set; then - if test "${with_gm}" = yes; then - with_gm="-I/usr/local/gm/include" - else - with_gm=-I"$with_gm/include" - fi - GMNAL="gmnal" -else -# default case - no GM - with_gm="" -fi -AC_SUBST(with_gm) -AC_SUBST(GMNAL) - - -def_scamac=/opt/scali/include -AC_ARG_WITH(scamac, [ --with-scamac=[yes/no/path] Path to ScaMAC includes (default=/opt/scali/include)], with_scamac=$withval, with_scamac=$def_scamac) -AC_MSG_CHECKING(if ScaMAC headers are present) -if test "$with_scamac" = yes; then - with_scamac=$def_scamac -fi -if test "$with_scamac" != no -a -f ${with_scamac}/scamac.h; then - AC_MSG_RESULT(yes) - SCIMACNAL="scimacnal" - with_scamac="-I${with_scamac} -I${with_scamac}/icm" -else - AC_MSG_RESULT(no) - SCIMACNAL="" - with_scamac="" -fi - -AC_SUBST(with_scamac) -AC_SUBST(SCIMACNAL) - -CFLAGS="$KCFLAGS" -CPPFLAGS="$KINCFLAGS $KCPPFLAGS $MFLAGS $enable_zerocopy $enable_affinity $with_quadrics $with_gm $with_scamac " - -AC_SUBST(MOD_LINK) -AC_SUBST(LINUX25) -AM_CONDITIONAL(LIBLUSTRE, test x$host_cpu = xlib) - -# ---------- Red Hat 2.4.20 backports some 2.5 bits -------- -# This needs to run after we've defined the KCPPFLAGS - -AC_MSG_CHECKING(for kernel version) -AC_TRY_LINK([#define __KERNEL__ - #include ], - [struct task_struct p; - p.sighand = NULL;], - [RH_2_4_20=1], - [RH_2_4_20=0]) - -if test $RH_2_4_20 = 1; then - AC_MSG_RESULT(redhat-2.4.20) - CPPFLAGS="$CPPFLAGS -DCONFIG_RH_2_4_20" -else - AC_MSG_RESULT($RELEASE) -fi diff --git a/lnet/autogen.sh b/lnet/autogen.sh deleted file mode 100644 index 9deed73..0000000 --- a/lnet/autogen.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -aclocal && -automake --add-missing && -${AUTOCONF:-autoconf} diff --git a/lnet/build.m4 b/lnet/build.m4 deleted file mode 100644 index 025f243..0000000 --- a/lnet/build.m4 +++ /dev/null @@ -1,95 +0,0 @@ -# ---------- other tests and settings --------- - - -# --------- unsigned long long sane? ------- - -AC_CHECK_SIZEOF(unsigned long long, 0) -echo "---> size SIZEOF $SIZEOF_unsigned_long_long" -echo "---> size SIZEOF $ac_cv_sizeof_unsigned_long_long" -if test $ac_cv_sizeof_unsigned_long_long != 8 ; then - AC_MSG_ERROR([** we assume that sizeof(long long) == 8. Tell phil@clusterfs.com]) -fi - -# directories for binaries -ac_default_prefix= -bindir='${exec_prefix}/usr/bin' -sbindir='${exec_prefix}/usr/sbin' -includedir='${prefix}/usr/include' - -# Directories for documentation and demos. -docdir='${prefix}/usr/share/doc/$(PACKAGE)' -AC_SUBST(docdir) -demodir='$(docdir)/demo' -AC_SUBST(demodir) -pkgexampledir='${prefix}/usr/lib/$(PACKAGE)/examples' -AC_SUBST(pkgexampledir) -pymoddir='${prefix}/usr/lib/${PACKAGE}/python/Lustre' -AC_SUBST(pymoddir) -modulenetdir='$(moduledir)/net/$(PACKAGE)' -AC_SUBST(modulenetdir) - - -# ---------- BAD gcc? ------------ -AC_PROG_RANLIB -AC_PROG_CC -AC_MSG_CHECKING(for buggy compiler) -CC_VERSION=`$CC -v 2>&1 | grep "^gcc version"` -bad_cc() { - echo - echo " '$CC_VERSION'" - echo " has been known to generate bad code, " - echo " please get an updated compiler." - AC_MSG_ERROR(sorry) -} -TMP_VERSION=`echo $CC_VERSION | cut -c 1-16` -if test "$TMP_VERSION" = "gcc version 2.95"; then - bad_cc -fi -case "$CC_VERSION" in - # ost_pack_niobuf putting 64bit NTOH temporaries on the stack - # without "sub $0xc,%esp" to protect the stack from being - # stomped on by interrupts (bug 606) - "gcc version 2.96 20000731 (Red Hat Linux 7.1 2.96-98)") - bad_cc - ;; - # mandrake's similar sub 0xc compiler bug - # http://marc.theaimsgroup.com/?l=linux-kernel&m=104748366226348&w=2 - "gcc version 2.96 20000731 (Mandrake Linux 8.1 2.96-0.62mdk)") - bad_cc - ;; - *) - AC_MSG_RESULT(no known problems) - ;; -esac -# end ------ BAD gcc? ------------ - -# -------- Check for required packages -------------- - -# this doesn't seem to work on older autoconf -# AC_CHECK_LIB(readline, readline,,) -AC_ARG_ENABLE(readline, [ --enable-readline use readline library],, - enable_readline="yes") - -if test "$enable_readline" = "yes" ; then - LIBREADLINE="-lreadline -lncurses" - HAVE_LIBREADLINE="-DHAVE_LIBREADLINE=1" -else - LIBREADLINE="" - HAVE_LIBREADLINE="" -fi -AC_SUBST(LIBREADLINE) -AC_SUBST(HAVE_LIBREADLINE) - -AC_ARG_ENABLE(efence, [ --enable-efence use efence library],, - enable_efence="no") - -if test "$enable_efence" = "yes" ; then - LIBEFENCE="-lefence" - HAVE_LIBEFENCE="-DHAVE_LIBEFENCE=1" -else - LIBEFENCE="" - HAVE_LIBEFENCE="" -fi -AC_SUBST(LIBEFENCE) -AC_SUBST(HAVE_LIBEFENCE) - diff --git a/lnet/configure.in b/lnet/configure.in deleted file mode 100644 index 31d3492..0000000 --- a/lnet/configure.in +++ /dev/null @@ -1,34 +0,0 @@ -# This version is here to make autoconf happy; the name is a file which is -# "unique" to this directory so that configure knows where it should run. -AC_INIT(knals/Makefile.am, 3.0) -AC_CANONICAL_SYSTEM -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -# Automake variables. Steal the version number from packaging/intersync.spec -AM_INIT_AUTOMAKE(portals, builtin([esyscmd], [sed -ne '/.*define IVERSION /{ s/.*IVERSION //; p; }' libcfs/module.c])) -# AM_MAINTAINER_MODE - -sinclude(build.m4) -sinclude(archdep.m4) - -if test x$enable_inkernel = xyes ; then -cp Kernelenv.mk Kernelenv.in -cp Makefile.mk Makefile.in -cp libcfs/Makefile.mk libcfs/Makefile.in -cp portals/Makefile.mk portals/Makefile.in -cp knals/Makefile.mk knals/Makefile.in -cp knals/socknal/Makefile.mk knals/socknal/Makefile.in -cp router/Makefile.mk router/Makefile.in -fi - -AM_CONFIG_HEADER(include/config.h) - -AC_OUTPUT([Makefile Kernelenv libcfs/Makefile portals/Makefile \ - unals/Makefile knals/Makefile router/Makefile \ - knals/socknal/Makefile knals/gmnal/Makefile knals/qswnal/Makefile \ - knals/scimacnal/Makefile knals/toenal/Makefile \ - utils/Makefile tests/Makefile doc/Makefile ]) - diff --git a/lnet/doc/.cvsignore b/lnet/doc/.cvsignore deleted file mode 100644 index 827dca4..0000000 --- a/lnet/doc/.cvsignore +++ /dev/null @@ -1,4 +0,0 @@ -Makefile -Makefile.in -*.eps -*.pdf diff --git a/lnet/doc/Data-structures b/lnet/doc/Data-structures deleted file mode 100644 index b5532b1..0000000 --- a/lnet/doc/Data-structures +++ /dev/null @@ -1,65 +0,0 @@ -In this document I will try to draw the data structures and how they -interrelate in the Portals 3 reference implementation. It is probably -best shown with a drawing, so there may be an additional xfig or -Postscript figure. - - -MEMORY POOLS: ------------- - -First, a digression on memory allocation in the library. As mentioned -in the NAL Writer's Guide, the library does not link against any -standard C libraries and as such is unable to dynamically allocate -memory on its own. It requires that the NAL implement a method -for allocation that is appropriate for the protection domain in -which the library lives. This is only called when a network -interface is initialized to allocate the Portals object pools. - -These pools are preallocate blocks of objects that the library -can rapidly make active and manage with a minimum of overhead. -It is also cuts down on overhead for setting up structures -since the NAL->malloc() callback does not need to be called -for each object. - -The objects are maintained on a per-object type singly linked free -list and contain a pointer to the next free object. This pointer -is NULL if the object is not on the free list and is non-zero -if it is on the list. The special sentinal value of 0xDEADBEEF -is used to mark the end of the free list since NULL could -indicate that the last object in the list is not free. - -When one of the lib_*_alloc() functions is called, the library -returns the head of the free list and advances the head pointer -to the next item on the list. The special case of 0xDEADBEEF is -checked and a NULL pointer is returned if there are no more -objects of this type available. The lib_*_free() functions -are even simpler -- check to ensure that the object is not already -free, set its next pointer to the current head and then set -the head to be this newly freed object. - -Since C does not have templates, I did the next best thing and wrote -the memory pool allocation code as a macro that expands based on the -type of the argument. The mk_alloc(T) macro expands to -write the _lib_T_alloc() and lib_T_free() functions. -It requires that the object have a pointer of the type T named -"next_free". There are also functions that map _lib_T_alloc() -to lib_T_alloc() so that the library can add some extra -functionality to the T constructor. - - - -LINKED LISTS: ------------- - -Many of the active Portals objects are stored in doubly linked lists -when they are active. These are always implemented with the pointer -to the next object and a pointer to the next pointer of the -previous object. This avoids the "dummy head" object or -special cases for inserting at the beginning or end of the list. -The pointer manipulations are a little hairy at times, but -I hope that they are understandable. - -The actual linked list code is implemented as macros in , -although the object has to know about - - diff --git a/lnet/doc/Makefile.am b/lnet/doc/Makefile.am deleted file mode 100644 index 7c65e6c..0000000 --- a/lnet/doc/Makefile.am +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -LYX2PDF = lyx --export pdf -LYX2TXT = lyx --export text -LYX2HTML = lyx --export html -SUFFIXES = .lin .lyx .pdf .sgml .html .txt .fig .eps - -DOCS = portals3.pdf -IMAGES = file.eps flow_new.eps get.eps mpi.eps portals.eps put.eps -LYXFILES= portals3.lyx - -MAINTAINERCLEANFILES = $(IMAGES) $(DOCS) $(GENERATED) -GENERATED = -EXTRA_DIST = $(DOCS) $(IMAGES) $(LYXFILES) - -all: $(DOCS) - -# update date and version in document -date := $(shell date +%x) -tag := $(shell echo '$$Name: $$' | sed -e 's/^\$$Na''me: *\$$$$/HEAD/; s/^\$$Na''me: \(.*\) \$$$$/\1/') -addversion = sed -e 's|@T''AG@|$(tag)|g; s|@VER''SION@|$(VERSION)|g; s|@DA''TE@|$(date)|g' - -# Regenerate when the $(VERSION) or $Name: $ changes. -.INTERMEDIATE: $(GENERATED) -$(GENERATED) : %.lyx: %.lin Makefile - $(addversion) $< > $@ - -.lyx.pdf: - @$(LYX2PDF) $< || printf "\n*** Warning: not creating PDF docs; install lyx to rectify this\n" - -.lyx.txt: - @$(LYX2TXT) $< || printf "\n*** Warning: not creating text docs; install lyx to rectify this\n" -.lyx.html: - @$(LYX2HTML) $< || printf "\n*** Warning: not creating HTML docs; install lyx to rectify this\n" -.fig.eps: - -fig2dev -L eps $< > $@ - -portals3.pdf portals3.txt portals3.html: $(IMAGES) portals3.lyx - -syncweb: portals3.pdf -# cp lustre.pdf /usr/src/www/content/lustre/docs/lustre.pdf -# ( cd /usr/src/www ; make lustre ; make synclustre ) - diff --git a/lnet/doc/Message-life-cycle b/lnet/doc/Message-life-cycle deleted file mode 100644 index e8cc7e2..0000000 --- a/lnet/doc/Message-life-cycle +++ /dev/null @@ -1,118 +0,0 @@ -This documents the life cycle of message as it arrives and is handled by -a basic async, packetized NAL. There are four types of messages that have -slightly different life cycles, so they are addressed independently. - - -Put request ------------ - -1. NAL notices that there is a incoming message header on the network -and reads an ptl_hdr_t in from the wire. - -2. It may store additional NAL specific data that provides context -for this event in a void* that it will interpret in some fashion -later. - -3. The NAL calls lib_parse() with a pointer to the header and its -private data structure. - -4. The library decodes the header and may build a message state -object that describes the event to be written and the ACK to be -sent, if any. It then calls nal->recv() with the private data -that the NAL passed in, a pointer to the message state object -and a translated user address. - - The NAL will have been given a chance to pretranslate - all user addresses when the buffers are created. This - process is described in the NAL-HOWTO. - -5. The NAL should restore what ever context it required from the -private data pointer, begin receiving the bytes and possibly store -some extra state of its own. It should return at this point. - - - -Get request ------------ - -1. As with a Put, the NAL notices the incoming message header and -passes it to lib_parse(). - -2. The library decodes the header and calls nal->recv() with a -zero byte length, offset and destination to instruct it to clean -up the wire after reading the header. The private data will -be passed in as well, allowing the NAL to retrieve any state -or context that it requires. - -3. The library may build a message state object to possibly -write an event log or invalidate a memory region. - -4. The library will build a ptl_msg_t header that specifies the -Portals protocol information for delivery at the remote end. - -5. The library calls nal->send() with the pre-built header, -the optional message state object, the four part address -component, a translated user pointer + offset, and some -other things. - -6. The NAL is to put the header on the wire or copy it at -this point (since it off the stack). It should store some -amount of state about its current position in the message and -the destination address. - -7. And then return to the library. - - -Reply request -------------- - -1. Starting at "The library decodes the header..." - -2. The library decodes the header and calls nal->recv() -to bring in the rest of the message. Flow continues in -exactly the same fashion as with all other receives. - - -Ack request ------------ - -1. The library decodes the header, builds the appropriate data -structures for the event in a message state object and calls nal->recv() -with a zero byte length, etc. - - -Packet arrival --------------- - -1. The NAL should notice the arrival of a packet, retrieve whatever -state it needs from the message ID or other NAL specific header data -and place the data bytes directly into the user address that were -given to nal->recv(). - - How this happens is outside the scope of the Portals library - and soley determined by the NAL... - -2. If this is the last packet in a message, the NAL should retrieve -the lib_msg_t *cookie that it was given in the call to nal->recv() -and pass it to lib_finalize(). lib_finalize() may call nal->send() -to send an ACK, nal->write() to record an entry in the event log, -nal->invalidate() to unregister a region of memory or do nothing at all. - -3. It should then clean up any remaining NAL specific state about -the message and go back into the main loop. - - -Outgoing packets ----------------- - -1. When the NAL has pending output, it should put the packets on -the wire wrapped with whatever implementation specified wrappers. - -2. Once it has output all the packets of a message it should -call lib_finalize() with the message state object that was -handed to nal->send(). This will allows the library to clean -up its state regarding the message and write any pending event -entries. - - - diff --git a/lnet/doc/NAL-HOWTO b/lnet/doc/NAL-HOWTO deleted file mode 100644 index ea38aed..0000000 --- a/lnet/doc/NAL-HOWTO +++ /dev/null @@ -1,293 +0,0 @@ -This document is a first attempt at describing how to write a NAL -for the Portals 3 library. It also defines the library architecture -and the abstraction of protection domains. - - -First, an overview of the architecture: - - Application - -----|----+-------- - | - API === NAL (User space) - | ----------+---|----- - | - LIB === NAL (Library space) - | ----------+---|----- - - Physical wire (NIC space) - - -Application - API -API-side NAL ------------- -LIB-side NAL - LIB -LIB-side NAL - wire - -Communication is through the indicated paths via well defined -interfaces. The API and LIB portions are written to be portable -across platforms and do not depend on the network interface. - -Communcation between the application and the API code is -defined in the Portals 3 API specification. This is the -user-visible portion of the interface and should be the most -stable. - - - -API-side NAL: ------------- - -The user space NAL needs to implement only a few functions -that are stored in a nal_t data structure and called by the -API-side library: - - int forward( nal_t *nal, - int index, - void *args, - size_t arg_len, - void *ret, - size_t ret_len - ); - -Most of the data structures in the portals library are held in -the LIB section of the code, so it is necessary to forward API -calls across the protection domain to the library. This is -handled by the NAL's forward method. Once the argument and return -blocks are on the remote side the NAL should call lib_dispatch() -to invoke the appropriate API function. - - int validate( nal_t *nal, - void *base, - size_t extent, - void **trans_base, - void **trans_data - ); - -The validate method provides a means for the NAL to prevalidate -and possibly pretranslate user addresses into a form suitable -for fast use by the network card or kernel module. The trans_base -pointer will be used by the library everytime it needs to -refer to the block of memory. The trans_data result is a -cookie that will be handed to the NAL along with the trans_base. - -The library never performs calculations on the trans_base value; -it only computes offsets that are then handed to the NAL. - - - int shutdown( nal_t *nal, int interface ); - -Brings down the network interface. The remote NAL side should -call lib_fini() to bring down the library side of the network. - - void yield( nal_t *nal ); - -This allows the user application to gracefully give up the processor -while busy waiting. Performance critical applications may not -want to take the time to call this function, so it should be an -option to the PtlEQWait call. Right now it is not implemented as such. - -Lastly, the NAL must implement a function named PTL_IFACE_*, where -* is the name of the NAL such as PTL_IFACE_IP or PTL_IFACE_MYR. -This initialization function is to set up communication with the -library-side NAL, which should call lib_init() to bring up the -network interface. - - - -LIB-side NAL: ------------- - -On the library-side, the NAL has much more responsibility. It -is responsible for calling lib_dispatch() on behalf of the user, -it is also responsible for bringing packets off the wire and -pushing bits out. As on the user side, the methods are stored -in a nal_cb_t structure that is defined on a per network -interface basis. - -The calls to lib_dispatch() need to be examined. The prototype: - - void lib_dispatch( - nal_cb_t *nal, - void *private, - int index, - void *arg_block, - void *ret_block - ); - -has two complications. The private field is a NAL-specific -value that will be passed to any callbacks produced as a result -of this API call. Kernel module implementations may use this -for task structures, or perhaps network card data. It is ignored -by the library. - -Secondly, the arg_block and ret_block must be in the same protection -domain as the library. The NAL's two halves must communicate the -sizes and perform the copies. After the call, the buffer pointed -to by ret_block will be filled in and should be copied back to -the user space. How this is to be done is NAL specific. - - int lib_parse( - nal_cb_t *nal, - ptl_hdr_t *hdr, - void *private - ); - -This is the only other entry point into the library from the NAL. -When the NAL detects an incoming message on the wire it should read -sizeof(ptl_hdr_t) bytes and pass a pointer to the header to -lib_parse(). It may set private to be anything that it needs to -tie the incoming message to callbacks that are made as a result -of this event. - -The method calls are: - - int (*send)( - nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - ptl_hdr_t *hdr, - int nid, - int pid, - int gid, - int rid, - user_ptr trans_base, - user_ptr trans_data, - size_t offset, - size_t len - ); - -This is a tricky function -- it must support async output -of messages as well as properly syncronized event log writing. -The private field is the same that was passed into lib_dispatch() -or lib_parse() and may be used to tie this call to the event -that initiated the entry to the library. - -The cookie is a pointer to a library private value that must -be passed to lib_finalize() once the message has been completely -sent. It should not be examined by the NAL for any meaning. - -The four ID fields are passed in, although some implementations -may not use all of them. - -The single base pointer has been replaced with the translated -address that the API NAL generated in the api_nal->validate() -call. The trans_data is unchanged and the offset is in bytes. - - - int (*recv)( - nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - user_ptr trans_base, - user_ptr trans_data, - size_t offset, - size_t mlen, - size_t rlen - ); - -This callback will only be called in response to lib_parse(). -The cookie, trans_addr and trans_data are as discussed in send(). -The NAL should read mlen bytes from the wire, deposit them into -trans_base + offset and then discard (rlen - mlen) bytes. -Once the entire message has been received the NAL should call -lib_finalize() with the lib_msg_t *cookie. - -The special arguments of base=NULL, data=NULL, offset=0, mlen=0, rlen=0 -is used to indicate that the NAL should clean up the wire. This could -be implemented as a blocking call, although having it return as quickly -as possible is desirable. - - int (*write)( - nal_cb_t *nal, - void *private, - user_ptr trans_addr, - user_ptr trans_data, - size_t offset, - - void *src_addr, - size_t len - ); - -This is essentially a cross-protection domain memcpy(). The user address -has been pretranslated by the api_nal->translate() call. - - void *(*malloc)( - nal_cb_t *nal, - size_t len - ); - - void (*free)( - nal_cb_t *nal, - void *buf - ); - -Since the NAL may be in a non-standard hosted environment it can -not call malloc(). This allows the library side NAL to implement -the system specific malloc(). In the current reference implementation -the libary only calls nal->malloc() when the network interface is -initialized and then calls free when it is brought down. The library -maintains its own pool of objects for allocation so only one call to -malloc is made per object type. - - void (*invalidate)( - nal_cb_t *nal, - user_ptr trans_base, - user_ptr trans_data, - size_t extent - ); - -User addresses are validated/translated at the user-level API NAL -method, which is likely to push them to this level. Meanwhile, -the library NAL will be notified when the library no longer -needs the buffer. Overlapped buffers are not detected by the -library, so the NAL should ref count each page involved. - -Unfortunately we have a few bugs when the invalidate method is -called. It is still in progress... - - void (*printf)( - nal_cb_t *nal, - const char *fmt, - ... - ); - -As with malloc(), the library does not have any way to do printf -or printk. It is not necessary for the NAL to implement the this -call, although it will make debugging difficult. - - void (*cli)( - nal_cb_t *nal, - unsigned long *flags - ); - - void (*sti)( - nal_cb_t *nal, - unsigned long *flags - ); - -These are used by the library to mark critical sections. - - int (*gidrid2nidpid)( - nal_cb_t *nal, - ptl_id_t gid, - ptl_id_t rid, - ptl_id_t *nid, - ptl_id_t *pid - ); - - - int (*nidpid2gidrid)( - nal_cb_t *nal, - ptl_id_t nid, - ptl_id_t pid, - ptl_id_t *gid, - ptl_id_t *rid - ); - -Rolf added these. I haven't looked at how they have to work yet. diff --git a/lnet/doc/file.fig b/lnet/doc/file.fig deleted file mode 100644 index 914c294..0000000 --- a/lnet/doc/file.fig +++ /dev/null @@ -1,111 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -6 1200 750 1650 1050 -2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 - 1650 1050 1650 750 1200 750 1200 1050 1650 1050 -4 1 0 100 0 0 10 0.0000 0 105 240 1425 952 FS0\001 --6 -6 1200 2325 1650 2625 -2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 - 1650 2625 1650 2325 1200 2325 1200 2625 1650 2625 -4 1 0 100 0 0 10 0.0000 0 105 240 1425 2527 FS3\001 --6 -6 1200 1800 1650 2100 -2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 - 1650 2100 1650 1800 1200 1800 1200 2100 1650 2100 -4 1 0 100 0 0 10 0.0000 0 105 240 1425 2002 FS2\001 --6 -6 1200 1275 1650 1575 -2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 - 1650 1575 1650 1275 1200 1275 1200 1575 1650 1575 -4 1 0 100 0 0 10 0.0000 0 105 240 1425 1477 FS1\001 --6 -6 450 750 900 1200 -5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 750.000 450 1050 675 1125 900 1050 -1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 825 225 75 450 900 900 750 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 450 825 450 1050 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 1050 900 825 --6 -6 450 2325 900 2775 -5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 2325.000 450 2625 675 2700 900 2625 -1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 2400 225 75 450 2475 900 2325 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 450 2400 450 2625 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 2625 900 2400 --6 -6 450 1800 900 2250 -5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 1800.000 450 2100 675 2175 900 2100 -1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 1875 225 75 450 1950 900 1800 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 450 1875 450 2100 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 2100 900 1875 --6 -6 450 1275 900 1725 -5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 1275.000 450 1575 675 1650 900 1575 -1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 1350 225 75 450 1425 900 1275 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 450 1350 450 1575 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 1575 900 1350 --6 -6 2250 750 3450 2625 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 - 2550 1200 3150 1200 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 - 2550 1500 3150 1500 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 - 2550 1800 3150 1800 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 - 2550 2100 3150 2100 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2550 975 3150 975 3150 2625 2550 2625 2550 975 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 - 2550 2400 3150 2400 -4 1 0 100 0 0 10 0.0000 0 135 1185 2850 900 Application Buffer\001 --6 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 2400 2550 1350 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 1875 2550 1050 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 1425 2550 1950 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 900 2550 1650 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 900 1200 900 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 1425 1200 1425 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 1950 1200 1950 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 2475 1200 2475 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 2025 2550 2250 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 2550 2550 2475 -2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 - 1875 2850 1875 600 225 600 225 2850 1875 2850 -4 1 0 100 0 0 10 0.0000 0 105 1215 1050 525 Parallel File Server\001 diff --git a/lnet/doc/flow_new.fig b/lnet/doc/flow_new.fig deleted file mode 100644 index d828dea..0000000 --- a/lnet/doc/flow_new.fig +++ /dev/null @@ -1,213 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -6 525 2175 1575 2925 -6 675 2287 1425 2812 -4 1 0 50 0 0 10 0.0000 4 105 255 1050 2437 MD\001 -4 1 0 50 0 0 10 0.0000 4 105 645 1050 2587 Exists and\001 -4 1 0 50 0 0 10 0.0000 4 135 555 1050 2737 Accepts?\001 --6 -2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 - 1575 2550 1050 2175 525 2550 1050 2925 1575 2550 --6 -6 3450 1275 4350 1725 -6 3600 1312 4200 1687 -4 1 0 100 0 0 10 0.0000 0 135 525 3900 1612 Message\001 -4 1 0 100 0 0 10 0.0000 0 105 465 3900 1462 Discard\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 3450 1275 4350 1275 4350 1725 3450 1725 3450 1275 --6 -6 4650 1275 5550 1725 -6 4725 1312 5475 1687 -4 1 0 100 0 0 10 0.0000 0 135 735 5100 1612 Drop Count\001 -4 1 0 100 0 0 10 0.0000 0 105 630 5100 1462 Increment\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 4650 1275 5550 1275 5550 1725 4650 1725 4650 1275 --6 -6 1350 525 2250 975 -6 1350 562 2250 937 -4 1 0 100 0 0 10 0.0000 0 135 795 1800 862 Match Entry\001 -4 1 0 100 0 0 10 0.0000 0 105 585 1800 712 Get Next\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1350 525 2250 525 2250 975 1350 975 1350 525 --6 -6 525 1125 1575 1875 -2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 - 1575 1500 1050 1125 525 1500 1050 1875 1575 1500 -4 1 0 100 0 0 10 0.0000 0 105 465 1049 1552 Match?\001 --6 -6 2340 1237 2940 1687 -6 2340 1237 2940 1687 -4 1 0 100 0 0 10 0.0000 0 105 345 2640 1387 More\001 -4 1 0 100 0 0 10 0.0000 0 105 405 2640 1537 Match\001 -4 1 0 100 0 0 10 0.0000 0 105 510 2640 1687 Entries?\001 --6 --6 -6 525 3225 1575 3975 -6 675 3375 1425 3750 -4 1 0 50 0 0 10 0.0000 4 105 255 1050 3525 MD\001 -4 1 0 50 0 0 10 0.0000 4 105 615 1050 3720 has room?\001 --6 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5 - 525 3600 1050 3225 1575 3600 1050 3975 525 3600 --6 -6 3300 3375 4350 3825 -6 3300 3412 4350 3787 -4 1 0 50 0 0 10 0.0000 4 105 735 3825 3562 Unlink MD\001 -4 1 0 50 0 0 10 0.0000 4 135 945 3825 3712 & Match Entry\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 3300 3375 4350 3375 4350 3825 3300 3825 3300 3375 --6 -6 1950 3225 3000 3975 -6 2250 3450 2700 3750 -4 1 0 50 0 0 10 0.0000 4 105 450 2475 3600 Unlink\001 -4 1 0 50 0 0 10 0.0000 4 105 315 2475 3750 full?\001 --6 -2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 - 3000 3600 2475 3225 1950 3600 2475 3975 3000 3600 --6 -6 3150 4500 4200 4950 -6 3150 4537 4200 4912 -4 1 0 50 0 0 10 0.0000 4 105 735 3675 4687 Unlink MD\001 -4 1 0 50 0 0 10 0.0000 4 135 945 3675 4837 & Match Entry\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 3150 4500 4200 4500 4200 4950 3150 4950 3150 4500 --6 -6 600 4500 1500 4950 -6 675 4537 1425 4912 -4 1 0 50 0 0 10 0.0000 4 135 615 1050 4837 Operation\001 -4 1 0 50 0 0 10 0.0000 4 105 525 1050 4687 Perform\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 600 4500 1500 4500 1500 4950 600 4950 600 4500 --6 -6 4650 4350 5700 5100 -6 4950 4537 5400 4912 -6 4950 4537 5400 4912 -4 1 0 50 0 0 10 0.0000 4 135 435 5175 4837 Queue?\001 -4 1 0 50 0 0 10 0.0000 4 105 360 5175 4687 Event\001 --6 --6 -2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 - 5700 4725 5175 4350 4650 4725 5175 5100 5700 4725 --6 -6 6000 4500 6900 4950 -6 6225 4575 6675 4875 -4 1 0 50 0 0 10 0.0000 4 105 360 6450 4875 Event\001 -4 1 0 50 0 0 10 0.0000 4 105 435 6450 4725 Record\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 6000 4500 6900 4500 6900 4950 6000 4950 6000 4500 --6 -6 1800 4350 2850 5100 -6 2100 4575 2550 4875 -4 1 0 50 0 0 10 0.0000 4 105 450 2325 4725 Unlink\001 -4 1 0 50 0 0 10 0.0000 4 105 450 2325 4875 thresh?\001 --6 -2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 - 2850 4725 2325 4350 1800 4725 2325 5100 2850 4725 --6 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1050 1875 1050 2175 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1575 1500 2100 1500 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1050 450 1050 1125 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1350 750 1050 750 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1050 2925 1050 3225 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3150 1500 3450 1500 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 4350 1500 4650 1500 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5 - 2100 1500 2625 1125 3150 1500 2625 1875 2100 1500 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1575 3600 1950 3600 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1050 3975 1050 4500 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3000 3600 3300 3600 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1500 4725 1800 4725 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 5700 4725 6000 4725 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 2850 4725 3150 4725 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 4200 4725 4650 4725 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 6900 4725 7950 4725 -3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5 - 0 0 1.00 60.00 120.00 - 1575 2550 1650 2550 1800 2550 1800 2400 1800 1500 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 0 1 5 - 0 0 1.00 60.00 120.00 - 2250 750 2475 750 2625 750 2625 900 2625 1125 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 0 1 5 - 0 0 1.00 60.00 120.00 - 7500 4725 7500 1650 7500 1500 7350 1500 5550 1500 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 50 0 -1 0.000 0 1 0 5 - 0 0 1.00 60.00 120.00 - 2475 3225 2475 2400 2475 2250 2325 2250 1800 2250 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 50 0 -1 0.000 0 1 0 5 - 0 0 1.00 60.00 120.00 - 3825 3375 3825 2175 3825 2025 3675 2025 1800 2025 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 50 0 -1 0.000 0 1 0 8 - 0 0 1.00 60.00 120.00 - 2325 4350 2325 4275 2325 4125 2475 4125 4275 4125 4425 4125 - 4425 4275 4425 4725 - 0.000 1.000 1.000 1.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 50 0 -1 0.000 0 1 0 8 - 0 0 1.00 60.00 120.00 - 5175 4350 5175 4275 5175 4125 5325 4125 7125 4125 7275 4125 - 7275 4275 7275 4725 - 0.000 1.000 1.000 1.000 1.000 1.000 1.000 0.000 -4 1 0 100 0 0 10 0.0000 0 75 150 1575 1425 no\001 -4 1 0 100 0 0 10 0.0000 0 135 360 825 525 Entry\001 -4 1 0 100 0 0 10 0.0000 0 75 150 1575 2475 no\001 -4 1 0 100 0 0 10 0.0000 0 105 195 1200 1950 yes\001 -4 1 0 100 0 0 10 0.0000 0 105 195 1200 3000 yes\001 -4 1 0 100 0 0 10 0.0000 0 105 195 2775 1050 yes\001 -4 1 0 100 0 0 10 0.0000 0 75 150 3225 1425 no\001 -4 1 0 100 0 0 10 0.0000 0 75 150 1650 3525 no\001 -4 1 0 100 0 0 10 0.0000 0 105 195 1200 4050 yes\001 -4 1 0 100 0 0 10 0.0000 0 105 195 3150 3525 yes\001 -4 1 0 100 0 0 10 0.0000 0 75 150 2625 3150 no\001 -4 1 0 100 0 0 10 0.0000 0 105 195 3000 4650 yes\001 -4 1 0 100 0 0 10 0.0000 0 105 195 5850 4650 yes\001 -4 1 0 100 0 0 10 0.0000 0 75 150 2475 4275 no\001 -4 1 0 100 0 0 10 0.0000 0 75 150 5325 4275 no\001 -4 1 0 50 0 0 10 0.0000 4 105 285 7800 4650 Exit\001 diff --git a/lnet/doc/get.fig b/lnet/doc/get.fig deleted file mode 100644 index 28db949..0000000 --- a/lnet/doc/get.fig +++ /dev/null @@ -1,33 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -6 2775 900 3525 1200 -4 0 0 100 0 0 10 0.0000 0 105 720 2775 1200 Translation\001 -4 0 0 100 0 0 10 0.0000 0 105 405 2850 1050 Portal\001 --6 -6 1350 1725 2175 2025 -4 0 0 100 0 0 10 0.0000 0 105 825 1350 2025 Transmission\001 -4 0 0 100 0 0 10 0.0000 0 105 285 1620 1875 Data\001 --6 -2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 900 525 2700 750 -2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 2700 825 2700 1275 -2 1 0 1 0 7 100 0 -1 3.000 0 0 7 1 0 2 - 0 0 1.00 60.00 120.00 - 2700 1350 900 1950 -2 2 0 1 0 7 100 0 -1 4.000 0 0 7 0 0 5 - 2400 300 3600 300 3600 2250 2400 2250 2400 300 -2 2 0 1 0 7 100 0 -1 4.000 0 0 7 0 0 5 - 0 300 1200 300 1200 2250 0 2250 0 300 -4 1 0 100 0 0 10 0.0000 4 135 495 1800 825 Request\001 -4 1 0 100 0 0 10 0.0000 0 105 540 600 525 Initiator\001 -4 1 0 100 0 0 10 0.0000 0 135 405 3000 525 Target\001 diff --git a/lnet/doc/ieee.bst b/lnet/doc/ieee.bst deleted file mode 100644 index 4df7c50..0000000 --- a/lnet/doc/ieee.bst +++ /dev/null @@ -1,1112 +0,0 @@ -% --------------------------------------------------------------- -% -% by Paolo.Ienne@di.epfl.ch -% -% --------------------------------------------------------------- -% -% no guarantee is given that the format corresponds perfectly to -% IEEE 8.5" x 11" Proceedings, but most features should be ok. -% -% --------------------------------------------------------------- -% -% `ieee' from BibTeX standard bibliography style `abbrv' -% version 0.99a for BibTeX versions 0.99a or later, LaTeX version 2.09. -% Copyright (C) 1985, all rights reserved. -% Copying of this file is authorized only if either -% (1) you make absolutely no changes to your copy, including name, or -% (2) if you do make changes, you name it something other than -% btxbst.doc, plain.bst, unsrt.bst, alpha.bst, and abbrv.bst. -% This restriction helps ensure that all standard styles are identical. -% The file btxbst.doc has the documentation for this style. - -ENTRY - { address - author - booktitle - chapter - edition - editor - howpublished - institution - journal - key - month - note - number - organization - pages - publisher - school - series - title - type - volume - year - } - {} - { label } - -INTEGERS { output.state before.all mid.sentence after.sentence after.block } - -FUNCTION {init.state.consts} -{ #0 'before.all := - #1 'mid.sentence := - #2 'after.sentence := - #3 'after.block := -} - -STRINGS { s t } - -FUNCTION {output.nonnull} -{ 's := - output.state mid.sentence = - { ", " * write$ } - { output.state after.block = - { add.period$ write$ - newline$ - "\newblock " write$ - } - { output.state before.all = - 'write$ - { add.period$ " " * write$ } - if$ - } - if$ - mid.sentence 'output.state := - } - if$ - s -} - -FUNCTION {output} -{ duplicate$ empty$ - 'pop$ - 'output.nonnull - if$ -} - -FUNCTION {output.check} -{ 't := - duplicate$ empty$ - { pop$ "empty " t * " in " * cite$ * warning$ } - 'output.nonnull - if$ -} - -FUNCTION {output.bibitem} -{ newline$ - "\bibitem{" write$ - cite$ write$ - "}" write$ - newline$ - "" - before.all 'output.state := -} - -FUNCTION {fin.entry} -{ add.period$ - write$ - newline$ -} - -FUNCTION {new.block} -{ output.state before.all = - 'skip$ - { after.block 'output.state := } - if$ -} - -FUNCTION {new.sentence} -{ output.state after.block = - 'skip$ - { output.state before.all = - 'skip$ - { after.sentence 'output.state := } - if$ - } - if$ -} - -FUNCTION {not} -{ { #0 } - { #1 } - if$ -} - -FUNCTION {and} -{ 'skip$ - { pop$ #0 } - if$ -} - -FUNCTION {or} -{ { pop$ #1 } - 'skip$ - if$ -} - -FUNCTION {new.block.checka} -{ empty$ - 'skip$ - 'new.block - if$ -} - -FUNCTION {new.block.checkb} -{ empty$ - swap$ empty$ - and - 'skip$ - 'new.block - if$ -} - -FUNCTION {new.sentence.checka} -{ empty$ - 'skip$ - 'new.sentence - if$ -} - -FUNCTION {new.sentence.checkb} -{ empty$ - swap$ empty$ - and - 'skip$ - 'new.sentence - if$ -} - -FUNCTION {field.or.null} -{ duplicate$ empty$ - { pop$ "" } - 'skip$ - if$ -} - -FUNCTION {emphasize} -{ duplicate$ empty$ - { pop$ "" } - { "{\em " swap$ * "}" * } - if$ -} - -INTEGERS { nameptr namesleft numnames } - -FUNCTION {format.names} -{ 's := - #1 'nameptr := - s num.names$ 'numnames := - numnames 'namesleft := - { namesleft #0 > } - { s nameptr "{f.~}{vv~}{ll}{, jj}" format.name$ 't := - nameptr #1 > - { namesleft #1 > - { ", " * t * } - { numnames #2 > - { "," * } - 'skip$ - if$ - t "others" = - { " et~al." * } - { " and " * t * } - if$ - } - if$ - } - 't - if$ - nameptr #1 + 'nameptr := - namesleft #1 - 'namesleft := - } - while$ -} - -FUNCTION {format.authors} -{ author empty$ - { "" } - { author format.names } - if$ -} - -FUNCTION {format.editors} -{ editor empty$ - { "" } - { editor format.names - editor num.names$ #1 > - { ", editors" * } - { ", editor" * } - if$ - } - if$ -} - -FUNCTION {format.title} -{ title empty$ - { "" } - { title "t" change.case$ } - if$ -} - -FUNCTION {n.dashify} -{ 't := - "" - { t empty$ not } - { t #1 #1 substring$ "-" = - { t #1 #2 substring$ "--" = not - { "--" * - t #2 global.max$ substring$ 't := - } - { { t #1 #1 substring$ "-" = } - { "-" * - t #2 global.max$ substring$ 't := - } - while$ - } - if$ - } - { t #1 #1 substring$ * - t #2 global.max$ substring$ 't := - } - if$ - } - while$ -} - -FUNCTION {format.date} -{ year empty$ - { month empty$ - { "" } - { "there's a month but no year in " cite$ * warning$ - month - } - if$ - } - { month empty$ - 'year - { month " " * year * } - if$ - } - if$ -} - -FUNCTION {format.btitle} -{ title emphasize -} - -FUNCTION {tie.or.space.connect} -{ duplicate$ text.length$ #3 < - { "~" } - { " " } - if$ - swap$ * * -} - -FUNCTION {either.or.check} -{ empty$ - 'pop$ - { "can't use both " swap$ * " fields in " * cite$ * warning$ } - if$ -} - -FUNCTION {format.bvolume} -{ volume empty$ - { "" } - { "volume" volume tie.or.space.connect - series empty$ - 'skip$ - { " of " * series emphasize * } - if$ - "volume and number" number either.or.check - } - if$ -} - -FUNCTION {format.number.series} -{ volume empty$ - { number empty$ - { series field.or.null } - { output.state mid.sentence = - { "number" } - { "Number" } - if$ - number tie.or.space.connect - series empty$ - { "there's a number but no series in " cite$ * warning$ } - { " in " * series * } - if$ - } - if$ - } - { "" } - if$ -} - -FUNCTION {format.edition} -{ edition empty$ - { "" } - { output.state mid.sentence = - { edition "l" change.case$ " edition" * } - { edition "t" change.case$ " edition" * } - if$ - } - if$ -} - -INTEGERS { multiresult } - -FUNCTION {multi.page.check} -{ 't := - #0 'multiresult := - { multiresult not - t empty$ not - and - } - { t #1 #1 substring$ - duplicate$ "-" = - swap$ duplicate$ "," = - swap$ "+" = - or or - { #1 'multiresult := } - { t #2 global.max$ substring$ 't := } - if$ - } - while$ - multiresult -} - -FUNCTION {format.pages} -{ pages empty$ - { "" } - { pages multi.page.check - { "pages" pages n.dashify tie.or.space.connect } - { "page" pages tie.or.space.connect } - if$ - } - if$ -} - -FUNCTION {format.vol.num.pages} -{ volume field.or.null - number empty$ - 'skip$ - { "(" number * ")" * * - volume empty$ - { "there's a number but no volume in " cite$ * warning$ } - 'skip$ - if$ - } - if$ - pages empty$ - 'skip$ - { duplicate$ empty$ - { pop$ format.pages } - { ":" * pages n.dashify * } - if$ - } - if$ -} - -FUNCTION {format.chapter.pages} -{ chapter empty$ - 'format.pages - { type empty$ - { "chapter" } - { type "l" change.case$ } - if$ - chapter tie.or.space.connect - pages empty$ - 'skip$ - { ", " * format.pages * } - if$ - } - if$ -} - -FUNCTION {format.in.ed.booktitle} -{ booktitle empty$ - { "" } - { editor empty$ - { "In " booktitle emphasize * } - { "In " format.editors * ", " * booktitle emphasize * } - if$ - } - if$ -} - -FUNCTION {empty.misc.check} -{ author empty$ title empty$ howpublished empty$ - month empty$ year empty$ note empty$ - and and and and and - key empty$ not and - { "all relevant fields are empty in " cite$ * warning$ } - 'skip$ - if$ -} - -FUNCTION {format.thesis.type} -{ type empty$ - 'skip$ - { pop$ - type "t" change.case$ - } - if$ -} - -FUNCTION {format.tr.number} -{ type empty$ - { "Technical Report" } - 'type - if$ - number empty$ - { "t" change.case$ } - { number tie.or.space.connect } - if$ -} - -FUNCTION {format.article.crossref} -{ key empty$ - { journal empty$ - { "need key or journal for " cite$ * " to crossref " * crossref * - warning$ - "" - } - { "In {\em " journal * "\/}" * } - if$ - } - { "In " key * } - if$ - " \cite{" * crossref * "}" * -} - -FUNCTION {format.crossref.editor} -{ editor #1 "{vv~}{ll}" format.name$ - editor num.names$ duplicate$ - #2 > - { pop$ " et~al." * } - { #2 < - 'skip$ - { editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" = - { " et~al." * } - { " and " * editor #2 "{vv~}{ll}" format.name$ * } - if$ - } - if$ - } - if$ -} - -FUNCTION {format.book.crossref} -{ volume empty$ - { "empty volume in " cite$ * "'s crossref of " * crossref * warning$ - "In " - } - { "Volume" volume tie.or.space.connect - " of " * - } - if$ - editor empty$ - editor field.or.null author field.or.null = - or - { key empty$ - { series empty$ - { "need editor, key, or series for " cite$ * " to crossref " * - crossref * warning$ - "" * - } - { "{\em " * series * "\/}" * } - if$ - } - { key * } - if$ - } - { format.crossref.editor * } - if$ - " \cite{" * crossref * "}" * -} - -FUNCTION {format.incoll.inproc.crossref} -{ editor empty$ - editor field.or.null author field.or.null = - or - { key empty$ - { booktitle empty$ - { "need editor, key, or booktitle for " cite$ * " to crossref " * - crossref * warning$ - "" - } - { "In {\em " booktitle * "\/}" * } - if$ - } - { "In " key * } - if$ - } - { "In " format.crossref.editor * } - if$ - " \cite{" * crossref * "}" * -} - -FUNCTION {article} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - crossref missing$ - { journal emphasize "journal" output.check - format.vol.num.pages output - format.date "year" output.check - } - { format.article.crossref output.nonnull - format.pages output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {book} -{ output.bibitem - author empty$ - { format.editors "author and editor" output.check } - { format.authors output.nonnull - crossref missing$ - { "author and editor" editor either.or.check } - 'skip$ - if$ - } - if$ - new.block - format.btitle "title" output.check - crossref missing$ - { format.bvolume output - new.block - format.number.series output - new.sentence - publisher "publisher" output.check - address output - } - { new.block - format.book.crossref output.nonnull - } - if$ - format.edition output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {booklet} -{ output.bibitem - format.authors output - new.block - format.title "title" output.check - howpublished address new.block.checkb - howpublished output - address output - format.date output - new.block - note output - fin.entry -} - -FUNCTION {inbook} -{ output.bibitem - author empty$ - { format.editors "author and editor" output.check } - { format.authors output.nonnull - crossref missing$ - { "author and editor" editor either.or.check } - 'skip$ - if$ - } - if$ - new.block - format.btitle "title" output.check - crossref missing$ - { format.bvolume output - format.chapter.pages "chapter and pages" output.check - new.block - format.number.series output - new.sentence - publisher "publisher" output.check - address output - } - { format.chapter.pages "chapter and pages" output.check - new.block - format.book.crossref output.nonnull - } - if$ - format.edition output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {incollection} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - crossref missing$ - { format.in.ed.booktitle "booktitle" output.check - format.bvolume output - format.number.series output - format.chapter.pages output - new.sentence - publisher "publisher" output.check - address output - format.edition output - format.date "year" output.check - } - { format.incoll.inproc.crossref output.nonnull - format.chapter.pages output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {inproceedings} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - crossref missing$ - { format.in.ed.booktitle "booktitle" output.check - format.bvolume output - format.number.series output - format.pages output - address empty$ - { organization publisher new.sentence.checkb - organization output - publisher output - format.date "year" output.check - } - { address output.nonnull - format.date "year" output.check - new.sentence - organization output - publisher output - } - if$ - } - { format.incoll.inproc.crossref output.nonnull - format.pages output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {conference} { inproceedings } - -FUNCTION {manual} -{ output.bibitem - author empty$ - { organization empty$ - 'skip$ - { organization output.nonnull - address output - } - if$ - } - { format.authors output.nonnull } - if$ - new.block - format.btitle "title" output.check - author empty$ - { organization empty$ - { address new.block.checka - address output - } - 'skip$ - if$ - } - { organization address new.block.checkb - organization output - address output - } - if$ - format.edition output - format.date output - new.block - note output - fin.entry -} - -FUNCTION {mastersthesis} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - "Master's thesis" format.thesis.type output.nonnull - school "school" output.check - address output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {misc} -{ output.bibitem - format.authors output - title howpublished new.block.checkb - format.title output - howpublished new.block.checka - howpublished output - format.date output - new.block - note output - fin.entry - empty.misc.check -} - -FUNCTION {phdthesis} -{ output.bibitem - format.authors "author" output.check - new.block - format.btitle "title" output.check - new.block - "PhD thesis" format.thesis.type output.nonnull - school "school" output.check - address output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {proceedings} -{ output.bibitem - editor empty$ - { organization output } - { format.editors output.nonnull } - if$ - new.block - format.btitle "title" output.check - format.bvolume output - format.number.series output - address empty$ - { editor empty$ - { publisher new.sentence.checka } - { organization publisher new.sentence.checkb - organization output - } - if$ - publisher output - format.date "year" output.check - } - { address output.nonnull - format.date "year" output.check - new.sentence - editor empty$ - 'skip$ - { organization output } - if$ - publisher output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {techreport} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - format.tr.number output.nonnull - institution "institution" output.check - address output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {unpublished} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - note "note" output.check - format.date output - fin.entry -} - -FUNCTION {default.type} { misc } - -MACRO {jan} {"Jan."} - -MACRO {feb} {"Feb."} - -MACRO {mar} {"Mar."} - -MACRO {apr} {"Apr."} - -MACRO {may} {"May"} - -MACRO {jun} {"June"} - -MACRO {jul} {"July"} - -MACRO {aug} {"Aug."} - -MACRO {sep} {"Sept."} - -MACRO {oct} {"Oct."} - -MACRO {nov} {"Nov."} - -MACRO {dec} {"Dec."} - -MACRO {acmcs} {"ACM Comput. Surv."} - -MACRO {acta} {"Acta Inf."} - -MACRO {cacm} {"Commun. ACM"} - -MACRO {ibmjrd} {"IBM J. Res. Dev."} - -MACRO {ibmsj} {"IBM Syst.~J."} - -MACRO {ieeese} {"IEEE Trans. Softw. Eng."} - -MACRO {ieeetc} {"IEEE Trans. Comput."} - -MACRO {ieeetcad} - {"IEEE Trans. Comput.-Aided Design Integrated Circuits"} - -MACRO {ipl} {"Inf. Process. Lett."} - -MACRO {jacm} {"J.~ACM"} - -MACRO {jcss} {"J.~Comput. Syst. Sci."} - -MACRO {scp} {"Sci. Comput. Programming"} - -MACRO {sicomp} {"SIAM J. Comput."} - -MACRO {tocs} {"ACM Trans. Comput. Syst."} - -MACRO {tods} {"ACM Trans. Database Syst."} - -MACRO {tog} {"ACM Trans. Gr."} - -MACRO {toms} {"ACM Trans. Math. Softw."} - -MACRO {toois} {"ACM Trans. Office Inf. Syst."} - -MACRO {toplas} {"ACM Trans. Prog. Lang. Syst."} - -MACRO {tcs} {"Theoretical Comput. Sci."} - -READ - -FUNCTION {sortify} -{ purify$ - "l" change.case$ -} - -INTEGERS { len } - -FUNCTION {chop.word} -{ 's := - 'len := - s #1 len substring$ = - { s len #1 + global.max$ substring$ } - 's - if$ -} - -FUNCTION {sort.format.names} -{ 's := - #1 'nameptr := - "" - s num.names$ 'numnames := - numnames 'namesleft := - { namesleft #0 > } - { nameptr #1 > - { " " * } - 'skip$ - if$ - s nameptr "{vv{ } }{ll{ }}{ f{ }}{ jj{ }}" format.name$ 't := - nameptr numnames = t "others" = and - { "et al" * } - { t sortify * } - if$ - nameptr #1 + 'nameptr := - namesleft #1 - 'namesleft := - } - while$ -} - -FUNCTION {sort.format.title} -{ 't := - "A " #2 - "An " #3 - "The " #4 t chop.word - chop.word - chop.word - sortify - #1 global.max$ substring$ -} - -FUNCTION {author.sort} -{ author empty$ - { key empty$ - { "to sort, need author or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { author sort.format.names } - if$ -} - -FUNCTION {author.editor.sort} -{ author empty$ - { editor empty$ - { key empty$ - { "to sort, need author, editor, or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { editor sort.format.names } - if$ - } - { author sort.format.names } - if$ -} - -FUNCTION {author.organization.sort} -{ author empty$ - { organization empty$ - { key empty$ - { "to sort, need author, organization, or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { "The " #4 organization chop.word sortify } - if$ - } - { author sort.format.names } - if$ -} - -FUNCTION {editor.organization.sort} -{ editor empty$ - { organization empty$ - { key empty$ - { "to sort, need editor, organization, or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { "The " #4 organization chop.word sortify } - if$ - } - { editor sort.format.names } - if$ -} - -FUNCTION {presort} -{ type$ "book" = - type$ "inbook" = - or - 'author.editor.sort - { type$ "proceedings" = - 'editor.organization.sort - { type$ "manual" = - 'author.organization.sort - 'author.sort - if$ - } - if$ - } - if$ - " " - * - year field.or.null sortify - * - " " - * - title field.or.null - sort.format.title - * - #1 entry.max$ substring$ - 'sort.key$ := -} - -ITERATE {presort} - -SORT - -STRINGS { longest.label } - -INTEGERS { number.label longest.label.width } - -FUNCTION {initialize.longest.label} -{ "" 'longest.label := - #1 'number.label := - #0 'longest.label.width := -} - -FUNCTION {longest.label.pass} -{ number.label int.to.str$ 'label := - number.label #1 + 'number.label := - label width$ longest.label.width > - { label 'longest.label := - label width$ 'longest.label.width := - } - 'skip$ - if$ -} - -EXECUTE {initialize.longest.label} - -ITERATE {longest.label.pass} - -FUNCTION {begin.bib} -{ preamble$ empty$ - 'skip$ - { preamble$ write$ newline$ } - if$ - "\begin{thebibliography}{" longest.label * - "}\setlength{\itemsep}{-1ex}\small" * write$ newline$ -} - -EXECUTE {begin.bib} - -EXECUTE {init.state.consts} - -ITERATE {call.type$} - -FUNCTION {end.bib} -{ newline$ - "\end{thebibliography}" write$ newline$ -} - -EXECUTE {end.bib} - -% end of file ieee.bst -% --------------------------------------------------------------- diff --git a/lnet/doc/mpi.fig b/lnet/doc/mpi.fig deleted file mode 100644 index e1a91b5..0000000 --- a/lnet/doc/mpi.fig +++ /dev/null @@ -1,117 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -6 150 1650 900 2025 -4 1 0 100 0 0 10 0.0000 0 135 735 525 1800 Unexpected\001 -4 1 0 100 0 0 10 0.0000 0 135 585 525 1995 Messages\001 --6 -6 150 150 900 525 -4 1 0 100 0 0 10 0.0000 0 135 615 525 300 Preposted\001 -4 1 0 100 0 0 10 0.0000 0 105 525 525 495 Receives\001 --6 -6 2550 4125 3150 4725 -4 1 0 100 0 0 10 0.0000 0 135 600 2850 4275 Length=0\001 -4 1 0 100 0 0 10 0.0000 0 105 540 2850 4470 Truncate\001 -4 1 0 100 0 0 10 0.0000 0 105 480 2850 4665 No Ack\001 --6 -6 1050 1575 1950 1875 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1050 1575 1950 1575 1950 1875 1050 1875 1050 1575 -4 1 0 100 0 0 10 0.0000 0 105 780 1500 1725 Match Short\001 --6 -6 5400 1575 6300 2175 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 5400 1575 6300 1575 6300 2175 5400 2175 5400 1575 -4 1 0 100 0 0 10 0.0000 0 105 405 5850 1875 Buffer\001 --6 -6 5400 2400 6300 3000 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 5400 2400 6300 2400 6300 3000 5400 3000 5400 2400 -4 1 0 100 0 0 10 0.0000 0 105 405 5850 2700 Buffer\001 --6 -6 1050 2400 1950 2700 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1050 2400 1950 2400 1950 2700 1050 2700 1050 2400 -4 1 0 100 0 0 10 0.0000 0 105 780 1500 2550 Match Short\001 --6 -6 1050 825 1950 1125 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1050 825 1950 825 1950 1125 1050 1125 1050 825 -4 1 0 100 0 0 10 0.0000 0 105 765 1500 975 Match None\001 --6 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1500 1125 1500 1575 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3225 2025 4050 3375 -2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2 - 150 675 6600 675 -2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2 - 150 1350 6600 1350 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2400 4125 3300 4125 3300 4725 2400 4725 2400 4125 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3225 4500 4050 3675 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3225 1725 5400 1725 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3225 2550 5400 2550 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3225 2850 4050 3450 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1500 1800 1500 2400 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2400 825 3300 825 3300 1275 2400 1275 2400 825 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1500 2625 1500 4125 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1050 4125 1950 4125 1950 4425 1050 4425 1050 4125 -2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1500 300 1500 825 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1875 975 2400 975 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1875 1725 2400 1725 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1875 2550 2400 2550 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1875 4275 2400 4275 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2400 1575 3300 1575 3300 2175 2400 2175 2400 1575 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2400 2400 3300 2400 3300 3000 2400 3000 2400 2400 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 4050 3300 5250 3300 5250 3750 4050 3750 4050 3300 -4 1 0 100 0 0 10 0.0000 0 105 885 1500 150 Match Entries\001 -4 1 0 100 0 0 10 0.0000 0 135 1290 2850 150 Memory Descriptors\001 -4 1 0 100 0 0 10 0.0000 0 135 1065 5850 150 Memory Regions\001 -4 1 0 100 0 0 10 0.0000 0 135 825 4500 150 Event Queues\001 -4 1 0 100 0 0 10 0.0000 0 105 585 525 1050 RcvMark\001 -4 1 0 100 0 0 10 0.0000 0 105 330 2850 1102 None\001 -4 1 0 100 0 0 10 0.0000 0 135 705 1500 4275 Match Any\001 -4 1 0 50 0 0 10 0.0000 0 150 810 2850 1725 max_offset=\001 -4 1 0 50 0 0 10 0.0000 0 150 840 2850 1875 n - short_len\001 -4 1 0 50 0 0 10 0.0000 0 150 810 2850 2550 max_offset=\001 -4 1 0 50 0 0 10 0.0000 0 150 840 2850 2700 n - short_len\001 -4 1 0 50 0 0 10 0.0000 0 105 405 2850 2100 unlink\001 -4 1 0 50 0 0 10 0.0000 0 105 405 2850 2925 unlink\001 -4 1 0 100 0 0 10 0.0000 0 135 930 4650 3675 Message Queue\001 -4 1 0 100 0 0 10 0.0000 0 135 735 4650 3525 Unexpected\001 diff --git a/lnet/doc/portals.fig b/lnet/doc/portals.fig deleted file mode 100644 index 9b1271b..0000000 --- a/lnet/doc/portals.fig +++ /dev/null @@ -1,68 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1350 900 1650 900 1650 1200 1350 1200 1350 900 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1800 1350 2100 1350 2100 1650 1800 1650 1800 1350 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2250 1800 2550 1800 2550 2100 2250 2100 2250 1800 -2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2 - 4200 375 4200 2100 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 525 600 1125 600 1125 2100 525 2100 525 600 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 4425 1275 4875 1275 4875 1950 4425 1950 4425 1275 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2550 1200 3150 1200 3150 1500 2550 1500 2550 1200 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3000 1425 4425 1425 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 3600 825 3750 825 3750 1125 3600 1125 3600 825 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 2025 1425 2550 1425 -2 2 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5 - 4425 750 4875 750 4875 1125 4425 1125 4425 750 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3675 975 4425 975 -3 0 0 1 0 7 100 0 -1 0.000 0 1 0 2 - 0 0 1.00 60.00 120.00 - 825 1050 1350 1050 - 0.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5 - 0 0 1.00 60.00 120.00 - 1500 1125 1500 1350 1500 1500 1650 1500 1800 1500 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5 - 0 0 1.00 60.00 120.00 - 1950 1575 1950 1800 1950 1950 2100 1950 2250 1950 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 0 0 2 - 525 975 1125 975 - 0.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 0 0 2 - 525 1125 1125 1125 - 0.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 1 0 7 - 0 0 1.00 60.00 120.00 - 3000 1275 3150 1275 3300 1275 3300 1125 3300 975 3450 975 - 3600 975 - 0.000 1.000 1.000 1.000 1.000 1.000 0.000 -4 0 0 100 0 0 10 0.0000 0 105 690 1275 750 Match List\001 -4 1 0 100 0 0 10 0.0000 0 105 780 825 525 Portal Table\001 -4 2 0 100 0 0 10 0.0000 0 135 825 4050 2025 Library Space\001 -4 0 0 100 0 0 10 0.0000 0 135 1110 4350 2175 Application Space\001 -4 1 0 100 0 0 10 0.0000 0 135 660 2850 1050 Descriptor\001 -4 1 0 100 0 0 10 0.0000 0 135 540 2850 825 Memory\001 -4 1 0 100 0 0 10 0.0000 0 135 765 3750 675 Event Queue\001 -4 1 0 100 0 0 10 0.0000 0 135 495 4650 675 Regions\001 -4 1 0 100 0 0 10 0.0000 0 135 540 4650 525 Memory\001 diff --git a/lnet/doc/portals3.bib b/lnet/doc/portals3.bib deleted file mode 100644 index 323b99f..0000000 --- a/lnet/doc/portals3.bib +++ /dev/null @@ -1,124 +0,0 @@ -@Article{ Cplant, - title = { {M}assively {P}arallel {C}omputing with - {C}ommodity {C}omponents }, - author = { Ron Brightwell and David S. Greenberg and Arthur - B. Maccabe and Rolf Riesen }, - journal = { Parallel Computing }, - volume = { 26 }, - month = { February }, - pages = { 243-266 }, - year = { 2000 } -} - -@Manual{ Portals, - organization = { Sandia National Laboratories }, - title = { {P}uma {P}ortals }, - note = { http://www.cs.sandia.gov/puma/portals }, - year = { 1997 } -} - -@Techreport{ VIA, - title = { {V}irtual {I}nterface {A}rchitecture - {S}pecification {V}ersion 1.0 }, - author = { {Compaq, Microsoft, and Intel} }, - institution = { Compaq, Microsoft, and Intel }, - month = { December }, - year = { 1997 } -} - -@Techreport{ ST, - title = { {I}nformation {T}echnology - {S}cheduled - {T}ransfer {P}rotocol - {W}orking {D}raft 2.0 }, - author = { {Task Group of Technical Committee T11} }, - institution = { Accredited Standards Committee NCITS }, - month = { July }, - year = { 1998 } -} - -@Manual{ TFLOPS, - organization = { Sandia National Laboratories }, - title = { ASCI Red }, - note = { http://www.sandia.gov/ASCI/TFLOP }, - year = { 1996 } -} - -@Techreport{ GM, - title = { The {GM} {M}essage {P}assing {S}ystem }, - author = { {Myricom, Inc.} }, - institution = { {Myricom, Inc.} }, - year = { 1997 }, -} - -@Article{ MPIstandard, - title = { {MPI}: {A} {M}essage-{P}assing {I}nterface standard }, - author = { {Message Passing Interface Forum} }, - journal = { The International Journal of Supercomputer Applications - and High Performance Computing }, - volume = { 8 }, - year = { 1994 } -} - -@Inproceedings{ PumaOS, - author = "Lance Shuler and Chu Jong and Rolf Riesen and - David van Dresser and Arthur B. Maccabe and - Lee Ann Fisk and T. Mack Stallcup", - booktitle = "Proceeding of the 1995 Intel Supercomputer - User's Group Conference", - title = "The {P}uma Operating System for Massively Parallel Computers", - organization = "Intel Supercomputer User's Group", - year = 1995 -} - -@InProceedings{ SUNMOS, -author = "Arthur B. Maccabe and Kevin S. McCurley and Rolf Riesen and - Stephen R. Wheat", -title = "{SUNMOS} for the {Intel} {Paragon}: A Brief User's Guide", -booktitle = "Proceedings of the {Intel} Supercomputer Users' Group. 1994 - Annual North America Users' Conference.", -year = 1994, -pages = "245--251", -month = "June", -location = "ftp.cs.sandia.gov /pub/sunmos/papers/ISUG94-1.ps" -} - -@InProceedings { PumaMPI, - title = { Design and Implementation of {MPI} on {P}uma Portals }, - author = { Ron Brightwell and Lance Shuler }, - booktitle = { Proceedings of the Second MPI Developer's Conference }, - pages = { 18-25 }, - month = { July }, - year = { 1996 } -} - -@Inproceedings{ FM2, - author = { Mario Lauria and Scott Pakin and Andrew Chien }, - title = { {E}fficient {L}ayering for {H}igh {S}peed - {C}ommunication: {F}ast {M}essages 2.x }, - Booktitle = { Proceedings of the IEEE International Symposium - on High Performance Distributed Computing }, - year = { 1998 } -} - -@Manual { CraySHMEM, - title = "SHMEM Technical Note for C, SG-2516 2.3", - organization = "Cray Research, Inc.", - month = "October", - year = 1994 -} - -@Manual { MPI2, - title = "{MPI}-2: {E}xtensions to the {M}essage-{P}assing {I}nterface", - organization = "Message Passing Interface Forum", - note = "http://www.mpi-forum.org/docs/mpi-20-html/mpi2-report.html", - month = "July", - year = 1997 -} - -@InProceedings { PMMPI, - title = { {The Design and Implementation of Zero Copy MPI Using - Commodity Hardware with a High Performance Network} }, - author = { Francis O'Carroll and Hiroshi Tezuka and Atsushi Hori - and Yutaka Ishikawa }, - booktitle = { Proceedings of the ICS }, - year = { 1998 } -} diff --git a/lnet/doc/portals3.lyx b/lnet/doc/portals3.lyx deleted file mode 100644 index 8429280..0000000 --- a/lnet/doc/portals3.lyx +++ /dev/null @@ -1,15944 +0,0 @@ -#LyX 1.2 created this file. For more info see http://www.lyx.org/ -\lyxformat 220 -\textclass report -\begin_preamble -\usepackage{fullpage} -\renewenvironment{comment}% -{\begin{quote}\textbf{Discussion}: \slshape}% -{\end{quote}} -\pagestyle{myheadings} -\end_preamble -\language american -\inputencoding auto -\fontscheme pslatex -\graphics default -\paperfontsize 10 -\spacing single -\papersize letterpaper -\paperpackage a4 -\use_geometry 0 -\use_amsmath 0 -\use_natbib 0 -\use_numerical_citations 0 -\paperorientation portrait -\secnumdepth 2 -\tocdepth 2 -\paragraph_separation indent -\defskip medskip -\quotes_language english -\quotes_times 2 -\papercolumns 1 -\papersides 2 -\paperpagestyle headings - -\layout Title - -The Portals 3.2 Message Passing Interface -\newline - Revision 1.1 -\layout Author - -Ron Brightwell -\begin_inset Foot -collapsed true - -\layout Standard - -R. - Brightwell and R. - Riesen are with the Scalable Computing Systems Department, Sandia National - Laboratories, P.O. - Box 5800, Albuquerque, NM\SpecialChar ~ -\SpecialChar ~ -87111-1110, bright@cs.sandia.gov, rolf@cs.sandia.gov. -\end_inset - -, Arthur B. - Maccabe -\begin_inset Foot -collapsed true - -\layout Standard - -A. - B. - Maccabe is with the Computer Science Department, University of New Mexico, - Albuquerque, NM\SpecialChar ~ -\SpecialChar ~ -87131-1386, maccabe@cs.unm.edu. -\end_inset - -, Rolf Riesen and Trammell Hudson -\layout Abstract - -This report presents a specification for the Portals 3.2 message passing - interface. - Portals 3.2 is intended to allow scalable, high-performance network communicatio -n between nodes of a parallel computing system. - Specifically, it is designed to support a parallel computing platform composed - of clusters of commodity workstations connected by a commodity system area - network fabric. - In addition, Portals 3.2 is well suited to massively parallel processing - and embedded systems. - Portals 3.2 represents an adaption of the data movement layer developed - for massively parallel processing platforms, such as the 4500-node Intel - TeraFLOPS machine. - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -clearpage -\backslash -pagenumbering{roman} -\backslash -setcounter{page}{3} -\end_inset - - -\layout Standard - - -\begin_inset LatexCommand \tableofcontents{} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -cleardoublepage -\end_inset - - -\layout Standard - - -\begin_inset FloatList figure - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -cleardoublepage -\end_inset - - -\layout Standard - - -\begin_inset FloatList table - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -cleardoublepage -\end_inset - - -\layout Chapter* - -Summary of Changes for Revision 1.1 -\layout Enumerate - -Updated version number to 3.2 throughout the document -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sub:PtlGetId} - -\end_inset - -: added -\family typewriter -PTL_SEGV -\family default - to error list for -\shape italic -PtlGetId -\shape default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -: added -\family typewriter -PTL_ML_TOOLONG -\family default - to error list for -\shape italic -PtlMEAttach -\shape default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:meunlink} - -\end_inset - -: removed text referring to a list of associated memory descriptors. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:mdfree} - -\end_inset - -: added text to describe unlinking a free-floating memory descriptor. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:types} - -\end_inset - -: added entry for -\family typewriter -ptl_seq_t -\family default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - -: -\begin_deeper -\layout Enumerate - -added definition of -\family typewriter -max_offset -\family default -. -\layout Enumerate - -added text to clarify -\family typewriter -PTL_MD_MANAGE_REMOTE -\family default -. -\end_deeper -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:mdattach} - -\end_inset - -: modified text for -\family typewriter -unlink_op -\family default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:niinit} - -\end_inset - -: added text to clarify multiple calls to -\shape italic -PtlNIInit -\shape default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:mdattach} - -\end_inset - -: added text to clarify -\family typewriter -unlink_nofit -\family default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:receiving} - -\end_inset - -: removed text indicating that an MD will reject a message if the associated - EQ is full. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:mdfree} - -\end_inset - -: added -\family typewriter -PTL_MD_INUSE -\family default - error code and text to indicate that only MDs with no pending operations - can be unlinked. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:retcodes} - -\end_inset - -: added -\family typewriter -PTL_MD_INUSE -\family default - return code. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:event-type} - -\end_inset - -: added user id field, MD handle field, and NI specific failure field to - the -\family typewriter -ptl_event_t -\family default - structure. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:types} - -\end_inset - -: added -\family typewriter -ptl_ni_fail_t -\family default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:event-type} - -\end_inset - -: added -\family typewriter -PTL_EVENT_UNLINK -\family default - event type. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:func} - -\end_inset - -: removed -\shape slanted -PtlTransId -\shape default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -, Section -\begin_inset LatexCommand \ref{sec:meinsert} - -\end_inset - -, Section -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - -: listed allowable constants with relevant fields. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:func} - -\end_inset - -: added -\shape italic -PtlMEAttachAny -\shape default - function. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:retcodes} - -\end_inset - -: added -\family typewriter -PTL_PT_FULL -\family default - return code for -\shape italic -PtlMEAttachAny -\shape default -. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:oconsts} - -\end_inset - -: updated to reflect new event types. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - -: added -\family typewriter -ptl_nid_t -\family default -, -\family typewriter -ptl_pid_t -\family default -, and -\family typewriter -ptl_uid_t -\family default -. -\layout Chapter* - -Summary of Changes for Version 3.1 -\layout Section* - -Thread Issues -\layout Standard - -The most significant change to the interface from version 3.0 to 3.1 involves - the clarification of how the interface interacts with multi-threaded applicatio -ns. - We adopted a generic thread model in which processes define an address - space and threads share the address space. - Consideration of the API in the light of threads lead to several clarifications - throughout the document: -\layout Enumerate - -Glossary: -\begin_deeper -\layout Enumerate - -added a definition for -\emph on -thread -\emph default -, -\layout Enumerate - -reworded the definition for -\emph on -process -\emph default -. - -\end_deeper -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:apiover} - -\end_inset - -: added section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:threads} - -\end_inset - - to describe the multi-threading model used by the Portals API. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ptlinit} - -\end_inset - -: -\emph on -PtlInit -\emph default - must be called at least once and may be called any number of times. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ptlfini} - -\end_inset - -: -\emph on -PtlFini -\emph default - should be called once as the process is terminating and not as each thread - terminates. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:pid} - -\end_inset - -: Portals does not define thread ids. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - -: network interfaces are associated with processes, not threads. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:niinit} - -\end_inset - -: -\emph on -PtlNIInit -\emph default - must be called at least once and may be called any number of times. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:eqget} - -\end_inset - -: -\emph on -PtlEQGet -\emph default - returns -\family typewriter -PTL_EQ_EMPTY -\family default - if a thread is blocked on -\emph on -PtlEQWait -\emph default -. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:eqwait} - -\end_inset - -: waiting threads are awakened in FIFO order. - -\layout Standard - -Two functions, -\emph on -PtlNIBarrier -\emph default - and -\emph on -PtlEQCount -\emph default - were removed from the API. - -\emph on -PtlNIBarrier -\emph default - was defined to block the calling process until all of the processes in - the application group had invoked -\emph on -PtlNIBarrier -\emph default -. - We now consider this functionality, along with the concept of groups (see - the discussion under -\begin_inset Quotes eld -\end_inset - -other changes -\begin_inset Quotes erd -\end_inset - -), to be part of the runtime system, not part of the Portals API. - -\emph on -PtlEQCount -\emph default - was defined to return the number of events in an event queue. - Because external operations may lead to new events being added and other - threads may remove events, the value returned by -\emph on -PtlEQCount -\emph default - would have to be a hint about the number of events in the event queue. -\layout Section* - -Handling small, unexpected messages -\layout Standard - -Another set of changes relates to handling small unexpected messages in - MPI. - In designing version 3.0, we assumed that each unexpected message would - be placed in a unique memory descriptor. - To avoid the need to process a long list of memory descriptors, we moved - the memory descriptors out of the match list and hung them off of a single - match list entry. - In this way, large unexpected messages would only encounter a single -\begin_inset Quotes eld -\end_inset - -short message -\begin_inset Quotes erd -\end_inset - - match list entry before encountering the -\begin_inset Quotes eld -\end_inset - -long message -\begin_inset Quotes erd -\end_inset - - match list entry. - Experience with this strategy identified resource management problems with - this approach. - In particular, a long sequence of very short (or zero length) messages - could quickly exhaust the memory descriptors constructed for handling unexpecte -d messages. - Our new strategy involves the use of several very large memory descriptors - for small unexpected messages. - Consecutive unexpected messages will be written into the first of these - memory descriptors until the memory descriptor fills up. - When the first of the -\begin_inset Quotes eld -\end_inset - -small memory -\begin_inset Quotes erd -\end_inset - - descriptors fills up, it will be unlinked and subsequent short messages - will be written into the next -\begin_inset Quotes eld -\end_inset - -short message -\begin_inset Quotes erd -\end_inset - - memory descriptor. - In this case, a -\begin_inset Quotes eld -\end_inset - -short message -\begin_inset Quotes erd -\end_inset - - memory descriptor will be declared full when it does not have sufficient - space for the largest small unexpected message. -\layout Standard - -This lead to two significant changes. - First, each match list entry now has a single memory descriptor rather - than a list of memory descriptors. - Second, in addition to exceeding the operation threshold, a memory descriptor - can be unlinked when the local offset exceeds a specified value. - These changes have lead to several changes in this document: -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{subsec:paddress} - -\end_inset - -: -\begin_deeper -\layout Enumerate - -removed references to the memory descriptor list, -\layout Enumerate - -changed the portals address translation description to indicate that unlinking - a memory descriptor implies unlinking the associated match list entry--match - list entries can no longer be unlinked independently from the memory descriptor. - -\end_deeper -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -: -\begin_deeper -\layout Enumerate - -removed unlink from argument list, -\layout Enumerate - -removed description of -\family typewriter -ptl_unlink -\family default - type, -\layout Enumerate - -changed wording of the error condition when the Portal table index already - has an associated match list. - -\end_deeper -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:meinsert} - -\end_inset - -: removed unlink from argument list. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - -: added -\family typewriter -max_offset -\family default -. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdattach} - -\end_inset - -: -\begin_deeper -\layout Enumerate - -added description of -\family typewriter -ptl_unlink -\family default - type, -\layout Enumerate - -removed reference to memory descriptor lists, -\layout Enumerate - -changed wording of the error condition when match list entry already has - an associated memory descriptor, -\layout Enumerate - -changed the description of the -\family typewriter -unlink -\family default - argument. - -\end_deeper -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - -: removed -\family typewriter -PtlMDInsert -\family default - operation. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdbind} - -\end_inset - -: removed references to memory descriptor list. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdfree} - -\end_inset - -: removed reference to memory descriptor list. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:summary} - -\end_inset - -: removed references to PtlMDInsert. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:semantics} - -\end_inset - -: removed reference to memory descriptor list. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:exmpi} - -\end_inset - -: revised the MPI example to reflect the changes to the interface. - -\layout Standard - -Several changes have been made to improve the general documentation of the - interface. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - -: documented the special value -\family typewriter -PTL_EQ_NONE -\family default -. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - -: documented the special value -\family typewriter -PTL_ID_ANY -\family default -. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdbind} - -\end_inset - -: documented the return value -\family typewriter -PTL_INV_EQ -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdupdate} - -\end_inset - -: clarified the description of the -\emph on -PtlMDUpdate -\emph default - function. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:implvals} - -\end_inset - -: introduced a new section to document the implementation defined values. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:summary} - -\end_inset - -: modified Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:oconsts} - -\end_inset - - to indicate where each constant is introduced and where it is used. - -\layout Section* - -Other changes -\layout Subsection* - -Implementation defined limits (Section -\begin_inset LatexCommand \ref{sec:niinit} - -\end_inset - -) -\layout Standard - -The earlier version provided implementation defined limits for the maximum - number of match entries, the maximum number of memory descriptors, etc. - Rather than spanning the entire implementation, these limits are now associated - with individual network interfaces. -\layout Subsection* - -Added User Ids (Section -\begin_inset LatexCommand \ref{sec:uid} - -\end_inset - -) -\layout Standard - -Group Ids had been used to simplify access control entries. - In particular, a process could allow access for all of the processes in - a group. - User Ids have been introduced to regain this functionality. - We use user ids to fill this role. -\layout Subsection* - -Removed Group Ids and Rank Ids (Section -\begin_inset LatexCommand \ref{sec:pid} - -\end_inset - -) -\layout Standard - -The earlier version of Portals had two forms for addressing processes: and . - A process group was defined as the collection processes created during - application launch. - Each process in the group was given a unique rank id in the range 0 to - -\begin_inset Formula $n-1$ -\end_inset - - where -\begin_inset Formula $n$ -\end_inset - - was the number of processes in the group. - We removed groups because they are better handled in the runtime system. -\layout Subsection* - -Match lists (Section -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -) -\layout Standard - -It is no longer illegal to have an existing match entry when calling PtlMEAttach. - A position argument was added to the list of arguments supplied to -\emph on -PtlMEAttach -\emph default - to specify whether the new match entry is prepended or appended to the - existing list. - If there is no existing match list, the position argument is ignored. -\layout Subsection* - -Unlinking Memory Descriptors (Section -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - -) -\layout Standard - -Previously, a memory descriptor could be unlinked if the offset exceeded - a threshold upon the completion of an operation. - In this version, the unlinking is delayed until there is a matching operation - which requires more memory than is currently available in the descriptor. - In addition to changes in section, this lead to a revision of Figure\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:flow} - -\end_inset - -. -\layout Subsection* - -Split Phase Operations and Events (Section -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - -) -\layout Standard - -Previously, there were five types of events: -\family typewriter -PTL_EVENT_PUT -\family default -, -\family typewriter -PTL_EVENT_GET -\family default -, -\family typewriter -PTL_EVENT_REPLY -\family default -, -\family typewriter -PTL_EVENT_SENT -\family default -, and -\family typewriter -PTL_EVENT_ACK. - -\family default -The first four of these reflected the completion of potentially long operations. - We have introduced new event types to reflect the fact that long operations - have a distinct starting point and a distinct completion point. - Moreover, the completion may be successful or unsuccessful. -\layout Standard - -In addition to providing a mechanism for reporting failure to higher levels - of software, this split provides an opportunity for for improved ordering - semantics. - Previously, if one process intiated two operations (e.g., two put operations) - on a remote process, these operations were guaranteed to complete in the - same order that they were initiated. - Now, we only guarantee that the initiation events are delivered in the - same order. - In particular, the operations do not need to complete in the order that - they were intiated. -\layout Subsection* - -Well known proces ids (Section -\begin_inset LatexCommand \ref{sec:niinit} - -\end_inset - -) -\layout Standard - -To support the notion of -\begin_inset Quotes eld -\end_inset - -well known process ids, -\begin_inset Quotes erd -\end_inset - - we added a process id argument to the arguments for PtlNIInit. -\layout Chapter* - -Glossary -\layout Description - -API Application Programming Interface. - A definition of the functions and semantics provided by library of functions. - -\layout Description - -Initiator A -\emph on -process -\emph default - that initiates a message operation. - -\layout Description - -Message An application-defined unit of data that is exchanged between -\emph on -processes -\emph default -. - -\layout Description - -Message\SpecialChar ~ -Operation Either a put operation, which writes data, or a get operation, - which reads data. - -\layout Description - -Network A network provides point-to-point communication between -\emph on -nodes -\emph default -. - Internally, a network may provide multiple routes between endpoints (to - improve fault tolerance or to improve performance characteristics); however, - multiple paths will not be exposed outside of the network. - -\layout Description - -Node A node is an endpoint in a -\emph on -network -\emph default -. - Nodes provide processing capabilities and memory. - A node may provide multiple processors (an SMP node) or it may act as a - -\emph on -gateway -\emph default - between networks. - -\layout Description - -Process A context of execution. - A process defines a virtual memory (VM) context. - This context is not shared with other processes. - Several threads may share the VM context defined by a process. - -\layout Description - -Target A -\emph on -process -\emph default - that is acted upon by a message operation. - -\layout Description - -Thread A context of execution that shares a VM context with other threads. - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -cleardoublepage -\layout Standard - -\backslash -setcounter{page}{1} -\backslash -pagenumbering{arabic} -\end_inset - - -\layout Chapter - -Introduction -\begin_inset LatexCommand \label{sec:intro} - -\end_inset - - -\layout Section - -Overview -\layout Standard - -This document describes an application programming interface for message - passing between nodes in a system area network. - The goal of this interface is to improve the scalability and performance - of network communication by defining the functions and semantics of message - passing required for scaling a parallel computing system to ten thousand - nodes. - This goal is achieved by providing an interface that will allow a quality - implementation to take advantage of the inherently scalable design of Portals. -\layout Standard - -This document is divided into several sections: -\layout Description - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:intro} - -\end_inset - ----Introduction This section describes the purpose and scope of the Portals - API. - -\layout Description - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:apiover} - -\end_inset - ----An\SpecialChar ~ -Overview\SpecialChar ~ -of\SpecialChar ~ -the\SpecialChar ~ -Portals\SpecialChar ~ -3.1\SpecialChar ~ -API This section gives a brief overview of the - Portals API. - The goal is to introduce the key concepts and terminology used in the descripti -on of the API. - -\layout Description - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:api} - -\end_inset - ----The\SpecialChar ~ -Portals\SpecialChar ~ -3.2\SpecialChar ~ -API This section describes the functions and semantics of - the Portals application programming interface. - -\layout Description - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:semantics} - -\end_inset - ---The\SpecialChar ~ -Semantics\SpecialChar ~ -of\SpecialChar ~ -Message\SpecialChar ~ -Transmission This section describes the semantics - of message transmission. - In particular, the information transmitted in each type of message and - the processing of incoming messages. - -\layout Description - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:examples} - -\end_inset - ----Examples This section presents several examples intended to illustrates - the use of the Portals API. - -\layout Section - -Purpose -\layout Standard - -Existing message passing technologies available for commodity cluster networking - hardware do not meet the scalability goals required by the Cplant\SpecialChar ~ - -\begin_inset LatexCommand \cite{Cplant} - -\end_inset - - project at Sandia National Laboratories. - The goal of the Cplant project is to construct a commodity cluster that - can scale to the order of ten thousand nodes. - This number greatly exceeds the capacity for which existing message passing - technologies have been designed and implemented. -\layout Standard - -In addition to the scalability requirements of the network, these technologies - must also be able to support a scalable implementation of the Message Passing - Interface (MPI)\SpecialChar ~ - -\begin_inset LatexCommand \cite{MPIstandard} - -\end_inset - - standard, which has become the -\shape italic -de facto -\shape default - standard for parallel scientific computing. - While MPI does not impose any scalability limitations, existing message - passing technologies do not provide the functionality needed to allow implement -ations of MPI to meet the scalability requirements of Cplant. -\layout Standard - -The following are properties of a network architecture that do not impose - any inherent scalability limitations: -\layout Itemize - -Connectionless - Many connection-oriented architectures, such as VIA\SpecialChar ~ - -\begin_inset LatexCommand \cite{VIA} - -\end_inset - - and TCP/IP sockets, have limitations on the number of peer connections - that can be established. - -\layout Itemize - -Network independence - Many communication systems depend on the host processor - to perform operations in order for messages in the network to be consumed. - Message consumption from the network should not be dependent on host processor - activity, such as the operating system scheduler or user-level thread scheduler. - -\layout Itemize - -User-level flow control - Many communication systems manage flow control - internally to avoid depleting resources, which can significantly impact - performance as the number of communicating processes increases. - -\layout Itemize - -OS Bypass - High performance network communication should not involve memory - copies into or out of a kernel-managed protocol stack. - -\layout Standard - -The following are properties of a network architecture that do not impose - scalability limitations for an implementation of MPI: -\layout Itemize - -Receiver-managed - Sender-managed message passing implementations require - a persistent block of memory to be available for every process, requiring - memory resources to increase with job size and requiring user-level flow - control mechanisms to manage these resources. - -\layout Itemize - -User-level Bypass - While OS Bypass is necessary for high-performance, it - alone is not sufficient to support the Progress Rule of MPI asynchronous - operations. - -\layout Itemize - -Unexpected messages - Few communication systems have support for receiving - messages for which there is no prior notification. - Support for these types of messages is necessary to avoid flow control - and protocol overhead. - -\layout Section - -Background -\layout Standard - -Portals was originally designed for and implemented on the nCube machine - as part of the SUNMOS (Sandia/UNM OS)\SpecialChar ~ - -\begin_inset LatexCommand \cite{SUNMOS} - -\end_inset - - and Puma\SpecialChar ~ - -\begin_inset LatexCommand \cite{PumaOS} - -\end_inset - - lightweight kernel development projects. - Portals went through two design phases, the latter of which is used on - the 4500-node Intel TeraFLOPS machine\SpecialChar ~ - -\begin_inset LatexCommand \cite{TFLOPS} - -\end_inset - -. - Portals have been very successful in meeting the needs of such a large - machine, not only as a layer for a high-performance MPI implementation\SpecialChar ~ - -\begin_inset LatexCommand \cite{PumaMPI} - -\end_inset - -, but also for implementing the scalable run-time environment and parallel - I/O capabilities of the machine. -\layout Standard - -The second generation Portals implementation was designed to take full advantage - of the hardware architecture of large MPP machines. - However, efforts to implement this same design on commodity cluster technology - identified several limitations, due to the differences in network hardware - as well as to shortcomings in the design of Portals. -\layout Section - -Scalability -\layout Standard - -The primary goal in the design of Portals is scalability. - Portals are designed specifically for an implementation capable of supporting - a parallel job running on tens of thousands of nodes. - Performance is critical only in terms of scalability. - That is, the level of message passing performance is characterized by how - far it allows an application to scale and not by how it performs in micro-bench -marks (e.g., a two node bandwidth or latency test). -\layout Standard - -The Portals API is designed to allow for scalability, not to guarantee it. - Portals cannot overcome the shortcomings of a poorly designed application - program. - Applications that have inherent scalability limitations, either through - design or implementation, will not be transformed by Portals into scalable - applications. - Scalability must be addressed at all levels. - Portals do not inhibit scalability, but do not guarantee it either. -\layout Standard - -To support scalability, the Portals interface maintains a minimal amount - of state. - Portals provide reliable, ordered delivery of messages between pairs of - processes. - They are connectionless: a process is not required to explicitly establish - a point-to-point connection with another process in order to communicate. - Moreover, all buffers used in the transmission of messages are maintained - in user space. - The target process determines how to respond to incoming messages, and - messages for which there are no buffers are discarded. -\layout Section - -Communication Model -\layout Standard - -Portals combine the characteristics of both one-side and two-sided communication. - They define a -\begin_inset Quotes eld -\end_inset - -matching put -\begin_inset Quotes erd -\end_inset - - operation and a -\begin_inset Quotes eld -\end_inset - -matching get -\begin_inset Quotes erd -\end_inset - - operation. - The destination of a put (or send) is not an explicit address; instead, - each message contains a set of match bits that allow the receiver to determine - where incoming messages should be placed. - This flexibility allows Portals to support both traditional one-sided operation -s and two-sided send/receive operations. -\layout Standard - -Portals allows the target to determine whether incoming messages are acceptable. - A target process can choose to accept message operations from any specific - process or can choose to ignore message operations from any specific process. -\layout Section - -Zero Copy, OS Bypass and Application Bypass -\layout Standard - -In traditional system architectures, network packets arrive at the network - interface card (NIC), are passed through one or more protocol layers in - the operating system, and eventually copied into the address space of the - application. - As network bandwidth began to approach memory copy rates, reduction of - memory copies became a critical concern. - This concern lead to the development of zero-copy message passing protocols - in which message copies are eliminated or pipelined to avoid the loss of - bandwidth. -\layout Standard - -A typical zero-copy protocol has the NIC generate an interrupt for the CPU - when a message arrives from the network. - The interrupt handler then controls the transfer of the incoming message - into the address space of the appropriate application. - The interrupt latency, the time from the initiation of an interrupt until - the interrupt handler is running, is fairly significant. - To avoid this cost, some modern NICs have processors that can be programmed - to implement part of a message passing protocol. - Given a properly designed protocol, it is possible to program the NIC to - control the transfer of incoming messages, without needing to interrupt - the CPU. - Because this strategy does not need to involve the OS on every message - transfer, it is frequently called -\begin_inset Quotes eld -\end_inset - -OS Bypass. -\begin_inset Quotes erd -\end_inset - - ST\SpecialChar ~ - -\begin_inset LatexCommand \cite{ST} - -\end_inset - -, VIA\SpecialChar ~ - -\begin_inset LatexCommand \cite{VIA} - -\end_inset - -, FM\SpecialChar ~ - -\begin_inset LatexCommand \cite{FM2} - -\end_inset - -, GM\SpecialChar ~ - -\begin_inset LatexCommand \cite{GM} - -\end_inset - -, and Portals are examples of OS Bypass protocols. -\layout Standard - -Many protocols that support OS Bypass still require that the application - actively participate in the protocol to ensure progress. - As an example, the long message protocol of PM requires that the application - receive and reply to a request to put or get a long message. - This complicates the runtime environment, requiring a thread to process - incoming requests, and significantly increases the latency required to - initiate a long message protocol. - The Portals message passing protocol does not require activity on the part - of the application to ensure progress. - We use the term -\begin_inset Quotes eld -\end_inset - -Application Bypass -\begin_inset Quotes erd -\end_inset - - to refer to this aspect of the Portals protocol. -\layout Section - -Faults -\layout Standard - -Given the number of components that we are dealing with and the fact that - we are interested in supporting applications that run for very long times, - failures are inevitable. - The Portals API recognizes that the underlying transport may not be able - to successfully complete an operation once it has been initiated. - This is reflected in the fact that the Portals API reports three types - of events: events indicating the initiation of an operation, events indicating - the successful completion of an operation, and events indicating the unsuccessf -ul completion of an operation. - Every initiation event is eventually followed by a successful completion - event or an unsuccessful completion event. -\layout Standard - -Between the time an operation is started and the time that the operation - completes (successfully or unsuccessfully), any memory associated with - the operation should be considered volatile. - That is, the memory may be changed in unpredictable ways while the operation - is progressing. - Once the operation completes, the memory associated with the operation - will not be subject to further modification (from this operation). - Notice that unsuccessful operations may alter memory in an essentially - unpredictable fashion. -\layout Chapter - -An Overview of the Portals API -\begin_inset LatexCommand \label{sec:apiover} - -\end_inset - - -\layout Standard - -In this section, we give a conceptual overview of the Portals API. - The goal is to provide a context for understanding the detailed description - of the API presented in the next section. -\layout Section - -Data Movement -\begin_inset LatexCommand \label{sec:dmsemantics} - -\end_inset - - -\layout Standard - -A Portal represents an opening in the address space of a process. - Other processes can use a Portal to read (get) or write (put) the memory - associated with the portal. - Every data movement operation involves two processes, the -\series bold -initiator -\series default - and the -\series bold -target -\series default -. - The initiator is the process that initiates the data movement operation. - The target is the process that responds to the operation by either accepting - the data for a put operation, or replying with the data for a get operation. -\layout Standard - -In this discussion, activities attributed to a process may refer to activities - that are actually performed by the process or -\emph on -on behalf of the process -\emph default -. - The inclusiveness of our terminology is important in the context of -\emph on -application bypass -\emph default -. - In particular, when we note that the target sends a reply in the case of - a get operation, it is possible that reply will be generated by another - component in the system, bypassing the application. -\layout Standard - -Figures\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:put} - -\end_inset - - and -\begin_inset LatexCommand \ref{fig:get} - -\end_inset - - present graphical interpretations of the Portal data movement operations: - put and get. - In the case of a put operation, the initiator sends a put request message - containing the data to the target. - The target translates the Portal addressing information in the request - using its local Portal structures. - When the request has been processed, the target optionally sends an acknowledge -ment message. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename put.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 218pt - lyxheight 119pt -\end_inset - - -\layout Caption - -Portal Put (Send) -\begin_inset LatexCommand \label{fig:put} - -\end_inset - - -\end_inset - - -\layout Standard - -In the case of a get operation, the initiator sends a get request to the - target. - As with the put operation, the target translates the Portal addressing - information in the request using its local Portal structures. - Once it has translated the Portal addressing information, the target sends - a reply that includes the requested data. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename get.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 218pt - lyxheight 119pt -\end_inset - - -\layout Caption - -Portal Get -\begin_inset LatexCommand \label{fig:get} - -\end_inset - - -\end_inset - - -\layout Standard - -We should note that Portal address translations are only performed on nodes - that respond to operations initiated by other nodes. - Acknowledgements and replies to get operations bypass the portals address - translation structures. -\layout Section - -Portal Addressing -\begin_inset LatexCommand \label{subsec:paddress} - -\end_inset - - -\layout Standard - -One-sided data movement models (e.g., shmem\SpecialChar ~ - -\begin_inset LatexCommand \cite{CraySHMEM} - -\end_inset - -, ST\SpecialChar ~ - -\begin_inset LatexCommand \cite{ST} - -\end_inset - -, MPI-2\SpecialChar ~ - -\begin_inset LatexCommand \cite{MPI2} - -\end_inset - -) typically use a triple to address memory on a remote node. - This triple consists of a process id, memory buffer id, and offset. - The process id identifies the target process, the memory buffer id specifies - the region of memory to be used for the operation, and the offset specifies - an offset within the memory buffer. -\layout Standard - -In addition to the standard address components (process id, memory buffer - id, and offset), a Portal address includes a set of match bits. - This addressing model is appropriate for supporting one-sided operations - as well as traditional two-sided message passing operations. - Specifically, the Portals API provides the flexibility needed for an efficient - implementation of MPI-1, which defines two-sided operations with one-sided - completion semantics. -\layout Standard - -Figure\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:portals} - -\end_inset - - presents a graphical representation of the structures used by a target - in the interpretation of a Portal address. - The process id is used to route the message to the appropriate node and - is not reflected in this diagram. - The memory buffer id, called the -\series bold -portal id -\series default -, is used as an index into the Portal table. - Each element of the Portal table identifies a match list. - Each element of the match list specifies two bit patterns: a set of -\begin_inset Quotes eld -\end_inset - -don't care -\begin_inset Quotes erd -\end_inset - - bits, and a set of -\begin_inset Quotes eld -\end_inset - -must match -\begin_inset Quotes erd -\end_inset - - bits. - In addition to the two sets of match bits, each match list element has - at most one memory descriptor. - Each memory descriptor identifies a memory region and an optional event - queue. - The memory region specifies the memory to be used in the operation and - the event queue is used to record information about these operations. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename portals.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 305pt - lyxheight 106pt -\end_inset - - -\layout Caption - -Portal Addressing Structures -\begin_inset LatexCommand \label{fig:portals} - -\end_inset - - -\end_inset - - -\layout Standard - -Figure\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:flow} - -\end_inset - - illustrates the steps involved in translating a Portal address, starting - from the first element in a match list. - If the match criteria specified in the match list entry are met and the - memory descriptor list accepts the operation -\begin_inset Foot -collapsed true - -\layout Standard - -Memory descriptors can reject operations because a threshold has been exceeded - or because the memory region does not have sufficient space, see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - - -\end_inset - -, the operation (put or get) is performed using the memory region specified - in the memory descriptor. - If the memory descriptor specifies that it is to be unlinked when a threshold - has been exceeded, the match list entry is removed from the match list - and the resources associated with the memory descriptor and match list - entry are reclaimed. - Finally, if there is an event queue specified in the memory descriptor, - the operation is logged in the event queue. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename flow_new.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 447pt - lyxheight 282pt -\end_inset - - -\layout Caption - -Portals Address Translation -\begin_inset LatexCommand \label{fig:flow} - -\end_inset - - -\end_inset - - -\layout Standard - -If the match criteria specified in the match list entry are not met, or - there is no memory descriptor associated with the match list entry, or - the memory descriptor associated with the match list entry rejects the - operation, the address translation continues with the next match list entry. - If the end of the match list has been reached, the address translation - is aborted and the incoming requested is discarded. -\layout Section - -Access Control -\layout Standard - -A process can control access to its portals using an access control list. - Each entry in the access control list specifies a process id and a Portal - table index. - The access control list is actually an array of entries. - Each incoming request includes an index into the access control list (i.e., - a -\begin_inset Quotes eld -\end_inset - -cookie -\begin_inset Quotes erd -\end_inset - - or hint). - If the id of the process issuing the request doesn't match the id specified - in the access control list entry or the Portal table index specified in - the request doesn't match the Portal table index specified in the access - control list entry, the request is rejected. - Process identifiers and Portal table indexes may include wild card values - to increase the flexibility of this mechanism. - -\layout Standard - -Two aspects of this design merit further discussion. - First, the model assumes that the information in a message header, the - sender's id in particular, is trustworthy. - In most contexts, we assume that the entity that constructs the header - is trustworthy; however, using cryptographic techniques, we could easily - devise a protocol that would ensure the authenticity of the sender. -\layout Standard - -Second, because the access check is performed by the receiver, it is possible - that a malicious process will generate thousands of messages that will - be denied by the receiver. - This could saturate the network and/or the receiver, resulting in a -\emph on -denial of service -\emph default - attack. - Moving the check to the sender using capabilities, would remove the potential - for this form of attack. - However, the solution introduces the complexities of capability management - (exchange of capabilities, revocation, protections, etc). -\layout Section - -Multi-threaded Applications -\begin_inset LatexCommand \label{sec:threads} - -\end_inset - - -\layout Standard - -The Portals API supports a generic view of multi-threaded applications. - From the perspective of the Portals API, an application program is defined - by a set of processes. - Each process defines a unique address space. - The Portals API defines access to this address space from other processes - (using portals addressing and the data movement operations). - A process may have one or more -\emph on -threads -\emph default - executing in its address space. - -\layout Standard - -With the exception of -\emph on -PtlEQWait -\emph default - every function in the Portals API is non-blocking and atomic with respect - to both other threads and external operations that result from data movement - operations. - While individual operations are atomic, sequences of these operations may - be interleaved between different threads and with external operations. - The Portals API does not provide any mechanisms to control this interleaving. - It is expected that these mechanisms will be provided by the API used to - create threads. -\layout Chapter - -The Portals API -\begin_inset LatexCommand \label{sec:api} - -\end_inset - - -\layout Section - -Naming Conventions -\begin_inset LatexCommand \label{sec:conv} - -\end_inset - - -\layout Standard - -The Portals API defines two types of entities: functions and types. - Function always start with -\emph on -Ptl -\emph default - and use mixed upper and lower case. - When used in the body of this report, function names appear in italic face, - e.g., -\emph on -PtlInit -\emph default -. - The functions associated with an object type will have names that start - with -\emph on -Ptl -\emph default -, followed by the two letter object type code shown in Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:objcodes} - -\end_inset - -. - As an example, the function -\emph on -PtlEQAlloc -\emph default - allocates resources for an event queue. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Object Type Codes -\begin_inset LatexCommand \label{tab:objcodes} - -\end_inset - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\newline - -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard - - -\emph on -xx -\end_inset - - -\begin_inset Text - -\layout Standard - - Name -\end_inset - - -\begin_inset Text - -\layout Standard - - Section -\end_inset - - - - -\begin_inset Text - -\layout Standard - -EQ -\end_inset - - -\begin_inset Text - -\layout Standard - - Event Queue -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - MD -\end_inset - - -\begin_inset Text - -\layout Standard - - Memory Descriptor -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - ME -\end_inset - - -\begin_inset Text - -\layout Standard - - Match list Entry -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - NI -\end_inset - - -\begin_inset Text - -\layout Standard - - Network Interface -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset - - - - -\end_inset - - -\end_inset - - -\layout Standard - -Type names use lower case with underscores to separate words. - Each type name starts with -\family typewriter -ptl -\family default -_ and ends with -\family typewriter -_t -\family default -. - When used in the body of this report, type names appear in a fixed font, - e.g., -\family typewriter -ptl_match_bits_t -\family default -. -\layout Standard - -Names for constants use upper case with underscores to separate words. - Each constant name starts with -\family typewriter -PTL_ -\family default -. - When used in the body of this report, type names appear in a fixed font, - e.g., -\family typewriter -PTL_OK -\family default -. -\layout Section - -Base Types -\layout Standard - -The Portals API defines a variety of base types. - These types represent a simple renaming of the base types provided by the - C programming language. - In most cases these new type names have been introduced to improve type - safety and to avoid issues arising from differences in representation sizes - (e.g., 16-bit or 32-bit integers). -\layout Subsection - -Sizes -\begin_inset LatexCommand \label{sec:size-t} - -\end_inset - - -\layout Standard - -The type -\family typewriter -ptl_size_t -\family default - is an unsigned 64-bit integral type used for representing sizes. -\layout Subsection - -Handles -\begin_inset LatexCommand \label{sec:handle-type} - -\end_inset - - -\layout Standard - -Objects maintained by the API are accessed through handles. - Handle types have names of the form -\family typewriter -ptl_handle_ -\emph on -xx -\emph default -_t -\family default -, where -\emph on -xx -\emph default - is one of the two letter object type codes shown in Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:objcodes} - -\end_inset - -. - For example, the type -\family typewriter -ptl_handle_ni_t -\family default - is used for network interface handles. -\layout Standard - -Each type of object is given a unique handle type to enhance type checking. - The type, -\family typewriter -ptl_handle_any_t -\family default -, can be used when a generic handle is needed. - Every handle value can be converted into a value of type -\family typewriter -ptl_handle_any_t -\family default - without loss of information. -\layout Standard - -Handles are not simple values. - Every portals object is associated with a specific network interface and - an identifier for this interface (along with an object identifier) is part - of the handle for the object. -\layout Standard - -The special value -\family typewriter -PTL_EQ_NONE -\family default -, of type -\family typewriter -ptl_handle_eq_t -\family default -, is used to indicate the absence of an event queue. - See sections -\begin_inset LatexCommand \ref{sec:mdfree} - -\end_inset - - and\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdupdate} - -\end_inset - - for uses of this value. -\layout Subsection - -Indexes -\begin_inset LatexCommand \label{sec:index-type} - -\end_inset - - -\layout Standard - -The types -\family typewriter -ptl_pt_index_t -\family default - and -\family typewriter -ptl_ac_index_t -\family default - are integral types used for representing Portal table indexes and access - control tables indexes, respectively. - See section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:niinit} - -\end_inset - - for limits on values of these types. -\layout Subsection - -Match Bits -\begin_inset LatexCommand \label{sec:mb-type} - -\end_inset - - -\layout Standard - -The type -\family typewriter -ptl_match_bits_t -\family default - is capable of holding unsigned 64-bit integer values. -\layout Subsection - -Network Interfaces -\begin_inset LatexCommand \label{sec:ni-type} - -\end_inset - - -\layout Standard - -The type -\family typewriter -ptl_interface_t -\family default - is an integral type used for identifying different network interfaces. - Users will need to consult the local documentation to determine appropriate - values for the interfaces available. - The special value -\family typewriter -PTL_IFACE_DEFAULT -\family default - identifies the default interface. -\layout Subsection - -Identifiers -\begin_inset LatexCommand \label{sec:id-type} - -\end_inset - - -\layout Standard - -The type -\family typewriter -ptl_nid_t -\family default - is an integral type used for representing node ids -\family typewriter -, ptl_pid_t -\family default - is an integral type for representing process ids, and -\family typewriter -ptl_uid_t -\family default -is an integral type for representing user ids. -\layout Standard - -The special values -\family typewriter -PTL_PID_ANY -\family default - matches any process identifier, PTL_NID_ANY matches any node identifier, - and -\family typewriter -PTL_UID_ANY -\family default - matches any user identifier. - See sections -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - - and\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:acentry} - -\end_inset - - for uses of these values. -\layout Subsection - -Status Registers -\begin_inset LatexCommand \label{sec:stat-type} - -\end_inset - - -\layout Standard - -Each network interface maintains an array of status registers that can be - accessed using the -\family typewriter -PtlNIStatus -\family default - function (see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:nistatus} - -\end_inset - -). - The type -\family typewriter -ptl_sr_index_t -\family default - defines the types of indexes that can be used to access the status registers. - The only index defined for all implementations is -\family typewriter -PTL_SR_DROP_COUNT -\family default - which identifies the status register that counts the dropped requests for - the interface. - Other indexes (and registers) may be defined by the implementation. -\layout Standard - -The type -\family typewriter -ptl_sr_value_t -\family default - defines the types of values held in status registers. - This is a signed integer type. - The size is implementation dependent, but must be at least 32 bits. -\layout Section - -Initialization and Cleanup -\begin_inset LatexCommand \label{sec:init} - -\end_inset - - -\layout Standard - -The Portals API includes a function, -\emph on -PtlInit -\emph default -, to initialize the library and a function, -\emph on -PtlFini -\emph default -, to cleanup after the application is done using the library. -\layout Subsection - -PtlInit -\begin_inset LatexCommand \label{sec:ptlinit} - -\end_inset - - -\layout LyX-Code - -int PtlInit( int *max_interfaces ); -\layout Standard -\noindent -The -\emph on -PtlInit -\emph default - function initializes the Portals library. - PtlInit must be called at least once by a process before any thread makes - a Portals function call, but may be safely called more than once. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_FAIL Indicates an error during initialization. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -max_interfaces -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -max_interfaces -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the maximum number of interfaces - that can be initialized. -\end_inset - - - - -\end_inset - - -\layout Subsection - -PtlFini -\begin_inset LatexCommand \label{sec:ptlfini} - -\end_inset - - -\layout LyX-Code - -void PtlFini( void ); -\layout Standard -\noindent -The -\emph on -PtlFini -\emph default - function cleans up after the Portals library is no longer needed by a process. - After this function is called, calls to any of the functions defined by - the Portal API or use of the structures set up by the Portals API will - result in undefined behavior. - This function should be called once and only once during termination by - a process. - Typically, this function will be called in the exit sequence of a process. - Individual threads should not call PtlFini when they terminate. -\layout Section - -Network Interfaces -\begin_inset LatexCommand \label{sec:ni} - -\end_inset - - -\layout Standard - -The Portals API supports the use of multiple network interfaces. - However, each interface is treated as an independent entity. - Combining interfaces (e.g., -\begin_inset Quotes eld -\end_inset - -bonding -\begin_inset Quotes erd -\end_inset - - to create a higher bandwidth connection) must be implemented by the application - or embedded in the underlying network. - Interfaces are treated as independent entities to make it easier to cache - information on individual network interface cards. -\layout Standard - -Once initialized, each interface provides a Portal table, an access control - table, and a collection of status registers. - See Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - for a discussion of updating Portal table entries using the -\emph on -PtlMEAttach -\emph default - function. - See Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ac} - -\end_inset - - for a discussion of the initialization and updating of entries in the access - control table. - See Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:nistatus} - -\end_inset - - for a discussion of the -\emph on -PtlNIStatus -\emph default - function which can be used to determine the value of a status register. -\layout Standard - -Every other type of Portal object (e.g., memory descriptor, event queue, or - match list entry) is associated with a specific network interface. - The association to a network interface is established when the object is - created and is encoded in the handle for the object. -\layout Standard - -Each network interface is initialized and shutdown independently. - The initialization routine, -\emph on -PtlNIInit -\emph default -, returns a handle for an interface object which is used in all subsequent - Portal operations. - The -\emph on -PtlNIFini -\emph default - function is used to shutdown an interface and release any resources that - are associated with the interface. - Network interface handles are associated with processes, not threads. - All threads in a process share all of the network interface handles. -\layout Standard - -The Portals API also defines the -\emph on -PtlNIStatus -\emph default - function to query the status registers for a network interface, the -\emph on -PtlNIDist -\emph default - function to determine the -\begin_inset Quotes eld -\end_inset - -distance -\begin_inset Quotes erd -\end_inset - - to another process, and the -\emph on -PtlNIHandle -\emph default - function to determine the network interface that an object is associated - with. -\layout Subsection - -PtlNIInit -\begin_inset LatexCommand \label{sec:niinit} - -\end_inset - - -\layout LyX-Code - -typedef struct { -\newline - int max_match_entries; -\newline - int max_mem_descriptors; -\newline - int max_event_queues; -\newline - ptl_ac_index_t max_atable_index; -\newline - ptl_pt_index_t max_ptable_index; -\newline -} ptl_ni_limits_t; -\newline - -\newline -int PtlNIInit( ptl_interface_t interface -\newline - ptl_pid_t pid, -\newline - ptl_ni_limits_t* desired, -\newline - ptl_ni_limits_t* actual, -\newline - ptl_handle_ni_t* handle ); -\layout Standard - -Values of type -\family typewriter -ptl_ni_limits_t -\family default - include the following members: -\layout Description - -max_match_entries Maximum number of match entries that can be allocated - at any one time. -\layout Description - -max_mem_descriptors Maximum number of memory descriptors that can be allocated - at any one time. -\layout Description - -max_event_queues Maximum number of event queues that can be allocated at - any one time. -\layout Description - -max_atable_index Largest access control table index for this interface, - valid indexes range from zero to -\family typewriter -max_atable_index -\family default -, inclusive. -\layout Description - -max_ptable_index Largest Portal table index for this interface, valid indexes - range from zero to -\family typewriter -max_ptable_index -\family default -, inclusive. -\layout Standard -\noindent -The -\emph on -PtlNIInit -\emph default - function is used to initialized the Portals API for a network interface. - This function must be called at least once by each process before any other - operations that apply to the interface by any process or thread. - For subsequent calls to -\shape italic -PtlNIInit -\shape default - from within the same process (either by different threads or the same thread), - the desired limits will be ignored and the call will return the existing - NI handle. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INIT_DUP Indicates a duplicate initialization of -\family typewriter -interface -\family default -. - -\layout Description - -PTL_INIT_INV Indicates that -\family typewriter -interface -\family default - is not a valid network interface. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to initialize the - interface. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -pid -\family default - is not a valid process id. -\layout Description - -PTL_SEGV Indicates that -\family typewriter -actual -\family default -or -\family typewriter - handle -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -Identifies the network interface to be initialized. - (See section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ni-type} - -\end_inset - - for a discussion of values used to identify network interfaces.) -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -pid -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -Identifies the desired process id (for well known process ids). - The value -\family typewriter -PTL_PID_ANY -\family default - may be used to have the process id assigned by the underlying library. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -desired -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -If non-NULL, points to a structure that holds the desired limits. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -actual -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -On successful return, the location pointed to by actual will hold the actual - limits. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the interface. -\end_inset - - - - -\end_inset - - -\layout Comment - -The use of desired is implementation dependent. - In particular, an implementation may choose to ignore this argument. -\layout Subsection - -PtlNIFini -\begin_inset LatexCommand \label{sec:nifini} - -\end_inset - - -\layout LyX-Code - -int PtlNIFini( ptl_handle_ni_t interface ); -\layout Standard -\noindent -The -\emph on -PtlNIFini -\emph default - function is used to release the resources allocated for a network interface. - Once the -\emph on -PtlNIFini -\emph default - operation has been started, the results of pending API operations (e.g., - operations initiated by another thread) for this interface are undefined. - Similarly, the effects of incoming operations (puts and gets) or return - values (acknowledgements and replies) for this interface are undefined. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard - -interface -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard - -A handle for the interface to shutdown. -\end_inset - - - - -\end_inset - - -\layout Subsection - -PtlNIStatus -\begin_inset LatexCommand \label{sec:nistatus} - -\end_inset - - -\layout LyX-Code - -int PtlNIStatus( ptl_handle_ni_t interface, -\newline - ptl_sr_index_t status_register, -\newline - ptl_sr_value_t* status ); -\layout Standard -\noindent -The -\emph on -PtlNIStatus -\emph default - function returns the value of a status register for the specified interface. - (See section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:stat-type} - -\end_inset - - for more information on status register indexes and status register values.) -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_INV_SR_INDX Indicates that -\family typewriter -status_register -\family default - is not a valid status register. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -status -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -A handle for the interface to use. - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -status_register -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -An index for the status register to read. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -status -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the current value of the status - register. -\end_inset - - - - -\end_inset - - -\layout Comment - -The only status register that must be defined is a drop count register ( -\family typewriter -PTL_SR_DROP_COUNT -\family default -). - Implementations may define additional status registers. - Identifiers for the indexes associated with these registers should start - with the prefix -\family typewriter -PTL_SR_ -\family default -. -\layout Subsection - -PtlNIDist -\layout LyX-Code - -int PtlNIDist( ptl_handle_ni_t interface, -\newline - ptl_process_id_t process, -\newline - unsigned long* distance ); -\layout Standard -\noindent -The -\emph on -PtlNIDist -\emph default - function returns the distance to another process using the specified interface. - Distances are only defined relative to an interface. - Distance comparisons between different interfaces on the same process may - be meaningless. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -process -\family default - is not a valid process identifier. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -distance -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -A handle for the interface to use. - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -process -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -An identifier for the process whose distance is being requested. - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -distance -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the distance to the remote - process. -\end_inset - - - - -\end_inset - - -\layout Comment - -This function should return a static measure of distance. - Examples include minimum latency, the inverse of available bandwidth, or - the number of switches between the two endpoints. -\layout Subsection - -PtlNIHandle -\layout LyX-Code - -int PtlNIHandle( ptl_handle_any_t handle, -\newline - ptl_handle_ni_t* interface ); -\layout Standard -\noindent -The -\emph on -PtlNIHandle -\emph default - function returns a handle for the network interface with which the object - identified by -\family typewriter -handle -\family default - is associated. - If the object identified by -\family typewriter -handle -\family default - is a network interface, this function returns the same value it is passed. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_HANDLE Indicates that -\family typewriter -handle -\family default - is not a valid handle. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -interface -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -A handle for the object. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the network interface - associated with -\family typewriter -handle -\family default -. -\end_inset - - - - -\end_inset - - -\layout Comment - -Every handle should encode the network interface and the object id relative - to this handle. - Both are presumably encoded using integer values. -\layout Section - -User Identification -\begin_inset LatexCommand \label{sec:uid} - -\end_inset - - -\layout Standard - -Every process runs on behalf of a user. - -\layout Subsection - -PtlGetUid -\layout LyX-Code - -int PtlGetUid( ptl_handle_ni_t ni_handle, -\newline - ptl_uid_t* uid ); -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -ni_handle -\family default - is not a valid network interface handle. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -interface -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -A network interface handle. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -id -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the user id for the calling - process. -\end_inset - - - - -\end_inset - - -\layout Comment - -Note that user identifiers are dependent on the network interface(s). - In particular, if a node has multiple interfaces, a process may have multiple - user identifiers. -\layout Section - -Process Identification -\begin_inset LatexCommand \label{sec:pid} - -\end_inset - - -\layout Standard - -Processes that use the Portals API, can be identified using a node id and - process id. - Every node accessible through a network interface has a unique node identifier - and every process running on a node has a unique process identifier. - As such, any process in the computing system can be identified by its node - id and process id. - -\layout Standard - -The Portals API defines a type, -\family typewriter -ptl_process_id_t -\family default - for representing process ids and a function, -\emph on -PtlGetId -\emph default -, which can be used to obtain the id of the current process. -\layout Comment - -The portals API does not include thread identifiers. - Messages are delivered to processes (address spaces) not threads (contexts - of execution). -\layout Subsection - -The Process Id Type -\begin_inset LatexCommand \label{sec:pid-type} - -\end_inset - - -\layout LyX-Code - -typedef struct { -\newline - ptl_nid_t nid; /* node id */ -\newline - ptl_pid_t pid; /* process id */ -\newline -} ptl_process_id_t; -\layout Standard -\noindent -The -\family typewriter -ptl_process_id_t -\family default - type uses two identifiers to represent a process id: a node id and a process - id. - -\layout Subsection - -PtlGetId -\begin_inset LatexCommand \label{sub:PtlGetId} - -\end_inset - - -\layout LyX-Code - -int PtlGetId( ptl_handle_ni_t ni_handle, -\newline - ptl_process_id_t* id ); -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -ni_handle -\family default - is not a valid network interface handle. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -id -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -A network interface handle. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -id -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the id for the calling process. -\end_inset - - - - -\end_inset - - -\layout Comment - -Note that process identifiers are dependent on the network interface(s). - In particular, if a node has multiple interfaces, it may have multiple - node identifiers. -\layout Section - -Match List Entries and Match Lists -\begin_inset LatexCommand \label{sec:me} - -\end_inset - - -\layout Standard - -A match list is a chain of match list entries. - Each match list entry includes a memory descriptor and a set of match criteria. - The match criteria can be used to reject incoming requests based on process - id or the match bits provided in the request. - A match list is created using the -\emph on -PtlMEAttach -\emph default - or -\shape italic -PtlMEAttachAny -\shape default - functions, which create a match list consisting of a single match list - entry, attaches the match list to the specified Portal index, and returns - a handle for the match list entry. - Match entries can be dynamically inserted and removed from a match list - using the -\emph on -PtlMEInsert -\emph default - and -\emph on -PtlMEUnlink -\emph default - functions. -\layout Subsection - -PtlMEAttach -\begin_inset LatexCommand \label{sec:meattach} - -\end_inset - - -\layout LyX-Code - -typedef enum { PTL_RETAIN, PTL_UNLINK } ptl_unlink_t; -\newline - -\layout LyX-Code - -typedef enum { PTL_INS_BEFORE, PTL_INS_AFTER } ptl_ins_pos_t; -\newline - -\layout LyX-Code - -int PtlMEAttach( ptl_handle_ni_t interface, -\newline - ptl_pt_index_t index, -\newline - ptl_process_id_t matchid, -\newline - ptl_match_bits_t match_bits, -\newline - ptl_match_bits_t ignorebits, -\newline - ptl_unlink_t unlink, -\newline - ptl_ins_pos_t position, -\newline - ptl_handle_me_t* handle ); -\layout Standard -\noindent -Values of the type -\family typewriter -ptl_ins_pos_t -\family default - are used to control where a new item is inserted. - The value -\family typewriter -PTL_INS_BEFORE -\family default - is used to insert the new item before the current item or before the head - of the list. - The value -\family typewriter -PTL_INS_AFTER -\family default - is used to insert the new item after the current item or after the last - item in the list. - -\layout Standard - -The -\emph on -PtlMEAttach -\emph default - function creates a match list consisting of a single entry and attaches - this list to the Portal table for -\family typewriter -interface -\family default -. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_PTINDEX Indicates that -\family typewriter -index -\family default - is not a valid Portal table index. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -matchid -\family default - is not a valid process identifier. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - match list entry. - -\layout Description - -PTL_ML_TOOLONG Indicates that the resulting match list is too long. - The maximum length for a match list is defined by the interface. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -interface -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -A handle for the interface to use. - -\end_inset - - - - -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -index -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -The Portal table index where the match list should be attached. -\end_inset - - - - -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -matchid -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -Specifies the match criteria for the process id of the requestor. - The constants -\family typewriter -PTL_PID_ANY -\family default - and -\family typewriter -PTL_NID_ANY -\family default - can be used to wildcard either of the ids in the -\family typewriter -ptl_process_id_t -\family default - structure. - -\end_inset - - - - -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -match_bits, ignorebits -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -Specify the match criteria to apply to the match bits in the incoming request. - The -\family typewriter -ignorebits -\family default - are used to mask out insignificant bits in the incoming match bits. - The resulting bits are then compared to the match list entry's match - bits to determine if the incoming request meets the match criteria. -\end_inset - - - - -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -unlink -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -Indicates the match list entry should be unlinked when the last memory descripto -r associated with this match list entry is unlinked. - (Note, the check for unlinking a match entry only occurs when a memory - descriptor is unlinked.) -\end_inset - - - - -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -position -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -Indicates whether the new match entry should be prepended or appended to - the existing match list. - If there is no existing list, this argument is ignored and the new match - entry becomes the only entry in the list. - Allowed constants: -\family typewriter -PTL_INS_BEFORE -\family default -, -\family typewriter -PTL_INS_AFTER -\family default -. -\end_inset - - - - -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -handle -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the newly created - match list entry. -\end_inset - - - - -\end_inset - - -\layout Subsection - -PtlMEAttachAny -\begin_inset LatexCommand \label{sec:attachany} - -\end_inset - - -\layout LyX-Code - -int PtlMEAttachAny( ptl_handle_ni_t interface, -\newline - ptl_pt_index_t *index, -\newline - ptl_process_id_t matchid, -\newline - ptl_match_bits_t match_bits, -\newline - ptl_match_bits_t ignorebits, -\newline - ptl_unlink_t unlink, -\newline - ptl_handle_me_t* handle ); -\layout Standard - -The -\emph on -PtlMEAttachAny -\emph default - function creates a match list consisting of a single entry and attaches - this list to an unused Portal table entry for -\family typewriter -interface -\family default -. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -matchid -\family default - is not a valid process identifier. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - match list entry. - -\layout Description - -PTL_PT_FULL Indicates that there are no free entries in the Portal table. -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -interface -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -A handle for the interface to use. - -\end_inset - - - - -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -index -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -On succesfful return, this location will hold the Portal index where the - match list has been attached. -\end_inset - - - - -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -matchid, match_bits, ignorebits, unlink -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -See the discussion for -\shape italic -PtlMEAttach -\shape default -. -\end_inset - - - - -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -handle -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the newly created - match list entry. -\end_inset - - - - -\end_inset - - -\layout Subsection - -PtlMEInsert -\begin_inset LatexCommand \label{sec:meinsert} - -\end_inset - - -\layout LyX-Code - -int PtlMEInsert( ptl_handle_me_t current, -\newline - ptl_process_id_t matchid, -\newline - ptl_match_bits_t match_bits, -\newline - ptl_match_bits_t ignorebits, -\newline - ptl_ins_pos_t position, -\newline - ptl_handle_me_t* handle ); -\layout Standard - -The -\emph on -PtlMEInsert -\emph default - function creates a new match list entry and inserts this entry into the - match list containing -\family typewriter -current -\family default -. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -matchid -\family default - is not a valid process identifier. - -\layout Description - -PTL_INV_ME Indicates that -\family typewriter -current -\family default - is not a valid match entry handle. - -\layout Description - -PTL_ML_TOOLONG Indicates that the resulting match list is too long. - The maximum length for a match list is defined by the interface. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - match entry. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -current -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -A handle for a match entry. - The new match entry will be inserted immediately before or immediately - after this match entry. -\end_inset - - - - -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -matchid -\family default -, -\family typewriter -match_bits -\family default -, -\family typewriter -ignorebits -\family default -, -\family typewriter -unlink -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -See the discussion for -\emph on -PtlMEAttach -\emph default - -\end_inset - - - - -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -position -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -Indicates whether the new match entry should be inserted before or after - the -\family typewriter -current -\family default - entry. - Allowed constants: -\family typewriter -PTL_INS_BEFORE -\family default -, -\family typewriter -PTL_INS_AFTER -\family default -. -\end_inset - - - - -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -handle -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -See the discussion for -\emph on -PtlMEAttach -\emph default -. -\end_inset - - - - -\end_inset - - -\layout Subsection - -PtlMEUnlink -\begin_inset LatexCommand \label{sec:meunlink} - -\end_inset - - -\layout LyX-Code - -int PtlMEUnlink( ptl_handle_me_t entry ); -\layout Standard -\noindent -The -\emph on -PtlMEUnlink -\emph default - function can be used to unlink a match entry from a match list. - This operation also releases any resources associated with the match entry - (including the associated memory descriptor). - It is an error to use the match entry handle after calling -\emph on -PtlMEUnlink -\emph default -. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_ME Indicates that -\family typewriter -entry -\family default - is not a valid match entry handle. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -entry -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard - -A handle for the match entry to be unlinked. -\end_inset - - - - -\end_inset - - -\layout Section - -Memory Descriptors -\begin_inset LatexCommand \label{sec:md} - -\end_inset - - -\layout Standard - -A memory descriptor contains information about a region of an application - process' memory and an event queue where information about the operations - performed on the memory descriptor are recorded. - The Portals API provides two operations to create memory descriptors: -\emph on -PtlMDAttach -\emph default -, and -\emph on -PtlMDBind -\emph default -; an operation to update a memory descriptor, -\emph on -PtlMDUpdate -\emph default -; and an operation to unlink and release the resources associated with a - memory descriptor, -\emph on -PtlMDUnlink -\emph default -. -\layout Subsection - -The Memory Descriptor Type -\begin_inset LatexCommand \label{sec:md-type} - -\end_inset - - -\layout LyX-Code - -typedef struct { -\newline - void* start; -\newline - ptl_size_t length; -\newline - int threshold; -\newline - unsigned int max_offset; -\newline - unsigned int options; -\newline - void* user_ptr; -\newline - ptl_handle_eq_t eventq; -\newline -} ptl_md_t; -\layout Standard -\noindent -The -\family typewriter -ptl_md_t -\family default - type defines the application view of a memory descriptor. - Values of this type are used to initialize and update the memory descriptors. -\layout Subsubsection - -Members -\layout Description - -start,\SpecialChar ~ -length Specify the memory region associated with the memory descriptor. - The -\family typewriter -start -\family default - member specifies the starting address for the memory region and the -\family typewriter -length -\family default - member specifies the length of the region. - The -\family typewriter -start member -\family default - can be NULL provided that the -\family typewriter -length -\family default - member is zero. - (Zero length buffers are useful to record events.) There are no alignment - restrictions on the starting address or the length of the region; although, - unaligned messages may be slower (i.e., lower bandwidth and/or longer latency) - on some implementations. - -\layout Description - -threshold Specifies the maximum number of operations that can be performed - on the memory descriptor. - An operation is any action that could possibly generate an event (see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - for the different types of events). - In the usual case, the threshold value is decremented for each operation - on the memory descriptor. - When the threshold value is zero, the memory descriptor is -\emph on -inactive -\emph default -, and does not respond to operations. - A memory descriptor can have an initial threshold value of zero to allow - for manipulation of an inactive memory descriptor by the local process. - A threshold value of -\family typewriter -PTL_MD_THRESH_INF -\family default - indicates that there is no bound on the number of operations that may be - applied to a memory descriptor. - Note that local operations (e.g., -\emph on -PtlMDUpdate -\emph default -) are not applied to the threshold count. - -\layout Description - -max_offset Specifies the maximum local offset of a memory descriptor. - When the local offset of a memory descriptor exceeds this maximum, the - memory descriptor becomes -\shape italic -inactive -\shape default - and does not respond to further operations. -\layout Description - -options Specifies the behavior of the memory descriptor. - There are five options that can be selected: enable put operations (yes - or no), enable get operations (yes or no), offset management (local or - remote), message truncation (yes or no), and acknowledgement (yes or no). - Values for this argument can be constructed using a bitwise or of the following - values: -\begin_deeper -\begin_deeper -\layout Description - -PTL_MD_OP_PUT Specifies that the memory descriptor will respond to -\emph on -put -\emph default - operations. - By default, memory descriptors reject -\emph on -put -\emph default - operations. - -\layout Description - -PTL_MD_OP_GET Specifies that the memory descriptor will respond to -\emph on -get -\emph default - operations. - By default, memory descriptors reject -\emph on -get -\emph default - operations. - -\layout Description - -PTL_MD_MANAGE_REMOTE Specifies that the offset used in accessing the memory - region is provided by the incoming request. - By default, the offset is maintained locally. - When the offset is maintained locally, the offset is incremented by the - length of the request so that the next operation (put and/or get) will - access the next part of the memory region. -\layout Description - -PTL_MD_TRUNCATE Specifies that the length provided in the incoming request - can be reduced to match the memory available in the region. - (The memory available in a memory region is determined by subtracting the - offset from the length of the memory region.) By default, if the length - in the incoming operation is greater than the amount of memory available, - the operation is rejected. - -\layout Description - -PTL_MD_ACK_DISABLE Specifies that an acknowledgement should -\emph on -not -\emph default - be sent for incoming -\emph on -put -\emph default - operations, even if requested. - By default, acknowledgements are sent for -\emph on -put -\emph default - operations that request an acknowledgement. - Acknowledgements are never sent for -\emph on -get -\emph default - operations. - The value sent in the reply serves as an implicit acknowledgement. - -\end_deeper -\layout Standard - - -\series bold -Note -\series default -: It is not considered an error to have a memory descriptor that does not - respond to either -\emph on -put -\emph default - or -\emph on -get -\emph default - operations: Every memory descriptor responds to -\emph on -reply -\emph default - operations. - Nor is it considered an error to have a memory descriptor that responds - to both -\emph on -put -\emph default - and -\emph on -get -\emph default - operations. - -\end_deeper -\layout Description - -user_ptr A user-specified value that is associated with the memory descriptor. - The value does not need to be a pointer, but must fit in the space used - by a pointer. - This value (along with other values) is recorded in events associated with - operations on this memory descriptor. -\begin_inset Foot -collapsed true - -\layout Standard - -Tying the memory descriptor to a user-defined value can be useful when multiple - memory descriptor share the same event queue or when the memory descriptor - needs to be associated with a data structure maintained by the application. - For example, an MPI implementation can set the -\family typewriter -user_ptr -\family default - argument to the value of an MPI Request. - This direct association allows for processing of memory descriptor's by - the MPI implementation without a table lookup or a search for the appropriate - MPI Request. -\end_inset - - -\layout Description - -eventq A handle for the event queue used to log the operations performed - on the memory region. - If this argument is -\family typewriter -PTl_EQ_NONE -\family default -, operations performed on this memory descriptor are not logged. - -\layout Subsection - -PtlMDAttach -\begin_inset LatexCommand \label{sec:mdattach} - -\end_inset - - -\layout LyX-Code - -int PtlMDAttach( ptl_handle_me_t match, -\newline - ptl_md_t mem_desc, -\newline - ptl_unlink_t unlink_op, -\newline - ptl_unlink_t unlink_nofit, -\newline - ptl_handle_md_t* handle ); -\layout Standard -\noindent -Values of the type -\family typewriter -ptl_unlink_t -\family default - are used to control whether an item is unlinked from a list. - The value -\family typewriter -PTL_UNLINK -\family default - enables unlinking. - The value -\family typewriter -PTL_RETAIN -\family default - disables unlinking. -\layout Standard - -The -\emph on -PtlMDAttach -\emph default - operation is used to create a memory descriptor and attach it to a match - list entry. - An error code is returned if this match list entry already has an associated - memory descriptor. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INUSE Indicates that -\family typewriter -match -\family default - already has a memory descriptor attached. - -\layout Description - -PTL_INV_ME Indicates that -\family typewriter -match -\family default - is not a valid match entry handle. - -\layout Description - -PTL_ILL_MD Indicates that -\family typewriter -mem_desc -\family default - is not a legal memory descriptor. - This may happen because the memory region defined in -\family typewriter -mem_desc -\family default - is invalid or because the network interface associated with the -\family typewriter -eventq -\family default - in -\family typewriter -mem_desc -\family default - is not the same as the network interface associated with -\family typewriter -match -\family default -. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - memory descriptor. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -match -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -A handle for the match entry that the memory descriptor will be associated - with. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -Provides initial values for the application visible parts of a memory descriptor. - Other than its use for initialization, there is no linkage between this - structure and the memory descriptor maintained by the API. - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -unlink_op -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -A flag to indicate whether the memory descriptor is unlinked when it becomes - inactive, either because the operation threshold drops to zero or because - the maximum offset has been exceeded. - (Note, the check for unlinking a memory descriptor only occurs after a - the completion of a successful operation. - If the threshold is set to zero during initialization or using -\emph on -PtlMDUpdate -\emph default -, the memory descriptor is -\series bold -not -\series default - unlinked.) -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -unlink_nofit -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -A flag to indicate whether the memory descriptor is unlinked when the space - remaining in the memory descriptor is not sufficient for a matching operation. - If an incoming message arrives arrives at a memory descriptor that does - not have sufficient space and the -\series bold -PTL_MD_TRUNCATE -\series default - operation is not specified, the memory descriptor will be unlinked. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the newly created - memory descriptor. - The -\family typewriter -handle -\family default - argument can be NULL, in which case the handle will not be returned. -\end_inset - - - - -\end_inset - - -\layout Subsection - -PtlMDBind -\begin_inset LatexCommand \label{sec:mdbind} - -\end_inset - - -\layout LyX-Code - -int PtlMDBind( ptl_handle_ni_t interface, -\newline - ptl_md_t mem_desc, -\newline - ptl_handle_md_t* handle ); -\layout Standard -\noindent -The -\emph on -PtlMDBind -\emph default - operation is used to create a -\begin_inset Quotes eld -\end_inset - -free floating -\begin_inset Quotes erd -\end_inset - - memory descriptor, i.e., a memory descriptor that is not associated with - a match list entry. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid match entry handle. - -\layout Description - -PTL_ILL_MD Indicates that -\family typewriter -mem_desc -\family default - is not a legal memory descriptor. - This may happen because the memory region defined in -\family typewriter -mem_desc -\family default - is invalid or because the network interface associated with the -\family typewriter -eventq -\family default - in -\family typewriter -mem_desc -\family default - is not the same as the network interface, -\family typewriter -interface -\family default -. - -\layout Description - -PTL_INV_EQ Indicates that the event queue associated with -\family typewriter -mem_desc -\family default - is not valid. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - memory descriptor. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -handle -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -A handle for the network interface with which the memory descriptor will - be associated. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -Provides initial values for the application visible parts of a memory descriptor. - Other than its use for initialization, there is no linkage between this - structure and the memory descriptor maintained by the API. - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the newly created - memory descriptor. - The -\family typewriter -handle -\family default - argument must be a valid address and cannot be NULL. -\end_inset - - - - -\end_inset - - -\layout Subsection - -PtlMDUnlink -\begin_inset LatexCommand \label{sec:mdfree} - -\end_inset - - -\layout LyX-Code - -int PtlMDUnlink( ptl_handle_md_t mem_desc ); -\layout Standard -\noindent -The -\emph on -PtlMDUnlink -\emph default - function unlinks the memory descriptor from any match list entry it may - be linked to and releases the resources associated with a memory descriptor. - (This function does not free the memory region associated with the memory - descriptor.) This function also releases the resources associated with a - floating memory descriptor. - Only memory descriptors with no pending operations may be unlinked. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_MD Indicates that -\family typewriter -mem_desc -\family default - is not a valid memory descriptor handle. -\layout Description - -PTL_MD_INUSE Indicates that -\family typewriter -mem_desc -\family default - has pending operations and cannot be unlinked. -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -A handle for the memory descriptor to be released. -\end_inset - - - - -\end_inset - - -\layout Subsection - -PtlMDUpdate -\begin_inset LatexCommand \label{sec:mdupdate} - -\end_inset - - -\layout LyX-Code - -int PtlMDUpdate( ptl_handle_md_t mem_desc, -\newline - ptl_md_t* old_md, -\newline - ptl_md_t* new_md, -\newline - ptl_handle_eq_t testq ); -\layout Standard -\noindent -The -\emph on -PtlMDUpdate -\emph default - function provides a conditional, atomic update operation for memory descriptors. - The memory descriptor identified by -\family typewriter -mem_desc -\family default - is only updated if the event queue identified by -\family typewriter -testq -\family default - is empty. - The intent is to only enable updates to the memory descriptor when no new - messages have arrived since the last time the queue was checked. - See section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:exmpi} - -\end_inset - - for an example of how this function can be used. -\layout Standard - -If -\family typewriter -new -\family default - is not NULL the memory descriptor identified by handle will be updated - to reflect the values in the structure pointed to by -\family typewriter -new -\family default - if -\family typewriter -testq -\family default - has the value -\family typewriter -PTL_EQ_NONE -\family default - or if the event queue identified by -\family typewriter -testq -\family default - is empty. - If -\family typewriter -old -\family default - is not NULL, the current value of the memory descriptor identified by -\family typewriter -mem_desc -\family default - is recorded in the location identified by -\family typewriter -old -\family default -. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_NOUPDATE Indicates that the update was not performed because -\family typewriter -testq -\family default - was not empty. - -\layout Description - -PTL_INV_MD Indicates that -\family typewriter -mem_desc -\family default - is not a valid memory descriptor handle. - -\layout Description - -PTL_ILL_MD Indicates that the value pointed to by -\family typewriter -new -\family default - is not a legal memory descriptor (e.g., the memory region specified by the - memory descriptor may be invalid). - -\layout Description - -PTL_INV_EQ Indicates that -\family typewriter -testq -\family default - is not a valid event queue handle. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -new -\family default - or -\family typewriter -old -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -A handle for the memory descriptor to update. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -old_md -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -If -\family typewriter -old_md -\family default - is not the value -\family typewriter -NULL -\family default -, the current value of the memory descriptor will be stored in the location - identified by -\family typewriter -old -\family default -_md. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -new_md -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -If -\family typewriter -new_md -\family default - is not the value -\family typewriter -NULL -\family default -, this argument provides the new values for the memory descriptor, if the - update is performed. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -testq -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -A handle for an event queue used to predicate the update. - If -\family typewriter -testq -\family default - is equal to -\family typewriter -PTL_EQ_NONE -\family default -, the update is performed unconditionally. - Otherwise, the update is performed if and only if -\family typewriter -testq -\family default - is empty. - If the update is not performed, the function returns the value -\family typewriter -PTL_NOUPDATE -\family default -. - (Note, the -\family typewriter -testq -\family default - argument does not need to be the same as the event queue associated with - the memory descriptor.) -\end_inset - - - - -\end_inset - - -\layout Standard - -The conditional update can be used to ensure that the memory descriptor - has not changed between the time it was examined and the time it is updated. - In particular, it is needed to support an MPI implementation where the - activity of searching an unexpected message queue and posting a receive - must be atomic. -\layout Section - -Events and Event Queues -\begin_inset LatexCommand \label{sec:eq} - -\end_inset - - -\layout Standard - -Event queues are used to log operations performed on memory descriptors. - They can also be used to hold acknowledgements for completed -\emph on -put -\emph default - operations and to note when the data specified in a -\emph on -put -\emph default - operation has been sent (i.e., when it is safe to reuse the buffer that holds - this data). - Multiple memory descriptors can share a single event queue. -\layout Standard - -In addition to the -\family typewriter -ptl_handle_eq_t -\family default - type, the Portals API defines two types associated with events: The -\family typewriter - -\newline -ptl_event_kind_t -\family default - type defines the kinds of events that can be stored in an event queue. - The -\family typewriter -ptl_event_t -\family default - type defines a structure that holds the information associated with an - event. -\layout Standard - -The Portals API also provides four functions for dealing with event queues: - The -\emph on -PtlEQAlloc -\emph default - function is used to allocate the API resources needed for an event queue, - the -\emph on -PtlEQFree -\emph default - function is used to release these resources, the -\emph on -PtlEQGet -\emph default - function can be used to get the next event from an event queue, and the - -\emph on -PtlEQWait -\emph default - function can be used to block a process (or thread) until an event queue - has at least one event. -\layout Subsection - -Kinds of Events -\begin_inset LatexCommand \label{sec:ek-type} - -\end_inset - - -\layout LyX-Code - -typedef enum { -\newline - PTL_EVENT_GET_START, PTL_EVENT_GET_END, PTL_EVENT_GET_FAIL, -\newline - PTL_EVENT_PUT_START, PTL_EVENT_PUT_END, PTL_EVENT_PUT_FAIL, -\newline - PTL_EVENT_REPLY_START, PTL_EVENT_REPLY_END, PTL_EVENT_REPLY_FAIL, -\newline - PTL_EVENT_SEND_START, PTL_EVENT_SEND_END, PTL_EVENT_SEND_FAIL, -\newline - PTL_EVENT_ACK, -\newline - PTL_EVENT_UNLINK -\newline -} ptl_event_kind_t; -\layout Standard -\noindent -The Portals API defines fourteen types of events that can be logged in an - event queue: -\layout Description - -PTL_EVENT_GET_START A remote -\emph on -get -\emph default - operation has been started on the memory descriptor. - The memory region associated with this descriptor should not be altered - until the corresponding END or FAIL event is logged. -\layout Description - -PTL_EVENT_GET_END A previously initiated -\emph on -get -\emph default - operation completed successfully. - This event is logged after the reply has been sent by the local node. - As such, the process could free the memory descriptor once it sees this - event. - -\layout Description - -PTL_EVENT_GET_FAIL A previously initiated -\emph on -get -\emph default - operation completed unsuccessfully. - This event is logged after the reply has been sent by the local node. - As such, the process could free the memory descriptor once it sees this - event. - -\layout Description - -PTL_EVENT_PUT_START A remote -\emph on -put -\emph default - operation has been started on the memory descriptor. - The memory region associated with this descriptor should should be considered - volatile until the corresponding END or FAIL event is logged. -\layout Description - -PTL_EVENT_PUT_END A previously initiated -\emph on -put -\emph default - operation completed successfully. - The underlying layers will not alter the memory (on behalf of this operation) - once this event has been logged. - -\layout Description - -PTL_EVENT_PUT_FAIL A previously initiated -\emph on -put -\emph default - operation completed unsuccessfully. - The underlying layers will not alter the memory (on behalf of this operation) - once this event has been logged. - -\layout Description - -PTL_EVENT_REPLY_START A -\emph on -reply -\emph default - operation has been started on the memory descriptor. - -\layout Description - -PTL_EVENT_REPLY_END A previously initiated -\emph on -reply -\emph default - operation has completed successfully . - This event is logged after the data (if any) from the reply has been written - into the memory descriptor. - -\layout Description - -PTL_EVENT_REPLY_FAIL A previously initiated -\emph on -reply -\emph default - operation has completed unsuccessfully. - This event is logged after the data (if any) from the reply has been written - into the memory descriptor. - -\layout Description - -PTL_EVENT_ACK An -\emph on -acknowledgement -\emph default - was received. - This event is logged when the acknowledgement is received -\layout Description - -PTL_EVENT_SEND_START An outgoing -\emph on -send -\emph default - operation has been started. - The memory region associated with this descriptor should not be altered - until the corresponding END or FAIL event is logged. -\layout Description - -PTL_EVENT_SEND_END A previously initiated -\emph on -send -\emph default - operation has completed successfully. - This event is logged after the entire buffer has been sent and it is safe - for the application to reuse the buffer. - -\layout Description - -PTL_EVENT_SEND_FAIL A previously initiated -\emph on -send -\emph default - operation has completed unsuccessfully. - The process can safely manipulate the memory or free the memory descriptor - once it sees this event. -\layout Description - -PTL_EVENT_UNLINK A memory descriptor associated with this event queue has - been automatically unlinked. - This event is not generated when a memory descriptor is explicitly unlinked - by calling -\shape italic -PtlMDUnlink -\shape default -. - This event does not decrement the threshold count. -\layout Subsection - -Event Ordering -\layout Standard - -The Portals API guarantees that a when a process initiates two operations - on a remote process, the operations will be initiated on the remote process - in the same order that they were initiated on the original process. - As an example, if process A intitates two -\emph on -put -\emph default - operations, -\emph on -x -\emph default - and -\emph on -y -\emph default -, on process B, the Portals API guarantees that process A will receive the - -\family typewriter -PTL_EVENT_SEND_START -\family default - events for -\emph on -x -\emph default - and -\emph on -y -\emph default - in the same order that process B receives the -\family typewriter -PTL_EVENT_PUT_START -\family default - events for -\emph on -x -\emph default - and -\emph on -y -\emph default -. - Notice that the API does not guarantee that the start events will be delivered - in the same order that process A initiated the -\emph on -x -\emph default - and -\emph on -y -\emph default - operations. - If process A needs to ensure the ordering of these operations, it should - include code to wait for the initiation of -\emph on -x -\emph default - before it initiates -\emph on -y -\emph default -. -\layout Subsection - -Failure Notification -\layout Standard - -Operations may fail to complete successfully; however, unless the node itself - fails, every operation that is started will eventually complete. - While an operation is in progress, the memory associated with the operation - should not be viewed (in the case of a put or a reply) or altered (in the - case of a send or get). - Operation completion, whether successful or unsuccessful, is final. - That is, when an operation completes, the memory associated with the operation - will no longer be read or altered by the operation. - A network interface can use the -\family typewriter -ptl_ni_fail_t -\family default - to define more specific information regarding the failure of the operation - and record this information in the -\family typewriter -ni_fail_type -\family default - field of the event. -\layout Subsection - -The Event Type -\begin_inset LatexCommand \label{sec:event-type} - -\end_inset - - -\layout LyX-Code - -typedef struct { -\newline - ptl_event_kind_t type; -\newline - ptl_process_id_t initiator; -\newline - ptl_uid_t uid; -\layout LyX-Code - - ptl_pt_index_t portal; -\newline - ptl_match_bits_t match_bits; -\newline - ptl_size_t rlength; -\newline - ptl_size_t mlength; -\newline - ptl_size_t offset; -\newline - ptl_handle_md_t md_handle; -\newline - ptl_md_t mem_desc; -\newline - ptl_hdr_data_t hdr_data; -\newline - ptl_seq_t link; -\newline - ptl_ni_fail_t ni_fail_type; -\newline - volatile ptl_seq_t sequence; -\newline -} ptl_event_t; -\layout Standard -\noindent -An event structure includes the following members: -\layout Description - -type Indicates the type of the event. - -\layout Description - -initiator The id of the initiator. - -\layout Description - -portal The Portal table index specified in the request. - -\layout Description - -match_bits A copy of the match bits specified in the request. - See section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - for more information on match bits. - -\layout Description - -rlength The length (in bytes) specified in the request. - -\layout Description - -mlength The length (in bytes) of the data that was manipulated by the operation. - For truncated operations, the manipulated length will be the number of - bytes specified by the memory descriptor (possibly with an offset) operation. - For all other operations, the manipulated length will be the length of - the requested operation. - -\layout Description - -offset Is the displacement (in bytes) into the memory region that the operation - used. - The offset can be determined by the operation (see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:datamovement} - -\end_inset - -) for a remote managed memory descriptor, or by the local memory descriptor - (see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - -). - -\layout Description - -md_handle Is the handle to the memory descriptor associated with the event. -\layout Description - -mem_desc Is the state of the memory descriptor immediately after the event - has been processed. - -\layout Description - -hdr_data 64 bits of out-of-band user data (see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - -). - -\layout Description - -link The -\emph on -link -\emph default - member is used to link -\family typewriter -START -\family default - events with the -\family typewriter -END -\family default - or -\family typewriter -FAIL -\family default - event that signifies completion of the operation. - The -\emph on -link -\emph default - member will be the same for the two events associated with an operation. - The link member is also used to link an -\family typewriter -UNLINK -\family default - event with the event that caused the memory descriptor to be unlinked. -\layout Description - -sequence The sequence number for this event. - Sequence numbers are unique to each event. -\layout Comment - -The -\emph on -sequence -\emph default - member is the last member and is volatile to support SMP implementations. - When an event structure is filled in, the -\emph on -sequence -\emph default - member should be written after all other members have been updated. - Moreover, a memory barrier should be inserted between the updating of other - members and the updating of the -\emph on -sequence -\emph default - member. -\layout Subsection - -PtlEQAlloc -\begin_inset LatexCommand \label{sec:eqalloc} - -\end_inset - - -\layout LyX-Code - -int PtlEQAlloc( ptl_handle_ni_t interface, -\newline - ptl_size_t count, -\newline - ptl_handle_eq_t* handle ); -\layout Standard -\noindent -The -\emph on -PtlEQAlloc -\emph default - function is used to build an event queue. - -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - event queue. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -handle -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -A handle for the interface with which the event queue will be associated. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -count -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -The number of events that can be stored in the event queue. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the newly created - event queue. -\end_inset - - - - -\end_inset - - -\layout Subsection - -PtlEQFree -\begin_inset LatexCommand \label{sec:eqfree} - -\end_inset - - -\layout LyX-Code - -int PtlEQFree( ptl_handle_eq_t eventq ); -\layout Standard -\noindent -The -\emph on -PtlEQFree -\emph default - function releases the resources associated with an event queue. - It is up to the user to insure that no memory descriptors are associated - with the event queue once it is freed. - -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_EQ Indicates that -\family typewriter -eventq -\family default - is not a valid event queue handle. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -eventq -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard - -A handle for the event queue to be released. -\end_inset - - - - -\end_inset - - -\layout Subsection - -PtlEQGet -\begin_inset LatexCommand \label{sec:eqget} - -\end_inset - - -\layout LyX-Code - -int PtlEQGet( ptl_handle_eq_t eventq, -\newline - ptl_event_t* event ); -\layout Standard -\noindent -The -\emph on -PTLEQGet -\emph default - function is a nonblocking function that can be used to get the next event - in an event queue. - The event is removed from the queue. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_EQ_DROPPED Indicates success (i.e., an event is returned) and that at - least one event between this event and the last event obtained (using -\emph on -PtlEQGet -\emph default - or -\emph on -PtlEQWait -\emph default -) from this event queue has been dropped due to limited space in the event - queue. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_EQ_EMPTY Indicates that -\family typewriter -eventq -\family default - is empty or another thread is waiting on -\emph on -PtlEQWait -\emph default -. - -\layout Description - -PTL_INV_EQ Indicates that -\family typewriter -eventq -\family default - is not a valid event queue handle. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -event -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -eventq -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -A handle for the event queue. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -event -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the values associated with - the next event in the event queue. -\end_inset - - - - -\end_inset - - -\layout Subsection - -PtlEQWait -\begin_inset LatexCommand \label{sec:eqwait} - -\end_inset - - -\layout LyX-Code - -int PtlEQWait( ptl_handle_eq_t eventq, -\newline - ptl_event_t* event ); -\layout Standard -\noindent -The -\emph on -PTLEQWait -\emph default - function can be used to block the calling process (thread) until there - is an event in an event queue. - This function also returns the next event in the event queue and removes - this event from the queue. - This is the only blocking operation in the Portals 3.2 API. - In the event that multiple threads are waiting on the same event queue, - PtlEQWait is guaranteed to wake exactly one thread, but the order in which - they are awakened is not specified. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_EQ_DROPPED Indicates success (i.e., an event is returned) and that at - least one event between this event and the last event obtained (using -\emph on -PtlEQGet -\emph default - or -\emph on -PtlEQWait -\emph default -) from this event queue has been dropped due to limited space in the event - queue. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_EQ Indicates that -\family typewriter -eventq -\family default - is not a valid event queue handle. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -event -\family default - is not a legal address. - queue handle. - -\layout Subsubsection - -Arguments -\layout Standard -\noindent - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -eventq -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -A handle for the event queue to wait on. - The calling process (thread) will be blocked until -\family typewriter -eventq -\family default - is not empty. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -event -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the values associated with - the next event in the event queue. -\end_inset - - - - -\end_inset - - -\layout Section - -The Access Control Table -\begin_inset LatexCommand \label{sec:ac} - -\end_inset - - -\layout Standard - -Processes can use the access control table to control which processes are - allowed to perform operations on Portal table entries. - Each communication interface has a Portal table and an access control table. - The access control table for the default interface contains an entry at - index zero that allows all processes with the same user id to communicate. - Entries in the access control table can be manipulated using the -\emph on -PtlACEntry -\emph default - function. -\layout Subsection - -PtlACEntry -\begin_inset LatexCommand \label{sec:acentry} - -\end_inset - - -\layout LyX-Code - -int PtlACEntry( ptl_handle_ni_t interface, -\newline - ptl_ac_index_t index, -\newline - ptl_process_id_t matchid, -\newline - ptl_uid_t user_id, -\newline - ptl_pt_index_t portal ); -\layout Standard -\noindent -The -\emph on -PtlACEntry -\emph default - function can be used to update an entry in the access control table for - an interface. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_AC_INV_INDEX Indicates that -\family typewriter -index -\family default - is not a valid access control table index. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -matchid -\family default - is not a valid process identifier. - -\layout Description - -PTL_PT_INV_INDEX Indicates that -\family typewriter -portal -\family default - is not a valid Portal table index. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -Identifies the interface to use. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -index -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -The index of the entry in the access control table to update. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -matchid -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -Identifies the process(es) that are allowed to perform operations. - The constants -\family typewriter -PTL_PID_ANY -\family default - and -\family typewriter -PTL_NID_ANY -\family default - can be used to wildcard either of the ids in the -\family typewriter -ptl_process_id_t -\family default - structure. - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -user_id -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -Identifies the user that is allowed to perform operations. - The value -\family typewriter -PTL_UID_ANY -\family default - can be used to wildcard the user. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -portal -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -Identifies the Portal index(es) that can be used. - The value -\family typewriter -PTL_PT_INDEX_ANY -\family default - can be used to wildcard the Portal index. -\end_inset - - - - -\end_inset - - -\layout Section - -Data Movement Operations -\begin_inset LatexCommand \label{sec:datamovement} - -\end_inset - - -\layout Standard - -The Portals API provides two data movement operations: -\emph on -PtlPut -\emph default - and -\emph on -PtlGet -\emph default -. -\layout Subsection - -PtlPut -\begin_inset LatexCommand \label{sec:put} - -\end_inset - - -\layout LyX-Code - -typedef enum { PTL_ACK_REQ, PTL_NOACK_REQ } ptl_ack_req_t; -\newline - -\newline -int PtlPut( ptl_handle_md_t mem_desc, -\newline - ptl_ack_req_t ack_req, -\newline - ptl_process_id_t target, -\newline - ptl_pt_index_t portal, -\newline - ptl_ac_index_t cookie, -\newline - ptl_match_bits_t match_bits, -\newline - ptl_size_t offset, -\newline - ptl_hdr_data_t hdr_data ); -\layout Standard -\noindent -Values of the type -\family typewriter -ptl_ack_req_t -\family default - are used to control whether an acknowledgement should be sent when the - operation completes (i.e., when the data has been written to a memory descriptor - of the -\family typewriter -target -\family default - process). - The value -\family typewriter -PTL_ACK_REQ -\family default - requests an acknowledgement, the value -\family typewriter -PTL_NOACK_REQ -\family default - requests that no acknowledgement should be generated. -\layout Standard - -The -\emph on -PtlPut -\emph default - function initiates an asynchronous put operation. - There are several events associated with a put operation: initiation of - the send on the local node ( -\family typewriter -PTL_EVENT_SEND_START -\family default -), completion of the send on the local node ( -\family typewriter -PTL_EVENT_SEND_END -\family default - or -\family typewriter -PTL_EVENT_SEND_FAIL -\family default -), and, when the send completes successfully, the receipt of an acknowledgement - ( -\family typewriter -PTL_EVENT_ACK -\family default -) indicating that the operation was accepted by the target. - These events will be logged in the event queue associated with the memory - descriptor ( -\family typewriter -mem_desc -\family default -) used in the put operation. - Using a memory descriptor that does not have an associated event queue - results in these events being discarded. - In this case, the application must have another mechanism (e.g., a higher - level protocol) for determining when it is safe to modify the memory region - associated with the memory descriptor. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_MD Indicates that -\family typewriter -mem_desc -\family default - is not a valid memory descriptor. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -target -\family default - is not a valid process id. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -A handle for the memory descriptor that describes the memory to be sent. - If the memory descriptor has an event queue associated with it, it will - be used to record events when the message has been sent (PTL_EVENT_SEND_START, - PTL_EVENT_SEND_END). - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ack_req -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -Controls whether an acknowledgement event is requested. - Acknowledgements are only sent when they are requested by the initiating - process -\series bold -and -\series default - the memory descriptor has an event queue -\series bold -and -\series default - the target memory descriptor enables them. - Allowed constants: -\family typewriter -PTL_ACK_REQ -\family default -, -\family typewriter -PTL_NOACK_REQ -\family default -. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -target -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -A process id for the target process. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -portal -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -The index in the remote Portal table. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -cookie -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -The index into the access control table of the target process. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -match_bits -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -The match bits to use for message selection at the target process. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -offset -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -The offset into the target memory descriptor (only used when the target - memory descriptor has the -\family typewriter -PTL_MD_MANAGE_REMOTE -\family default - option set). -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -hdr_data -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -64 bits of user data that can be included in message header. - This data is written to an event queue entry at the target if an event - queue is present on the matching memory descriptor. -\end_inset - - - - -\end_inset - - -\layout Subsection - -PtlGet -\begin_inset LatexCommand \label{sec:get} - -\end_inset - - -\layout LyX-Code - -int PtlGet( ptl_handle_md_t mem_desc, -\newline - ptl_process_id_t target, -\newline - ptl_pt_index_t portal, -\newline - ptl_ac_index_t cookie, -\newline - ptl_match_bits_t match_bits, -\newline - ptl_size_t offset ); -\layout Standard -\noindent -The -\emph on -PtlGet -\emph default - function initiates a remote read operation. - There are two event pairs associated with a get operation , when the data - is sent from the remote node, a -\family typewriter -PTL_EVENT_GET{START|END} -\family default - event pair is registered on the remote node; and when the data is returned - from the remote node a -\family typewriter -PTL_EVENT_REPLY{START|END} -\family default - event pair is registered on the local node. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_MD Indicates that -\family typewriter -mem_desc -\family default - is not a valid memory descriptor. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -target -\family default - is not a valid process id. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -A handle for the memory descriptor that describes the memory into which - the requested data will be received. - The memory descriptor can have an event queue associated with it to record - events, such as when the message receive has started ( -\family typewriter -PTL_EVENT_REPLY -\family default -_ -\family typewriter -START -\family default -). -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -target -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -A process id for the target process. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -portal -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -The index in the remote Portal table. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -cookie -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -The index into the access control table of the target process. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -match_bits -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -The match bits to use for message selection at the target process. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -offset -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -The offset into the target memory descriptor (only used when the target - memory descriptor has the -\family typewriter -PTL_MD_MANAGE_REMOTE -\family default - option set). -\end_inset - - - - -\end_inset - - -\layout Section - -Summary -\layout Standard - - -\begin_inset LatexCommand \label{sec:summary} - -\end_inset - - We conclude this section by summarizing the names introduced by the Portals - 3.2 API. - We start by summarizing the names of the types introduced by the API. - This is followed by a summary of the functions introduced by the API. - Which is followed by a summary of the function return codes. - Finally, we conclude with a summary of the other constant values introduced - by the API. -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:types} - -\end_inset - - presents a summary of the types defined by the Portals API. - The first column in this table gives the type name, the second column gives - a brief description of the type, the third column identifies the section - where the type is defined, and the fourth column lists the functions that - have arguments of this type. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Types Defined by the Portals 3.2 API -\begin_inset LatexCommand \label{tab:types} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\noindent - -\size small - -\begin_inset Tabular - - - - - - - - -\begin_inset Text - -\layout Standard - - -\series bold - Name -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold - Meaning -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold - Sect -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold - Functions -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ac_index_t -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -indexes for an access control table -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:index-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlACEntry, PtlPut, PtlGet -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ack_req_t -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -acknowledgement request types -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlPut -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -kinds of events -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlGet -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_t -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -information about events -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:event-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlEQGet -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -plt_seq_t -\end_inset - - -\begin_inset Text - -\layout Standard - -event sequence number -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:event-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - -PtlEQGet, PtlEQWait -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_any_t -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -handles for any object -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlNIHandle -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_eq_t -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -handles for event queues -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlEQAlloc, PtlEQFree, PtlEQGet, PtlEQWait, PtlMDUpdate -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_md_t -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -handles for memory descriptors -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlMDAlloc, PtlMDUnlink, PtlMDUpdate, PtlMEAttach, PtlMEAttachAny, PtlMEInsert, - PtlPut, PtlGet -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_me_t -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -handles for match entries -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlMEUnlink -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_ni_t -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -handles for network interfaces -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit, PtlNIFini, PtlNIStatus, PtlNIDist, PtlEQAlloc, PtlACEntry, PtlPut, - PtlGet -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_nid_t -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -node identifiers -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent - PtlGetId,PtlACEntry -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pid_t -\end_inset - - -\begin_inset Text - -\layout Standard - -process identifier -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - -PtlGetId, PtlACEntry -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_uid_t -\end_inset - - -\begin_inset Text - -\layout Standard - -user indentifier -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - -PtlGetUid, PtlACEntry -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ins_pos_t -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -insertion position (before or after) -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlMEAttach, PtlMEAttachAny, PtlMEInsert -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_interface_t -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -identifiers for network interfaces -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_match_bits_t -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -match (and ignore) bits -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:mb-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlPut, PtlGet -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_md_t -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -memory descriptors -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlMDAttach, PtlMDUpdate -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ni_fail_t -\end_inset - - -\begin_inset Text - -\layout Standard - -network interface-specific failures -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - -PtlEQGet, PtlEQWait -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -process identifiers -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:pid-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlGetId, PtlNIDist, PtlMEAttach, PtlMEAttachAny, PtlACEntry, PtlPut, PtlGet - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -indexes for Portal tables -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:index-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlMEAttach, PtlMEAttachAny, PtlACEntry -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -sizes -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:size-t} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlEQAlloc, PtlPut, PtlGet -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_sr_index_t -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -indexes for status registers -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:stat-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlNIStatus -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_sr_value_t -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -values in status registers -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:stat-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlNIStatus -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_unlink_t -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -unlink options -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlMDAttach -\end_inset - - - - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:func} - -\end_inset - - presents a summary of the functions defined by the Portals API. - The first column in this table gives the name for the function, the second - column gives a brief description of the operation implemented by the function, - and the third column identifies the section where the function is defined. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Functions Defined by the Portals 3.2 API -\begin_inset LatexCommand \label{tab:func} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard - -Name -\end_inset - - -\begin_inset Text - -\layout Standard - - Operation -\end_inset - - -\begin_inset Text - -\layout Standard - - Section -\end_inset - - - - -\begin_inset Text - -\layout Standard - -PtlACEntry -\end_inset - - -\begin_inset Text - -\layout Standard - - update an entry in an access control table -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ac} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - PtlEQAlloc -\end_inset - - -\begin_inset Text - -\layout Standard - - create an event queue -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - PtlEQGet -\end_inset - - -\begin_inset Text - -\layout Standard - - get the next event from an event queue -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - PtlEQFree -\end_inset - - -\begin_inset Text - -\layout Standard - - release the resources for an event queue -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - PtlEQWait -\end_inset - - -\begin_inset Text - -\layout Standard - - wait for a new event in an event queue -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - PtlFini -\end_inset - - -\begin_inset Text - -\layout Standard - - shutdown the Portals API -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:init} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - PtlGet -\end_inset - - -\begin_inset Text - -\layout Standard - - perform a get operation -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:datamovement} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - PtlGetId -\end_inset - - -\begin_inset Text - -\layout Standard - - get the id for the current process -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:pid} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - PtlInit -\end_inset - - -\begin_inset Text - -\layout Standard - - initialize the Portals API -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:init} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - PtlMDAttach -\end_inset - - -\begin_inset Text - -\layout Standard - - create a memory descriptor and attach it to a match entry -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - PtlMDBind -\end_inset - - -\begin_inset Text - -\layout Standard - - create a free-floating memory descriptor -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:mdbind} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - PtlMDUnlink -\end_inset - - -\begin_inset Text - -\layout Standard - - remove a memory descriptor from a list and release its resources -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - PtlMDUpdate -\end_inset - - -\begin_inset Text - -\layout Standard - - update a memory descriptor -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - PtlMEAttach -\end_inset - - -\begin_inset Text - -\layout Standard - -create a match entry and attach it to a Portal table -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - -PtlMEAttachAny -\end_inset - - -\begin_inset Text - -\layout Standard - -create a match entry and attach it to a free Portal table entry -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:attachany} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - PtlMEInsert -\end_inset - - -\begin_inset Text - -\layout Standard - - create a match entry and insert it in a list -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - PtlMEUnlink -\end_inset - - -\begin_inset Text - -\layout Standard - - remove a match entry from a list and release its resources -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - PtlNIDist -\end_inset - - -\begin_inset Text - -\layout Standard - - get the distance to another process -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - PtlNIFini -\end_inset - - -\begin_inset Text - -\layout Standard - - shutdown a network interface -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - PtlNIHandle -\end_inset - - -\begin_inset Text - -\layout Standard - - get the network interface handle for an object -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - PtlNIInit -\end_inset - - -\begin_inset Text - -\layout Standard - - initialize a network interface -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - PtlNIStatus -\end_inset - - -\begin_inset Text - -\layout Standard - - read a network interface status register -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - PtlPut -\end_inset - - -\begin_inset Text - -\layout Standard - - perform a put operation -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:datamovement} - -\end_inset - - -\end_inset - - - - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:retcodes} - -\end_inset - - summarizes the return codes used by functions defined by the Portals API. - All of these constants are integer values. - The first column of this table gives the symbolic name for the constant, - the second column gives a brief description of the value, and the third - column identifies the functions that can return this value. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Function Return Codes for the Portals 3.2 API -\begin_inset LatexCommand \label{tab:retcodes} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular - - - - - - - -\begin_inset Text - -\layout Standard - - -\series bold -Name -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -Meaning -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -Functions -\series default - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_AC_INV_INDEX -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - -invalid access control table index -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent - PtlACEntry -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EQ_DROPPED -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - -at least one event has been dropped -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent - PtlEQGet, PtlWait -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EQ_EMPTY -\end_inset - - -\begin_inset Text - -\layout Standard - -no events available in an event queue -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent - PtlEQGet -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_FAIL -\end_inset - - -\begin_inset Text - -\layout Standard - -error during initialization or cleanup -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent - PtlInit, PtlFini -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_ILL_MD -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - -illegal memory descriptor values -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlMDAttach, PtlMDBind, PtlMDUpdate -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INIT_DUP -\end_inset - - -\begin_inset Text - -\layout Standard - -duplicate initialization of an interface -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INIT_INV -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - -initialization of an invalid interface -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INUSE -\end_inset - - -\begin_inset Text - -\layout Standard - -the ME already has an MD -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlMDAttach -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_ASIZE -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - -invalid access control table size -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_EQ -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - -invalid event queue handle -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlMDUpdate, PtlEQFree, PtlEQGet -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_HANDLE -\end_inset - - -\begin_inset Text - -\layout Standard - -invalid handle -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlNIHandle -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_MD -\end_inset - - -\begin_inset Text - -\layout Standard - -invalid memory descriptor handle -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlMDUnlink, PtlMDUpdate -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_ME -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - -invalid match entry handle -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlMDAttach -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_NI -\end_inset - - -\begin_inset Text - -\layout Standard - -invalid network interface handle -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlNIDist, PtlNIFini, PtlMDBind, PtlEQAlloc -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_PROC -\end_inset - - -\begin_inset Text - -\layout Standard - -invalid process identifier -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit, PtlNIDist, PtlMEAttach, PtlMEInsert, PtlACEntry, PtlPut, PtlGet - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_PTINDEX -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - -invalid Portal table index -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent - PtlMEAttach -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_REG -\end_inset - - -\begin_inset Text - -\layout Standard - -invalid status register -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent - PtlNIStatus -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_SR_INDX -\end_inset - - -\begin_inset Text - -\layout Standard - -invalid status register index -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent - PtlNIStatus -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_ML_TOOLONG -\end_inset - - -\begin_inset Text - -\layout Standard - -match list too long -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent - PtlMEAttach, PtlMEInsert -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_INUSE -\end_inset - - -\begin_inset Text - -\layout Standard - -MD has pending operations -\end_inset - - -\begin_inset Text - -\layout Standard - -PtlMDUnlink -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_NOINIT -\end_inset - - -\begin_inset Text - -\layout Standard - -uninitialized API -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent - -\emph on -all -\emph default -, except PtlInit -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_NOSPACE -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - -insufficient memory -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit, PtlMDAttach, PtlMDBind, PtlEQAlloc, PtlMEAttach, PtlMEInsert - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_NOUPDATE -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - - no update was performed -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent - PtlMDUpdate -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_PT_FULL -\end_inset - - -\begin_inset Text - -\layout Standard - -Portal table is full -\end_inset - - -\begin_inset Text - -\layout Standard - -PtlMEAttachAny -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_OK -\end_inset - - -\begin_inset Text - -\layout Standard - - success -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent - -\emph on -all -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_SEGV -\end_inset - - -\begin_inset Text - -\layout Standard - -addressing violation -\end_inset - - -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit, PtlNIStatus, PtlNIDist, PtlNIHandle, PtlMDBind, PtlMDUpdate, - PtlEQAlloc, PtlEQGet, PtlEQWait -\end_inset - - - - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:oconsts} - -\end_inset - - summarizes the remaining constant values introduced by the Portals API. - The first column in this table presents the symbolic name for the constant, - the second column gives a brief description of the value, the third column - identifies the type for the value, and the fourth column identifies the - sections in which the value is mentioned. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Other Constants Defined by the Portals 3.2 API -\begin_inset LatexCommand \label{tab:oconsts} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular - - - - - - - - - -\begin_inset Text - -\layout Standard - - -\series bold -Name -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -Meaning -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -Base type -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -Intr. -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -Ref. -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_ACK_REQ -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - -request an acknowledgement -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ack_req_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EQ_NONE -\end_inset - - -\begin_inset Text - -\layout Standard - -a NULL event queue handle -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_eq_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - -, -\begin_inset LatexCommand \ref{sec:mdupdate} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_GET_START -\end_inset - - -\begin_inset Text - -\layout Standard - -get event start -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_GET_END -\end_inset - - -\begin_inset Text - -\layout Standard - -get event end -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_GET_FAIL -\end_inset - - -\begin_inset Text - -\layout Standard - -get event fail -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_PUT_START -\end_inset - - -\begin_inset Text - -\layout Standard - -put event start -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_PUT_END -\end_inset - - -\begin_inset Text - -\layout Standard - -put event end -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_PUT_FAIL -\end_inset - - -\begin_inset Text - -\layout Standard - -put event fail -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_REPLY_START -\end_inset - - -\begin_inset Text - -\layout Standard - -reply event start -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_REPLY_END -\end_inset - - -\begin_inset Text - -\layout Standard - -reply event end -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_REPLY_FAIL -\end_inset - - -\begin_inset Text - -\layout Standard - -reply event fail -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_ACK_START -\end_inset - - -\begin_inset Text - -\layout Standard - -acknowledgement event start -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_ACK_END -\end_inset - - -\begin_inset Text - -\layout Standard - -acknowledgement event end -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_ACK_FAIL -\end_inset - - -\begin_inset Text - -\layout Standard - -acknowledgement event fail -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_SEND_START -\end_inset - - -\begin_inset Text - -\layout Standard - -send event start -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_SEND_END -\end_inset - - -\begin_inset Text - -\layout Standard - -send event end -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_SEND_FAIL -\end_inset - - -\begin_inset Text - -\layout Standard - -send event fail -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_UNLINK -\end_inset - - -\begin_inset Text - -\layout Standard - -unlink event -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_PID_ANY -\end_inset - - -\begin_inset Text - -\layout Standard - -wildcard for process id fields -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pid_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -, -\begin_inset LatexCommand \ref{sec:acentry} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_NID_ANY -\end_inset - - -\begin_inset Text - -\layout Standard - -wildcard for node id fields -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_nid_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -, -\begin_inset LatexCommand \ref{sec:acentry} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_UID_ANY -\end_inset - - -\begin_inset Text - -\layout Standard - -wildcard for user id -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_uid_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -, -\begin_inset LatexCommand \ref{sec:acentry} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_IFACE_DEFAULT -\end_inset - - -\begin_inset Text - -\layout Standard - -default interface -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_interface_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INS_AFTER -\end_inset - - -\begin_inset Text - -\layout Standard - -insert after -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ins_pos_t -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meinsert} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INS_BEFORE -\end_inset - - -\begin_inset Text - -\layout Standard - -insert before -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ins_pos_t -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meinsert} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_ACK_DISABLE -\end_inset - - -\begin_inset Text - -\layout Standard - -a flag to disable acknowledgements -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_MANAGE_REMOTE -\end_inset - - -\begin_inset Text - -\layout Standard - -a flag to enable the use of remote offsets -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - -, -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_OP_GET -\end_inset - - -\begin_inset Text - -\layout Standard - -a flag to enable get operations -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_OP_PUT -\end_inset - - -\begin_inset Text - -\layout Standard - -a flag to enable put operations -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_THRESH_INF -\end_inset - - -\begin_inset Text - -\layout Standard - -infinite threshold for a memory descriptor -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_TRUNCATE -\end_inset - - -\begin_inset Text - -\layout Standard - -a flag to enable truncation of a request -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_NOACK_REQ -\end_inset - - -\begin_inset Text - -\layout Standard - -request no acknowledgement -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ack_req_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_PT_INDEX_ANY -\end_inset - - -\begin_inset Text - -\layout Standard - -wildcard for Portal indexes -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:acentry} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_RETAIN -\end_inset - - -\begin_inset Text - -\layout Standard - -disable unlinking -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_unlink_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:mdattach} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_SR_DROP_COUNT -\end_inset - - -\begin_inset Text - -\layout Standard - -index for the dropped count register -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_sr_index_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:stat-type} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:nistatus} - -\end_inset - - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_UNLINK -\end_inset - - -\begin_inset Text - -\layout Standard - -enable unlinking -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_unlink_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:mdattach} - -\end_inset - - -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\end_inset - - -\end_inset - - -\layout Chapter - -The Semantics of Message Transmission -\begin_inset LatexCommand \label{sec:semantics} - -\end_inset - - -\layout Standard - -The portals API uses four types of messages: put requests, acknowledgements, - get requests, and replies. - In this section, we describe the information passed on the wire for each - type of message. - We also describe how this information is used to process incoming messages. -\layout Section - -Sending Messages -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:put-wire} - -\end_inset - - summarizes the information that is transmitted for a put request. - The first column provides a descriptive name for the information, the second - column provides the type for this information, the third column identifies - the source of the information, and the fourth column provides additional - notes. - Most information that is transmitted is obtained directly from the -\emph on -PtlPut -\emph default - operation. - Notice that the handle for the memory descriptor used in the -\emph on -PtlPut -\emph default - operation is transmitted even though this value cannot be interpreted by - the target. - A value of anything other than -\family typewriter -PTL_MD_NONE -\family default -, is interpreted as a request for an acknowledgement. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Information Passed in a Put Request -\begin_inset LatexCommand \label{tab:put-wire} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular - - - - - - - - -\begin_inset Text - -\layout Standard - - -\series bold -Information -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -Type -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -\emph on -PtlPut -\emph default - arg -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -Notes -\end_inset - - - - -\begin_inset Text - -\layout Standard - -operation -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - -\begin_inset Text - -\layout Standard - -indicates a put request -\end_inset - - - - -\begin_inset Text - -\layout Standard - -initiator -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - -\begin_inset Text - -\layout Standard - -local information -\end_inset - - - - -\begin_inset Text - -\layout Standard - -user -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_uid_t -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - -\begin_inset Text - -\layout Standard - -local information -\end_inset - - - - -\begin_inset Text - -\layout Standard - -target -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -target -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - -portal index -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -portal -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - -cookie -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ac_index_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -cookie -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - -match bits -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_match_bits_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -match_bits -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - -offset -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -offset -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - -memory desc -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_md_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - -no ack if -\family typewriter -PTL_MD_NONE -\end_inset - - - - -\begin_inset Text - -\layout Standard - -length -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -length -\family default - member -\end_inset - - - - -\begin_inset Text - -\layout Standard - -data -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family roman -\emph on -bytes -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -start -\family default - and -\family typewriter -length -\family default - members -\end_inset - - - - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:ack-wire} - -\end_inset - - summarizes the information transmitted in an acknowledgement. - Most of the information is simply echoed from the put request. - Notice that the initiator and target are obtained directly from the put - request, but are swapped in generating the acknowledgement. - The only new piece of information in the acknowledgement is the manipulated - length which is determined as the put request is satisfied. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Information Passed in an Acknowledgement -\begin_inset LatexCommand \label{tab:ack-wire} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular - - - - - - - - -\begin_inset Text - -\layout Standard - - -\series bold -Information -\series default - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -Type -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -Put Information -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -Notes -\end_inset - - - - -\begin_inset Text - -\layout Standard - -operation -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - -\begin_inset Text - -\layout Standard - - indicates an acknowledgement -\end_inset - - - - -\begin_inset Text - -\layout Standard - - initiator -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset - - -\begin_inset Text - -\layout Standard - - target -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - target -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset - - -\begin_inset Text - -\layout Standard - - initiator -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - - portal index -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\end_inset - - -\begin_inset Text - -\layout Standard - - portal index -\end_inset - - -\begin_inset Text - -\layout Standard - - echo -\end_inset - - - - -\begin_inset Text - -\layout Standard - - match bits -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_match_bits_t -\end_inset - - -\begin_inset Text - -\layout Standard - - match bits -\end_inset - - -\begin_inset Text - -\layout Standard - - echo -\end_inset - - - - -\begin_inset Text - -\layout Standard - - offset -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\end_inset - - -\begin_inset Text - -\layout Standard - - offset -\end_inset - - -\begin_inset Text - -\layout Standard - - echo -\end_inset - - - - -\begin_inset Text - -\layout Standard - - memory desc -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter - ptl_handle_md_t -\end_inset - - -\begin_inset Text - -\layout Standard - - memory desc -\end_inset - - -\begin_inset Text - -\layout Standard - - echo -\end_inset - - - - -\begin_inset Text - -\layout Standard - - requested length -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter - ptl_size_t -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - - length -\end_inset - - -\begin_inset Text - -\layout Standard - - echo -\end_inset - - - - -\begin_inset Text - -\layout Standard - - manipulated length -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter - ptl_size_t -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - -\begin_inset Text - -\layout Standard - - obtained from the operation -\end_inset - - - - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:get-wire} - -\end_inset - - summarizes the information that is transmitted for a get request. - Like the information transmitted in a put request, most of the information - transmitted in a get request is obtained directly from the -\emph on -PtlGet -\emph default - operation. - Unlike put requests, get requests do not include the event queue handle. - In this case, the reply is generated whenever the operation succeeds and - the memory descriptor must not be unlinked until the reply is received. - As such, there is no advantage to explicitly sending the event queue handle. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Information Passed in a Get Request -\begin_inset LatexCommand \label{tab:get-wire} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular - - - - - - - - -\begin_inset Text - -\layout Standard - - -\series bold -Information -\series default - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -Type -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -\emph on -PtlGet -\emph default - argument -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -Notes -\end_inset - - - - -\begin_inset Text - -\layout Standard - -operation -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - -\begin_inset Text - -\layout Standard - -indicates a get operation -\end_inset - - - - -\begin_inset Text - -\layout Standard - -initiator -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - -\begin_inset Text - -\layout Standard - -local information -\end_inset - - - - -\begin_inset Text - -\layout Standard - -user -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_uid_t -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - -\begin_inset Text - -\layout Standard - -local information -\end_inset - - - - -\begin_inset Text - -\layout Standard - -target -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -target -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - -portal index -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -portal -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - -cookie -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ac_index_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -cookie -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - -match bits -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_match_bits_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -match_bits -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - -offset -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -offset -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - -memory desc -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_md_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - -length -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -length -\family default - member -\end_inset - - - - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:reply-wire} - -\end_inset - - summarizes the information transmitted in a reply. - Like an acknowledgement, most of the information is simply echoed from - the get request. - The initiator and target are obtained directly from the get request, but - are swapped in generating the acknowledgement. - The only new information in the acknowledgement are the manipulated length - and the data, which are determined as the get request is satisfied. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Information Passed in a Reply -\begin_inset LatexCommand \label{tab:reply-wire} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular - - - - - - - - -\begin_inset Text - -\layout Standard - - -\series bold -Information -\series default - -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -Type -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -Put Information -\end_inset - - -\begin_inset Text - -\layout Standard - - -\series bold -Notes -\end_inset - - - - -\begin_inset Text - -\layout Standard - -operation -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - -\begin_inset Text - -\layout Standard - -indicates an acknowledgement -\end_inset - - - - -\begin_inset Text - -\layout Standard - -initiator -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset - - -\begin_inset Text - -\layout Standard - -target -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - -target -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset - - -\begin_inset Text - -\layout Standard - -initiator -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - - - -\begin_inset Text - -\layout Standard - -portal index -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\end_inset - - -\begin_inset Text - -\layout Standard - -portal index -\end_inset - - -\begin_inset Text - -\layout Standard - -echo -\end_inset - - - - -\begin_inset Text - -\layout Standard - -match bits -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_match_bits_t -\end_inset - - -\begin_inset Text - -\layout Standard - -match bits -\end_inset - - -\begin_inset Text - -\layout Standard - -echo -\end_inset - - - - -\begin_inset Text - -\layout Standard - -offset -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - -offset -\end_inset - - -\begin_inset Text - -\layout Standard - -echo -\end_inset - - - - -\begin_inset Text - -\layout Standard - -memory desc -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_md_t -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - -memory desc -\end_inset - - -\begin_inset Text - -\layout Standard - -echo -\end_inset - - - - -\begin_inset Text - -\layout Standard - -requested length -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - -length -\end_inset - - -\begin_inset Text - -\layout Standard - -echo -\end_inset - - - - -\begin_inset Text - -\layout Standard - -manipulated length -\end_inset - - -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\family default - -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - -\begin_inset Text - -\layout Standard - -obtained from the operation -\end_inset - - - - -\begin_inset Text - -\layout Standard - -data -\end_inset - - -\begin_inset Text - -\layout Standard - - -\emph on -bytes -\end_inset - - -\begin_inset Text - -\layout Standard - -\end_inset - - -\begin_inset Text - -\layout Standard - -obtained from the operation -\end_inset - - - - -\end_inset - - -\end_inset - - -\layout Section - -Receiving Messages -\begin_inset LatexCommand \label{sec:receiving} - -\end_inset - - -\layout Standard - -When an incoming message arrives on a network interface, the communication - system first checks that the target process identified in the request is - a valid process that has initialized the network interface (i.e., that the - target process has a valid Portal table). - If this test fails, the communication system discards the message and increment -s the dropped message count for the interface. - The remainder of the processing depends on the type of the incoming message. - Put and get messages are subject to access control checks and translation - (searching a match list), while acknowledgement and reply messages bypass - the access control checks and the translation step. -\layout Standard - -Acknowledgement messages include a handle for the memory descriptor used - in the original -\emph on -PtlPut -\emph default - operation. - This memory descriptor will identify the event queue where the event should - be recorded. - Upon receipt of an acknowledgement, the runtime system only needs to confirm - that the memory descriptor and event queue still exist and that there is - space for another event. - Should the any of these conditions fail, the message is simply discarded - and the dropped message count for the interface is incremented. - Otherwise, the system builds an acknowledgement event from the information - in the acknowledgement message and adds it to the event queue. -\layout Standard - -Reception of reply messages is also relatively straightforward. - Each reply message includes a handle for a memory descriptor. - If this descriptor exists, it is used to receive the message. - A reply message will be dropped if the memory descriptor identified in - the request doesn't exist. - In either of this case, the dropped message count for the interface is - incremented. - These are the only reasons for dropping reply messages. - Every memory descriptor accepts and truncates incoming reply messages, - eliminating the other potential reasons for rejecting a reply message. -\layout Standard - -The critical step in processing an incoming put or get request involves - mapping the request to a memory descriptor. - This step starts by using the Portal index in the incoming request to identify - a list of match entries. - This list of match entries is searched in order until a match entry is - found whose match criteria matches the match bits in the incoming request - and whose memory descriptor accepts the request. -\layout Standard - -Because acknowledge and reply messages are generated in response to requests - made by the process receiving these messages, the checks performed by the - runtime system for acknowledgements and replies are minimal. - In contrast, put and get messages are generated by remote processes and - the checks performed for these messages are more extensive. - Incoming put or get messages may be rejected because: -\layout Itemize - -the Portal index supplied in the request is not valid; -\layout Itemize - -the cookie supplied in the request is not a valid access control entry; - -\layout Itemize - -the access control entry identified by the cookie does not match the identifier - of the requesting process; -\layout Itemize - -the access control entry identified by the access control entry does not - match the Portal index supplied in the request; or -\layout Itemize - -the match bits supplied in the request do not match any of the match entries - with a memory descriptor that accepts the request. - -\layout Standard - -In all cases, if the message is rejected, the incoming message is discarded - and the dropped message count for the interface is incremented. -\layout Standard - -A memory descriptor may reject an incoming request for any of the following - reasons: -\layout Itemize - -the -\family typewriter -PTL_MD_PUT -\family default - or -\family typewriter -PTL_MD_GET -\family default - option has not been enabled and the operation is put or get, respectively; - -\layout Itemize - -the length specified in the request is too long for the memory descriptor - and the -\family typewriter -PTL_MD_TRUNCATE -\family default - option has not been enabled. -\layout Chapter - -Examples -\begin_inset LatexCommand \label{sec:examples} - -\end_inset - - -\layout Comment - -The examples presented in this chapter have not been updated to reflect - the current API. -\layout Standard - -In this section we present several example to illustrate expected usage - patterns for the Portals 3.2 API. - The first example describes how to implement parallel servers using the - features of the Portals 3.2 API. - This example covers the access control list and the use of remote managed - offsets. - The second example presents an approach to dealing with dropped requests. - This example covers aspects of match lists and memory descriptors. - The final example covers message reception in MPI. - This example illustrates more sophisticated uses of matching and a procedure - to update a memory descriptor. -\layout Section - -Parallel File Servers -\begin_inset LatexCommand \label{sec:expfs} - -\end_inset - - -\layout Standard - -Figure\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:file} - -\end_inset - - illustrates the logical structure of a parallel file server. - In this case, the parallel server consists of four servers that stripe - application data across four disks. - We would like to present applications with the illusion that the file server - is a single entity. - We will assume that all of the processes that constitute the parallel server - have the same user id. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename file.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 196pt - lyxheight 147pt -\end_inset - - -\layout Caption - -Parallel File Server -\begin_inset LatexCommand \label{fig:file} - -\end_inset - - -\end_inset - - -\layout Standard - -When an application establishes a connection to the parallel file server, - it will allocate a Portal and access control list entry for communicating - with the server. - The access control list entry will include the Portal and match any process - in the parallel file server's, so all of the file server processes will - have access to the portal. - The Portal information and access control entry will be sent to the file - server at this time. - If the application and server need to have multiple, concurrent I/O operations, - they can use additional portals or match entries to keep the operations - from interfering with one another. -\layout Standard - -When an application initiates an I/O operation, it first builds a memory - descriptor that describes the memory region involved in the operation. - This memory descriptor will enable the appropriate operation (put for read - operations and get for write operations) and enable the use of remote offsets - (this lets the servers decide where their data should be placed in the - memory region). - After creating the memory descriptor and linking it into the appropriate - Portal entry, the application sends a read or write request (using -\emph on -PtlPut -\emph default -) to one of the file server processes. - The file server processes can then use put or get operations with the appropria -te offsets to fill or retrieve the contents of the application's buffer. - To know when the operation has completed, the application can add an event - queue to the memory descriptor and add up the lengths of the remote operations - until the sum is the size of the requested I/O operation. -\layout Section - -Dealing with Dropped Requests -\begin_inset LatexCommand \label{sec:exdrop} - -\end_inset - - -\layout Standard - -If a process does not anticipate unexpected requests, they will be discarded. - Applications using the Portals API can query the dropped count for the - interface to determine the number of requests that have been dropped (see - Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:nistatus} - -\end_inset - -). - While this approach minimizes resource consumption, it does not provide - information that might be critical in debugging the implementation of a - higher level protocol. -\layout Standard - -To keep track of more information about dropped requests, we use a memory - descriptor that truncates each incoming request to zero bytes and logs - the -\begin_inset Quotes eld -\end_inset - -dropped -\begin_inset Quotes erd -\end_inset - - operations in an event queue. - Note that the operations are not dropped in the Portals sense, because - the operation succeeds. -\layout Standard - -The following code fragment illustrates an implementation of this approach. - In this case, we assume that a thread is launched to execute the function - -\family typewriter -watch_drop -\family default -. - This code starts by building an event queue to log truncated operations - and a memory descriptor to truncate the incoming requests. - This example only captures -\begin_inset Quotes eld -\end_inset - -dropped -\begin_inset Quotes erd -\end_inset - - requests for a single portal. - In a more realistic situation, the memory descriptor would be appended - to the match list for every portal. - We also assume that the thread is capable of keeping up with the -\begin_inset Quotes eld -\end_inset - -dropped -\begin_inset Quotes erd -\end_inset - - requests. - If this is not the case, we could use a finite threshold on the memory - descriptor to capture the first few dropped requests. -\layout LyX-Code - - -\size small -#include -\newline -#include -\newline -#include -\newline - -\newline -#define DROP_SIZE 32 /* number of dropped requests to track */ -\newline - -\newline -int watch_drop( ptl_handle_ni_t ni, ptl_pt_index_t index ) { -\newline - ptl_handle_eq_t drop_events; -\newline - ptl_event_t event; -\newline - ptl_handle_md_t drop_em; -\newline - ptl_md_t drop_desc; -\newline - ptl_process_id_t any_proc; -\newline - ptl_handle_me_t match_any; -\newline - -\newline - /* create the event queue */ -\newline - if( PtlEQAlloc(ni, DROP_SIZE, &drop_events) != PTL_OK ) { -\newline - fprintf( stderr, "Couldn't create the event queue -\backslash -n" ); -\newline - exit( 1 ); -\newline - } -\newline - -\newline - /* build a match entry */ -\newline - any_proc.nid = PTL_ID_ANY; -\newline - any_proc.pid = PTL_ID_ANY; -\newline - PtlMEAttach( index, any_proc, 0, ~(ptl_match_bits_t)0, PTL_RETAIN, -\newline - &match_any ); -\newline - -\newline - /* create the memory descriptor */ -\newline - drop_desc.start = NULL; -\newline - drop_desc.length = 0; -\newline - drop_desc.threshold = PTL_MD_THRESH_INF; -\newline - drop_desc.options = PTL_MD_OP_PUT | PTL_MD_OP_GET | PTL_MD_TRUNCATE; -\newline - drop_desc.user_ptr = NULL; -\newline - drop_desc.eventq = drop_events; -\newline - if( PtlMDAttach(match_any, drop_desc, &drop_em) != PTL_OK ) { -\newline - fprintf( stderr, "Couldn't create the memory descriptor -\backslash -n" ); -\newline - exit( 1 ); -\newline - } -\newline - -\newline - /* watch for "dropped" requests */ -\newline - while( 1 ) { -\newline - if( PtlEQWait( drop_events, &event ) != PTL_OK ) break; -\newline - fprintf( stderr, "Dropped request from gid = event.initiator.gid, - event.initiator.rid ); -\newline - } -\newline -} -\layout Section - -Message Transmission in MPI -\begin_inset LatexCommand \label{sec:exmpi} - -\end_inset - - -\layout Standard - -We conclude this section with a fairly extensive example that describes - an approach to implementing message transmission for MPI. - Like many MPI implementations, we distinguish two message transmission - protocols: a short message protocol and a long message protocol. - We use the constant -\family typewriter -MPI_LONG_LENGTH -\family default - to determine the size of a long message. -\layout Standard - -For small messages, the sender simply sends the message and presumes that - the message will be received (i.e., the receiver has allocated a memory region - to receive the message body). - For large messages, the sender also sends the message, but does not presume - that the message body will be saved. - Instead, the sender builds a memory descriptor for the message and enables - get operations on this descriptor. - If the target does not save the body of the message, it will record an - event for the put operation. - When the process later issues a matching MPI receive, it will perform a - get operation to retrieve the body of the message. -\layout Standard - -To facilitate receive side matching based on the protocol, we use the most - significant bit in the match bits to indicate the protocol: 1 for long - messages and 0 for short messages. -\layout Standard - -The following code presents a function that implements the send side of - the protocol. - The global variable -\family typewriter -EndGet -\family default - is the last match entry attached to the Portal index used for posting long - messages. - This entry does not match any incoming requests (i.e., the memory descriptor - rejects all get operations) and is built during initialization of the MPI - library. - The other global variable, -\family typewriter -MPI_NI -\family default -, is a handle for the network interface used by the MPI implementation. -\layout LyX-Code - - -\size small -extern ptl_handle_me_t EndGet; -\newline -extern ptl_handle_ni_t MPI_NI; -\newline - -\newline -void MPIsend( void *buf, ptl_size_t len, void *data, ptl_handle_eq_t eventq, -\newline - ptl_process_id target, ptl_match_bits_t match ) -\newline -{ -\newline - ptl_handle_md_t send_handle; -\newline - ptl_md_t mem_desc; -\newline - ptl_ack_req_t want_ack; -\newline - -\newline - mem_desc.start = buf; -\newline - mem_desc.length = len; -\newline - mem_desc.threshold = 1; -\newline - mem_desc.options = PTL_MD_GET_OP; -\newline - mem_desc.user_ptr = data; -\newline - mem_desc.eventq = eventq; -\newline - -\newline - if( len >= MPI_LONG_LENGTH ) { -\newline - ptl_handle_me_t me_handle; -\newline - -\newline - /* add a match entry to the end of the get list */ -\newline - PtlMEInsert( target, match, 0, PTL_UNLINK, PTL_INS_BEFORE, EndGet, - &me_handle ); -\newline - PtlMDAttach( me_handle, mem_desc, PTL_UNLINK, NULL ); -\newline - -\newline - /* we want an ack for long messages */ -\newline - want_ack = PTL_ACK_REQ; -\newline - -\newline - /* set the protocol bit to indicate that this is a long message - */ -\newline - match |= 1<<63; -\newline - } else { -\newline - /* we don't want an ack for short messages */ -\newline - want_ack = PTL_ACK_REQ; -\newline - -\newline - /* set the protocol bit to indicate that this is a short message - */ -\newline - match &= ~(1<<63); -\newline - } -\newline - -\newline - /* create a memory descriptor and send it */ -\newline - PtlMDBind( MPI_NI, mem_desc, &send_handle ); -\newline - PtlPut( send_handle, want_ack, target, MPI_SEND_PINDEX, MPI_AINDEX, match, - 0 ); -\newline -} -\layout Standard - -The -\emph on -MPISend -\emph default - function returns as soon as the message has been scheduled for transmission. - The event queue argument, -\family typewriter -eventq -\family default -, can be used to determine the disposition of the message. - Assuming that -\family typewriter -eventq -\family default - is not -\family typewriter -PTL_EQ_NONE -\family default -, a -\family typewriter -PTL_EVENT_SENT -\family default - event will be recorded for each message as the message is transmitted. - For small messages, this is the only event that will be recorded in -\family typewriter -eventq -\family default -. - In contrast, long messages include an explicit request for an acknowledgement. - If the -\family typewriter -target -\family default - process has posted a matching receive, the acknowledgement will be sent - as the message is received. - If a matching receive has not been posted, the message will be discarded - and no acknowledgement will be sent. - When the -\family typewriter -target -\family default - process later issues a matching receive, the receive will be translated - into a get operation and a -\family typewriter -PTL_EVENT_GET -\family default - event will be recorded in -\family typewriter -eventq -\family default -. -\layout Standard - -Figure\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:mpi} - -\end_inset - - illustrates the organization of the match list used for receiving MPI messages. - The initial entries (not shown in this figure) would be used to match the - MPI receives that have been preposted by the application. - The preposted receives are followed by a match entry, -\emph on -RcvMark -\emph default -, that marks the boundary between preposted receives and the memory descriptors - used for -\begin_inset Quotes eld -\end_inset - -unexpected -\begin_inset Quotes erd -\end_inset - - messages. - The -\emph on -RcvMark -\emph default - entry is followed by a small collection of match entries that match unexpected - -\begin_inset Quotes eld -\end_inset - -short -\begin_inset Quotes erd -\end_inset - - messages, i.e., messages that have a 0 in the most significant bit of their - match bits. - The memory descriptors associated with these match entries will append - the incoming message to the associated memory descriptor and record an - event in an event queue for unexpected messages. - The unexpected short message matching entries are followed by a match entry - that will match messages that were not matched by the preceding match entries, - i.e., the unexpected long messages. - The memory descriptor associated with this match entry truncates the message - body and records an event in the event queue for unexpected messages. - Note that of the memory descriptors used for unexpected messages share - a common event queue. - This makes it possible to process the unexpected messages in the order - in which they arrived, regardless of. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename mpi.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 389pt - lyxheight 284pt -\end_inset - - -\layout Caption - -Message Reception in MPI -\begin_inset LatexCommand \label{fig:mpi} - -\end_inset - - -\end_inset - - -\layout Standard - -When the local MPI process posts an MPI receive, we must first search the - events unexpected message queue to see if a matching message has already - arrived. - If no matching message is found, a match entry for the receive is inserted - before the -\emph on -RcvMark -\emph default - entry--after the match entries for all of the previously posted receives - and before the match entries for the unexpected messages. - This ensures that preposted receives are matched in the order that they - were posted (a requirement of MPI). - -\layout Standard - -While this strategy respects the temporal semantics of MPI, it introduces - a race condition: a matching message might arrive after the events in the - unexpected message queue have been searched, but before the match entry - for the receive has been inserted in the match list. - -\layout Standard - -To avoid this race condition we start by setting the -\family typewriter -threshold -\family default - of the memory descriptor to 0, making the descriptor inactive. - We then insert the match entry into the match list and proceed to search - the events in the unexpected message queue. - A matching message that arrives as we are searching the unexpected message - queue will not be accepted by the memory descriptor and, if not matched - by an earlier match list element, will add an event to the unexpected message - queue. - After searching the events in the unexpected message queue, we update the - memory descriptor, setting the threshold to 1 to activate the memory descriptor. - This update is predicated by the condition that the unexpected message - queue is empty. - We repeat the process of searching the unexpected message queue until the - update succeeds. -\layout Standard - -The following code fragment illustrates this approach. - Because events must be removed from the unexpected message queue to be - examined, this code fragment assumes the existence of a user managed event - list, -\family typewriter -Rcvd -\family default -, for the events that have already been removed from the unexpected message - queue. - In an effort to keep the example focused on the basic protocol, we have - omitted the code that would be needed to manage the memory descriptors - used for unexpected short messages. - In particular, we simply leave messages in these descriptors until they - are received by the application. - In a robust implementation, we would introduce code to ensure that short - unexpected messages are removed from these memory descriptors so that they - can be re-used. -\layout LyX-Code - - -\size small -extern ptl_handle_eq_t UnexpQueue; -\newline -extern ptl_handle_me_t RcvMark; -\newline -extern ptl_handle_me_t ShortMatch; -\newline - -\newline -typedef struct event_list_tag { -\newline - ptl_event_t event; -\newline - struct event_list_tag* next; -\newline -} event_list; -\newline - -\newline -extern event_list Rcvd; -\newline - -\newline -void AppendRcvd( ptl_event_t event ) -\newline -{ -\newline - /* append an event onto the Rcvd list */ -\newline -} -\newline - -\newline -int SearchRcvd( void *buf, ptl_size_t len, ptl_process_id_t sender, ptl_match_bi -ts_t match, -\newline - ptl_match_bits_t ignore, ptl_event_t *event ) -\newline -{ -\newline - /* Search the Rcvd event queue, looking for a message that matches the - requested message. -\newline - * If one is found, remove the event from the Rcvd list and return it. - */ -\newline -} -\newline - -\newline -typedef enum { RECEIVED, POSTED } receive_state; -\newline - -\newline -receive_state CopyMsg( void *buf, ptl_size_t &length, ptl_event_t event, - ptl_md_t md_buf ) -\newline -{ -\newline - ptl_md_t md_buf; -\newline - ptl_handle_me_t me_handle; -\newline - -\newline - if( event.rlength >= MPI_LONG_LENGTH ) { -\newline - PtlMDBind( MPI_NI, md_buf, &md_handle ); -\newline - PtlGet( event.initiator, MPI_GET_PINDEX, 0, event.match_bits, MPI_AINDEX, - md_handle ); -\newline - return POSTED; -\newline - } else { -\newline - /* copy the message */ -\newline - if( event.mlength < *length ) *length = event.mlength; -\newline - memcpy( buf, (char*)event.md_desc.start+event.offset, *length ); -\newline - return RECEIVED; -\newline - } -\newline -} -\newline - -\newline -receive_state MPIreceive( void *buf, ptl_size_t &len, void *MPI_data, ptl_handle -_eq_t eventq, -\newline - ptl_process_id_t sender, ptl_match_bits_t match, - ptl_match_bits_t ignore ) -\newline -{ -\newline - ptl_md_t md_buf; -\newline - ptl_handle_md_t md_handle; -\newline - ptl_handle_me_t me_handle; -\newline - ptl_event_t event; -\newline - -\newline - /* build a memory descriptor for the receive */ -\newline - md_buf.start = buf; -\newline - md_buf.length = *len; -\newline - md_buf.threshold = 0; /* temporarily disabled */ -\newline - md_buf.options = PTL_MD_PUT_OP; -\newline - md_buf.user_ptr = MPI_data; -\newline - md_buf.eventq = eventq; -\newline - -\newline - /* see if we have already received the message */ -\newline - if( SearchRcvd(buf, len, sender, match, ignore, &event) ) -\newline - return CopyMsg( buf, len, event, md_buf ); -\newline - -\newline - /* create the match entry and attach the memory descriptor */ -\newline - PtlMEInsert(sender, match, ignore, PTL_UNLINK, PTL_INS_BEFORE, RcvMark, - &me_handle); -\newline - PtlMDAttach( me_handle, md_buf, PTL_UNLINK, &md_handle ); -\newline - -\newline - md_buf.threshold = 1; -\newline - do -\newline - if( PtlEQGet( UnexpQueue, &event ) != PTL_EQ_EMPTY ) { -\newline - if( MPIMatch(event, match, ignore, sender) ) { -\newline - return CopyMsg( buf, len, (char*)event.md_desc.start+event.offset, - md_buf ); -\newline - } else { -\newline - AppendRcvd( event ); -\newline - } -\newline - } -\newline - while( PtlMDUpdate(md_handle, NULL, &md_buf, unexp_queue) == PTL_NOUPDATE - ); -\newline - return POSTED; -\newline -} -\layout Chapter* - -Acknowledgments -\layout Standard - -Several people have contributed to the philosophy, design, and implementation - of the Portals message passing architecture as it has evolved. - We acknowledge the following people for their contributions: Al Audette, - Lee Ann Fisk, David Greenberg, Tramm Hudson, Gabi Istrail, Chu Jong, Mike - Levenhagen, Jim Otto, Mark Sears, Lance Shuler, Mack Stallcup, Jeff VanDyke, - Dave van Dresser, Lee Ward, and Stephen Wheat. - -\layout Standard - - -\begin_inset LatexCommand \BibTeX[ieee]{portals3} - -\end_inset - - -\the_end diff --git a/lnet/doc/put.fig b/lnet/doc/put.fig deleted file mode 100644 index 5235b6d..0000000 --- a/lnet/doc/put.fig +++ /dev/null @@ -1,32 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -6 1350 900 2175 1200 -4 0 0 100 0 0 10 0.0000 0 105 825 1350 1200 Transmission\001 -4 0 0 100 0 0 10 0.0000 0 105 285 1620 1050 Data\001 --6 -2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 2700 1275 2700 1725 -2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 900 525 2700 1200 -2 2 0 1 0 7 100 0 -1 3.000 0 0 7 0 0 5 - 0 300 1200 300 1200 2250 0 2250 0 300 -2 2 0 1 0 7 100 0 -1 3.000 0 0 7 0 0 5 - 2400 300 3600 300 3600 2250 2400 2250 2400 300 -2 1 1 1 0 7 100 0 -1 4.000 0 0 7 1 0 2 - 0 0 1.00 60.00 120.00 - 2699 1788 899 1938 -4 0 0 100 0 0 10 0.0000 0 105 720 2775 1650 Translation\001 -4 1 0 100 0 0 10 0.0000 0 135 555 1800 2025 Optional\001 -4 1 0 100 0 0 10 0.0000 0 135 1170 1800 2175 Acknowledgement\001 -4 0 0 100 0 0 10 0.0000 0 105 405 2850 1500 Portal\001 -4 1 0 100 0 0 10 0.0000 0 135 405 3000 525 Target\001 -4 1 0 100 0 0 10 0.0000 0 105 540 600 525 Initiator\001 diff --git a/lnet/include/.cvsignore b/lnet/include/.cvsignore deleted file mode 100644 index d45f796..0000000 --- a/lnet/include/.cvsignore +++ /dev/null @@ -1,4 +0,0 @@ -config.h -stamp-h -stamp-h1 -stamp-h.in diff --git a/lnet/include/Makefile.am b/lnet/include/Makefile.am deleted file mode 100644 index 2cf7f99..0000000 --- a/lnet/include/Makefile.am +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -SUBDIRS = portals linux -EXTRA_DIST = config.h.in -include $(top_srcdir)/Rules diff --git a/lnet/include/config.h.in b/lnet/include/config.h.in deleted file mode 100644 index 3aa6909..0000000 --- a/lnet/include/config.h.in +++ /dev/null @@ -1,58 +0,0 @@ -/* portals/include/config.h.in. Generated from configure.in by autoheader. */ - -/* Define to 1 if you have the header file. */ -#undef HAVE_INTTYPES_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_MEMORY_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STDINT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STDLIB_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STRINGS_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_STRING_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_STAT_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_SYS_TYPES_H - -/* Define to 1 if you have the header file. */ -#undef HAVE_UNISTD_H - -/* IOCTL Buffer Size */ -#undef OBD_MAX_IOCTL_BUFFER - -/* Name of package */ -#undef PACKAGE - -/* Define to the address where bug reports for this package should be sent. */ -#undef PACKAGE_BUGREPORT - -/* Define to the full name of this package. */ -#undef PACKAGE_NAME - -/* Define to the full name and version of this package. */ -#undef PACKAGE_STRING - -/* Define to the one symbol short name of this package. */ -#undef PACKAGE_TARNAME - -/* Define to the version of this package. */ -#undef PACKAGE_VERSION - -/* The size of a `unsigned long long', as computed by sizeof. */ -#undef SIZEOF_UNSIGNED_LONG_LONG - -/* Define to 1 if you have the ANSI C header files. */ -#undef STDC_HEADERS - -/* Version number of package */ -#undef VERSION diff --git a/lnet/include/linux/Makefile.am b/lnet/include/linux/Makefile.am deleted file mode 100644 index 6a65cb5..0000000 --- a/lnet/include/linux/Makefile.am +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include $(top_srcdir)/Rules - -linuxincludedir = $(includedir)/linux - -linuxinclude_HEADERS=kp30.h portals_lib.h diff --git a/lnet/include/linux/kp30.h b/lnet/include/linux/kp30.h deleted file mode 100644 index ee3b9fc..0000000 --- a/lnet/include/linux/kp30.h +++ /dev/null @@ -1,941 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef _KP30_INCLUDED -#define _KP30_INCLUDED - - -#define PORTAL_DEBUG - -#ifndef offsetof -# define offsetof(typ,memb) ((int)((char *)&(((typ *)0)->memb))) -#endif - -#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1)) - -#ifndef CONFIG_SMP -# define smp_processor_id() 0 -#endif - -/* - * Debugging - */ -extern unsigned int portal_subsystem_debug; -extern unsigned int portal_stack; -extern unsigned int portal_debug; -extern unsigned int portal_printk; -/* Debugging subsystems (8 bit ID) - * - * If you add debug subsystem #32, you need to send email to phil, because - * you're going to break kernel subsystem debug filtering. */ -#define S_UNDEFINED (0 << 24) -#define S_MDC (1 << 24) -#define S_MDS (2 << 24) -#define S_OSC (3 << 24) -#define S_OST (4 << 24) -#define S_CLASS (5 << 24) -#define S_OBDFS (6 << 24) /* obsolete */ -#define S_LLITE (7 << 24) -#define S_RPC (8 << 24) -#define S_EXT2OBD (9 << 24) /* obsolete */ -#define S_PORTALS (10 << 24) -#define S_SOCKNAL (11 << 24) -#define S_QSWNAL (12 << 24) -#define S_PINGER (13 << 24) -#define S_FILTER (14 << 24) -#define S_TRACE (15 << 24) /* obsolete */ -#define S_ECHO (16 << 24) -#define S_LDLM (17 << 24) -#define S_LOV (18 << 24) -#define S_GMNAL (19 << 24) -#define S_PTLROUTER (20 << 24) -#define S_COBD (21 << 24) -#define S_PTLBD (22 << 24) -#define S_LOG (23 << 24) - -/* If you change these values, please keep portals/linux/utils/debug.c - * up to date! */ - -/* Debugging masks (24 bits, non-overlapping) */ -#define D_TRACE (1 << 0) /* ENTRY/EXIT markers */ -#define D_INODE (1 << 1) -#define D_SUPER (1 << 2) -#define D_EXT2 (1 << 3) /* anything from ext2_debug */ -#define D_MALLOC (1 << 4) /* print malloc, free information */ -#define D_CACHE (1 << 5) /* cache-related items */ -#define D_INFO (1 << 6) /* general information */ -#define D_IOCTL (1 << 7) /* ioctl related information */ -#define D_BLOCKS (1 << 8) /* ext2 block allocation */ -#define D_NET (1 << 9) /* network communications */ -#define D_WARNING (1 << 10) -#define D_BUFFS (1 << 11) -#define D_OTHER (1 << 12) -#define D_DENTRY (1 << 13) -#define D_PORTALS (1 << 14) /* ENTRY/EXIT markers */ -#define D_PAGE (1 << 15) /* bulk page handling */ -#define D_DLMTRACE (1 << 16) -#define D_ERROR (1 << 17) /* CERROR(...) == CDEBUG (D_ERROR, ...) */ -#define D_EMERG (1 << 18) /* CEMERG(...) == CDEBUG (D_EMERG, ...) */ -#define D_HA (1 << 19) /* recovery and failover */ -#define D_RPCTRACE (1 << 20) /* for distributed debugging */ -#define D_VFSTRACE (1 << 21) - -#ifndef __KERNEL__ -#define THREAD_SIZE 8192 -#endif -#ifdef __ia64__ -#define CDEBUG_STACK() (THREAD_SIZE - \ - ((unsigned long)__builtin_dwarf_cfa() & \ - (THREAD_SIZE - 1))) -#else -#define CDEBUG_STACK() (THREAD_SIZE - \ - ((unsigned long)__builtin_frame_address(0) & \ - (THREAD_SIZE - 1))) -#endif - -#ifdef __KERNEL__ -#define CHECK_STACK(stack) \ - do { \ - if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) { \ - portals_debug_msg(DEBUG_SUBSYSTEM, D_ERROR, \ - __FILE__, __FUNCTION__, __LINE__, \ - (stack), \ - "maximum lustre stack %u\n", \ - portal_stack = (stack)); \ - /*panic("LBUG");*/ \ - } \ - } while (0) -#else -#define CHECK_STACK(stack) do { } while(0) -#endif - -#if 1 -#define CDEBUG(mask, format, a...) \ -do { \ - CHECK_STACK(CDEBUG_STACK()); \ - if (!(mask) || ((mask) & (D_ERROR | D_EMERG)) || \ - (portal_debug & (mask) && \ - portal_subsystem_debug & (1 << (DEBUG_SUBSYSTEM >> 24)))) \ - portals_debug_msg(DEBUG_SUBSYSTEM, mask, \ - __FILE__, __FUNCTION__, __LINE__, \ - CDEBUG_STACK(), format , ## a); \ -} while (0) - -#define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a) -#define CERROR(format, a...) CDEBUG(D_ERROR, format, ## a) -#define CEMERG(format, a...) CDEBUG(D_EMERG, format, ## a) - -#define GOTO(label, rc) \ -do { \ - long GOTO__ret = (long)(rc); \ - CDEBUG(D_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n", \ - #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,\ - (signed long)GOTO__ret); \ - goto label; \ -} while (0) - -#define RETURN(rc) \ -do { \ - typeof(rc) RETURN__ret = (rc); \ - CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n", \ - (long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);\ - return RETURN__ret; \ -} while (0) - -#define ENTRY \ -do { \ - CDEBUG(D_TRACE, "Process entered\n"); \ -} while (0) - -#define EXIT \ -do { \ - CDEBUG(D_TRACE, "Process leaving\n"); \ -} while(0) -#else -#define CDEBUG(mask, format, a...) do { } while (0) -#define CWARN(format, a...) do { } while (0) -#define CERROR(format, a...) printk("<3>" format, ## a) -#define CEMERG(format, a...) printk("<0>" format, ## a) -#define GOTO(label, rc) do { (void)(rc); goto label; } while (0) -#define RETURN(rc) return (rc) -#define ENTRY do { } while (0) -#define EXIT do { } while (0) -#endif - - -#ifdef __KERNEL__ -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#define schedule_work schedule_task -#define prepare_work(wq,cb,cbdata) \ -do { \ - INIT_TQUEUE((wq), 0, 0); \ - PREPARE_TQUEUE((wq), (cb), (cbdata)); \ -} while (0) - -#define ll_invalidate_inode_pages invalidate_inode_pages -#define PageUptodate Page_Uptodate -#define our_recalc_sigpending(current) recalc_sigpending(current) -#define num_online_cpus() smp_num_cpus -static inline void our_cond_resched(void) -{ - if (current->need_resched) - schedule (); -} - -#else - -#define prepare_work(wq,cb,cbdata) \ -do { \ - INIT_WORK((wq), (void *)(cb), (void *)(cbdata)); \ -} while (0) -#define ll_invalidate_inode_pages(inode) invalidate_inode_pages((inode)->i_mapping) -#define wait_on_page wait_on_page_locked -#define our_recalc_sigpending(current) recalc_sigpending() -#define strtok(a,b) strpbrk(a, b) -static inline void our_cond_resched(void) -{ - cond_resched(); -} -#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */ - -#ifdef PORTAL_DEBUG -extern void kportal_assertion_failed(char *expr,char *file,char *func,int line); -#define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__, \ - __FUNCTION__, __LINE__)) -#else -#define LASSERT(e) -#endif - -#ifdef __arch_um__ -#define LBUG_WITH_LOC(file, func, line) \ -do { \ - CEMERG("LBUG - trying to dump log to /tmp/lustre-log\n"); \ - portals_debug_dumplog(); \ - portals_run_lbug_upcall(file, func, line); \ - panic("LBUG"); \ -} while (0) -#else -#define LBUG_WITH_LOC(file, func, line) \ -do { \ - CEMERG("LBUG\n"); \ - portals_debug_dumplog(); \ - portals_run_lbug_upcall(file, func, line); \ - set_task_state(current, TASK_UNINTERRUPTIBLE); \ - schedule(); \ -} while (0) -#endif /* __arch_um__ */ - -#define LBUG() LBUG_WITH_LOC(__FILE__, __FUNCTION__, __LINE__) - -/* - * Memory - */ -#ifdef PORTAL_DEBUG -extern atomic_t portal_kmemory; - -# define portal_kmem_inc(ptr, size) \ -do { \ - atomic_add(size, &portal_kmemory); \ -} while (0) - -# define portal_kmem_dec(ptr, size) do { \ - atomic_sub(size, &portal_kmemory); \ -} while (0) - -#else -# define portal_kmem_inc(ptr, size) do {} while (0) -# define portal_kmem_dec(ptr, size) do {} while (0) -#endif /* PORTAL_DEBUG */ - -#define PORTAL_VMALLOC_SIZE 16384 - -#define PORTAL_ALLOC(ptr, size) \ -do { \ - LASSERT (!in_interrupt()); \ - if ((size) > PORTAL_VMALLOC_SIZE) \ - (ptr) = vmalloc(size); \ - else \ - (ptr) = kmalloc((size), GFP_NOFS); \ - if ((ptr) == NULL) \ - CERROR("PORTALS: out of memory at %s:%d (tried to alloc '"\ - #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\ - else { \ - portal_kmem_inc((ptr), (size)); \ - memset((ptr), 0, (size)); \ - } \ - CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), (ptr), atomic_read (&portal_kmemory)); \ -} while (0) - -#define PORTAL_FREE(ptr, size) \ -do { \ - int s = (size); \ - if ((ptr) == NULL) { \ - CERROR("PORTALS: free NULL '" #ptr "' (%d bytes) at " \ - "%s:%d\n", s, __FILE__, __LINE__); \ - break; \ - } \ - if (s > PORTAL_VMALLOC_SIZE) \ - vfree(ptr); \ - else \ - kfree(ptr); \ - portal_kmem_dec((ptr), s); \ - CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ - s, (ptr), atomic_read(&portal_kmemory)); \ -} while (0) - -#define PORTAL_SLAB_ALLOC(ptr, slab, size) \ -do { \ - LASSERT(!in_interrupt()); \ - (ptr) = kmem_cache_alloc((slab), SLAB_KERNEL); \ - if ((ptr) == NULL) { \ - CERROR("PORTALS: out of memory at %s:%d (tried to alloc" \ - " '" #ptr "' from slab '" #slab "')\n", __FILE__, \ - __LINE__); \ - } else { \ - portal_kmem_inc((ptr), (size)); \ - memset((ptr), 0, (size)); \ - } \ - CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %ld at %p (tot %d).\n", \ - (int)(size), (ptr), atomic_read(&portal_kmemory)); \ -} while (0) - -#define PORTAL_SLAB_FREE(ptr, slab, size) \ -do { \ - int s = (size); \ - if ((ptr) == NULL) { \ - CERROR("PORTALS: free NULL '" #ptr "' (%d bytes) at " \ - "%s:%d\n", s, __FILE__, __LINE__); \ - break; \ - } \ - memset((ptr), 0x5a, s); \ - kmem_cache_free((slab), ptr); \ - portal_kmem_dec((ptr), s); \ - CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ - s, (ptr), atomic_read (&portal_kmemory)); \ -} while (0) - -/* ------------------------------------------------------------------- */ - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - -#define PORTAL_SYMBOL_REGISTER(x) inter_module_register(#x, THIS_MODULE, &x) -#define PORTAL_SYMBOL_UNREGISTER(x) inter_module_unregister(#x) - -#define PORTAL_SYMBOL_GET(x) ((typeof(&x))inter_module_get(#x)) -#define PORTAL_SYMBOL_PUT(x) inter_module_put(#x) - -#define PORTAL_MODULE_USE MOD_INC_USE_COUNT -#define PORTAL_MODULE_UNUSE MOD_DEC_USE_COUNT -#else - -#define PORTAL_SYMBOL_REGISTER(x) -#define PORTAL_SYMBOL_UNREGISTER(x) - -#define PORTAL_SYMBOL_GET(x) symbol_get(x) -#define PORTAL_SYMBOL_PUT(x) symbol_put(x) - -#define PORTAL_MODULE_USE try_module_get(THIS_MODULE) -#define PORTAL_MODULE_UNUSE module_put(THIS_MODULE) - -#endif - -/******************************************************************************/ -/* Kernel Portals Router interface */ - -typedef void (*kpr_fwd_callback_t)(void *arg, int error); // completion callback - -/* space for routing targets to stash "stuff" in a forwarded packet */ -typedef union { - long long _alignment; - void *_space[16]; /* scale with CPU arch */ -} kprfd_scratch_t; - -/* Kernel Portals Routing Forwarded message Descriptor */ -typedef struct { - struct list_head kprfd_list; /* stash in queues (routing target can use) */ - ptl_nid_t kprfd_target_nid; /* final destination NID */ - ptl_nid_t kprfd_gateway_nid; /* gateway NID */ - int kprfd_nob; /* # message bytes (including header) */ - int kprfd_niov; /* # message frags (including header) */ - struct iovec *kprfd_iov; /* message fragments */ - void *kprfd_router_arg; // originating NAL's router arg - kpr_fwd_callback_t kprfd_callback; /* completion callback */ - void *kprfd_callback_arg; /* completion callback arg */ - kprfd_scratch_t kprfd_scratch; // scratchpad for routing targets -} kpr_fwd_desc_t; - -typedef void (*kpr_fwd_t)(void *arg, kpr_fwd_desc_t *fwd); - -/* NAL's routing interface (Kernel Portals Routing Nal Interface) */ -typedef const struct { - int kprni_nalid; /* NAL's id */ - void *kprni_arg; /* Arg to pass when calling into NAL */ - kpr_fwd_t kprni_fwd; /* NAL's forwarding entrypoint */ -} kpr_nal_interface_t; - -/* Router's routing interface (Kernel Portals Routing Router Interface) */ -typedef const struct { - /* register the calling NAL with the router and get back the handle for - * subsequent calls */ - int (*kprri_register) (kpr_nal_interface_t *nal_interface, - void **router_arg); - - /* ask the router to find a gateway that forwards to 'nid' and is a peer - * of the calling NAL */ - int (*kprri_lookup) (void *router_arg, ptl_nid_t nid, - ptl_nid_t *gateway_nid); - - /* hand a packet over to the router for forwarding */ - kpr_fwd_t kprri_fwd_start; - - /* hand a packet back to the router for completion */ - void (*kprri_fwd_done) (void *router_arg, kpr_fwd_desc_t *fwd, - int error); - - /* the calling NAL is shutting down */ - void (*kprri_shutdown) (void *router_arg); - - /* deregister the calling NAL with the router */ - void (*kprri_deregister) (void *router_arg); - -} kpr_router_interface_t; - -/* Convenient struct for NAL to stash router interface/args */ -typedef struct { - kpr_router_interface_t *kpr_interface; - void *kpr_arg; -} kpr_router_t; - -/* Router's control interface (Kernel Portals Routing Control Interface) */ -typedef const struct { - int (*kprci_add_route)(int gateway_nal, ptl_nid_t gateway_nid, - ptl_nid_t lo_nid, ptl_nid_t hi_nid); - int (*kprci_del_route)(ptl_nid_t nid); - int (*kprci_get_route)(int index, int *gateway_nal, - ptl_nid_t *gateway, ptl_nid_t *lo_nid, - ptl_nid_t *hi_nid); -} kpr_control_interface_t; - -extern kpr_control_interface_t kpr_control_interface; -extern kpr_router_interface_t kpr_router_interface; - -static inline int -kpr_register (kpr_router_t *router, kpr_nal_interface_t *nalif) -{ - int rc; - - router->kpr_interface = PORTAL_SYMBOL_GET (kpr_router_interface); - if (router->kpr_interface == NULL) - return (-ENOENT); - - rc = (router->kpr_interface)->kprri_register (nalif, &router->kpr_arg); - if (rc != 0) - router->kpr_interface = NULL; - - PORTAL_SYMBOL_PUT (kpr_router_interface); - return (rc); -} - -static inline int -kpr_routing (kpr_router_t *router) -{ - return (router->kpr_interface != NULL); -} - -static inline int -kpr_lookup (kpr_router_t *router, ptl_nid_t nid, ptl_nid_t *gateway_nid) -{ - if (!kpr_routing (router)) - return (-EHOSTUNREACH); - - return (router->kpr_interface->kprri_lookup(router->kpr_arg, nid, - gateway_nid)); -} - -static inline void -kpr_fwd_init (kpr_fwd_desc_t *fwd, ptl_nid_t nid, - int nob, int niov, struct iovec *iov, - kpr_fwd_callback_t callback, void *callback_arg) -{ - fwd->kprfd_target_nid = nid; - fwd->kprfd_gateway_nid = nid; - fwd->kprfd_nob = nob; - fwd->kprfd_niov = niov; - fwd->kprfd_iov = iov; - fwd->kprfd_callback = callback; - fwd->kprfd_callback_arg = callback_arg; -} - -static inline void -kpr_fwd_start (kpr_router_t *router, kpr_fwd_desc_t *fwd) -{ - if (!kpr_routing (router)) - fwd->kprfd_callback (fwd->kprfd_callback_arg, -EHOSTUNREACH); - else - router->kpr_interface->kprri_fwd_start (router->kpr_arg, fwd); -} - -static inline void -kpr_fwd_done (kpr_router_t *router, kpr_fwd_desc_t *fwd, int error) -{ - LASSERT (kpr_routing (router)); - router->kpr_interface->kprri_fwd_done (router->kpr_arg, fwd, error); -} - -static inline void -kpr_shutdown (kpr_router_t *router) -{ - if (kpr_routing (router)) - router->kpr_interface->kprri_shutdown (router->kpr_arg); -} - -static inline void -kpr_deregister (kpr_router_t *router) -{ - if (!kpr_routing (router)) - return; - router->kpr_interface->kprri_deregister (router->kpr_arg); - router->kpr_interface = NULL; -} - -/******************************************************************************/ - -#ifdef PORTALS_PROFILING -#define prof_enum(FOO) PROF__##FOO -enum { - prof_enum(our_recvmsg), - prof_enum(our_sendmsg), - prof_enum(socknal_recv), - prof_enum(lib_parse), - prof_enum(conn_list_walk), - prof_enum(memcpy), - prof_enum(lib_finalize), - prof_enum(pingcli_time), - prof_enum(gmnal_send), - prof_enum(gmnal_recv), - MAX_PROFS -}; - -struct prof_ent { - char *str; - /* hrmph. wrap-tastic. */ - u32 starts; - u32 finishes; - cycles_t total_cycles; - cycles_t start; - cycles_t end; -}; - -extern struct prof_ent prof_ents[MAX_PROFS]; - -#define PROF_START(FOO) \ - do { \ - struct prof_ent *pe = &prof_ents[PROF__##FOO]; \ - pe->starts++; \ - pe->start = get_cycles(); \ - } while (0) - -#define PROF_FINISH(FOO) \ - do { \ - struct prof_ent *pe = &prof_ents[PROF__##FOO]; \ - pe->finishes++; \ - pe->end = get_cycles(); \ - pe->total_cycles += (pe->end - pe->start); \ - } while (0) -#else /* !PORTALS_PROFILING */ -#define PROF_START(FOO) do {} while(0) -#define PROF_FINISH(FOO) do {} while(0) -#endif /* PORTALS_PROFILING */ - -/* debug.c */ -void portals_run_lbug_upcall(char * file, char *fn, int line); -void portals_debug_dumplog(void); -int portals_debug_init(unsigned long bufsize); -int portals_debug_cleanup(void); -int portals_debug_clear_buffer(void); -int portals_debug_mark_buffer(char *text); -int portals_debug_set_daemon(unsigned int cmd, unsigned int length, - char *file, unsigned int size); -__s32 portals_debug_copy_to_user(char *buf, unsigned long len); -#if (__GNUC__) -/* Use the special GNU C __attribute__ hack to have the compiler check the - * printf style argument string against the actual argument count and - * types. - */ -#ifdef printf -# warning printf has been defined as a macro... -# undef printf -#endif -void portals_debug_msg (int subsys, int mask, char *file, char *fn, int line, - unsigned long stack, const char *format, ...) - __attribute__ ((format (printf, 7, 8))); -#else -void portals_debug_msg (int subsys, int mask, char *file, char *fn, - int line, unsigned long stack, - const char *format, ...); -#endif /* __GNUC__ */ -void portals_debug_set_level(unsigned int debug_level); - -# define fprintf(a, format, b...) CDEBUG(D_OTHER, format , ## b) -# define printf(format, b...) CDEBUG(D_OTHER, format , ## b) -# define time(a) CURRENT_TIME - -extern void kportal_daemonize (char *name); -extern void kportal_blockallsigs (void); - -#else /* !__KERNEL__ */ -# include -# include -#ifndef __CYGWIN__ -# include -#endif -# include -# include -# include -# ifndef DEBUG_SUBSYSTEM -# define DEBUG_SUBSYSTEM S_UNDEFINED -# endif -# ifdef PORTAL_DEBUG -# undef NDEBUG -# include -# define LASSERT(e) assert(e) -# else -# define LASSERT(e) -# endif -# define printk(format, args...) printf (format, ## args) -# define PORTAL_ALLOC(ptr, size) do { (ptr) = malloc(size); } while (0); -# define PORTAL_FREE(a, b) do { free(a); } while (0); -# define portals_debug_msg(subsys, mask, file, fn, line, stack, format, a...) \ - printf ("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format, \ - (subsys) >> 24, (mask), (long)time(0), file, fn, line, \ - getpid() , stack, ## a); -#endif - -#ifndef CURRENT_TIME -# define CURRENT_TIME time(0) -#endif - -#include - -/* - * USER LEVEL STUFF BELOW - */ - -#define PORTAL_IOCTL_VERSION 0x00010007 -#define PING_SYNC 0 -#define PING_ASYNC 1 - -struct portal_ioctl_data { - __u32 ioc_len; - __u32 ioc_version; - __u64 ioc_nid; - __u64 ioc_nid2; - __u64 ioc_nid3; - __u32 ioc_count; - __u32 ioc_nal; - __u32 ioc_nal_cmd; - __u32 ioc_fd; - __u32 ioc_id; - - __u32 ioc_flags; - __u32 ioc_size; - - __u32 ioc_wait; - __u32 ioc_timeout; - __u32 ioc_misc; - - __u32 ioc_inllen1; - char *ioc_inlbuf1; - __u32 ioc_inllen2; - char *ioc_inlbuf2; - - __u32 ioc_plen1; /* buffers in userspace */ - char *ioc_pbuf1; - __u32 ioc_plen2; /* buffers in userspace */ - char *ioc_pbuf2; - - char ioc_bulk[0]; -}; - -struct portal_ioctl_hdr { - __u32 ioc_len; - __u32 ioc_version; -}; - -struct portals_debug_ioctl_data -{ - struct portal_ioctl_hdr hdr; - unsigned int subs; - unsigned int debug; -}; - -#define PORTAL_IOC_INIT(data) \ -do { \ - memset(&data, 0, sizeof(data)); \ - data.ioc_version = PORTAL_IOCTL_VERSION; \ - data.ioc_len = sizeof(data); \ -} while (0) - -/* FIXME check conflict with lustre_lib.h */ -#define PTL_IOC_DEBUG_MASK _IOWR('f', 250, long) - -static inline int portal_ioctl_packlen(struct portal_ioctl_data *data) -{ - int len = sizeof(*data); - len += size_round(data->ioc_inllen1); - len += size_round(data->ioc_inllen2); - return len; -} - -static inline int portal_ioctl_is_invalid(struct portal_ioctl_data *data) -{ - if (data->ioc_len > (1<<30)) { - CERROR ("PORTALS ioctl: ioc_len larger than 1<<30\n"); - return 1; - } - if (data->ioc_inllen1 > (1<<30)) { - CERROR ("PORTALS ioctl: ioc_inllen1 larger than 1<<30\n"); - return 1; - } - if (data->ioc_inllen2 > (1<<30)) { - CERROR ("PORTALS ioctl: ioc_inllen2 larger than 1<<30\n"); - return 1; - } - if (data->ioc_inlbuf1 && !data->ioc_inllen1) { - CERROR ("PORTALS ioctl: inlbuf1 pointer but 0 length\n"); - return 1; - } - if (data->ioc_inlbuf2 && !data->ioc_inllen2) { - CERROR ("PORTALS ioctl: inlbuf2 pointer but 0 length\n"); - return 1; - } - if (data->ioc_pbuf1 && !data->ioc_plen1) { - CERROR ("PORTALS ioctl: pbuf1 pointer but 0 length\n"); - return 1; - } - if (data->ioc_pbuf2 && !data->ioc_plen2) { - CERROR ("PORTALS ioctl: pbuf2 pointer but 0 length\n"); - return 1; - } - if (data->ioc_plen1 && !data->ioc_pbuf1) { - CERROR ("PORTALS ioctl: plen1 nonzero but no pbuf1 pointer\n"); - return 1; - } - if (data->ioc_plen2 && !data->ioc_pbuf2) { - CERROR ("PORTALS ioctl: plen2 nonzero but no pbuf2 pointer\n"); - return 1; - } - if (portal_ioctl_packlen(data) != data->ioc_len ) { - CERROR ("PORTALS ioctl: packlen != ioc_len\n"); - return 1; - } - if (data->ioc_inllen1 && - data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') { - CERROR ("PORTALS ioctl: inlbuf1 not 0 terminated\n"); - return 1; - } - if (data->ioc_inllen2 && - data->ioc_bulk[size_round(data->ioc_inllen1) + - data->ioc_inllen2 - 1] != '\0') { - CERROR ("PORTALS ioctl: inlbuf2 not 0 terminated\n"); - return 1; - } - return 0; -} - -#ifndef __KERNEL__ -static inline int portal_ioctl_pack(struct portal_ioctl_data *data, char **pbuf, - int max) -{ - char *ptr; - struct portal_ioctl_data *overlay; - data->ioc_len = portal_ioctl_packlen(data); - data->ioc_version = PORTAL_IOCTL_VERSION; - - if (*pbuf && portal_ioctl_packlen(data) > max) - return 1; - if (*pbuf == NULL) { - *pbuf = malloc(data->ioc_len); - } - if (!*pbuf) - return 1; - overlay = (struct portal_ioctl_data *)*pbuf; - memcpy(*pbuf, data, sizeof(*data)); - - ptr = overlay->ioc_bulk; - if (data->ioc_inlbuf1) - LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr); - if (data->ioc_inlbuf2) - LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr); - if (portal_ioctl_is_invalid(overlay)) - return 1; - - return 0; -} -#else -#include - -/* buffer MUST be at least the size of portal_ioctl_hdr */ -static inline int portal_ioctl_getdata(char *buf, char *end, void *arg) -{ - struct portal_ioctl_hdr *hdr; - struct portal_ioctl_data *data; - int err; - ENTRY; - - hdr = (struct portal_ioctl_hdr *)buf; - data = (struct portal_ioctl_data *)buf; - - err = copy_from_user(buf, (void *)arg, sizeof(*hdr)); - if ( err ) { - EXIT; - return err; - } - - if (hdr->ioc_version != PORTAL_IOCTL_VERSION) { - CERROR ("PORTALS: version mismatch kernel vs application\n"); - return -EINVAL; - } - - if (hdr->ioc_len + buf >= end) { - CERROR ("PORTALS: user buffer exceeds kernel buffer\n"); - return -EINVAL; - } - - - if (hdr->ioc_len < sizeof(struct portal_ioctl_data)) { - CERROR ("PORTALS: user buffer too small for ioctl\n"); - return -EINVAL; - } - - err = copy_from_user(buf, (void *)arg, hdr->ioc_len); - if ( err ) { - EXIT; - return err; - } - - if (portal_ioctl_is_invalid(data)) { - CERROR ("PORTALS: ioctl not correctly formatted\n"); - return -EINVAL; - } - - if (data->ioc_inllen1) { - data->ioc_inlbuf1 = &data->ioc_bulk[0]; - } - - if (data->ioc_inllen2) { - data->ioc_inlbuf2 = &data->ioc_bulk[0] + - size_round(data->ioc_inllen1); - } - - EXIT; - return 0; -} -#endif - -/* ioctls for manipulating snapshots 30- */ -#define IOC_PORTAL_TYPE 'e' -#define IOC_PORTAL_MIN_NR 30 - -#define IOC_PORTAL_PING _IOWR('e', 30, long) -#define IOC_PORTAL_GET_DEBUG _IOWR('e', 31, long) -#define IOC_PORTAL_CLEAR_DEBUG _IOWR('e', 32, long) -#define IOC_PORTAL_MARK_DEBUG _IOWR('e', 33, long) -#define IOC_PORTAL_PANIC _IOWR('e', 34, long) -#define IOC_PORTAL_ADD_ROUTE _IOWR('e', 35, long) -#define IOC_PORTAL_DEL_ROUTE _IOWR('e', 36, long) -#define IOC_PORTAL_GET_ROUTE _IOWR('e', 37, long) -#define IOC_PORTAL_NAL_CMD _IOWR('e', 38, long) -#define IOC_PORTAL_GET_NID _IOWR('e', 39, long) -#define IOC_PORTAL_FAIL_NID _IOWR('e', 40, long) -#define IOC_PORTAL_SET_DAEMON _IOWR('e', 41, long) - -#define IOC_PORTAL_MAX_NR 41 - -enum { - QSWNAL = 1, - SOCKNAL, - GMNAL, - TOENAL, - TCPNAL, - SCIMACNAL, - NAL_ENUM_END_MARKER -}; - -#ifdef __KERNEL__ -extern ptl_handle_ni_t kqswnal_ni; -extern ptl_handle_ni_t ksocknal_ni; -extern ptl_handle_ni_t ktoenal_ni; -extern ptl_handle_ni_t kgmnal_ni; -extern ptl_handle_ni_t kscimacnal_ni; -#endif - -#define NAL_MAX_NR (NAL_ENUM_END_MARKER - 1) - -#define NAL_CMD_REGISTER_PEER_FD 100 -#define NAL_CMD_CLOSE_CONNECTION 101 -#define NAL_CMD_REGISTER_MYNID 102 -#define NAL_CMD_PUSH_CONNECTION 103 - -enum { - DEBUG_DAEMON_START = 1, - DEBUG_DAEMON_STOP = 2, - DEBUG_DAEMON_PAUSE = 3, - DEBUG_DAEMON_CONTINUE = 4, -}; - -/* XXX remove to lustre ASAP */ -struct lustre_peer { - ptl_nid_t peer_nid; - ptl_handle_ni_t peer_ni; -}; - -/* module.c */ -typedef int (*nal_cmd_handler_t)(struct portal_ioctl_data *, void * private); -int kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private); -int kportal_nal_unregister(int nal); - -ptl_handle_ni_t *kportal_get_ni (int nal); -void kportal_put_ni (int nal); - -#ifdef __CYGWIN__ -#ifndef BITS_PER_LONG -#if (~0UL) == 0xffffffffUL -#define BITS_PER_LONG 32 -#else -#define BITS_PER_LONG 64 -#endif -#endif -#endif - -#if (BITS_PER_LONG == 32 || __WORDSIZE == 32) -# define LPU64 "%Lu" -# define LPD64 "%Ld" -# define LPX64 "%#Lx" -# define LPSZ "%u" -# define LPSSZ "%d" -#endif -#if (BITS_PER_LONG == 64 || __WORDSIZE == 64) -# define LPU64 "%lu" -# define LPD64 "%ld" -# define LPX64 "%#lx" -# define LPSZ "%lu" -# define LPSSZ "%ld" -#endif -#ifndef LPU64 -# error "No word size defined" -#endif - -#endif diff --git a/lnet/include/linux/portals_compat25.h b/lnet/include/linux/portals_compat25.h deleted file mode 100644 index e28fbac..0000000 --- a/lnet/include/linux/portals_compat25.h +++ /dev/null @@ -1,13 +0,0 @@ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20) -# define SIGNAL_MASK_LOCK(task, flags) \ - spin_lock_irqsave(&task->sighand->siglock, flags) -# define SIGNAL_MASK_UNLOCK(task, flags) \ - spin_unlock_irqrestore(&task->sighand->siglock, flags) -# define RECALC_SIGPENDING recalc_sigpending() -#else -# define SIGNAL_MASK_LOCK(task, flags) \ - spin_lock_irqsave(&task->sigmask_lock, flags) -# define SIGNAL_MASK_UNLOCK(task, flags) \ - spin_unlock_irqrestore(&task->sigmask_lock, flags) -# define RECALC_SIGPENDING recalc_sigpending(current) -#endif diff --git a/lnet/include/linux/portals_lib.h b/lnet/include/linux/portals_lib.h deleted file mode 100644 index a528a80..0000000 --- a/lnet/include/linux/portals_lib.h +++ /dev/null @@ -1,188 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef _PORTALS_LIB_H -#define _PORTALS_LIB_H - -#ifndef __KERNEL__ -# include -#else -# include -#endif - -#undef MIN -#define MIN(a,b) (((a)<(b)) ? (a): (b)) -#undef MAX -#define MAX(a,b) (((a)>(b)) ? (a): (b)) -#define MKSTR(ptr) ((ptr))? (ptr) : "" - -static inline int size_round (int val) -{ - return (val + 7) & (~0x7); -} - -static inline int size_round0(int val) -{ - if (!val) - return 0; - return (val + 1 + 7) & (~0x7); -} - -static inline size_t round_strlen(char *fset) -{ - return size_round(strlen(fset) + 1); -} - -#ifdef __KERNEL__ -static inline char *strdup(const char *str) -{ - int len = strlen(str) + 1; - char *tmp = kmalloc(len, GFP_KERNEL); - if (tmp) - memcpy(tmp, str, len); - - return tmp; -} -#endif - -#ifdef __KERNEL__ -# define NTOH__u32(var) le32_to_cpu(var) -# define NTOH__u64(var) le64_to_cpu(var) -# define HTON__u32(var) cpu_to_le32(var) -# define HTON__u64(var) cpu_to_le64(var) -#else -# define expansion_u64(var) \ - ({ __u64 ret; \ - switch (sizeof(var)) { \ - case 8: (ret) = (var); break; \ - case 4: (ret) = (__u32)(var); break; \ - case 2: (ret) = (__u16)(var); break; \ - case 1: (ret) = (__u8)(var); break; \ - }; \ - (ret); \ - }) -# define NTOH__u32(var) (var) -# define NTOH__u64(var) (expansion_u64(var)) -# define HTON__u32(var) (var) -# define HTON__u64(var) (expansion_u64(var)) -#endif - -/* - * copy sizeof(type) bytes from pointer to var and move ptr forward. - * return EFAULT if pointer goes beyond end - */ -#define UNLOGV(var,type,ptr,end) \ -do { \ - var = *(type *)ptr; \ - ptr += sizeof(type); \ - if (ptr > end ) \ - return -EFAULT; \ -} while (0) - -/* the following two macros convert to little endian */ -/* type MUST be __u32 or __u64 */ -#define LUNLOGV(var,type,ptr,end) \ -do { \ - var = NTOH##type(*(type *)ptr); \ - ptr += sizeof(type); \ - if (ptr > end ) \ - return -EFAULT; \ -} while (0) - -/* now log values */ -#define LOGV(var,type,ptr) \ -do { \ - *((type *)ptr) = var; \ - ptr += sizeof(type); \ -} while (0) - -/* and in network order */ -#define LLOGV(var,type,ptr) \ -do { \ - *((type *)ptr) = HTON##type(var); \ - ptr += sizeof(type); \ -} while (0) - - -/* - * set var to point at (type *)ptr, move ptr forward with sizeof(type) - * return from function with EFAULT if ptr goes beyond end - */ -#define UNLOGP(var,type,ptr,end) \ -do { \ - var = (type *)ptr; \ - ptr += sizeof(type); \ - if (ptr > end ) \ - return -EFAULT; \ -} while (0) - -#define LOGP(var,type,ptr) \ -do { \ - memcpy(ptr, var, sizeof(type)); \ - ptr += sizeof(type); \ -} while (0) - -/* - * set var to point at (char *)ptr, move ptr forward by size_round(len); - * return from function with EFAULT if ptr goes beyond end - */ -#define UNLOGL(var,type,len,ptr,end) \ -do { \ - var = (type *)ptr; \ - ptr += size_round(len * sizeof(type)); \ - if (ptr > end ) \ - return -EFAULT; \ -} while (0) - -#define UNLOGL0(var,type,len,ptr,end) \ -do { \ - UNLOGL(var,type,len,ptr,end); \ - if ( *((char *)ptr - size_round(len) + len - 1) != '\0') \ - return -EFAULT; \ -} while (0) - -#define LOGL(var,len,ptr) \ -do { \ - if (var) \ - memcpy((char *)ptr, (const char *)var, len); \ - ptr += size_round(len); \ -} while (0) - -#define LOGU(var,len,ptr) \ -do { \ - if (var) \ - memcpy((char *)var, (const char *)ptr, len); \ - ptr += size_round(len); \ -} while (0) - -#define LOGL0(var,len,ptr) \ -do { \ - if (!len) \ - break; \ - memcpy((char *)ptr, (const char *)var, len); \ - *((char *)(ptr) + len) = 0; \ - ptr += size_round(len + 1); \ -} while (0) - -#endif /* _PORTALS_LIB_H */ diff --git a/lnet/include/lnet/Makefile.am b/lnet/include/lnet/Makefile.am deleted file mode 100644 index c61b084..0000000 --- a/lnet/include/lnet/Makefile.am +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -SUBDIRS = base -include $(top_srcdir)/Rules - -pkginclude_HEADERS=api-support.h api.h arg-blocks.h defines.h errno.h internal.h lib-dispatch.h lib-nal.h lib-p30.h lib-types.h myrnal.h nal.h p30.h ppid.h ptlctl.h stringtab.h types.h nalids.h list.h bridge.h ipmap.h procbridge.h lltrace.h - diff --git a/lnet/include/lnet/api-support.h b/lnet/include/lnet/api-support.h deleted file mode 100644 index af4a2dc..0000000 --- a/lnet/include/lnet/api-support.h +++ /dev/null @@ -1,27 +0,0 @@ -# define DEBUG_SUBSYSTEM S_PORTALS -# define PORTAL_DEBUG - -#ifndef __KERNEL__ -# include -# include -# include -# include - -/* Lots of POSIX dependencies to support PtlEQWait_timeout */ -# include -# include -# include -#endif - -#include -#include -#include - -#include -#include -#include - -/* Hack for 2.4.18 macro name collision */ -#ifdef yield -#undef yield -#endif diff --git a/lnet/include/lnet/api.h b/lnet/include/lnet/api.h deleted file mode 100644 index a83749b..0000000 --- a/lnet/include/lnet/api.h +++ /dev/null @@ -1,159 +0,0 @@ -#ifndef P30_API_H -#define P30_API_H - -#include - -#ifndef PTL_NO_WRAP -int PtlInit(void); -int PtlInitialized(void); -void PtlFini(void); - -int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size_in, - ptl_ac_index_t acl_size_in, ptl_pid_t requested_pid, - ptl_handle_ni_t * interface_out); - -int PtlNIInitialized(ptl_interface_t); - -int PtlNIFini(ptl_handle_ni_t interface_in); - -#endif - -int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id); - - -/* - * Network interfaces - */ - -#ifndef PTL_NO_WRAP -int PtlNIBarrier(ptl_handle_ni_t interface_in); -#endif - -int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in, - ptl_sr_value_t * status_out); - -int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in, - unsigned long *distance_out); - -#ifndef PTL_NO_WRAP -int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * interface_out); -#endif - - -/* - * PtlNIDebug: - * - * This is not an official Portals 3 API call. It is provided - * by the reference implementation to allow the maintainers an - * easy way to turn on and off debugging information in the - * library. Do not use it in code that is not intended for use - * with any version other than the portable reference library. - */ -unsigned int PtlNIDebug(ptl_handle_ni_t ni, unsigned int mask_in); - -/* - * PtlNIFailNid - * - * Not an official Portals 3 API call. It provides a way of simulating - * communications failures to all (nid == PTL_NID_ANY), or specific peers - * (via multiple calls), either until further notice (threshold == -1), or - * for a specific number of messages. Passing a threshold of zero, "heals" - * the given peer. - */ -int PtlFailNid (ptl_handle_ni_t ni, ptl_nid_t nid, unsigned int threshold); - - -/* - * Match entries - */ - -int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in, - ptl_process_id_t match_id_in, ptl_match_bits_t match_bits_in, - ptl_match_bits_t ignore_bits_in, ptl_unlink_t unlink_in, - ptl_ins_pos_t pos_in, ptl_handle_me_t * handle_out); - -int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in, - ptl_match_bits_t match_bits_in, ptl_match_bits_t ignore_bits_in, - ptl_unlink_t unlink_in, ptl_ins_pos_t position_in, - ptl_handle_me_t * handle_out); - -int PtlMEUnlink(ptl_handle_me_t current_in); - -int PtlMEUnlinkList(ptl_handle_me_t current_in); - -int PtlTblDump(ptl_handle_ni_t ni, int index_in); -int PtlMEDump(ptl_handle_me_t current_in); - - - -/* - * Memory descriptors - */ - -#ifndef PTL_NO_WRAP -int PtlMDAttach(ptl_handle_me_t current_in, ptl_md_t md_in, - ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out); - -int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in, - ptl_handle_md_t * handle_out); - -int PtlMDUnlink(ptl_handle_md_t md_in); - -int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t * old_inout, - ptl_md_t * new_inout, ptl_handle_eq_t testq_in); - -#endif - -/* These should not be called by users */ -int PtlMDUpdate_internal(ptl_handle_md_t md_in, ptl_md_t * old_inout, - ptl_md_t * new_inout, ptl_handle_eq_t testq_in, - ptl_seq_t sequence_in); - - - - -/* - * Event queues - */ -#ifndef PTL_NO_WRAP - -/* These should be called by users */ -int PtlEQAlloc(ptl_handle_ni_t ni_in, ptl_size_t count_in, - int (*callback) (ptl_event_t * event), - ptl_handle_eq_t * handle_out); -int PtlEQFree(ptl_handle_eq_t eventq_in); - -int PtlEQCount(ptl_handle_eq_t eventq_in, ptl_size_t * count_out); - -int PtlEQGet(ptl_handle_eq_t eventq_in, ptl_event_t * event_out); - - -int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t * event_out); - -int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out, - int timeout); -#endif - -/* - * Access Control Table - */ -int PtlACEntry(ptl_handle_ni_t ni_in, ptl_ac_index_t index_in, - ptl_process_id_t match_id_in, ptl_pt_index_t portal_in); - - -/* - * Data movement - */ - -int PtlPut(ptl_handle_md_t md_in, ptl_ack_req_t ack_req_in, - ptl_process_id_t target_in, ptl_pt_index_t portal_in, - ptl_ac_index_t cookie_in, ptl_match_bits_t match_bits_in, - ptl_size_t offset_in, ptl_hdr_data_t hdr_data_in); - -int PtlGet(ptl_handle_md_t md_in, ptl_process_id_t target_in, - ptl_pt_index_t portal_in, ptl_ac_index_t cookie_in, - ptl_match_bits_t match_bits_in, ptl_size_t offset_in); - - - -#endif diff --git a/lnet/include/lnet/arg-blocks.h b/lnet/include/lnet/arg-blocks.h deleted file mode 100644 index 3c3b154..0000000 --- a/lnet/include/lnet/arg-blocks.h +++ /dev/null @@ -1,265 +0,0 @@ -#ifndef PTL_BLOCKS_H -#define PTL_BLOCKS_H - -/* - * blocks.h - * - * Argument block types for the Portals 3.0 library - * Generated by idl - * - */ - -#include - -/* put LIB_MAX_DISPATCH last here -- these must match the - assignements to the dispatch table in lib-p30/dispatch.c */ -#define PTL_GETID 1 -#define PTL_NISTATUS 2 -#define PTL_NIDIST 3 -#define PTL_NIDEBUG 4 -#define PTL_MEATTACH 5 -#define PTL_MEINSERT 6 -// #define PTL_MEPREPEND 7 -#define PTL_MEUNLINK 8 -#define PTL_TBLDUMP 9 -#define PTL_MEDUMP 10 -#define PTL_MDATTACH 11 -// #define PTL_MDINSERT 12 -#define PTL_MDBIND 13 -#define PTL_MDUPDATE 14 -#define PTL_MDUNLINK 15 -#define PTL_EQALLOC 16 -#define PTL_EQFREE 17 -#define PTL_ACENTRY 18 -#define PTL_PUT 19 -#define PTL_GET 20 -#define PTL_FAILNID 21 -#define LIB_MAX_DISPATCH 21 - -typedef struct PtlFailNid_in { - ptl_handle_ni_t interface; - ptl_nid_t nid; - unsigned int threshold; -} PtlFailNid_in; - -typedef struct PtlFailNid_out { - int rc; -} PtlFailNid_out; - -typedef struct PtlGetId_in { - ptl_handle_ni_t handle_in; -} PtlGetId_in; - -typedef struct PtlGetId_out { - int rc; - ptl_process_id_t id_out; -} PtlGetId_out; - -typedef struct PtlNIStatus_in { - ptl_handle_ni_t interface_in; - ptl_sr_index_t register_in; -} PtlNIStatus_in; - -typedef struct PtlNIStatus_out { - int rc; - ptl_sr_value_t status_out; -} PtlNIStatus_out; - - -typedef struct PtlNIDist_in { - ptl_handle_ni_t interface_in; - ptl_process_id_t process_in; -} PtlNIDist_in; - -typedef struct PtlNIDist_out { - int rc; - unsigned long distance_out; -} PtlNIDist_out; - - -typedef struct PtlNIDebug_in { - unsigned int mask_in; -} PtlNIDebug_in; - -typedef struct PtlNIDebug_out { - unsigned int rc; -} PtlNIDebug_out; - - -typedef struct PtlMEAttach_in { - ptl_handle_ni_t interface_in; - ptl_pt_index_t index_in; - ptl_ins_pos_t position_in; - ptl_process_id_t match_id_in; - ptl_match_bits_t match_bits_in; - ptl_match_bits_t ignore_bits_in; - ptl_unlink_t unlink_in; -} PtlMEAttach_in; - -typedef struct PtlMEAttach_out { - int rc; - ptl_handle_me_t handle_out; -} PtlMEAttach_out; - - -typedef struct PtlMEInsert_in { - ptl_handle_me_t current_in; - ptl_process_id_t match_id_in; - ptl_match_bits_t match_bits_in; - ptl_match_bits_t ignore_bits_in; - ptl_unlink_t unlink_in; - ptl_ins_pos_t position_in; -} PtlMEInsert_in; - -typedef struct PtlMEInsert_out { - int rc; - ptl_handle_me_t handle_out; -} PtlMEInsert_out; - -typedef struct PtlMEUnlink_in { - ptl_handle_me_t current_in; - ptl_unlink_t unlink_in; -} PtlMEUnlink_in; - -typedef struct PtlMEUnlink_out { - int rc; -} PtlMEUnlink_out; - - -typedef struct PtlTblDump_in { - int index_in; -} PtlTblDump_in; - -typedef struct PtlTblDump_out { - int rc; -} PtlTblDump_out; - - -typedef struct PtlMEDump_in { - ptl_handle_me_t current_in; -} PtlMEDump_in; - -typedef struct PtlMEDump_out { - int rc; -} PtlMEDump_out; - - -typedef struct PtlMDAttach_in { - ptl_handle_me_t me_in; - ptl_handle_eq_t eq_in; - ptl_md_t md_in; - ptl_unlink_t unlink_in; -} PtlMDAttach_in; - -typedef struct PtlMDAttach_out { - int rc; - ptl_handle_md_t handle_out; -} PtlMDAttach_out; - - -typedef struct PtlMDBind_in { - ptl_handle_ni_t ni_in; - ptl_handle_eq_t eq_in; - ptl_md_t md_in; -} PtlMDBind_in; - -typedef struct PtlMDBind_out { - int rc; - ptl_handle_md_t handle_out; -} PtlMDBind_out; - - -typedef struct PtlMDUpdate_internal_in { - ptl_handle_md_t md_in; - ptl_handle_eq_t testq_in; - ptl_seq_t sequence_in; - - ptl_md_t old_inout; - int old_inout_valid; - ptl_md_t new_inout; - int new_inout_valid; -} PtlMDUpdate_internal_in; - -typedef struct PtlMDUpdate_internal_out { - int rc; - ptl_md_t old_inout; - ptl_md_t new_inout; -} PtlMDUpdate_internal_out; - - -typedef struct PtlMDUnlink_in { - ptl_handle_md_t md_in; -} PtlMDUnlink_in; - -typedef struct PtlMDUnlink_out { - int rc; - ptl_md_t status_out; -} PtlMDUnlink_out; - - -typedef struct PtlEQAlloc_in { - ptl_handle_ni_t ni_in; - ptl_size_t count_in; - void *base_in; - int len_in; - int (*callback_in) (ptl_event_t * event); -} PtlEQAlloc_in; - -typedef struct PtlEQAlloc_out { - int rc; - ptl_handle_eq_t handle_out; -} PtlEQAlloc_out; - - -typedef struct PtlEQFree_in { - ptl_handle_eq_t eventq_in; -} PtlEQFree_in; - -typedef struct PtlEQFree_out { - int rc; -} PtlEQFree_out; - - -typedef struct PtlACEntry_in { - ptl_handle_ni_t ni_in; - ptl_ac_index_t index_in; - ptl_process_id_t match_id_in; - ptl_pt_index_t portal_in; -} PtlACEntry_in; - -typedef struct PtlACEntry_out { - int rc; -} PtlACEntry_out; - - -typedef struct PtlPut_in { - ptl_handle_md_t md_in; - ptl_ack_req_t ack_req_in; - ptl_process_id_t target_in; - ptl_pt_index_t portal_in; - ptl_ac_index_t cookie_in; - ptl_match_bits_t match_bits_in; - ptl_size_t offset_in; - ptl_hdr_data_t hdr_data_in; -} PtlPut_in; - -typedef struct PtlPut_out { - int rc; -} PtlPut_out; - - -typedef struct PtlGet_in { - ptl_handle_md_t md_in; - ptl_process_id_t target_in; - ptl_pt_index_t portal_in; - ptl_ac_index_t cookie_in; - ptl_match_bits_t match_bits_in; - ptl_size_t offset_in; -} PtlGet_in; - -typedef struct PtlGet_out { - int rc; -} PtlGet_out; - - -#endif diff --git a/lnet/include/lnet/defines.h b/lnet/include/lnet/defines.h deleted file mode 100644 index 785ce73..0000000 --- a/lnet/include/lnet/defines.h +++ /dev/null @@ -1,116 +0,0 @@ -/* -** -** This files contains definitions that are used throughout the cplant code. -*/ - -#ifndef CPLANT_H -#define CPLANT_H - -#define TITLE(fname,zmig) - - -/* -** TRUE and FALSE -*/ -#undef TRUE -#define TRUE (1) -#undef FALSE -#define FALSE (0) - - -/* -** Return codes from functions -*/ -#undef OK -#define OK (0) -#undef ERROR -#define ERROR (-1) - - - -/* -** The GCC macro for a safe max() that works on all types arithmetic types. -*/ -#ifndef MAX -#define MAX(a, b) (a) > (b) ? (a) : (b) -#endif /* MAX */ - -#ifndef MIN -#define MIN(a, b) (a) < (b) ? (a) : (b) -#endif /* MIN */ - -/* -** The rest is from the old qkdefs.h -*/ - -#ifndef __linux__ -#define __inline__ -#endif - -#ifndef NULL -#define NULL ((void *)0) -#endif - -#ifndef __osf__ -#define PRIVATE static -#define PUBLIC -#endif - -#ifndef __osf__ -typedef unsigned char uchar; -#endif - -typedef char CHAR; -typedef unsigned char UCHAR; -typedef char INT8; -typedef unsigned char UINT8; -typedef short int INT16; -typedef unsigned short int UINT16; -typedef int INT32; -typedef unsigned int UINT32; -typedef long LONG32; -typedef unsigned long ULONG32; - -/* long may be 32 or 64, so we can't really append the size to the definition */ -typedef long LONG; -typedef unsigned long ULONG; - -#ifdef __alpha__ -typedef long int_t; -#ifndef __osf__ -typedef unsigned long uint_t; -#endif -#endif - -#ifdef __i386__ -typedef int int_t; -typedef unsigned int uint_t; -#endif - -typedef float FLOAT32; -typedef double FLOAT64; -typedef void VOID; -typedef INT32 BOOLEAN; -typedef void (*FCN_PTR)(void); - -#ifndef off64_t - -#if defined (__alpha__) || defined (__ia64__) -typedef long off64_t; -#else -typedef long long off64_t; -#endif - -#endif - -/* -** Process related typedefs -*/ -typedef UINT16 PID_TYPE; /* Type of Local process ID */ -typedef UINT16 NID_TYPE; /* Type of Physical node ID */ -typedef UINT16 GID_TYPE; /* Type of Group ID */ -typedef UINT16 RANK_TYPE; /* Type of Logical rank/process within a group */ - - - -#endif /* CPLANT_H */ diff --git a/lnet/include/lnet/errno.h b/lnet/include/lnet/errno.h deleted file mode 100644 index 817936a..0000000 --- a/lnet/include/lnet/errno.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef _P30_ERRNO_H_ -#define _P30_ERRNO_H_ - -/* - * include/portals/errno.h - * - * Shared error number lists - */ - -/* If you change these, you must update the string table in api-errno.c */ -typedef enum { - PTL_OK = 0, - PTL_SEGV = 1, - - PTL_NOSPACE = 2, - PTL_INUSE = 3, - PTL_VAL_FAILED = 4, - - PTL_NAL_FAILED = 5, - PTL_NOINIT = 6, - PTL_INIT_DUP = 7, - PTL_INIT_INV = 8, - PTL_AC_INV_INDEX = 9, - - PTL_INV_ASIZE = 10, - PTL_INV_HANDLE = 11, - PTL_INV_MD = 12, - PTL_INV_ME = 13, - PTL_INV_NI = 14, -/* If you change these, you must update the string table in api-errno.c */ - PTL_ILL_MD = 15, - PTL_INV_PROC = 16, - PTL_INV_PSIZE = 17, - PTL_INV_PTINDEX = 18, - PTL_INV_REG = 19, - - PTL_INV_SR_INDX = 20, - PTL_ML_TOOLONG = 21, - PTL_ADDR_UNKNOWN = 22, - PTL_INV_EQ = 23, - PTL_EQ_DROPPED = 24, - - PTL_EQ_EMPTY = 25, - PTL_NOUPDATE = 26, - PTL_FAIL = 27, - PTL_NOT_IMPLEMENTED = 28, - PTL_NO_ACK = 29, - - PTL_IOV_TOO_MANY = 30, - PTL_IOV_TOO_SMALL = 31, - - PTL_EQ_INUSE = 32, - PTL_MD_INUSE = 33, - - PTL_MAX_ERRNO = 33 -} ptl_err_t; -/* If you change these, you must update the string table in api-errno.c */ - -extern const char *ptl_err_str[]; - -#endif diff --git a/lnet/include/lnet/internal.h b/lnet/include/lnet/internal.h deleted file mode 100644 index d78cad4..0000000 --- a/lnet/include/lnet/internal.h +++ /dev/null @@ -1,45 +0,0 @@ -/* -*/ -#ifndef _P30_INTERNAL_H_ -#define _P30_INTERNAL_H_ - -/* - * p30/internal.h - * - * Internals for the API level library that are not needed - * by the user application - */ - -#include - -extern int ptl_init; /* Has the library be initialized */ - -extern int ptl_ni_init(void); -extern int ptl_me_init(void); -extern int ptl_md_init(void); -extern int ptl_eq_init(void); - -extern int ptl_me_ni_init(nal_t * nal); -extern int ptl_md_ni_init(nal_t * nal); -extern int ptl_eq_ni_init(nal_t * nal); - -extern void ptl_ni_fini(void); -extern void ptl_me_fini(void); -extern void ptl_md_fini(void); -extern void ptl_eq_fini(void); - -extern void ptl_me_ni_fini(nal_t * nal); -extern void ptl_md_ni_fini(nal_t * nal); -extern void ptl_eq_ni_fini(nal_t * nal); - -static inline ptl_eq_t * -ptl_handle2usereq (ptl_handle_eq_t *handle) -{ - /* EQ handles are a little wierd. On the "user" side, the cookie - * is just a pointer to a queue of events in shared memory. It's - * cb_eq_handle is the "real" handle which we pass when we - * call do_forward(). */ - return (ptl_eq_t *)((unsigned long)handle->cookie); -} - -#endif diff --git a/lnet/include/lnet/lib-dispatch.h b/lnet/include/lnet/lib-dispatch.h deleted file mode 100644 index f87ff83..0000000 --- a/lnet/include/lnet/lib-dispatch.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef PTL_DISPATCH_H -#define PTL_DISPATCH_H - -/* - * include/dispatch.h - * - * Dispatch table header and externs for remote side - * operations - * - * Generated by idl - * - */ - -#include -#include - -extern int do_PtlGetId(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlNIStatus(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlNIDist(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlNIDebug(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlMEAttach(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlMEInsert(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlMEPrepend(nal_cb_t * nal, void *private, void *args, - void *ret); -extern int do_PtlMEUnlink(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlTblDump(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlMEDump(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlMDAttach(nal_cb_t * nal, void *private, void *args, - void *ret); -extern int do_PtlMDBind(nal_cb_t * nal, void *private, void *args, - void *ret); -extern int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *args, - void *ret); -extern int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *args, - void *ret); -extern int do_PtlEQAlloc_internal(nal_cb_t * nal, void *private, void *args, - void *ret); -extern int do_PtlEQFree_internal(nal_cb_t * nal, void *private, void *args, - void *ret); -extern int do_PtlPut(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlGet(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlFailNid (nal_cb_t *nal, void *private, void *args, void *ret); - -extern char *dispatch_name(int index); -#endif diff --git a/lnet/include/lnet/lib-lnet.h b/lnet/include/lnet/lib-lnet.h deleted file mode 100644 index b623b93..0000000 --- a/lnet/include/lnet/lib-lnet.h +++ /dev/null @@ -1,385 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib-p30.h - * - * Top level include for library side routines - */ - -#ifndef _LIB_P30_H_ -#define _LIB_P30_H_ - -#ifdef __KERNEL__ -# include -# include -#else -# include -# include -#endif -#include -#include -#include -#include -#include -#include -#include - -static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh) -{ - return (wh->wh_interface_cookie == PTL_WIRE_HANDLE_NONE.wh_interface_cookie && - wh->wh_object_cookie == PTL_WIRE_HANDLE_NONE.wh_object_cookie); -} - -#ifdef __KERNEL__ -#define state_lock(nal,flagsp) \ -do { \ - CDEBUG(D_PORTALS, "taking state lock\n"); \ - nal->cb_cli(nal, flagsp); \ -} while (0) - -#define state_unlock(nal,flagsp) \ -{ \ - CDEBUG(D_PORTALS, "releasing state lock\n"); \ - nal->cb_sti(nal, flagsp); \ -} -#else -/* not needed in user space until we thread there */ -#define state_lock(nal,flagsp) \ -do { \ - CDEBUG(D_PORTALS, "taking state lock\n"); \ - CDEBUG(D_PORTALS, "%p:%p\n", nal, flagsp); \ -} while (0) - -#define state_unlock(nal,flagsp) \ -{ \ - CDEBUG(D_PORTALS, "releasing state lock\n"); \ - CDEBUG(D_PORTALS, "%p:%p\n", nal, flagsp); \ -} -#endif /* __KERNEL__ */ - -#ifndef PTL_USE_SLAB_CACHE - -#define MAX_MES 2048 -#define MAX_MDS 2048 -#define MAX_MSGS 2048 /* Outstanding messages */ -#define MAX_EQS 512 - -extern int lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int nobj, int objsize); -extern void lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl); - -static inline void * -lib_freelist_alloc (lib_freelist_t *fl) -{ - /* ALWAYS called with statelock held */ - lib_freeobj_t *o; - - if (list_empty (&fl->fl_list)) - return (NULL); - - o = list_entry (fl->fl_list.next, lib_freeobj_t, fo_list); - list_del (&o->fo_list); - return ((void *)&o->fo_contents); -} - -static inline void -lib_freelist_free (lib_freelist_t *fl, void *obj) -{ - /* ALWAYS called with statelock held */ - lib_freeobj_t *o = list_entry (obj, lib_freeobj_t, fo_contents); - - list_add (&o->fo_list, &fl->fl_list); -} - - -static inline lib_eq_t * -lib_eq_alloc (nal_cb_t *nal) -{ - /* NEVER called with statelock held */ - unsigned long flags; - lib_eq_t *eq; - - state_lock (nal, &flags); - eq = (lib_eq_t *)lib_freelist_alloc (&nal->ni.ni_free_eqs); - state_unlock (nal, &flags); - - return (eq); -} - -static inline void -lib_eq_free (nal_cb_t *nal, lib_eq_t *eq) -{ - /* ALWAYS called with statelock held */ - lib_freelist_free (&nal->ni.ni_free_eqs, eq); -} - -static inline lib_md_t * -lib_md_alloc (nal_cb_t *nal) -{ - /* NEVER called with statelock held */ - unsigned long flags; - lib_md_t *md; - - state_lock (nal, &flags); - md = (lib_md_t *)lib_freelist_alloc (&nal->ni.ni_free_mds); - state_unlock (nal, &flags); - - return (md); -} - -static inline void -lib_md_free (nal_cb_t *nal, lib_md_t *md) -{ - /* ALWAYS called with statelock held */ - lib_freelist_free (&nal->ni.ni_free_mds, md); -} - -static inline lib_me_t * -lib_me_alloc (nal_cb_t *nal) -{ - /* NEVER called with statelock held */ - unsigned long flags; - lib_me_t *me; - - state_lock (nal, &flags); - me = (lib_me_t *)lib_freelist_alloc (&nal->ni.ni_free_mes); - state_unlock (nal, &flags); - - return (me); -} - -static inline void -lib_me_free (nal_cb_t *nal, lib_me_t *me) -{ - /* ALWAYS called with statelock held */ - lib_freelist_free (&nal->ni.ni_free_mes, me); -} - -static inline lib_msg_t * -lib_msg_alloc (nal_cb_t *nal) -{ - /* ALWAYS called with statelock held */ - return ((lib_msg_t *)lib_freelist_alloc (&nal->ni.ni_free_msgs)); -} - -static inline void -lib_msg_free (nal_cb_t *nal, lib_msg_t *msg) -{ - /* ALWAYS called with statelock held */ - lib_freelist_free (&nal->ni.ni_free_msgs, msg); -} - -#else - -extern kmem_cache_t *ptl_md_slab; -extern kmem_cache_t *ptl_msg_slab; -extern kmem_cache_t *ptl_me_slab; -extern kmem_cache_t *ptl_eq_slab; -extern atomic_t md_in_use_count; -extern atomic_t msg_in_use_count; -extern atomic_t me_in_use_count; -extern atomic_t eq_in_use_count; - -static inline lib_eq_t * -lib_eq_alloc (nal_cb_t *nal) -{ - /* NEVER called with statelock held */ - lib_eq_t *eq = kmem_cache_alloc(ptl_eq_slab, GFP_NOFS); - - if (eq == NULL) - return (NULL); - - atomic_inc (&eq_in_use_count); - return (eq); -} - -static inline void -lib_eq_free (nal_cb_t *nal, lib_eq_t *eq) -{ - /* ALWAYS called with statelock held */ - atomic_dec (&eq_in_use_count); - kmem_cache_free(ptl_eq_slab, eq); -} - -static inline lib_md_t * -lib_md_alloc (nal_cb_t *nal) -{ - /* NEVER called with statelock held */ - lib_md_t *md = kmem_cache_alloc(ptl_md_slab, GFP_NOFS); - - if (md == NULL) - return (NULL); - - atomic_inc (&md_in_use_count); - return (md); -} - -static inline void -lib_md_free (nal_cb_t *nal, lib_md_t *md) -{ - /* ALWAYS called with statelock held */ - atomic_dec (&md_in_use_count); - kmem_cache_free(ptl_md_slab, md); -} - -static inline lib_me_t * -lib_me_alloc (nal_cb_t *nal) -{ - /* NEVER called with statelock held */ - lib_me_t *me = kmem_cache_alloc(ptl_me_slab, GFP_NOFS); - - if (me == NULL) - return (NULL); - - atomic_inc (&me_in_use_count); - return (me); -} - -static inline void -lib_me_free(nal_cb_t *nal, lib_me_t *me) -{ - /* ALWAYS called with statelock held */ - atomic_dec (&me_in_use_count); - kmem_cache_free(ptl_me_slab, me); -} - -static inline lib_msg_t * -lib_msg_alloc(nal_cb_t *nal) -{ - /* ALWAYS called with statelock held */ - lib_msg_t *msg = kmem_cache_alloc(ptl_msg_slab, GFP_ATOMIC); - - if (msg == NULL) - return (NULL); - - atomic_inc (&msg_in_use_count); - return (msg); -} - -static inline void -lib_msg_free(nal_cb_t *nal, lib_msg_t *msg) -{ - /* ALWAYS called with statelock held */ - atomic_dec (&msg_in_use_count); - kmem_cache_free(ptl_msg_slab, msg); -} -#endif - -extern lib_handle_t *lib_lookup_cookie (nal_cb_t *nal, __u64 cookie, int type); -extern void lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh, int type); -extern void lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh); - -static inline void -ptl_eq2handle (ptl_handle_eq_t *handle, lib_eq_t *eq) -{ - handle->cookie = eq->eq_lh.lh_cookie; -} - -static inline lib_eq_t * -ptl_handle2eq (ptl_handle_eq_t *handle, nal_cb_t *nal) -{ - /* ALWAYS called with statelock held */ - lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, - PTL_COOKIE_TYPE_EQ); - if (lh == NULL) - return (NULL); - - return (lh_entry (lh, lib_eq_t, eq_lh)); -} - -static inline void -ptl_md2handle (ptl_handle_md_t *handle, lib_md_t *md) -{ - handle->cookie = md->md_lh.lh_cookie; -} - -static inline lib_md_t * -ptl_handle2md (ptl_handle_md_t *handle, nal_cb_t *nal) -{ - /* ALWAYS called with statelock held */ - lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, - PTL_COOKIE_TYPE_MD); - if (lh == NULL) - return (NULL); - - return (lh_entry (lh, lib_md_t, md_lh)); -} - -static inline lib_md_t * -ptl_wire_handle2md (ptl_handle_wire_t *wh, nal_cb_t *nal) -{ - /* ALWAYS called with statelock held */ - lib_handle_t *lh; - - if (wh->wh_interface_cookie != nal->ni.ni_interface_cookie) - return (NULL); - - lh = lib_lookup_cookie (nal, wh->wh_object_cookie, - PTL_COOKIE_TYPE_MD); - if (lh == NULL) - return (NULL); - - return (lh_entry (lh, lib_md_t, md_lh)); -} - -static inline void -ptl_me2handle (ptl_handle_me_t *handle, lib_me_t *me) -{ - handle->cookie = me->me_lh.lh_cookie; -} - -static inline lib_me_t * -ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal) -{ - /* ALWAYS called with statelock held */ - lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, - PTL_COOKIE_TYPE_ME); - if (lh == NULL) - return (NULL); - - return (lh_entry (lh, lib_me_t, me_lh)); -} - -extern int lib_init(nal_cb_t * cb, ptl_nid_t nid, ptl_pid_t pid, int gsize, - ptl_pt_index_t tbl_size, ptl_ac_index_t ac_size); -extern int lib_fini(nal_cb_t * cb); -extern void lib_dispatch(nal_cb_t * cb, void *private, int index, - void *arg_block, void *ret_block); -extern char *dispatch_name(int index); - -/* - * When the NAL detects an incoming message, it should call - * lib_parse() decode it. The NAL callbacks will be handed - * the private cookie as a way for the NAL to maintain state - * about which transaction is being processed. An extra parameter, - * lib_cookie will contain the necessary information for - * finalizing the message. - * - * After it has finished the handling the message, it should - * call lib_finalize() with the lib_cookie parameter. - * Call backs will be made to write events, send acks or - * replies and so on. - */ -extern int lib_parse(nal_cb_t * nal, ptl_hdr_t * hdr, void *private); -extern int lib_finalize(nal_cb_t * nal, void *private, lib_msg_t * msg); -extern void print_hdr(nal_cb_t * nal, ptl_hdr_t * hdr); - -extern ptl_size_t lib_iov_nob (int niov, struct iovec *iov); -extern void lib_copy_iov2buf (char *dest, int niov, struct iovec *iov, ptl_size_t len); -extern void lib_copy_buf2iov (int niov, struct iovec *iov, char *dest, ptl_size_t len); - -extern ptl_size_t lib_kiov_nob (int niov, ptl_kiov_t *iov); -extern void lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *iov, ptl_size_t len); -extern void lib_copy_buf2kiov (int niov, ptl_kiov_t *iov, char *src, ptl_size_t len); -extern void lib_assert_wire_constants (void); - -extern void lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md, - ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen); -extern int lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - lib_md_t *md, ptl_size_t offset, ptl_size_t len); - -extern void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md_in, - ptl_md_t * md_out); -extern void lib_md_unlink(nal_cb_t * nal, lib_md_t * md_in); -extern void lib_me_unlink(nal_cb_t * nal, lib_me_t * me_in); -#endif diff --git a/lnet/include/lnet/lib-nal.h b/lnet/include/lnet/lib-nal.h deleted file mode 100644 index 4052c0c..0000000 --- a/lnet/include/lnet/lib-nal.h +++ /dev/null @@ -1,102 +0,0 @@ -#ifndef _LIB_NAL_H_ -#define _LIB_NAL_H_ - -/* - * nal.h - * - * Library side headers that define the abstraction layer's - * responsibilities and interfaces - */ - -#include - -struct nal_cb_t { - /* - * Per interface portal table, access control table - * and NAL private data field; - */ - lib_ni_t ni; - void *nal_data; - /* - * send: Sends a preformatted header and user data to a - * specified remote process. - * Can overwrite iov. - */ - int (*cb_send) (nal_cb_t * nal, void *private, lib_msg_t * cookie, - ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int niov, struct iovec *iov, size_t mlen); - - /* as send, but with a set of page fragments (NULL if not supported) */ - int (*cb_send_pages) (nal_cb_t * nal, void *private, lib_msg_t * cookie, - ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int niov, ptl_kiov_t *iov, size_t mlen); - /* - * recv: Receives an incoming message from a remote process - * Type of iov depends on options. Can overwrite iov. - */ - int (*cb_recv) (nal_cb_t * nal, void *private, lib_msg_t * cookie, - unsigned int niov, struct iovec *iov, size_t mlen, - size_t rlen); - - /* as recv, but with a set of page fragments (NULL if not supported) */ - int (*cb_recv_pages) (nal_cb_t * nal, void *private, lib_msg_t * cookie, - unsigned int niov, ptl_kiov_t *iov, size_t mlen, - size_t rlen); - /* - * read: Reads a block of data from a specified user address - */ - int (*cb_read) (nal_cb_t * nal, void *private, void *dst_addr, - user_ptr src_addr, size_t len); - - /* - * write: Writes a block of data into a specified user address - */ - int (*cb_write) (nal_cb_t * nal, void *private, user_ptr dsr_addr, - void *src_addr, size_t len); - - /* - * callback: Calls an event callback - */ - int (*cb_callback) (nal_cb_t * nal, void *private, lib_eq_t *eq, - ptl_event_t *ev); - - /* - * malloc: Acquire a block of memory in a system independent - * fashion. - */ - void *(*cb_malloc) (nal_cb_t * nal, size_t len); - - void (*cb_free) (nal_cb_t * nal, void *buf, size_t len); - - /* - * (un)map: Tell the NAL about some memory it will access. - * *addrkey passed to cb_unmap() is what cb_map() set it to. - * type of *iov depends on options. - * Set to NULL if not required. - */ - int (*cb_map) (nal_cb_t * nal, unsigned int niov, struct iovec *iov, - void **addrkey); - void (*cb_unmap) (nal_cb_t * nal, unsigned int niov, struct iovec *iov, - void **addrkey); - - /* as (un)map, but with a set of page fragments */ - int (*cb_map_pages) (nal_cb_t * nal, unsigned int niov, ptl_kiov_t *iov, - void **addrkey); - void (*cb_unmap_pages) (nal_cb_t * nal, unsigned int niov, ptl_kiov_t *iov, - void **addrkey); - - void (*cb_printf) (nal_cb_t * nal, const char *fmt, ...); - - /* Turn interrupts off (begin of protected area) */ - void (*cb_cli) (nal_cb_t * nal, unsigned long *flags); - - /* Turn interrupts on (end of protected area) */ - void (*cb_sti) (nal_cb_t * nal, unsigned long *flags); - - /* - * Calculate a network "distance" to given node - */ - int (*cb_dist) (nal_cb_t * nal, ptl_nid_t nid, unsigned long *dist); -}; - -#endif diff --git a/lnet/include/lnet/lib-p30.h b/lnet/include/lnet/lib-p30.h deleted file mode 100644 index b623b93..0000000 --- a/lnet/include/lnet/lib-p30.h +++ /dev/null @@ -1,385 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib-p30.h - * - * Top level include for library side routines - */ - -#ifndef _LIB_P30_H_ -#define _LIB_P30_H_ - -#ifdef __KERNEL__ -# include -# include -#else -# include -# include -#endif -#include -#include -#include -#include -#include -#include -#include - -static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh) -{ - return (wh->wh_interface_cookie == PTL_WIRE_HANDLE_NONE.wh_interface_cookie && - wh->wh_object_cookie == PTL_WIRE_HANDLE_NONE.wh_object_cookie); -} - -#ifdef __KERNEL__ -#define state_lock(nal,flagsp) \ -do { \ - CDEBUG(D_PORTALS, "taking state lock\n"); \ - nal->cb_cli(nal, flagsp); \ -} while (0) - -#define state_unlock(nal,flagsp) \ -{ \ - CDEBUG(D_PORTALS, "releasing state lock\n"); \ - nal->cb_sti(nal, flagsp); \ -} -#else -/* not needed in user space until we thread there */ -#define state_lock(nal,flagsp) \ -do { \ - CDEBUG(D_PORTALS, "taking state lock\n"); \ - CDEBUG(D_PORTALS, "%p:%p\n", nal, flagsp); \ -} while (0) - -#define state_unlock(nal,flagsp) \ -{ \ - CDEBUG(D_PORTALS, "releasing state lock\n"); \ - CDEBUG(D_PORTALS, "%p:%p\n", nal, flagsp); \ -} -#endif /* __KERNEL__ */ - -#ifndef PTL_USE_SLAB_CACHE - -#define MAX_MES 2048 -#define MAX_MDS 2048 -#define MAX_MSGS 2048 /* Outstanding messages */ -#define MAX_EQS 512 - -extern int lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int nobj, int objsize); -extern void lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl); - -static inline void * -lib_freelist_alloc (lib_freelist_t *fl) -{ - /* ALWAYS called with statelock held */ - lib_freeobj_t *o; - - if (list_empty (&fl->fl_list)) - return (NULL); - - o = list_entry (fl->fl_list.next, lib_freeobj_t, fo_list); - list_del (&o->fo_list); - return ((void *)&o->fo_contents); -} - -static inline void -lib_freelist_free (lib_freelist_t *fl, void *obj) -{ - /* ALWAYS called with statelock held */ - lib_freeobj_t *o = list_entry (obj, lib_freeobj_t, fo_contents); - - list_add (&o->fo_list, &fl->fl_list); -} - - -static inline lib_eq_t * -lib_eq_alloc (nal_cb_t *nal) -{ - /* NEVER called with statelock held */ - unsigned long flags; - lib_eq_t *eq; - - state_lock (nal, &flags); - eq = (lib_eq_t *)lib_freelist_alloc (&nal->ni.ni_free_eqs); - state_unlock (nal, &flags); - - return (eq); -} - -static inline void -lib_eq_free (nal_cb_t *nal, lib_eq_t *eq) -{ - /* ALWAYS called with statelock held */ - lib_freelist_free (&nal->ni.ni_free_eqs, eq); -} - -static inline lib_md_t * -lib_md_alloc (nal_cb_t *nal) -{ - /* NEVER called with statelock held */ - unsigned long flags; - lib_md_t *md; - - state_lock (nal, &flags); - md = (lib_md_t *)lib_freelist_alloc (&nal->ni.ni_free_mds); - state_unlock (nal, &flags); - - return (md); -} - -static inline void -lib_md_free (nal_cb_t *nal, lib_md_t *md) -{ - /* ALWAYS called with statelock held */ - lib_freelist_free (&nal->ni.ni_free_mds, md); -} - -static inline lib_me_t * -lib_me_alloc (nal_cb_t *nal) -{ - /* NEVER called with statelock held */ - unsigned long flags; - lib_me_t *me; - - state_lock (nal, &flags); - me = (lib_me_t *)lib_freelist_alloc (&nal->ni.ni_free_mes); - state_unlock (nal, &flags); - - return (me); -} - -static inline void -lib_me_free (nal_cb_t *nal, lib_me_t *me) -{ - /* ALWAYS called with statelock held */ - lib_freelist_free (&nal->ni.ni_free_mes, me); -} - -static inline lib_msg_t * -lib_msg_alloc (nal_cb_t *nal) -{ - /* ALWAYS called with statelock held */ - return ((lib_msg_t *)lib_freelist_alloc (&nal->ni.ni_free_msgs)); -} - -static inline void -lib_msg_free (nal_cb_t *nal, lib_msg_t *msg) -{ - /* ALWAYS called with statelock held */ - lib_freelist_free (&nal->ni.ni_free_msgs, msg); -} - -#else - -extern kmem_cache_t *ptl_md_slab; -extern kmem_cache_t *ptl_msg_slab; -extern kmem_cache_t *ptl_me_slab; -extern kmem_cache_t *ptl_eq_slab; -extern atomic_t md_in_use_count; -extern atomic_t msg_in_use_count; -extern atomic_t me_in_use_count; -extern atomic_t eq_in_use_count; - -static inline lib_eq_t * -lib_eq_alloc (nal_cb_t *nal) -{ - /* NEVER called with statelock held */ - lib_eq_t *eq = kmem_cache_alloc(ptl_eq_slab, GFP_NOFS); - - if (eq == NULL) - return (NULL); - - atomic_inc (&eq_in_use_count); - return (eq); -} - -static inline void -lib_eq_free (nal_cb_t *nal, lib_eq_t *eq) -{ - /* ALWAYS called with statelock held */ - atomic_dec (&eq_in_use_count); - kmem_cache_free(ptl_eq_slab, eq); -} - -static inline lib_md_t * -lib_md_alloc (nal_cb_t *nal) -{ - /* NEVER called with statelock held */ - lib_md_t *md = kmem_cache_alloc(ptl_md_slab, GFP_NOFS); - - if (md == NULL) - return (NULL); - - atomic_inc (&md_in_use_count); - return (md); -} - -static inline void -lib_md_free (nal_cb_t *nal, lib_md_t *md) -{ - /* ALWAYS called with statelock held */ - atomic_dec (&md_in_use_count); - kmem_cache_free(ptl_md_slab, md); -} - -static inline lib_me_t * -lib_me_alloc (nal_cb_t *nal) -{ - /* NEVER called with statelock held */ - lib_me_t *me = kmem_cache_alloc(ptl_me_slab, GFP_NOFS); - - if (me == NULL) - return (NULL); - - atomic_inc (&me_in_use_count); - return (me); -} - -static inline void -lib_me_free(nal_cb_t *nal, lib_me_t *me) -{ - /* ALWAYS called with statelock held */ - atomic_dec (&me_in_use_count); - kmem_cache_free(ptl_me_slab, me); -} - -static inline lib_msg_t * -lib_msg_alloc(nal_cb_t *nal) -{ - /* ALWAYS called with statelock held */ - lib_msg_t *msg = kmem_cache_alloc(ptl_msg_slab, GFP_ATOMIC); - - if (msg == NULL) - return (NULL); - - atomic_inc (&msg_in_use_count); - return (msg); -} - -static inline void -lib_msg_free(nal_cb_t *nal, lib_msg_t *msg) -{ - /* ALWAYS called with statelock held */ - atomic_dec (&msg_in_use_count); - kmem_cache_free(ptl_msg_slab, msg); -} -#endif - -extern lib_handle_t *lib_lookup_cookie (nal_cb_t *nal, __u64 cookie, int type); -extern void lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh, int type); -extern void lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh); - -static inline void -ptl_eq2handle (ptl_handle_eq_t *handle, lib_eq_t *eq) -{ - handle->cookie = eq->eq_lh.lh_cookie; -} - -static inline lib_eq_t * -ptl_handle2eq (ptl_handle_eq_t *handle, nal_cb_t *nal) -{ - /* ALWAYS called with statelock held */ - lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, - PTL_COOKIE_TYPE_EQ); - if (lh == NULL) - return (NULL); - - return (lh_entry (lh, lib_eq_t, eq_lh)); -} - -static inline void -ptl_md2handle (ptl_handle_md_t *handle, lib_md_t *md) -{ - handle->cookie = md->md_lh.lh_cookie; -} - -static inline lib_md_t * -ptl_handle2md (ptl_handle_md_t *handle, nal_cb_t *nal) -{ - /* ALWAYS called with statelock held */ - lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, - PTL_COOKIE_TYPE_MD); - if (lh == NULL) - return (NULL); - - return (lh_entry (lh, lib_md_t, md_lh)); -} - -static inline lib_md_t * -ptl_wire_handle2md (ptl_handle_wire_t *wh, nal_cb_t *nal) -{ - /* ALWAYS called with statelock held */ - lib_handle_t *lh; - - if (wh->wh_interface_cookie != nal->ni.ni_interface_cookie) - return (NULL); - - lh = lib_lookup_cookie (nal, wh->wh_object_cookie, - PTL_COOKIE_TYPE_MD); - if (lh == NULL) - return (NULL); - - return (lh_entry (lh, lib_md_t, md_lh)); -} - -static inline void -ptl_me2handle (ptl_handle_me_t *handle, lib_me_t *me) -{ - handle->cookie = me->me_lh.lh_cookie; -} - -static inline lib_me_t * -ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal) -{ - /* ALWAYS called with statelock held */ - lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, - PTL_COOKIE_TYPE_ME); - if (lh == NULL) - return (NULL); - - return (lh_entry (lh, lib_me_t, me_lh)); -} - -extern int lib_init(nal_cb_t * cb, ptl_nid_t nid, ptl_pid_t pid, int gsize, - ptl_pt_index_t tbl_size, ptl_ac_index_t ac_size); -extern int lib_fini(nal_cb_t * cb); -extern void lib_dispatch(nal_cb_t * cb, void *private, int index, - void *arg_block, void *ret_block); -extern char *dispatch_name(int index); - -/* - * When the NAL detects an incoming message, it should call - * lib_parse() decode it. The NAL callbacks will be handed - * the private cookie as a way for the NAL to maintain state - * about which transaction is being processed. An extra parameter, - * lib_cookie will contain the necessary information for - * finalizing the message. - * - * After it has finished the handling the message, it should - * call lib_finalize() with the lib_cookie parameter. - * Call backs will be made to write events, send acks or - * replies and so on. - */ -extern int lib_parse(nal_cb_t * nal, ptl_hdr_t * hdr, void *private); -extern int lib_finalize(nal_cb_t * nal, void *private, lib_msg_t * msg); -extern void print_hdr(nal_cb_t * nal, ptl_hdr_t * hdr); - -extern ptl_size_t lib_iov_nob (int niov, struct iovec *iov); -extern void lib_copy_iov2buf (char *dest, int niov, struct iovec *iov, ptl_size_t len); -extern void lib_copy_buf2iov (int niov, struct iovec *iov, char *dest, ptl_size_t len); - -extern ptl_size_t lib_kiov_nob (int niov, ptl_kiov_t *iov); -extern void lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *iov, ptl_size_t len); -extern void lib_copy_buf2kiov (int niov, ptl_kiov_t *iov, char *src, ptl_size_t len); -extern void lib_assert_wire_constants (void); - -extern void lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md, - ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen); -extern int lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - lib_md_t *md, ptl_size_t offset, ptl_size_t len); - -extern void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md_in, - ptl_md_t * md_out); -extern void lib_md_unlink(nal_cb_t * nal, lib_md_t * md_in); -extern void lib_me_unlink(nal_cb_t * nal, lib_me_t * me_in); -#endif diff --git a/lnet/include/lnet/lib-types.h b/lnet/include/lnet/lib-types.h deleted file mode 100644 index 47c0dd2..0000000 --- a/lnet/include/lnet/lib-types.h +++ /dev/null @@ -1,282 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * p30/lib-types.h - * - * Types used by the library side routines that do not need to be - * exposed to the user application - */ - -#ifndef _LIB_TYPES_H_ -#define _LIB_TYPES_H_ - -#include -#ifdef __KERNEL__ -# define PTL_USE_SLAB_CACHE -# include -# include -# include -#else -# include -#endif - -/* struct nal_cb_t is defined in lib-nal.h */ -typedef struct nal_cb_t nal_cb_t; - -typedef char *user_ptr; -typedef struct lib_msg_t lib_msg_t; -typedef struct lib_ptl_t lib_ptl_t; -typedef struct lib_ac_t lib_ac_t; -typedef struct lib_me_t lib_me_t; -typedef struct lib_md_t lib_md_t; -typedef struct lib_eq_t lib_eq_t; - -#define WIRE_ATTR __attribute__((packed)) - -/* The wire handle's interface cookie only matches one network interface in - * one epoch (i.e. new cookie when the interface restarts or the node - * reboots). The object cookie only matches one object on that interface - * during that object's lifetime (i.e. no cookie re-use). */ -typedef struct { - __u64 wh_interface_cookie; - __u64 wh_object_cookie; -} WIRE_ATTR ptl_handle_wire_t; - -/* byte-flip insensitive! */ -#define PTL_WIRE_HANDLE_NONE \ -((const ptl_handle_wire_t) {.wh_interface_cookie = -1, .wh_object_cookie = -1}) - -typedef enum { - PTL_MSG_ACK = 0, - PTL_MSG_PUT, - PTL_MSG_GET, - PTL_MSG_REPLY, - PTL_MSG_HELLO, -} ptl_msg_type_t; - -/* Each of these structs should start with an odd number of - * __u32, or the compiler could add its own padding and confuse - * everyone. - * - * Also, "length" needs to be at offset 28 of each struct. - */ -typedef struct ptl_ack { - ptl_size_t mlength; - ptl_handle_wire_t dst_wmd; - ptl_match_bits_t match_bits; - ptl_size_t length; /* common length (0 for acks) moving out RSN */ -} WIRE_ATTR ptl_ack_t; - -typedef struct ptl_put { - ptl_pt_index_t ptl_index; - ptl_handle_wire_t ack_wmd; - ptl_match_bits_t match_bits; - ptl_size_t length; /* common length moving out RSN */ - ptl_size_t offset; - ptl_hdr_data_t hdr_data; -} WIRE_ATTR ptl_put_t; - -typedef struct ptl_get { - ptl_pt_index_t ptl_index; - ptl_handle_wire_t return_wmd; - ptl_match_bits_t match_bits; - ptl_size_t length; /* common length (0 for gets) moving out RSN */ - ptl_size_t src_offset; - ptl_size_t return_offset; /* unused: going RSN */ - ptl_size_t sink_length; -} WIRE_ATTR ptl_get_t; - -typedef struct ptl_reply { - __u32 unused1; /* unused fields going RSN */ - ptl_handle_wire_t dst_wmd; - ptl_size_t dst_offset; /* unused: going RSN */ - __u32 unused2; - ptl_size_t length; /* common length moving out RSN */ -} WIRE_ATTR ptl_reply_t; - -typedef struct { - ptl_nid_t dest_nid; - ptl_nid_t src_nid; - ptl_pid_t dest_pid; - ptl_pid_t src_pid; - __u32 type; /* ptl_msg_type_t */ - union { - ptl_ack_t ack; - ptl_put_t put; - ptl_get_t get; - ptl_reply_t reply; - } msg; -} WIRE_ATTR ptl_hdr_t; - -/* All length fields in individual unions at same offset */ -/* LASSERT for same in lib-move.c */ -#define PTL_HDR_LENGTH(h) ((h)->msg.ack.length) - -/* A HELLO message contains the portals magic number and protocol version - * code in the header's dest_nid, the peer's NID in the src_nid, and - * PTL_MSG_HELLO in the type field. All other fields are zero (including - * PTL_HDR_LENGTH; i.e. no payload). - * This is for use by byte-stream NALs (e.g. TCP/IP) to check the peer is - * running the same protocol and to find out its NID, so that hosts with - * multiple IP interfaces can have a single NID. These NALs should exchange - * HELLO messages when a connection is first established. */ -typedef struct { - __u32 magic; /* PORTALS_PROTO_MAGIC */ - __u16 version_major; /* increment on incompatible change */ - __u16 version_minor; /* increment on compatible change */ -} WIRE_ATTR ptl_magicversion_t; - -#define PORTALS_PROTO_MAGIC 0xeebc0ded - -#define PORTALS_PROTO_VERSION_MAJOR 0 -#define PORTALS_PROTO_VERSION_MINOR 1 - -typedef struct { - long recv_count, recv_length, send_count, send_length, drop_count, - drop_length, msgs_alloc, msgs_max; -} lib_counters_t; - -/* temporary expedient: limit number of entries in discontiguous MDs */ -#if PTL_LARGE_MTU -# define PTL_MD_MAX_IOV 64 -#else -# define PTL_MD_MAX_IOV 16 -#endif - -struct lib_msg_t { - struct list_head msg_list; - int send_ack; - lib_md_t *md; - ptl_nid_t nid; - ptl_pid_t pid; - ptl_event_t ev; - ptl_handle_wire_t ack_wmd; - union { - struct iovec iov[PTL_MD_MAX_IOV]; - ptl_kiov_t kiov[PTL_MD_MAX_IOV]; - } msg_iov; -}; - -struct lib_ptl_t { - ptl_pt_index_t size; - struct list_head *tbl; -}; - -struct lib_ac_t { - int next_free; -}; - -typedef struct { - struct list_head lh_hash_chain; - __u64 lh_cookie; -} lib_handle_t; - -#define lh_entry(ptr, type, member) \ - ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) - -struct lib_eq_t { - struct list_head eq_list; - lib_handle_t eq_lh; - ptl_seq_t sequence; - ptl_size_t size; - ptl_event_t *base; - int eq_refcount; - int (*event_callback) (ptl_event_t * event); - void *eq_addrkey; -}; - -struct lib_me_t { - struct list_head me_list; - lib_handle_t me_lh; - ptl_process_id_t match_id; - ptl_match_bits_t match_bits, ignore_bits; - ptl_unlink_t unlink; - lib_md_t *md; -}; - -struct lib_md_t { - struct list_head md_list; - lib_handle_t md_lh; - lib_me_t *me; - user_ptr start; - ptl_size_t offset; - ptl_size_t length; - ptl_size_t max_size; - int threshold; - int pending; - ptl_unlink_t unlink; - unsigned int options; - unsigned int md_flags; - void *user_ptr; - lib_eq_t *eq; - void *md_addrkey; - unsigned int md_niov; /* # frags */ - union { - struct iovec iov[PTL_MD_MAX_IOV]; - ptl_kiov_t kiov[PTL_MD_MAX_IOV]; - } md_iov; -}; - -#define PTL_MD_FLAG_UNLINK (1 << 0) -#define PTL_MD_FLAG_AUTO_UNLINKED (1 << 1) - -#ifndef PTL_USE_SLAB_CACHE -typedef struct -{ - void *fl_objs; /* single contiguous array of objects */ - int fl_nobjs; /* the number of them */ - int fl_objsize; /* the size (including overhead) of each of them */ - struct list_head fl_list; /* where they are enqueued */ -} lib_freelist_t; - -typedef struct -{ - struct list_head fo_list; /* enqueue on fl_list */ - void *fo_contents; /* aligned contents */ -} lib_freeobj_t; -#endif - -typedef struct { - /* info about peers we are trying to fail */ - struct list_head tp_list; /* stash in ni.ni_test_peers */ - ptl_nid_t tp_nid; /* matching nid */ - unsigned int tp_threshold; /* # failures to simulate */ -} lib_test_peer_t; - -#define PTL_COOKIE_TYPE_MD 1 -#define PTL_COOKIE_TYPE_ME 2 -#define PTL_COOKIE_TYPE_EQ 3 -#define PTL_COOKIE_TYPES 4 -/* PTL_COOKIE_TYPES must be a power of 2, so the cookie type can be - * extracted by masking with (PTL_COOKIE_TYPES - 1) */ - -typedef struct { - int up; - int refcnt; - ptl_nid_t nid; - ptl_pid_t pid; - int num_nodes; - unsigned int debug; - lib_ptl_t tbl; - lib_ac_t ac; - lib_counters_t counters; - - int ni_lh_hash_size; /* size of lib handle hash table */ - struct list_head *ni_lh_hash_table; /* all extant lib handles, this interface */ - __u64 ni_next_object_cookie; /* cookie generator */ - __u64 ni_interface_cookie; /* uniquely identifies this ni in this epoch */ - - struct list_head ni_test_peers; - -#ifndef PTL_USE_SLAB_CACHE - lib_freelist_t ni_free_mes; - lib_freelist_t ni_free_msgs; - lib_freelist_t ni_free_mds; - lib_freelist_t ni_free_eqs; -#endif - struct list_head ni_active_msgs; - struct list_head ni_active_mds; - struct list_head ni_active_eqs; -} lib_ni_t; - -#endif diff --git a/lnet/include/lnet/list.h b/lnet/include/lnet/list.h deleted file mode 100644 index 2b63312..0000000 --- a/lnet/include/lnet/list.h +++ /dev/null @@ -1,245 +0,0 @@ -#ifndef _LINUX_LIST_H -#define _LINUX_LIST_H - - -/* - * Simple doubly linked list implementation. - * - * Some of the internal functions ("__xxx") are useful when - * manipulating whole lists rather than single entries, as - * sometimes we already know the next/prev entries and we can - * generate better code by using them directly rather than - * using the generic single-entry routines. - */ - -#define prefetch(a) ((void)a) - -struct list_head { - struct list_head *next, *prev; -}; - -#define LIST_HEAD_INIT(name) { &(name), &(name) } - -#define LIST_HEAD(name) \ - struct list_head name = LIST_HEAD_INIT(name) - -#define INIT_LIST_HEAD(ptr) do { \ - (ptr)->next = (ptr); (ptr)->prev = (ptr); \ -} while (0) - -/* - * Insert a new entry between two known consecutive entries. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_add(struct list_head * new, - struct list_head * prev, - struct list_head * next) -{ - next->prev = new; - new->next = next; - new->prev = prev; - prev->next = new; -} - -/** - * list_add - add a new entry - * @new: new entry to be added - * @head: list head to add it after - * - * Insert a new entry after the specified head. - * This is good for implementing stacks. - */ -static inline void list_add(struct list_head *new, struct list_head *head) -{ - __list_add(new, head, head->next); -} - -/** - * list_add_tail - add a new entry - * @new: new entry to be added - * @head: list head to add it before - * - * Insert a new entry before the specified head. - * This is useful for implementing queues. - */ -static inline void list_add_tail(struct list_head *new, struct list_head *head) -{ - __list_add(new, head->prev, head); -} - -/* - * Delete a list entry by making the prev/next entries - * point to each other. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_del(struct list_head * prev, struct list_head * next) -{ - next->prev = prev; - prev->next = next; -} - -/** - * list_del - deletes entry from list. - * @entry: the element to delete from the list. - * Note: list_empty on entry does not return true after this, the entry is in an undefined state. - */ -static inline void list_del(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); -} - -/** - * list_del_init - deletes entry from list and reinitialize it. - * @entry: the element to delete from the list. - */ -static inline void list_del_init(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - INIT_LIST_HEAD(entry); -} - -/** - * list_move - delete from one list and add as another's head - * @list: the entry to move - * @head: the head that will precede our entry - */ -static inline void list_move(struct list_head *list, struct list_head *head) -{ - __list_del(list->prev, list->next); - list_add(list, head); -} - -/** - * list_move_tail - delete from one list and add as another's tail - * @list: the entry to move - * @head: the head that will follow our entry - */ -static inline void list_move_tail(struct list_head *list, - struct list_head *head) -{ - __list_del(list->prev, list->next); - list_add_tail(list, head); -} - -/** - * list_empty - tests whether a list is empty - * @head: the list to test. - */ -static inline int list_empty(struct list_head *head) -{ - return head->next == head; -} - -static inline void __list_splice(struct list_head *list, - struct list_head *head) -{ - struct list_head *first = list->next; - struct list_head *last = list->prev; - struct list_head *at = head->next; - - first->prev = head; - head->next = first; - - last->next = at; - at->prev = last; -} - -/** - * list_splice - join two lists - * @list: the new list to add. - * @head: the place to add it in the first list. - */ -static inline void list_splice(struct list_head *list, struct list_head *head) -{ - if (!list_empty(list)) - __list_splice(list, head); -} - -/** - * list_splice_init - join two lists and reinitialise the emptied list. - * @list: the new list to add. - * @head: the place to add it in the first list. - * - * The list at @list is reinitialised - */ -static inline void list_splice_init(struct list_head *list, - struct list_head *head) -{ - if (!list_empty(list)) { - __list_splice(list, head); - INIT_LIST_HEAD(list); - } -} - -/** - * list_entry - get the struct for this entry - * @ptr: the &struct list_head pointer. - * @type: the type of the struct this is embedded in. - * @member: the name of the list_struct within the struct. - */ -#define list_entry(ptr, type, member) \ - ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) - -/** - * list_for_each - iterate over a list - * @pos: the &struct list_head to use as a loop counter. - * @head: the head for your list. - */ -#define list_for_each(pos, head) \ - for (pos = (head)->next, prefetch(pos->next); pos != (head); \ - pos = pos->next, prefetch(pos->next)) - -/** - * list_for_each_prev - iterate over a list in reverse order - * @pos: the &struct list_head to use as a loop counter. - * @head: the head for your list. - */ -#define list_for_each_prev(pos, head) \ - for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \ - pos = pos->prev, prefetch(pos->prev)) - -/** - * list_for_each_safe - iterate over a list safe against removal of list entry - * @pos: the &struct list_head to use as a loop counter. - * @n: another &struct list_head to use as temporary storage - * @head: the head for your list. - */ -#define list_for_each_safe(pos, n, head) \ - for (pos = (head)->next, n = pos->next; pos != (head); \ - pos = n, n = pos->next) - -#endif - -#ifndef list_for_each_entry -/** - * list_for_each_entry - iterate over list of given type - * @pos: the type * to use as a loop counter. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry(pos, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member), \ - prefetch(pos->member.next); \ - &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member), \ - prefetch(pos->member.next)) -#endif - -#ifndef list_for_each_entry_safe -/** - * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry - * @pos: the type * to use as a loop counter. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry_safe(pos, n, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member), \ - n = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.next, typeof(*n), member)) -#endif diff --git a/lnet/include/lnet/lltrace.h b/lnet/include/lnet/lltrace.h deleted file mode 100644 index 7d1b304..0000000 --- a/lnet/include/lnet/lltrace.h +++ /dev/null @@ -1,175 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Compile with: - * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl - */ -#ifndef __LTRACE_H_ -#define __LTRACE_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static inline int ltrace_write_file(char* fname) -{ - char* argv[3]; - - argv[0] = "debug_kernel"; - argv[1] = fname; - argv[2] = "1"; - - fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]); - - return jt_dbg_debug_kernel(3, argv); -} - -static inline int ltrace_clear() -{ - char* argv[1]; - - argv[0] = "clear"; - - fprintf(stderr, "[ptlctl] %s\n", argv[0]); - - return jt_dbg_clear_debug_buf(1, argv); -} - -static inline int ltrace_mark(int indent_level, char* text) -{ - char* argv[2]; - char mark_buf[PATH_MAX]; - - snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text); - - argv[0] = "mark"; - argv[1] = mark_buf; - return jt_dbg_mark_debug_buf(2, argv); -} - -static inline int ltrace_applymasks() -{ - char* argv[2]; - argv[0] = "list"; - argv[1] = "applymasks"; - - fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]); - - return jt_dbg_list(2, argv); -} - - -static inline int ltrace_filter(char* subsys_or_mask) -{ - char* argv[2]; - argv[0] = "filter"; - argv[1] = subsys_or_mask; - return jt_dbg_filter(2, argv); -} - -static inline int ltrace_show(char* subsys_or_mask) -{ - char* argv[2]; - argv[0] = "show"; - argv[1] = subsys_or_mask; - return jt_dbg_show(2, argv); -} - -static inline int ltrace_start() -{ - int rc = 0; - dbg_initialize(0, NULL); -#ifdef PORTALS_DEV_ID - rc = register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH); -#endif - ltrace_filter("class"); - ltrace_filter("socknal"); - ltrace_filter("qswnal"); - ltrace_filter("gmnal"); - ltrace_filter("portals"); - - ltrace_show("all_types"); - ltrace_filter("trace"); - ltrace_filter("malloc"); - ltrace_filter("net"); - ltrace_filter("page"); - ltrace_filter("other"); - ltrace_filter("info"); - ltrace_applymasks(); - - return rc; -} - - -static inline void ltrace_stop() -{ -#ifdef PORTALS_DEV_ID - unregister_ioc_dev(PORTALS_DEV_ID); -#endif -} - -static inline int not_uml() -{ - /* Return Values: - * 0 when run under UML - * 1 when run on host - * <0 when lookup failed - */ - struct stat buf; - int rc = stat("/dev/ubd", &buf); - rc = ((rc<0) && (errno == ENOENT)) ? 1 : rc; - if (rc<0) { - fprintf(stderr, "Cannot stat /dev/ubd: %s\n", strerror(errno)); - rc = 1; /* Assume host */ - } - return rc; -} - -#define LTRACE_MAX_NOB 256 -static inline void ltrace_add_processnames(char* fname) -{ - char cmdbuf[LTRACE_MAX_NOB]; - struct timeval tv; - struct timezone tz; - int nob; - int underuml = !not_uml(); - - gettimeofday(&tv, &tz); - - nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \""); - - /* Careful - these format strings need to match the CDEBUG - * formats in portals/linux/debug.c EXACTLY - */ - nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, "%02x:%06x:%d:%lu.%06lu ", - S_RPC >> 24, D_VFSTRACE, 0, tv.tv_sec, tv.tv_usec); - - if (underuml && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))) { - nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB, - "(%s:%d:%s() %d | %d+%lu): ", - "lltrace.h", __LINE__, __FUNCTION__, 0, 0, 0L); - } - else { - nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB, - "(%s:%d:%s() %d+%lu): ", - "lltrace.h", __LINE__, __FUNCTION__, 0, 0L); - } - - nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname); - system(cmdbuf); -} - -#endif diff --git a/lnet/include/lnet/lnet.h b/lnet/include/lnet/lnet.h deleted file mode 100644 index a4ea39b..0000000 --- a/lnet/include/lnet/lnet.h +++ /dev/null @@ -1,72 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef _P30_H_ -#define _P30_H_ - -/* - * p30.h - * - * User application interface file - */ - -#if defined (__KERNEL__) -#include -#include -#else -#include -#include -#endif - -#include -#include -#include -#include -#include - -extern int __p30_initialized; /* for libraries & test codes */ -extern int __p30_myr_initialized; /* that don't know if p30 */ -extern int __p30_ip_initialized; /* had been initialized yet */ -extern ptl_handle_ni_t __myr_ni_handle, __ip_ni_handle; - -extern int __p30_myr_timeout; /* in seconds, for PtlNIBarrier, */ -extern int __p30_ip_timeout; /* PtlReduce_all, & PtlBroadcast_all */ - -/* - * Debugging flags reserved for the Portals reference library. - * These are not part of the API as described in the SAND report - * but are for the use of the maintainers of the reference implementation. - * - * It is not expected that the real implementations will export - * this functionality. - */ -#define PTL_DEBUG_NONE 0ul -#define PTL_DEBUG_ALL (0x0FFFul) /* Only the Portals flags */ - -#define __bit(x) ((unsigned long) 1<<(x)) -#define PTL_DEBUG_PUT __bit(0) -#define PTL_DEBUG_GET __bit(1) -#define PTL_DEBUG_REPLY __bit(2) -#define PTL_DEBUG_ACK __bit(3) -#define PTL_DEBUG_DROP __bit(4) -#define PTL_DEBUG_REQUEST __bit(5) -#define PTL_DEBUG_DELIVERY __bit(6) -#define PTL_DEBUG_UNLINK __bit(7) -#define PTL_DEBUG_THRESHOLD __bit(8) -#define PTL_DEBUG_API __bit(9) - -/* - * These eight are reserved for the NAL to define - * It should probably give them better names... - */ -#define PTL_DEBUG_NI_ALL (0xF000ul) /* Only the NAL flags */ -#define PTL_DEBUG_NI0 __bit(24) -#define PTL_DEBUG_NI1 __bit(25) -#define PTL_DEBUG_NI2 __bit(26) -#define PTL_DEBUG_NI3 __bit(27) -#define PTL_DEBUG_NI4 __bit(28) -#define PTL_DEBUG_NI5 __bit(29) -#define PTL_DEBUG_NI6 __bit(30) -#define PTL_DEBUG_NI7 __bit(31) - -#endif diff --git a/lnet/include/lnet/lnetctl.h b/lnet/include/lnet/lnetctl.h deleted file mode 100644 index dc02780..0000000 --- a/lnet/include/lnet/lnetctl.h +++ /dev/null @@ -1,75 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * header for libptlctl.a - */ -#ifndef _PTLCTL_H_ -#define _PTLCTL_H_ - -#define PORTALS_DEV_ID 0 -#define PORTALS_DEV_PATH "/dev/portals" -#define OBD_DEV_ID 1 -#define OBD_DEV_PATH "/dev/obd" - -int ptl_name2nal(char *str); -int ptl_parse_nid (ptl_nid_t *nidp, char *str); -char * ptl_nid2str (char *buffer, ptl_nid_t nid); - -int ptl_initialize(int argc, char **argv); -int jt_ptl_network(int argc, char **argv); -int jt_ptl_connect(int argc, char **argv); -int jt_ptl_disconnect(int argc, char **argv); -int jt_ptl_push_connection(int argc, char **argv); -int jt_ptl_ping(int argc, char **argv); -int jt_ptl_shownid(int argc, char **argv); -int jt_ptl_mynid(int argc, char **argv); -int jt_ptl_add_uuid(int argc, char **argv); -int jt_ptl_add_uuid_old(int argc, char **argv); /* backwards compatibility */ -int jt_ptl_close_uuid(int argc, char **argv); -int jt_ptl_del_uuid(int argc, char **argv); -int jt_ptl_rxmem (int argc, char **argv); -int jt_ptl_txmem (int argc, char **argv); -int jt_ptl_nagle (int argc, char **argv); -int jt_ptl_add_route (int argc, char **argv); -int jt_ptl_del_route (int argc, char **argv); -int jt_ptl_print_routes (int argc, char **argv); -int jt_ptl_fail_nid (int argc, char **argv); - -int dbg_initialize(int argc, char **argv); -int jt_dbg_filter(int argc, char **argv); -int jt_dbg_show(int argc, char **argv); -int jt_dbg_list(int argc, char **argv); -int jt_dbg_debug_kernel(int argc, char **argv); -int jt_dbg_debug_daemon(int argc, char **argv); -int jt_dbg_debug_file(int argc, char **argv); -int jt_dbg_clear_debug_buf(int argc, char **argv); -int jt_dbg_mark_debug_buf(int argc, char **argv); -int jt_dbg_modules(int argc, char **argv); -int jt_dbg_panic(int argc, char **argv); - -/* l_ioctl.c */ -int register_ioc_dev(int dev_id, const char * dev_name); -void unregister_ioc_dev(int dev_id); -int set_ioctl_dump(char * file); -int l_ioctl(int dev_id, int opc, void *buf); -int parse_dump(char * dump_file, int (*ioc_func)(int dev_id, int opc, void *)); -int jt_ioc_dump(int argc, char **argv); - -#endif diff --git a/lnet/include/lnet/myrnal.h b/lnet/include/lnet/myrnal.h deleted file mode 100644 index 12b1925..0000000 --- a/lnet/include/lnet/myrnal.h +++ /dev/null @@ -1,26 +0,0 @@ -/* -*/ - -#ifndef MYRNAL_H -#define MYRNAL_H - -#define MAX_ARGS_LEN (256) -#define MAX_RET_LEN (128) -#define MYRNAL_MAX_ACL_SIZE (64) -#define MYRNAL_MAX_PTL_SIZE (64) - -#define P3CMD (100) -#define P3SYSCALL (200) -#define P3REGISTER (300) - -enum { PTL_MLOCKALL }; - -typedef struct { - void *args; - size_t args_len; - void *ret; - size_t ret_len; - int p3cmd; -} myrnal_forward_t; - -#endif /* MYRNAL_H */ diff --git a/lnet/include/lnet/nal.h b/lnet/include/lnet/nal.h deleted file mode 100644 index 88be63c..0000000 --- a/lnet/include/lnet/nal.h +++ /dev/null @@ -1,49 +0,0 @@ -/* -*/ -#ifndef _NAL_H_ -#define _NAL_H_ - -/* - * p30/nal.h - * - * The API side NAL declarations - */ - -#include - -#ifdef yield -#undef yield -#endif - -typedef struct nal_t nal_t; - -struct nal_t { - ptl_ni_t ni; - int refct; - void *nal_data; - int *timeout; /* for libp30api users */ - int (*forward) (nal_t * nal, int index, /* Function ID */ - void *args, size_t arg_len, void *ret, size_t ret_len); - - int (*shutdown) (nal_t * nal, int interface); - - int (*validate) (nal_t * nal, void *base, size_t extent); - - void (*yield) (nal_t * nal); - - void (*lock) (nal_t * nal, unsigned long *flags); - - void (*unlock) (nal_t * nal, unsigned long *flags); -}; - -typedef nal_t *(ptl_interface_t) (int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid); -extern nal_t *PTL_IFACE_IP(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid); -extern nal_t *PTL_IFACE_MYR(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid); - -extern nal_t *ptl_hndl2nal(ptl_handle_any_t * any); - -#ifndef PTL_IFACE_DEFAULT -#define PTL_IFACE_DEFAULT (PTL_IFACE_IP) -#endif - -#endif diff --git a/lnet/include/lnet/nalids.h b/lnet/include/lnet/nalids.h deleted file mode 100644 index 1b837b4..0000000 --- a/lnet/include/lnet/nalids.h +++ /dev/null @@ -1,4 +0,0 @@ -#define PTL_IFACE_TCP 1 -#define PTL_IFACE_ER 2 -#define PTL_IFACE_SS 3 -#define PTL_IFACE_MAX 4 diff --git a/lnet/include/lnet/p30.h b/lnet/include/lnet/p30.h deleted file mode 100644 index a4ea39b..0000000 --- a/lnet/include/lnet/p30.h +++ /dev/null @@ -1,72 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef _P30_H_ -#define _P30_H_ - -/* - * p30.h - * - * User application interface file - */ - -#if defined (__KERNEL__) -#include -#include -#else -#include -#include -#endif - -#include -#include -#include -#include -#include - -extern int __p30_initialized; /* for libraries & test codes */ -extern int __p30_myr_initialized; /* that don't know if p30 */ -extern int __p30_ip_initialized; /* had been initialized yet */ -extern ptl_handle_ni_t __myr_ni_handle, __ip_ni_handle; - -extern int __p30_myr_timeout; /* in seconds, for PtlNIBarrier, */ -extern int __p30_ip_timeout; /* PtlReduce_all, & PtlBroadcast_all */ - -/* - * Debugging flags reserved for the Portals reference library. - * These are not part of the API as described in the SAND report - * but are for the use of the maintainers of the reference implementation. - * - * It is not expected that the real implementations will export - * this functionality. - */ -#define PTL_DEBUG_NONE 0ul -#define PTL_DEBUG_ALL (0x0FFFul) /* Only the Portals flags */ - -#define __bit(x) ((unsigned long) 1<<(x)) -#define PTL_DEBUG_PUT __bit(0) -#define PTL_DEBUG_GET __bit(1) -#define PTL_DEBUG_REPLY __bit(2) -#define PTL_DEBUG_ACK __bit(3) -#define PTL_DEBUG_DROP __bit(4) -#define PTL_DEBUG_REQUEST __bit(5) -#define PTL_DEBUG_DELIVERY __bit(6) -#define PTL_DEBUG_UNLINK __bit(7) -#define PTL_DEBUG_THRESHOLD __bit(8) -#define PTL_DEBUG_API __bit(9) - -/* - * These eight are reserved for the NAL to define - * It should probably give them better names... - */ -#define PTL_DEBUG_NI_ALL (0xF000ul) /* Only the NAL flags */ -#define PTL_DEBUG_NI0 __bit(24) -#define PTL_DEBUG_NI1 __bit(25) -#define PTL_DEBUG_NI2 __bit(26) -#define PTL_DEBUG_NI3 __bit(27) -#define PTL_DEBUG_NI4 __bit(28) -#define PTL_DEBUG_NI5 __bit(29) -#define PTL_DEBUG_NI6 __bit(30) -#define PTL_DEBUG_NI7 __bit(31) - -#endif diff --git a/lnet/include/lnet/ppid.h b/lnet/include/lnet/ppid.h deleted file mode 100644 index 4727599..0000000 --- a/lnet/include/lnet/ppid.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - */ - -#ifndef _INCppidh_ -#define _INCppidh_ - -#include "defines.h" -// #include "idtypes.h" - - -#define MAX_PPID 1000 /* this needs to fit into 16 bits so the - maximum value is 65535. having it "large" - can help w/ debugging process accounting - but there are reasons for making it - somewhat smaller than the maximum -- - requiring storage for arrays that index - on the ppid, eg... */ - -#define MAX_GID 1000 /* this needs to fit into 16 bits... */ - -#define MAX_FIXED_PPID 100 -#define MAX_FIXED_GID 100 -#define PPID_FLOATING MAX_FIXED_PPID+1 /* Floating area starts here */ -#define GID_FLOATING MAX_FIXED_GID+1 /* Floating area starts here */ -#define NUM_PTL_TASKS MAX_FIXED_PPID+80 /* Maximum no. portals tasks */ - -#define PPID_AUTO 0 - -/* Minimum PPID is 1 */ -#define PPID_BEBOPD 1 /* bebopd */ -#define GID_BEBOPD 1 /* bebopd */ - -#define PPID_PCT 2 /* pct */ -#define GID_PCT 2 /* pct */ - -#define PPID_FYOD 3 /* fyod */ -#define GID_FYOD 3 /* fyod */ - -#define PPID_GDBWRAP 11 /* portals proxy for gdb */ -#define GID_GDBWRAP 11 /* portals proxy for gdb */ - -#define PPID_TEST 15 /* for portals tests */ -#define GID_TEST 15 - -#define GID_YOD 5 /* yod */ -#define GID_PINGD 6 /* pingd */ -#define GID_BT 7 /* bt */ -#define GID_PTLTEST 8 /* ptltest */ -#define GID_CGDB 9 /* cgdb */ -#define GID_TVDSVR 10 /* start-tvdsvr */ - -#endif /* _INCppidh_ */ diff --git a/lnet/include/lnet/ptlctl.h b/lnet/include/lnet/ptlctl.h deleted file mode 100644 index dc02780..0000000 --- a/lnet/include/lnet/ptlctl.h +++ /dev/null @@ -1,75 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * header for libptlctl.a - */ -#ifndef _PTLCTL_H_ -#define _PTLCTL_H_ - -#define PORTALS_DEV_ID 0 -#define PORTALS_DEV_PATH "/dev/portals" -#define OBD_DEV_ID 1 -#define OBD_DEV_PATH "/dev/obd" - -int ptl_name2nal(char *str); -int ptl_parse_nid (ptl_nid_t *nidp, char *str); -char * ptl_nid2str (char *buffer, ptl_nid_t nid); - -int ptl_initialize(int argc, char **argv); -int jt_ptl_network(int argc, char **argv); -int jt_ptl_connect(int argc, char **argv); -int jt_ptl_disconnect(int argc, char **argv); -int jt_ptl_push_connection(int argc, char **argv); -int jt_ptl_ping(int argc, char **argv); -int jt_ptl_shownid(int argc, char **argv); -int jt_ptl_mynid(int argc, char **argv); -int jt_ptl_add_uuid(int argc, char **argv); -int jt_ptl_add_uuid_old(int argc, char **argv); /* backwards compatibility */ -int jt_ptl_close_uuid(int argc, char **argv); -int jt_ptl_del_uuid(int argc, char **argv); -int jt_ptl_rxmem (int argc, char **argv); -int jt_ptl_txmem (int argc, char **argv); -int jt_ptl_nagle (int argc, char **argv); -int jt_ptl_add_route (int argc, char **argv); -int jt_ptl_del_route (int argc, char **argv); -int jt_ptl_print_routes (int argc, char **argv); -int jt_ptl_fail_nid (int argc, char **argv); - -int dbg_initialize(int argc, char **argv); -int jt_dbg_filter(int argc, char **argv); -int jt_dbg_show(int argc, char **argv); -int jt_dbg_list(int argc, char **argv); -int jt_dbg_debug_kernel(int argc, char **argv); -int jt_dbg_debug_daemon(int argc, char **argv); -int jt_dbg_debug_file(int argc, char **argv); -int jt_dbg_clear_debug_buf(int argc, char **argv); -int jt_dbg_mark_debug_buf(int argc, char **argv); -int jt_dbg_modules(int argc, char **argv); -int jt_dbg_panic(int argc, char **argv); - -/* l_ioctl.c */ -int register_ioc_dev(int dev_id, const char * dev_name); -void unregister_ioc_dev(int dev_id); -int set_ioctl_dump(char * file); -int l_ioctl(int dev_id, int opc, void *buf); -int parse_dump(char * dump_file, int (*ioc_func)(int dev_id, int opc, void *)); -int jt_ioc_dump(int argc, char **argv); - -#endif diff --git a/lnet/include/lnet/stringtab.h b/lnet/include/lnet/stringtab.h deleted file mode 100644 index c9683f7..0000000 --- a/lnet/include/lnet/stringtab.h +++ /dev/null @@ -1,5 +0,0 @@ -/* -*/ -/* - * stringtab.h - */ diff --git a/lnet/include/lnet/types.h b/lnet/include/lnet/types.h deleted file mode 100644 index d4038b6..0000000 --- a/lnet/include/lnet/types.h +++ /dev/null @@ -1,157 +0,0 @@ -#ifndef _P30_TYPES_H_ -#define _P30_TYPES_H_ - -#ifdef __linux__ -#include -#include -#else -#include -typedef u_int32_t __u32; -typedef u_int64_t __u64; -typedef unsigned long long cycles_t; -static inline cycles_t get_cycles(void) { return 0; } -#endif - -typedef __u64 ptl_nid_t; -typedef __u32 ptl_pid_t; -typedef __u32 ptl_pt_index_t; -typedef __u32 ptl_ac_index_t; -typedef __u64 ptl_match_bits_t; -typedef __u64 ptl_hdr_data_t; -typedef __u32 ptl_size_t; - -typedef struct { - unsigned long nal_idx; /* which network interface */ - __u64 cookie; /* which thing on that interface */ -} ptl_handle_any_t; - -typedef ptl_handle_any_t ptl_handle_ni_t; -typedef ptl_handle_any_t ptl_handle_eq_t; -typedef ptl_handle_any_t ptl_handle_md_t; -typedef ptl_handle_any_t ptl_handle_me_t; - -#define PTL_HANDLE_NONE \ -((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1}) -#define PTL_EQ_NONE PTL_HANDLE_NONE - -static inline int PtlHandleEqual (ptl_handle_any_t h1, ptl_handle_any_t h2) -{ - return (h1.nal_idx == h2.nal_idx && h1.cookie == h2.cookie); -} - -#define PTL_NID_ANY ((ptl_nid_t) -1) -#define PTL_PID_ANY ((ptl_pid_t) -1) - -typedef struct { - ptl_nid_t nid; - ptl_pid_t pid; /* node id / process id */ -} ptl_process_id_t; - -typedef enum { - PTL_RETAIN = 0, - PTL_UNLINK -} ptl_unlink_t; - -typedef enum { - PTL_INS_BEFORE, - PTL_INS_AFTER -} ptl_ins_pos_t; - -typedef struct { - struct page *kiov_page; - unsigned int kiov_len; - unsigned int kiov_offset; -} ptl_kiov_t; - -typedef struct { - void *start; - ptl_size_t length; - int threshold; - int max_size; - unsigned int options; - void *user_ptr; - ptl_handle_eq_t eventq; - unsigned int niov; -} ptl_md_t; - -/* Options for the MD structure */ -#define PTL_MD_OP_PUT (1 << 0) -#define PTL_MD_OP_GET (1 << 1) -#define PTL_MD_MANAGE_REMOTE (1 << 2) -#define PTL_MD_AUTO_UNLINK (1 << 3) -#define PTL_MD_TRUNCATE (1 << 4) -#define PTL_MD_ACK_DISABLE (1 << 5) -#define PTL_MD_IOV (1 << 6) -#define PTL_MD_MAX_SIZE (1 << 7) -#define PTL_MD_KIOV (1 << 8) - -#define PTL_MD_THRESH_INF (-1) - -typedef enum { - PTL_EVENT_GET, - PTL_EVENT_PUT, - PTL_EVENT_REPLY, - PTL_EVENT_ACK, - PTL_EVENT_SENT -} ptl_event_kind_t; - -#define PTL_SEQ_BASETYPE long -typedef unsigned PTL_SEQ_BASETYPE ptl_seq_t; -#define PTL_SEQ_GT(a,b) (((signed PTL_SEQ_BASETYPE)((a) - (b))) > 0) - -typedef struct { - ptl_event_kind_t type; - ptl_process_id_t initiator; - ptl_pt_index_t portal; - ptl_match_bits_t match_bits; - ptl_size_t rlength, mlength, offset; - ptl_handle_me_t unlinked_me; - ptl_md_t mem_desc; - ptl_hdr_data_t hdr_data; - cycles_t arrival_time; - volatile ptl_seq_t sequence; -} ptl_event_t; - - -typedef enum { - PTL_ACK_REQ, - PTL_NOACK_REQ -} ptl_ack_req_t; - - -typedef struct { - volatile ptl_seq_t sequence; - ptl_size_t size; - ptl_event_t *base; - ptl_handle_any_t cb_eq_handle; -} ptl_eq_t; - -typedef struct { - ptl_eq_t *eq; -} ptl_ni_t; - - -typedef struct { - int max_match_entries; /* max number of match entries */ - int max_mem_descriptors; /* max number of memory descriptors */ - int max_event_queues; /* max number of event queues */ - int max_atable_index; /* maximum access control list table index */ - int max_ptable_index; /* maximum portals table index */ -} ptl_ni_limits_t; - -/* - * Status registers - */ -typedef enum { - PTL_SR_DROP_COUNT, - PTL_SR_DROP_LENGTH, - PTL_SR_RECV_COUNT, - PTL_SR_RECV_LENGTH, - PTL_SR_SEND_COUNT, - PTL_SR_SEND_LENGTH, - PTL_SR_MSGS_MAX, -} ptl_sr_index_t; - -typedef int ptl_sr_value_t; - -#endif diff --git a/lnet/klnds/.cvsignore b/lnet/klnds/.cvsignore deleted file mode 100644 index 282522d..0000000 --- a/lnet/klnds/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile -Makefile.in diff --git a/lnet/klnds/Makefile.am b/lnet/klnds/Makefile.am deleted file mode 100644 index fed2785..0000000 --- a/lnet/klnds/Makefile.am +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -DIST_SUBDIRS= socknal toenal qswnal gmnal scimacnal -SUBDIRS= socknal toenal @QSWNAL@ @GMNAL@ @SCIMACNAL@ diff --git a/lnet/klnds/Makefile.mk b/lnet/klnds/Makefile.mk deleted file mode 100644 index ce40a60..0000000 --- a/lnet/klnds/Makefile.mk +++ /dev/null @@ -1,4 +0,0 @@ -include ../Kernelenv - -obj-y = socknal/ -# more coming... \ No newline at end of file diff --git a/lnet/klnds/gmlnd/.cvsignore b/lnet/klnds/gmlnd/.cvsignore deleted file mode 100644 index e995588..0000000 --- a/lnet/klnds/gmlnd/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lnet/klnds/gmlnd/Makefile.am b/lnet/klnds/gmlnd/Makefile.am deleted file mode 100644 index 1dc6f4e..0000000 --- a/lnet/klnds/gmlnd/Makefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../../Rules.linux - -MODULE = kgmnal -modulenet_DATA = kgmnal.o -EXTRA_PROGRAMS = kgmnal - -DEFS = -kgmnal_SOURCES = gmnal.c gmnal_cb.c gmnal.h diff --git a/lnet/klnds/gmlnd/gm-1.5.2.1-exports.patch b/lnet/klnds/gmlnd/gm-1.5.2.1-exports.patch deleted file mode 100644 index 23c80d9..0000000 --- a/lnet/klnds/gmlnd/gm-1.5.2.1-exports.patch +++ /dev/null @@ -1,43 +0,0 @@ -diff -ru gm-1.5.2.1_Linux/drivers/linux/gm/gm_arch.c gm-1.5.2.1_Linux-cfs/drivers/linux/gm/gm_arch.c ---- gm-1.5.2.1_Linux/drivers/linux/gm/gm_arch.c Mon Jul 1 10:35:09 2002 -+++ gm-1.5.2.1_Linux-cfs/drivers/linux/gm/gm_arch.c Thu Sep 19 14:19:38 2002 -@@ -30,6 +30,8 @@ - * - ************************************************************************/ - -+#define EXPORT_SYMTAB -+ - #include - #include - -@@ -4075,6 +4077,28 @@ - return 0; - } - -+EXPORT_SYMBOL(gm_blocking_receive_no_spin); -+EXPORT_SYMBOL(gm_close); -+EXPORT_SYMBOL(gm_dma_free); -+EXPORT_SYMBOL(gm_dma_malloc); -+EXPORT_SYMBOL(gm_drop_sends); -+EXPORT_SYMBOL(gm_finalize); -+EXPORT_SYMBOL(gm_get_node_id); -+EXPORT_SYMBOL(gm_init); -+EXPORT_SYMBOL(gm_initialize_alarm); -+EXPORT_SYMBOL(gm_max_node_id_in_use); -+EXPORT_SYMBOL(gm_min_size_for_length); -+EXPORT_SYMBOL(gm_num_receive_tokens); -+EXPORT_SYMBOL(gm_num_send_tokens); -+EXPORT_SYMBOL(gm_open); -+EXPORT_SYMBOL(gm_provide_receive_buffer); -+EXPORT_SYMBOL(gm_resume_sending); -+EXPORT_SYMBOL(gm_send_with_callback); -+EXPORT_SYMBOL(gm_set_acceptable_sizes); -+EXPORT_SYMBOL(gm_set_alarm); -+EXPORT_SYMBOL(gm_unknown); -+ -+ - /* - This file uses GM standard indentation. - -Only in gm-1.5.2.1_Linux-cfs/drivers/linux/gm: gm_arch.c~ -Only in gm-1.5.2.1_Linux-cfs/: trace diff --git a/lnet/klnds/gmlnd/gmlnd.h b/lnet/klnds/gmlnd/gmlnd.h deleted file mode 100644 index 47e8c3c..0000000 --- a/lnet/klnds/gmlnd/gmlnd.h +++ /dev/null @@ -1,101 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef _GMNAL_H -#define _GMNAL_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include - -#define DEBUG_SUBSYSTEM S_GMNAL - -#include -#include -#include - -#include - - -/* - * Myrinet GM NAL - */ -#define NPAGES_LARGE 16 -#define NPAGES_SMALL 1 -#define MSG_LEN_LARGE NPAGES_LARGE*PAGE_SIZE -#define MSG_LEN_SMALL NPAGES_SMALL*PAGE_SIZE -#define MSG_SIZE_LARGE (gm_min_size_for_length(MSG_LEN_LARGE)) -#define MSG_SIZE_SMALL (gm_min_size_for_length(MSG_LEN_SMALL)) - -#define TXMSGS 64 /* Number of Transmit Messages */ -#define ENVELOPES 8 /* Number of outstanding receive msgs */ - -#define KGM_PORT_NUM 3 -#define KGM_HOSTNAME "kgmnal" - - -typedef struct { - char *krx_buffer; - unsigned long krx_len; - unsigned int krx_size; - unsigned int krx_priority; - struct list_head krx_item; -} kgmnal_rx_t; - - -typedef struct { - nal_cb_t *ktx_nal; - void *ktx_private; - lib_msg_t *ktx_cookie; - char *ktx_buffer; - size_t ktx_len; - unsigned long ktx_size; - int ktx_ndx; - unsigned int ktx_priority; - unsigned int ktx_tgt_node; - unsigned int ktx_tgt_port_id; -} kgmnal_tx_t; - - -typedef struct { - char kgm_init; - char kgm_shuttingdown; - struct gm_port *kgm_port; - struct list_head kgm_list; - ptl_nid_t kgm_nid; - nal_cb_t *kgm_cb; - struct kgm_trans *kgm_trans; - struct tq_struct kgm_ready_tq; - spinlock_t kgm_dispatch_lock; - spinlock_t kgm_update_lock; - spinlock_t kgm_send_lock; -} kgmnal_data_t; - -int kgm_init(kgmnal_data_t *kgm_data); -int kgmnal_recv_thread(void *); -int gm_return_mynid(void); -void kgmnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd); - -extern kgmnal_data_t kgmnal_data; -extern nal_t kgmnal_api; -extern nal_cb_t kgmnal_lib; - -#endif /* _GMNAL_H */ - diff --git a/lnet/klnds/gmlnd/gmlnd_cb.c b/lnet/klnds/gmlnd/gmlnd_cb.c deleted file mode 100644 index 3d4c86d..0000000 --- a/lnet/klnds/gmlnd/gmlnd_cb.c +++ /dev/null @@ -1,517 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Based on ksocknal and qswnal - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Robert Read - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* TODO - * preallocate send buffers, store on list - * put receive buffers on queue, handle with receive threads - * use routing - */ - -#include "gmnal.h" - -extern kgmnal_rx_t *kgm_add_recv(kgmnal_data_t *,int); - -static kgmnal_tx_t * -get_trans(void) -{ - kgmnal_tx_t *t; - PORTAL_ALLOC(t, (sizeof(kgmnal_tx_t))); - return t; -} - -static void -put_trans(kgmnal_tx_t *t) -{ - PORTAL_FREE(t, sizeof(kgmnal_tx_t)); -} - -int -kgmnal_ispeer (ptl_nid_t nid) -{ - unsigned int gmnid = (unsigned int)nid; - unsigned int nnids; - - gm_max_node_id_in_use(kgmnal_data.kgm_port, &nnids); - - return ((ptl_nid_t)gmnid == nid &&/* didn't lose high bits on conversion ? */ - gmnid < nnids); /* it's in this machine */ -} - -/* - * LIB functions follow - * - */ -static int -kgmnal_read (nal_cb_t *nal, void *private, void *dst_addr, user_ptr src_addr, - size_t len) -{ - CDEBUG(D_NET, "0x%Lx: reading %ld bytes from %p -> %p\n", - nal->ni.nid, (long)len, src_addr, dst_addr ); - memcpy( dst_addr, src_addr, len ); - return 0; -} - -static int -kgmnal_write(nal_cb_t *nal, void *private, user_ptr dst_addr, void *src_addr, - size_t len) -{ - CDEBUG(D_NET, "0x%Lx: writing %ld bytes from %p -> %p\n", - nal->ni.nid, (long)len, src_addr, dst_addr ); - memcpy( dst_addr, src_addr, len ); - return 0; -} - -static void * -kgmnal_malloc(nal_cb_t *nal, size_t len) -{ - void *buf; - - PORTAL_ALLOC(buf, len); - return buf; -} - -static void -kgmnal_free(nal_cb_t *nal, void *buf, size_t len) -{ - PORTAL_FREE(buf, len); -} - -static void -kgmnal_printf(nal_cb_t *nal, const char *fmt, ...) -{ - va_list ap; - char msg[256]; - - if (portal_debug & D_NET) { - va_start( ap, fmt ); - vsnprintf( msg, sizeof(msg), fmt, ap ); - va_end( ap ); - - printk("CPUId: %d %s",smp_processor_id(), msg); - } -} - - -static void -kgmnal_cli(nal_cb_t *nal, unsigned long *flags) -{ - kgmnal_data_t *data= nal->nal_data; - - spin_lock_irqsave(&data->kgm_dispatch_lock,*flags); -} - - -static void -kgmnal_sti(nal_cb_t *nal, unsigned long *flags) -{ - kgmnal_data_t *data= nal->nal_data; - - spin_unlock_irqrestore(&data->kgm_dispatch_lock,*flags); -} - - -static int -kgmnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) -{ - /* network distance doesn't mean much for this nal */ - if ( nal->ni.nid == nid ) { - *dist = 0; - } else { - *dist = 1; - } - - return 0; -} - -/* FIXME rmr: add rounting code here */ -static void -kgmnal_tx_done(kgmnal_tx_t *trans, int error) -{ - lib_finalize(trans->ktx_nal, trans->ktx_private, trans->ktx_cookie); - - gm_dma_free(kgmnal_data.kgm_port, trans->ktx_buffer); - - trans->ktx_buffer = NULL; - trans->ktx_len = 0; - - put_trans(trans); -} -static char * gm_error_strings[GM_NUM_STATUS_CODES] = { - [GM_SUCCESS] = "GM_SUCCESS", - [GM_SEND_TIMED_OUT] = "GM_SEND_TIMED_OUT", - [GM_SEND_REJECTED] = "GM_SEND_REJECTED", - [GM_SEND_TARGET_PORT_CLOSED] = "GM_SEND_TARGET_PORT_CLOSED", - [GM_SEND_TARGET_NODE_UNREACHABLE] = "GM_SEND_TARGET_NODE_UNREACHABLE", - [GM_SEND_DROPPED] = "GM_SEND_DROPPED", - [GM_SEND_PORT_CLOSED] = "GM_SEND_PORT_CLOSED", -}; - -inline char * get_error(int status) -{ - if (gm_error_strings[status] != NULL) - return gm_error_strings[status]; - else - return "Unknown error"; -} - -static void -kgmnal_errhandler(struct gm_port *p, void *context, gm_status_t status) -{ - CDEBUG(D_NET,"error callback: ktx %p status %d\n", context, status); -} - -static void -kgmnal_txhandler(struct gm_port *p, void *context, gm_status_t status) -{ - kgmnal_tx_t *ktx = (kgmnal_tx_t *)context; - int err = 0; - - LASSERT (p != NULL); - LASSERT (ktx != NULL); - - CDEBUG(D_NET,"ktx %p status %d nid 0x%x pid %d\n", ktx, status, - ktx->ktx_tgt_node, ktx->ktx_tgt_port_id); - - switch((int)status) { - case GM_SUCCESS: /* normal */ - break; - case GM_SEND_TIMED_OUT: /* application error */ - case GM_SEND_REJECTED: /* size of msg unacceptable */ - case GM_SEND_TARGET_PORT_CLOSED: - CERROR("%s (%d):\n", get_error(status), status); - gm_resume_sending(kgmnal_data.kgm_port, ktx->ktx_priority, - ktx->ktx_tgt_node, ktx->ktx_tgt_port_id, - kgmnal_errhandler, NULL); - err = -EIO; - break; - case GM_SEND_TARGET_NODE_UNREACHABLE: - case GM_SEND_PORT_CLOSED: - CERROR("%s (%d):\n", get_error(status), status); - gm_drop_sends(kgmnal_data.kgm_port, ktx->ktx_priority, - ktx->ktx_tgt_node, ktx->ktx_tgt_port_id, - kgmnal_errhandler, NULL); - err = -EIO; - break; - case GM_SEND_DROPPED: - CERROR("%s (%d):\n", get_error(status), status); - err = -EIO; - break; - default: - CERROR("Unknown status: %d\n", status); - err = -EIO; - break; - } - - kgmnal_tx_done(ktx, err); -} - -/* - */ - -static int -kgmnal_send(nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - ptl_hdr_t *hdr, - int type, - ptl_nid_t nid, - ptl_pid_t pid, - int options, - unsigned int niov, - lib_md_iov_t *iov, - size_t len) -{ - /* - * ipnal assumes that this is the private as passed to lib_dispatch.. - * so do we :/ - */ - kgmnal_tx_t *ktx=NULL; - int rc=0; - void * buf; - int buf_len = sizeof(ptl_hdr_t) + len; - int buf_size = 0; - - LASSERT ((options & PTL_MD_KIOV) == 0); - - PROF_START(gmnal_send); - - - CDEBUG(D_NET, "sending %d bytes from %p to nid: 0x%Lx pid %d\n", - len, iov, nid, KGM_PORT_NUM); - - /* ensure there is an available tx handle */ - - /* save transaction info to trans for later finalize and cleanup */ - ktx = get_trans(); - if (ktx == NULL) { - rc = -ENOMEM; - goto send_exit; - } - - /* hmmm... GM doesn't support vectored write, so need to allocate buffer to coalesce - header and data. - Also, memory must be dma'able or registered with GM. */ - - if (buf_len <= MSG_LEN_SMALL) { - buf_size = MSG_SIZE_SMALL; - } else if (buf_len <= MSG_LEN_LARGE) { - buf_size = MSG_SIZE_LARGE; - } else { - printk("kgmnal:request exceeds TX MTU size (%d).\n", - MSG_SIZE_LARGE); - rc = -1; - goto send_exit; - } - - buf = gm_dma_malloc(kgmnal_data.kgm_port, buf_len); - if (buf == NULL) { - rc = -ENOMEM; - goto send_exit; - } - memcpy(buf, hdr, sizeof(ptl_hdr_t)); - - if (len != 0) - lib_copy_iov2buf(((char *)buf) + sizeof (ptl_hdr_t), - options, niov, iov, len); - - ktx->ktx_nal = nal; - ktx->ktx_private = private; - ktx->ktx_cookie = cookie; - ktx->ktx_len = buf_len; - ktx->ktx_size = buf_size; - ktx->ktx_buffer = buf; - ktx->ktx_priority = GM_LOW_PRIORITY; - ktx->ktx_tgt_node = nid; - ktx->ktx_tgt_port_id = KGM_PORT_NUM; - - CDEBUG(D_NET, "gm_send %d bytes (size %d) from %p to nid: 0x%Lx " - "pid %d pri %d\n", buf_len, buf_size, iov, nid, KGM_PORT_NUM, - GM_LOW_PRIORITY); - - gm_send_with_callback(kgmnal_data.kgm_port, buf, buf_size, - buf_len, GM_LOW_PRIORITY, - nid, KGM_PORT_NUM, - kgmnal_txhandler, ktx); - - PROF_FINISH(gmnal_send); - send_exit: - return rc; -} -void -kgmnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) -{ - CERROR ("forwarding not implemented\n"); -} - -void -kqswnal_fwd_callback (void *arg, int error) -{ - CERROR ("forwarding not implemented\n"); -} - - -static inline void -kgmnal_requeue_rx(kgmnal_rx_t *krx) -{ - gm_provide_receive_buffer(kgmnal_data.kgm_port, krx->krx_buffer, - krx->krx_size, krx->krx_priority); -} - -/* Process a received portals packet */ - -/* Receive Interrupt Handler */ -static void kgmnal_rx(kgmnal_data_t *kgm, unsigned long len, unsigned int size, - void * buf, unsigned int pri) -{ - ptl_hdr_t *hdr = buf; - kgmnal_rx_t krx; - - CDEBUG(D_NET,"buf %p, len %ld\n", buf, len); - - if ( len < sizeof( ptl_hdr_t ) ) { - /* XXX what's this for? */ - if (kgm->kgm_shuttingdown) - return; - CERROR("kgmnal: did not receive complete portal header, " - "len= %ld", len); - gm_provide_receive_buffer(kgm->kgm_port, buf, size, pri); - return; - } - - /* might want to use seperate threads to handle receive */ - krx.krx_buffer = buf; - krx.krx_len = len; - krx.krx_size = size; - krx.krx_priority = pri; - - if ( hdr->dest_nid == kgmnal_lib.ni.nid ) { - PROF_START(lib_parse); - lib_parse(&kgmnal_lib, (ptl_hdr_t *)krx.krx_buffer, &krx); - PROF_FINISH(lib_parse); - } else if (kgmnal_ispeer(hdr->dest_nid)) { - /* should have gone direct to peer */ - CERROR("dropping packet from 0x%llx to 0x%llx: target is " - "a peer", hdr->src_nid, hdr->dest_nid); - kgmnal_requeue_rx(&krx); - } else { - /* forward to gateway */ - CERROR("forwarding not implemented yet"); - kgmnal_requeue_rx(&krx); - } - - return; -} - - -static int kgmnal_recv(nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - int options, - unsigned int niov, - lib_md_iov_t *iov, - size_t mlen, - size_t rlen) -{ - kgmnal_rx_t *krx = private; - - LASSERT ((options & PTL_MD_KIOV) == 0); - - CDEBUG(D_NET,"mlen=%d, rlen=%d\n", mlen, rlen); - - /* What was actually received must be >= what sender claims to - * have sent. This is an LASSERT, since lib-move doesn't - * check cb return code yet. */ - LASSERT (krx->krx_len >= sizeof (ptl_hdr_t) + rlen); - LASSERT (mlen <= rlen); - - PROF_START(gmnal_recv); - - if(mlen != 0) { - PROF_START(memcpy); - lib_copy_buf2iov (options, niov, iov, - krx->krx_buffer + sizeof (ptl_hdr_t), mlen); - PROF_FINISH(memcpy); - } - - PROF_START(lib_finalize); - lib_finalize(nal, private, cookie); - PROF_FINISH(lib_finalize); - - kgmnal_requeue_rx(krx); - - PROF_FINISH(gmnal_recv); - - return rlen; -} - - -static void kgmnal_shutdown(void * none) -{ - CERROR("called\n"); - return; -} - -/* - * Set terminate and use alarm to wake up the recv thread. - */ -static void recv_shutdown(kgmnal_data_t *kgm) -{ - gm_alarm_t alarm; - - kgm->kgm_shuttingdown = 1; - gm_initialize_alarm(&alarm); - gm_set_alarm(kgm->kgm_port, &alarm, 1, kgmnal_shutdown, NULL); -} - -int kgmnal_end(kgmnal_data_t *kgm) -{ - - /* wait for sends to finish ? */ - /* remove receive buffers */ - /* shutdown receive thread */ - - recv_shutdown(kgm); - - return 0; -} - -/* Used only for the spinner */ -int kgmnal_recv_thread(void *arg) -{ - kgmnal_data_t *kgm = arg; - - LASSERT(kgm != NULL); - - kportal_daemonize("kgmnal_rx"); - - while(1) { - gm_recv_event_t *e; - int priority = GM_LOW_PRIORITY; - if (kgm->kgm_shuttingdown) - break; - - e = gm_blocking_receive_no_spin(kgm->kgm_port); - if (e == NULL) { - CERROR("gm_blocking_receive returned NULL\n"); - break; - } - - switch(gm_ntohc(e->recv.type)) { - case GM_HIGH_RECV_EVENT: - priority = GM_HIGH_PRIORITY; - /* fall through */ - case GM_RECV_EVENT: - kgmnal_rx(kgm, gm_ntohl(e->recv.length), - gm_ntohc(e->recv.size), - gm_ntohp(e->recv.buffer), priority); - break; - case GM_ALARM_EVENT: - CERROR("received alarm"); - gm_unknown(kgm->kgm_port, e); - break; - case GM_BAD_SEND_DETECTED_EVENT: /* ?? */ - CERROR("received bad send!\n"); - break; - default: - gm_unknown(kgm->kgm_port, e); - } - } - - CERROR("shuttting down.\n"); - return 0; -} - -nal_cb_t kgmnal_lib = { - nal_data: &kgmnal_data, /* NAL private data */ - cb_send: kgmnal_send, - cb_recv: kgmnal_recv, - cb_read: kgmnal_read, - cb_write: kgmnal_write, - cb_malloc: kgmnal_malloc, - cb_free: kgmnal_free, - cb_printf: kgmnal_printf, - cb_cli: kgmnal_cli, - cb_sti: kgmnal_sti, - cb_dist: kgmnal_dist -}; diff --git a/lnet/klnds/gmlnd/gmnal.c b/lnet/klnds/gmlnd/gmnal.c deleted file mode 100644 index ceeea2a..0000000 --- a/lnet/klnds/gmlnd/gmnal.c +++ /dev/null @@ -1,284 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Based on ksocknal and qswnal - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Robert Read - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "gmnal.h" - -ptl_handle_ni_t kgmnal_ni; -nal_t kgmnal_api; - -kgmnal_data_t kgmnal_data; -int gmnal_debug = 0; - -kpr_nal_interface_t kqswnal_router_interface = { - kprni_nalid: GMNAL, - kprni_arg: NULL, - kprni_fwd: kgmnal_fwd_packet, -}; - -static int kgmnal_forward(nal_t *nal, - int id, - void *args, size_t args_len, - void *ret, size_t ret_len) -{ - kgmnal_data_t *k = nal->nal_data; - nal_cb_t *nal_cb = k->kgm_cb; - - LASSERT (nal == &kgmnal_api); - LASSERT (k == &kgmnal_data); - LASSERT (nal_cb == &kgmnal_lib); - - lib_dispatch(nal_cb, k, id, args, ret); /* nal needs k */ - return PTL_OK; -} - -static void kgmnal_lock(nal_t *nal, unsigned long *flags) -{ - kgmnal_data_t *k = nal->nal_data; - nal_cb_t *nal_cb = k->kgm_cb; - - - LASSERT (nal == &kgmnal_api); - LASSERT (k == &kgmnal_data); - LASSERT (nal_cb == &kgmnal_lib); - - nal_cb->cb_cli(nal_cb,flags); -} - -static void kgmnal_unlock(nal_t *nal, unsigned long *flags) -{ - kgmnal_data_t *k = nal->nal_data; - nal_cb_t *nal_cb = k->kgm_cb; - - - LASSERT (nal == &kgmnal_api); - LASSERT (k == &kgmnal_data); - LASSERT (nal_cb == &kgmnal_lib); - - nal_cb->cb_sti(nal_cb,flags); -} - -static int kgmnal_shutdown(nal_t *nal, int ni) -{ - LASSERT (nal == &kgmnal_api); - return 0; -} - -static void kgmnal_yield( nal_t *nal ) -{ - LASSERT (nal == &kgmnal_api); - - if (current->need_resched) - schedule(); - return; -} - -kgmnal_rx_t *kgm_add_recv(kgmnal_data_t *data,int ndx) -{ - kgmnal_rx_t *conn; - - PORTAL_ALLOC(conn, sizeof(kgmnal_rx_t)); - /* Check for out of mem here */ - if (conn==NULL) { - printk("kgm_add_recv: memory alloc failed\n"); - return NULL; - } - - list_add(&conn->krx_item,(struct list_head *)&data->kgm_list); - // conn->ndx=ndx; - // conn->len=conn->ptlhdr_copied=0; - // conn->loopback=0; - return conn; -} - -static nal_t *kgmnal_init(int interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t ac_size, ptl_pid_t requested_pid) -{ - unsigned int nnids; - - gm_max_node_id_in_use(kgmnal_data.kgm_port, &nnids); - - CDEBUG(D_NET, "calling lib_init with nid 0x%Lx of %d\n", - kgmnal_data.kgm_nid, nnids); - lib_init(&kgmnal_lib, kgmnal_data.kgm_nid, 0, nnids,ptl_size, ac_size); - return &kgmnal_api; -} - -static void __exit -kgmnal_finalize(void) -{ - struct list_head *tmp; - - PORTAL_SYMBOL_UNREGISTER (kgmnal_ni); - PtlNIFini(kgmnal_ni); - lib_fini(&kgmnal_api); - - if (kgmnal_data.kgm_port) { - gm_close(kgmnal_data.kgm_port); - } - - /* FIXME: free dma buffers */ - /* FIXME: kill receiver thread */ - - PORTAL_FREE (kgmnal_data.kgm_trans, bsizeof(kgmnal_tx_t)*TXMSGS); - - list_for_each(tmp, &kgmnal_data.kgm_list) { - kgmnal_rx_t *conn; - conn = list_entry(tmp, kgmnal_rx_t, krx_item); - CDEBUG(D_IOCTL, "freeing conn %p\n",conn); - tmp = tmp->next; - list_del(&conn->krx_item); - PORTAL_FREE(conn, sizeof(*conn)); - } - - CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read (&portal_kmemory)); - - return; -} - -static int __init -kgmnal_initialize(void) -{ - int rc; - int ntok; - unsigned long sizemask; - unsigned int nid; - - CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read (&portal_kmemory)); - - kgmnal_api.forward = kgmnal_forward; - kgmnal_api.shutdown = kgmnal_shutdown; - kgmnal_api.yield = kgmnal_yield; - kgmnal_api.validate = NULL; /* our api validate is a NOOP */ - kgmnal_api.lock= kgmnal_lock; - kgmnal_api.unlock= kgmnal_unlock; - kgmnal_api.nal_data = &kgmnal_data; - - kgmnal_lib.nal_data = &kgmnal_data; - - memset(&kgmnal_data, 0, sizeof(kgmnal_data)); - - INIT_LIST_HEAD(&kgmnal_data.kgm_list); - kgmnal_data.kgm_cb = &kgmnal_lib; - - /* Allocate transmit descriptors */ - PORTAL_ALLOC (kgmnal_data.kgm_trans, sizeof(kgmnal_tx_t)*TXMSGS); - if (kgmnal_data.kgm_trans==NULL) { - printk("kgmnal: init: failed to allocate transmit " - "descriptors\n"); - return -1; - } - memset(kgmnal_data.kgm_trans,-1,sizeof(kgmnal_tx_t)*(TXMSGS)); - - spin_lock_init(&kgmnal_data.kgm_dispatch_lock); - spin_lock_init(&kgmnal_data.kgm_update_lock); - spin_lock_init(&kgmnal_data.kgm_send_lock); - - /* Do the receiver and xmtr allocation */ - - rc = gm_init(); - if (rc != GM_SUCCESS) { - CERROR("gm_init failed: %d\n", rc); - return -1; - } - - rc = gm_open(&kgmnal_data.kgm_port, 0 , KGM_PORT_NUM, KGM_HOSTNAME, - GM_API_VERSION_1_1); - if (rc != GM_SUCCESS) { - gm_finalize(); - kgmnal_data.kgm_port = NULL; - CERROR("gm_open failed: %d\n", rc); - return -1; - } - gm_get_node_id(kgmnal_data.kgm_port, &nid); - kgmnal_data.kgm_nid = nid; - /* Allocate 2 different sizes of buffers. For new, use half - the tokens for each. */ - ntok = gm_num_receive_tokens(kgmnal_data.kgm_port)/2; - CDEBUG(D_NET, "gmnal_init: creating %d large %d byte recv buffers\n", - ntok, MSG_LEN_LARGE); - while (ntok-- > 0) { - void * buffer = gm_dma_malloc(kgmnal_data.kgm_port, - MSG_LEN_LARGE); - if (buffer == NULL) { - CERROR("gm_init failed: %d\n", rc); - return (-ENOMEM); - } - CDEBUG(D_NET, " add buffer: port %p buf %p len %d size %d " - "pri %d\n ", kgmnal_data.kgm_port, buffer, - MSG_LEN_LARGE, MSG_SIZE_LARGE, GM_LOW_PRIORITY); - - gm_provide_receive_buffer(kgmnal_data.kgm_port, buffer, - MSG_SIZE_LARGE, GM_LOW_PRIORITY); - } - - ntok = gm_num_receive_tokens(kgmnal_data.kgm_port)/2; - CDEBUG(D_NET, "gmnal_init: creating %d small %d byte recv buffers\n", - ntok, MSG_LEN_SMALL); - while (ntok-- > 0) { - void * buffer = gm_dma_malloc(kgmnal_data.kgm_port, - MSG_LEN_SMALL); - if (buffer == NULL) { - CERROR("gm_init failed: %d\n", rc); - return (-ENOMEM); - } - CDEBUG(D_NET, " add buffer: port %p buf %p len %d size %d " - "pri %d\n ", kgmnal_data.kgm_port, buffer, - MSG_LEN_SMALL, MSG_SIZE_SMALL, GM_LOW_PRIORITY); - - gm_provide_receive_buffer(kgmnal_data.kgm_port, buffer, - MSG_SIZE_SMALL, GM_LOW_PRIORITY); - - } - sizemask = (1 << MSG_SIZE_LARGE) | (1 << MSG_SIZE_SMALL); - CDEBUG(D_NET, "gm_set_acceptable_sizes port %p pri %d mask 0x%x\n", - kgmnal_data.kgm_port, GM_LOW_PRIORITY, sizemask); - gm_set_acceptable_sizes(kgmnal_data.kgm_port, GM_LOW_PRIORITY, - sizemask); - gm_set_acceptable_sizes(kgmnal_data.kgm_port, GM_HIGH_PRIORITY, 0); - - /* Initialize Network Interface */ - rc = PtlNIInit(kgmnal_init, 32, 4, 0, &kgmnal_ni); - if (rc) { - CERROR("PtlNIInit failed %d\n", rc); - return (-ENOMEM); - } - - /* Start receiver thread */ - kernel_thread(kgmnal_recv_thread, &kgmnal_data, 0); - - PORTAL_SYMBOL_REGISTER(kgmnal_ni); - - kgmnal_data.kgm_init = 1; - - return 0; -} - -MODULE_AUTHOR("Robert Read "); -MODULE_DESCRIPTION("Kernel Myrinet GM NAL v0.1"); -MODULE_LICENSE("GPL"); - -module_init (kgmnal_initialize); -module_exit (kgmnal_finalize); - -EXPORT_SYMBOL (kgmnal_ni); diff --git a/lnet/klnds/qswlnd/.cvsignore b/lnet/klnds/qswlnd/.cvsignore deleted file mode 100644 index e995588..0000000 --- a/lnet/klnds/qswlnd/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lnet/klnds/qswlnd/Makefile.am b/lnet/klnds/qswlnd/Makefile.am deleted file mode 100644 index 3eb4dd5..0000000 --- a/lnet/klnds/qswlnd/Makefile.am +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../../Rules.linux - -MODULE = kqswnal -modulenet_DATA = kqswnal.o -EXTRA_PROGRAMS = kqswnal - - -#CFLAGS:= @KCFLAGS@ -#CPPFLAGS:=@KCPPFLAGS@ -DEFS = -CPPFLAGS=@CPPFLAGS@ @with_quadrics@ -kqswnal_SOURCES = qswnal.c qswnal_cb.c qswnal.h diff --git a/lnet/klnds/qswlnd/qswlnd.c b/lnet/klnds/qswlnd/qswlnd.c deleted file mode 100644 index 1a8fb74..0000000 --- a/lnet/klnds/qswlnd/qswlnd.c +++ /dev/null @@ -1,608 +0,0 @@ -/* - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Eric Barton - * - * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL) - * W. Marcus Miller - Based on ksocknal - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "qswnal.h" - -ptl_handle_ni_t kqswnal_ni; -nal_t kqswnal_api; -kqswnal_data_t kqswnal_data; - -kpr_nal_interface_t kqswnal_router_interface = { - kprni_nalid: QSWNAL, - kprni_arg: NULL, - kprni_fwd: kqswnal_fwd_packet, -}; - - -static int -kqswnal_forward(nal_t *nal, - int id, - void *args, size_t args_len, - void *ret, size_t ret_len) -{ - kqswnal_data_t *k = nal->nal_data; - nal_cb_t *nal_cb = k->kqn_cb; - - LASSERT (nal == &kqswnal_api); - LASSERT (k == &kqswnal_data); - LASSERT (nal_cb == &kqswnal_lib); - - lib_dispatch(nal_cb, k, id, args, ret); /* nal needs k */ - return (PTL_OK); -} - -static void -kqswnal_lock (nal_t *nal, unsigned long *flags) -{ - kqswnal_data_t *k = nal->nal_data; - nal_cb_t *nal_cb = k->kqn_cb; - - LASSERT (nal == &kqswnal_api); - LASSERT (k == &kqswnal_data); - LASSERT (nal_cb == &kqswnal_lib); - - nal_cb->cb_cli(nal_cb,flags); -} - -static void -kqswnal_unlock(nal_t *nal, unsigned long *flags) -{ - kqswnal_data_t *k = nal->nal_data; - nal_cb_t *nal_cb = k->kqn_cb; - - LASSERT (nal == &kqswnal_api); - LASSERT (k == &kqswnal_data); - LASSERT (nal_cb == &kqswnal_lib); - - nal_cb->cb_sti(nal_cb,flags); -} - -static int -kqswnal_shutdown(nal_t *nal, int ni) -{ - CDEBUG (D_NET, "shutdown\n"); - - LASSERT (nal == &kqswnal_api); - return (0); -} - -static void -kqswnal_yield( nal_t *nal ) -{ - CDEBUG (D_NET, "yield\n"); - - if (current->need_resched) - schedule(); - return; -} - -static nal_t * -kqswnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, - ptl_pid_t requested_pid) -{ - ptl_nid_t mynid = kqswnal_elanid2nid (kqswnal_data.kqn_elanid); - int nnids = kqswnal_data.kqn_nnodes; - - CDEBUG(D_NET, "calling lib_init with nid "LPX64" of %d\n", mynid, nnids); - - lib_init(&kqswnal_lib, mynid, 0, nnids, ptl_size, ac_size); - - return (&kqswnal_api); -} - -int -kqswnal_cmd (struct portal_ioctl_data *data, void *private) -{ - LASSERT (data != NULL); - - switch (data->ioc_nal_cmd) { - case NAL_CMD_REGISTER_MYNID: - CDEBUG (D_IOCTL, "setting NID offset to "LPX64" (was "LPX64")\n", - data->ioc_nid - kqswnal_data.kqn_elanid, - kqswnal_data.kqn_nid_offset); - kqswnal_data.kqn_nid_offset = - data->ioc_nid - kqswnal_data.kqn_elanid; - kqswnal_lib.ni.nid = data->ioc_nid; - return (0); - - default: - return (-EINVAL); - } -} - -void __exit -kqswnal_finalise (void) -{ - switch (kqswnal_data.kqn_init) - { - default: - LASSERT (0); - - case KQN_INIT_ALL: - PORTAL_SYMBOL_UNREGISTER (kqswnal_ni); - /* fall through */ - - case KQN_INIT_PTL: - PtlNIFini (kqswnal_ni); - lib_fini (&kqswnal_lib); - /* fall through */ - - case KQN_INIT_DATA: - break; - - case KQN_INIT_NOTHING: - return; - } - - /**********************************************************************/ - /* Make router stop her calling me and fail any more call-ins */ - kpr_shutdown (&kqswnal_data.kqn_router); - - /**********************************************************************/ - /* flag threads to terminate, wake them and wait for them to die */ - - kqswnal_data.kqn_shuttingdown = 1; - wake_up_all (&kqswnal_data.kqn_sched_waitq); - - while (atomic_read (&kqswnal_data.kqn_nthreads) != 0) { - CDEBUG(D_NET, "waiting for %d threads to terminate\n", - atomic_read (&kqswnal_data.kqn_nthreads)); - set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (HZ); - } - - /**********************************************************************/ - /* close elan comms */ - - if (kqswnal_data.kqn_eprx_small != NULL) - ep_remove_large_rcvr (kqswnal_data.kqn_eprx_small); - - if (kqswnal_data.kqn_eprx_large != NULL) - ep_remove_large_rcvr (kqswnal_data.kqn_eprx_large); - - if (kqswnal_data.kqn_eptx != NULL) - ep_free_large_xmtr (kqswnal_data.kqn_eptx); - - /**********************************************************************/ - /* No more threads. No more portals, router or comms callbacks! - * I control the horizontals and the verticals... - */ - - /**********************************************************************/ - /* Complete any blocked forwarding packets with error - */ - - while (!list_empty (&kqswnal_data.kqn_idletxd_fwdq)) - { - kpr_fwd_desc_t *fwd = list_entry (kqswnal_data.kqn_idletxd_fwdq.next, - kpr_fwd_desc_t, kprfd_list); - list_del (&fwd->kprfd_list); - kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -EHOSTUNREACH); - } - - while (!list_empty (&kqswnal_data.kqn_delayedfwds)) - { - kpr_fwd_desc_t *fwd = list_entry (kqswnal_data.kqn_delayedfwds.next, - kpr_fwd_desc_t, kprfd_list); - list_del (&fwd->kprfd_list); - kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -EHOSTUNREACH); - } - - /**********************************************************************/ - /* Wait for router to complete any packets I sent her - */ - - kpr_deregister (&kqswnal_data.kqn_router); - - - /**********************************************************************/ - /* Unmap message buffers and free all descriptors and buffers - */ - - if (kqswnal_data.kqn_eprxdmahandle != NULL) - { - elan3_dvma_unload(kqswnal_data.kqn_epdev->DmaState, - kqswnal_data.kqn_eprxdmahandle, 0, - KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL + - KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE); - - elan3_dma_release(kqswnal_data.kqn_epdev->DmaState, - kqswnal_data.kqn_eprxdmahandle); - } - - if (kqswnal_data.kqn_eptxdmahandle != NULL) - { - elan3_dvma_unload(kqswnal_data.kqn_epdev->DmaState, - kqswnal_data.kqn_eptxdmahandle, 0, - KQSW_NTXMSGPAGES * (KQSW_NTXMSGS + - KQSW_NNBLK_TXMSGS)); - - elan3_dma_release(kqswnal_data.kqn_epdev->DmaState, - kqswnal_data.kqn_eptxdmahandle); - } - - if (kqswnal_data.kqn_txds != NULL) - { - int i; - - for (i = 0; i < KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS; i++) - { - kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i]; - - if (ktx->ktx_buffer != NULL) - PORTAL_FREE(ktx->ktx_buffer, - KQSW_TX_BUFFER_SIZE); - } - - PORTAL_FREE(kqswnal_data.kqn_txds, - sizeof (kqswnal_tx_t) * (KQSW_NTXMSGS + - KQSW_NNBLK_TXMSGS)); - } - - if (kqswnal_data.kqn_rxds != NULL) - { - int i; - int j; - - for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++) - { - kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i]; - - for (j = 0; j < krx->krx_npages; j++) - if (krx->krx_pages[j] != NULL) - __free_page (krx->krx_pages[j]); - } - - PORTAL_FREE(kqswnal_data.kqn_rxds, - sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + - KQSW_NRXMSGS_LARGE)); - } - - /* resets flags, pointers to NULL etc */ - memset(&kqswnal_data, 0, sizeof (kqswnal_data)); - - CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read(&portal_kmemory)); - - printk (KERN_INFO "Routing QSW NAL unloaded (final mem %d)\n", - atomic_read(&portal_kmemory)); -} - -static int __init -kqswnal_initialise (void) -{ - ELAN3_DMA_REQUEST dmareq; - int rc; - int i; - int elan_page_idx; - int pkmem = atomic_read(&portal_kmemory); - - LASSERT (kqswnal_data.kqn_init == KQN_INIT_NOTHING); - - CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory)); - - kqswnal_api.forward = kqswnal_forward; - kqswnal_api.shutdown = kqswnal_shutdown; - kqswnal_api.yield = kqswnal_yield; - kqswnal_api.validate = NULL; /* our api validate is a NOOP */ - kqswnal_api.lock = kqswnal_lock; - kqswnal_api.unlock = kqswnal_unlock; - kqswnal_api.nal_data = &kqswnal_data; - - kqswnal_lib.nal_data = &kqswnal_data; - - /* ensure all pointers NULL etc */ - memset (&kqswnal_data, 0, sizeof (kqswnal_data)); - - kqswnal_data.kqn_cb = &kqswnal_lib; - - INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds); - INIT_LIST_HEAD (&kqswnal_data.kqn_nblk_idletxds); - spin_lock_init (&kqswnal_data.kqn_idletxd_lock); - init_waitqueue_head (&kqswnal_data.kqn_idletxd_waitq); - INIT_LIST_HEAD (&kqswnal_data.kqn_idletxd_fwdq); - - INIT_LIST_HEAD (&kqswnal_data.kqn_delayedfwds); - INIT_LIST_HEAD (&kqswnal_data.kqn_delayedtxds); - INIT_LIST_HEAD (&kqswnal_data.kqn_readyrxds); - - spin_lock_init (&kqswnal_data.kqn_sched_lock); - init_waitqueue_head (&kqswnal_data.kqn_sched_waitq); - - spin_lock_init (&kqswnal_data.kqn_statelock); - - /* pointers/lists/locks initialised */ - kqswnal_data.kqn_init = KQN_INIT_DATA; - - /**********************************************************************/ - /* Find the first Elan device */ - - kqswnal_data.kqn_epdev = ep_device (0); - if (kqswnal_data.kqn_epdev == NULL) - { - CERROR ("Can't get elan device 0\n"); - return (-ENOMEM); - } - - kqswnal_data.kqn_nid_offset = 0; - kqswnal_data.kqn_nnodes = ep_numnodes (kqswnal_data.kqn_epdev); - kqswnal_data.kqn_elanid = ep_nodeid (kqswnal_data.kqn_epdev); - - /**********************************************************************/ - /* Get the transmitter */ - - kqswnal_data.kqn_eptx = ep_alloc_large_xmtr (kqswnal_data.kqn_epdev); - if (kqswnal_data.kqn_eptx == NULL) - { - CERROR ("Can't allocate transmitter\n"); - kqswnal_finalise (); - return (-ENOMEM); - } - - /**********************************************************************/ - /* Get the receivers */ - - kqswnal_data.kqn_eprx_small = ep_install_large_rcvr (kqswnal_data.kqn_epdev, - EP_SVC_LARGE_PORTALS_SMALL, - KQSW_EP_ENVELOPES_SMALL); - if (kqswnal_data.kqn_eprx_small == NULL) - { - CERROR ("Can't install small msg receiver\n"); - kqswnal_finalise (); - return (-ENOMEM); - } - - kqswnal_data.kqn_eprx_large = ep_install_large_rcvr (kqswnal_data.kqn_epdev, - EP_SVC_LARGE_PORTALS_LARGE, - KQSW_EP_ENVELOPES_LARGE); - if (kqswnal_data.kqn_eprx_large == NULL) - { - CERROR ("Can't install large msg receiver\n"); - kqswnal_finalise (); - return (-ENOMEM); - } - - /**********************************************************************/ - /* Reserve Elan address space for transmit buffers */ - - dmareq.Waitfn = DDI_DMA_SLEEP; - dmareq.ElanAddr = (E3_Addr) 0; - dmareq.Attr = PTE_LOAD_LITTLE_ENDIAN; - dmareq.Perm = ELAN_PERM_REMOTEREAD; - - rc = elan3_dma_reserve(kqswnal_data.kqn_epdev->DmaState, - KQSW_NTXMSGPAGES*(KQSW_NTXMSGS+KQSW_NNBLK_TXMSGS), - &dmareq, &kqswnal_data.kqn_eptxdmahandle); - if (rc != DDI_SUCCESS) - { - CERROR ("Can't reserve rx dma space\n"); - kqswnal_finalise (); - return (-ENOMEM); - } - - /**********************************************************************/ - /* Reserve Elan address space for receive buffers */ - - dmareq.Waitfn = DDI_DMA_SLEEP; - dmareq.ElanAddr = (E3_Addr) 0; - dmareq.Attr = PTE_LOAD_LITTLE_ENDIAN; - dmareq.Perm = ELAN_PERM_REMOTEWRITE; - - rc = elan3_dma_reserve (kqswnal_data.kqn_epdev->DmaState, - KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL + - KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE, - &dmareq, &kqswnal_data.kqn_eprxdmahandle); - if (rc != DDI_SUCCESS) - { - CERROR ("Can't reserve rx dma space\n"); - kqswnal_finalise (); - return (-ENOMEM); - } - - /**********************************************************************/ - /* Allocate/Initialise transmit descriptors */ - - PORTAL_ALLOC(kqswnal_data.kqn_txds, - sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS)); - if (kqswnal_data.kqn_txds == NULL) - { - kqswnal_finalise (); - return (-ENOMEM); - } - - /* clear flags, null pointers etc */ - memset(kqswnal_data.kqn_txds, 0, - sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS)); - for (i = 0; i < (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS); i++) - { - int premapped_pages; - kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i]; - int basepage = i * KQSW_NTXMSGPAGES; - - PORTAL_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE); - if (ktx->ktx_buffer == NULL) - { - kqswnal_finalise (); - return (-ENOMEM); - } - - /* Map pre-allocated buffer NOW, to save latency on transmit */ - premapped_pages = kqswnal_pages_spanned(ktx->ktx_buffer, - KQSW_TX_BUFFER_SIZE); - - elan3_dvma_kaddr_load (kqswnal_data.kqn_epdev->DmaState, - kqswnal_data.kqn_eptxdmahandle, - ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE, - basepage, &ktx->ktx_ebuffer); - - ktx->ktx_basepage = basepage + premapped_pages; /* message mapping starts here */ - ktx->ktx_npages = KQSW_NTXMSGPAGES - premapped_pages; /* for this many pages */ - - if (i < KQSW_NTXMSGS) - ktx->ktx_idle = &kqswnal_data.kqn_idletxds; - else - ktx->ktx_idle = &kqswnal_data.kqn_nblk_idletxds; - - list_add_tail (&ktx->ktx_list, ktx->ktx_idle); - } - - /**********************************************************************/ - /* Allocate/Initialise receive descriptors */ - - PORTAL_ALLOC (kqswnal_data.kqn_rxds, - sizeof (kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE)); - if (kqswnal_data.kqn_rxds == NULL) - { - kqswnal_finalise (); - return (-ENOMEM); - } - - memset(kqswnal_data.kqn_rxds, 0, /* clear flags, null pointers etc */ - sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL+KQSW_NRXMSGS_LARGE)); - - elan_page_idx = 0; - for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++) - { - E3_Addr elanaddr; - int j; - kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i]; - - if (i < KQSW_NRXMSGS_SMALL) - { - krx->krx_npages = KQSW_NRXMSGPAGES_SMALL; - krx->krx_eprx = kqswnal_data.kqn_eprx_small; - } - else - { - krx->krx_npages = KQSW_NRXMSGPAGES_LARGE; - krx->krx_eprx = kqswnal_data.kqn_eprx_large; - } - - LASSERT (krx->krx_npages > 0); - for (j = 0; j < krx->krx_npages; j++) - { - krx->krx_pages[j] = alloc_page(GFP_KERNEL); - if (krx->krx_pages[j] == NULL) - { - kqswnal_finalise (); - return (-ENOMEM); - } - - LASSERT(page_address(krx->krx_pages[j]) != NULL); - - elan3_dvma_kaddr_load(kqswnal_data.kqn_epdev->DmaState, - kqswnal_data.kqn_eprxdmahandle, - page_address(krx->krx_pages[j]), - PAGE_SIZE, elan_page_idx, - &elanaddr); - elan_page_idx++; - - if (j == 0) - krx->krx_elanaddr = elanaddr; - - /* NB we assume a contiguous */ - LASSERT (elanaddr == krx->krx_elanaddr + j * PAGE_SIZE); - } - } - LASSERT (elan_page_idx == - (KQSW_NRXMSGS_SMALL * KQSW_NRXMSGPAGES_SMALL) + - (KQSW_NRXMSGS_LARGE * KQSW_NRXMSGPAGES_LARGE)); - - /**********************************************************************/ - /* Network interface ready to initialise */ - - rc = PtlNIInit(kqswnal_init, 32, 4, 0, &kqswnal_ni); - if (rc != 0) - { - CERROR ("PtlNIInit failed %d\n", rc); - kqswnal_finalise (); - return (-ENOMEM); - } - - kqswnal_data.kqn_init = KQN_INIT_PTL; - - /**********************************************************************/ - /* Queue receives, now that it's OK to run their completion callbacks */ - - for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++) - { - kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i]; - - /* NB this enqueue can allocate/sleep (attr == 0) */ - rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx, - krx->krx_elanaddr, - krx->krx_npages * PAGE_SIZE, 0); - if (rc != 0) - { - CERROR ("failed ep_queue_receive %d\n", rc); - kqswnal_finalise (); - return (-ENOMEM); - } - } - - /**********************************************************************/ - /* Spawn scheduling threads */ - for (i = 0; i < smp_num_cpus; i++) - { - rc = kqswnal_thread_start (kqswnal_scheduler, NULL); - if (rc != 0) - { - CERROR ("failed to spawn scheduling thread: %d\n", rc); - kqswnal_finalise (); - return (rc); - } - } - - /**********************************************************************/ - /* Connect to the router */ - rc = kpr_register (&kqswnal_data.kqn_router, &kqswnal_router_interface); - CDEBUG(D_NET, "Can't initialise routing interface (rc = %d): not routing\n",rc); - - rc = kportal_nal_register (QSWNAL, &kqswnal_cmd, NULL); - if (rc != 0) { - CERROR ("Can't initialise command interface (rc = %d)\n", rc); - kqswnal_finalise (); - return (rc); - } - - PORTAL_SYMBOL_REGISTER(kqswnal_ni); - kqswnal_data.kqn_init = KQN_INIT_ALL; - - printk(KERN_INFO "Routing QSW NAL loaded on node %d of %d " - "(Routing %s, initial mem %d)\n", - kqswnal_data.kqn_elanid, kqswnal_data.kqn_nnodes, - kpr_routing (&kqswnal_data.kqn_router) ? "enabled" : "disabled", - pkmem); - - return (0); -} - - -MODULE_AUTHOR("W. Marcus Miller "); -MODULE_DESCRIPTION("Kernel Quadrics Switch NAL v1.00"); -MODULE_LICENSE("GPL"); - -module_init (kqswnal_initialise); -module_exit (kqswnal_finalise); - -EXPORT_SYMBOL (kqswnal_ni); diff --git a/lnet/klnds/qswlnd/qswlnd.h b/lnet/klnds/qswlnd/qswlnd.h deleted file mode 100644 index 85e585b..0000000 --- a/lnet/klnds/qswlnd/qswlnd.h +++ /dev/null @@ -1,270 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef _QSWNAL_H -#define _QSWNAL_H -#define EXPORT_SYMTAB - -#ifdef PROPRIETARY_ELAN -# include -#else -# include -#endif - -#undef printf /* nasty QSW #define */ - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include - -#define DEBUG_SUBSYSTEM S_QSWNAL - -#include -#include -#include - -#define KQSW_CHECKSUM 0 -#if KQSW_CHECKSUM -typedef unsigned long kqsw_csum_t; -#define KQSW_CSUM_SIZE (2 * sizeof (kqsw_csum_t)) -#else -#define KQSW_CSUM_SIZE 0 -#endif -#define KQSW_HDR_SIZE (sizeof (ptl_hdr_t) + KQSW_CSUM_SIZE) - -/* - * Elan NAL - */ -#define EP_SVC_LARGE_PORTALS_SMALL (0x10) /* Portals over elan port number (large payloads) */ -#define EP_SVC_LARGE_PORTALS_LARGE (0x11) /* Portals over elan port number (small payloads) */ -/* NB small/large message sizes are GLOBAL constants */ - -/* - * Performance Tuning defines - * NB no mention of PAGE_SIZE for interoperability - */ -#if PTL_LARGE_MTU -# define KQSW_MAXPAYLOAD (256<<10) /* biggest message this NAL will cope with */ -#else -# define KQSW_MAXPAYLOAD (64<<10) /* biggest message this NAL will cope with */ -#endif - -#define KQSW_SMALLPAYLOAD ((4<<10) - KQSW_HDR_SIZE) /* small/large ep receiver breakpoint */ - -#define KQSW_TX_MAXCONTIG (1<<10) /* largest payload that gets made contiguous on transmit */ - -#define KQSW_NTXMSGS 8 /* # normal transmit messages */ -#define KQSW_NNBLK_TXMSGS 256 /* # reserved transmit messages if can't block */ - -#define KQSW_NRXMSGS_LARGE 64 /* # large receive buffers */ -#define KQSW_EP_ENVELOPES_LARGE 128 /* # large ep envelopes */ - -#define KQSW_NRXMSGS_SMALL 256 /* # small receive buffers */ -#define KQSW_EP_ENVELOPES_SMALL 2048 /* # small ep envelopes */ - -#define KQSW_RESCHED 100 /* # busy loops that forces scheduler to yield */ - -/* - * derived constants - */ - -#define KQSW_TX_BUFFER_SIZE (KQSW_HDR_SIZE + KQSW_TX_MAXCONTIG) -/* The pre-allocated tx buffer (hdr + small payload) */ - -#define KQSW_NTXMSGPAGES (btopr(KQSW_TX_BUFFER_SIZE) + 1 + btopr(KQSW_MAXPAYLOAD) + 1) -/* Reserve elan address space for pre-allocated and pre-mapped transmit - * buffer and a full payload too. Extra pages allow for page alignment */ - -#define KQSW_NRXMSGPAGES_SMALL (btopr(KQSW_HDR_SIZE + KQSW_SMALLPAYLOAD)) -/* receive hdr/payload always contiguous and page aligned */ -#define KQSW_NRXMSGBYTES_SMALL (KQSW_NRXMSGPAGES_SMALL * PAGE_SIZE) - -#define KQSW_NRXMSGPAGES_LARGE (btopr(KQSW_HDR_SIZE + KQSW_MAXPAYLOAD)) -/* receive hdr/payload always contiguous and page aligned */ -#define KQSW_NRXMSGBYTES_LARGE (KQSW_NRXMSGPAGES_LARGE * PAGE_SIZE) -/* biggest complete packet we can receive (or transmit) */ - - -typedef struct -{ - struct list_head krx_list; /* enqueue -> thread */ - EP_RCVR *krx_eprx; /* port to post receives to */ - EP_RXD *krx_rxd; /* receive descriptor (for repost) */ - E3_Addr krx_elanaddr; /* Elan address of buffer (contiguous in elan vm) */ - int krx_npages; /* # pages in receive buffer */ - int krx_nob; /* Number Of Bytes received into buffer */ - kpr_fwd_desc_t krx_fwd; /* embedded forwarding descriptor */ - struct page *krx_pages[KQSW_NRXMSGPAGES_LARGE]; /* pages allocated */ - struct iovec krx_iov[KQSW_NRXMSGPAGES_LARGE]; /* iovec for forwarding */ -} kqswnal_rx_t; - -typedef struct -{ - struct list_head ktx_list; /* enqueue idle/delayed */ - struct list_head *ktx_idle; /* where to put when idle */ - char ktx_state; /* What I'm doing */ - uint32_t ktx_basepage; /* page offset in reserved elan tx vaddrs for mapping pages */ - int ktx_npages; /* pages reserved for mapping messages */ - int ktx_nmappedpages; /* # pages mapped for current message */ - EP_IOVEC ktx_iov[EP_MAXFRAG]; /* msg frags (elan vaddrs) */ - int ktx_niov; /* # message frags */ - int ktx_port; /* destination ep port */ - ptl_nid_t ktx_nid; /* destination node */ - void *ktx_args[2]; /* completion passthru */ - E3_Addr ktx_ebuffer; /* elan address of ktx_buffer */ - char *ktx_buffer; /* pre-allocated contiguous buffer for hdr + small payloads */ -} kqswnal_tx_t; - -#define KTX_IDLE 0 /* MUST BE ZERO (so zeroed ktx is idle) */ -#define KTX_SENDING 1 /* local send */ -#define KTX_FORWARDING 2 /* routing a packet */ - -typedef struct -{ - char kqn_init; /* what's been initialised */ - char kqn_shuttingdown; /* I'm trying to shut down */ - atomic_t kqn_nthreads; /* # threads still running */ - - kqswnal_rx_t *kqn_rxds; /* all the receive descriptors */ - kqswnal_tx_t *kqn_txds; /* all the transmit descriptors */ - - struct list_head kqn_idletxds; /* transmit descriptors free to use */ - struct list_head kqn_nblk_idletxds; /* reserve of */ - spinlock_t kqn_idletxd_lock; /* serialise idle txd access */ - wait_queue_head_t kqn_idletxd_waitq; /* sender blocks here waiting for idle txd */ - struct list_head kqn_idletxd_fwdq; /* forwarded packets block here waiting for idle txd */ - - spinlock_t kqn_sched_lock; /* serialise packet schedulers */ - wait_queue_head_t kqn_sched_waitq; /* scheduler blocks here */ - - struct list_head kqn_readyrxds; /* rxds full of data */ - struct list_head kqn_delayedfwds; /* delayed forwards */ - struct list_head kqn_delayedtxds; /* delayed transmits */ - - spinlock_t kqn_statelock; /* cb_cli/cb_sti */ - nal_cb_t *kqn_cb; /* -> kqswnal_lib */ - EP_DEV *kqn_epdev; /* elan device */ - EP_XMTR *kqn_eptx; /* elan transmitter */ - EP_RCVR *kqn_eprx_small; /* elan receiver (small messages) */ - EP_RCVR *kqn_eprx_large; /* elan receiver (large messages) */ - ELAN3_DMA_HANDLE *kqn_eptxdmahandle; /* elan reserved tx vaddrs */ - ELAN3_DMA_HANDLE *kqn_eprxdmahandle; /* elan reserved rx vaddrs */ - kpr_router_t kqn_router; /* connection to Kernel Portals Router module */ - - ptl_nid_t kqn_nid_offset; /* this cluster's NID offset */ - int kqn_nnodes; /* this cluster's size */ - int kqn_elanid; /* this nodes's elan ID */ -} kqswnal_data_t; - -/* kqn_init state */ -#define KQN_INIT_NOTHING 0 /* MUST BE ZERO so zeroed state is initialised OK */ -#define KQN_INIT_DATA 1 -#define KQN_INIT_PTL 2 -#define KQN_INIT_ALL 3 - -extern nal_cb_t kqswnal_lib; -extern nal_t kqswnal_api; -extern kqswnal_data_t kqswnal_data; - -extern int kqswnal_thread_start (int (*fn)(void *arg), void *arg); -extern void kqswnal_rxhandler(EP_RXD *rxd); -extern int kqswnal_scheduler (void *); -extern void kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd); - -static inline ptl_nid_t -kqswnal_elanid2nid (int elanid) -{ - return (kqswnal_data.kqn_nid_offset + elanid); -} - -static inline int -kqswnal_nid2elanid (ptl_nid_t nid) -{ - /* not in this cluster? */ - if (nid < kqswnal_data.kqn_nid_offset || - nid >= kqswnal_data.kqn_nid_offset + kqswnal_data.kqn_nnodes) - return (-1); - - return (nid - kqswnal_data.kqn_nid_offset); -} - -static inline void -kqswnal_requeue_rx (kqswnal_rx_t *krx) -{ - ep_requeue_receive (krx->krx_rxd, kqswnal_rxhandler, krx, - krx->krx_elanaddr, krx->krx_npages * PAGE_SIZE); -} - -static inline int -kqswnal_pages_spanned (void *base, int nob) -{ - unsigned long first_page = ((unsigned long)base) >> PAGE_SHIFT; - unsigned long last_page = (((unsigned long)base) + (nob - 1)) >> PAGE_SHIFT; - - LASSERT (last_page >= first_page); /* can't wrap address space */ - return (last_page - first_page + 1); -} - -#if KQSW_CHECKSUM -static inline kqsw_csum_t kqsw_csum (kqsw_csum_t sum, void *base, int nob) -{ - unsigned char *ptr = (unsigned char *)base; - - while (nob-- > 0) - sum += *ptr++; - - return (sum); -} -#endif - -#endif /* _QSWNAL_H */ diff --git a/lnet/klnds/qswlnd/qswlnd_cb.c b/lnet/klnds/qswlnd/qswlnd_cb.c deleted file mode 100644 index c03d592..0000000 --- a/lnet/klnds/qswlnd/qswlnd_cb.c +++ /dev/null @@ -1,1240 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Eric Barton - * - * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL) - * W. Marcus Miller - Based on ksocknal - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "qswnal.h" - -atomic_t kqswnal_packets_launched; -atomic_t kqswnal_packets_transmitted; -atomic_t kqswnal_packets_received; - - -/* - * LIB functions follow - * - */ -static int -kqswnal_read(nal_cb_t *nal, void *private, void *dst_addr, user_ptr src_addr, - size_t len) -{ - CDEBUG (D_NET, LPX64": reading "LPSZ" bytes from %p -> %p\n", - nal->ni.nid, len, src_addr, dst_addr ); - memcpy( dst_addr, src_addr, len ); - - return (0); -} - -static int -kqswnal_write(nal_cb_t *nal, void *private, user_ptr dst_addr, void *src_addr, - size_t len) -{ - CDEBUG (D_NET, LPX64": writing "LPSZ" bytes from %p -> %p\n", - nal->ni.nid, len, src_addr, dst_addr ); - memcpy( dst_addr, src_addr, len ); - - return (0); -} - -static void * -kqswnal_malloc(nal_cb_t *nal, size_t len) -{ - void *buf; - - PORTAL_ALLOC(buf, len); - return (buf); -} - -static void -kqswnal_free(nal_cb_t *nal, void *buf, size_t len) -{ - PORTAL_FREE(buf, len); -} - -static void -kqswnal_printf (nal_cb_t * nal, const char *fmt, ...) -{ - va_list ap; - char msg[256]; - - va_start (ap, fmt); - vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */ - va_end (ap); - - msg[sizeof (msg) - 1] = 0; /* ensure terminated */ - - CDEBUG (D_NET, "%s", msg); -} - - -static void -kqswnal_cli(nal_cb_t *nal, unsigned long *flags) -{ - kqswnal_data_t *data= nal->nal_data; - - spin_lock_irqsave(&data->kqn_statelock, *flags); -} - - -static void -kqswnal_sti(nal_cb_t *nal, unsigned long *flags) -{ - kqswnal_data_t *data= nal->nal_data; - - spin_unlock_irqrestore(&data->kqn_statelock, *flags); -} - - -static int -kqswnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) -{ - if (nid == nal->ni.nid) - *dist = 0; /* it's me */ - else if (kqswnal_nid2elanid (nid) >= 0) - *dist = 1; /* it's my peer */ - else - *dist = 2; /* via router */ - return (0); -} - -void -kqswnal_unmap_tx (kqswnal_tx_t *ktx) -{ - if (ktx->ktx_nmappedpages == 0) - return; - - CDEBUG (D_NET, "%p[%d] unloading pages %d for %d\n", - ktx, ktx->ktx_niov, ktx->ktx_basepage, ktx->ktx_nmappedpages); - - LASSERT (ktx->ktx_nmappedpages <= ktx->ktx_npages); - LASSERT (ktx->ktx_basepage + ktx->ktx_nmappedpages <= - kqswnal_data.kqn_eptxdmahandle->NumDvmaPages); - - elan3_dvma_unload(kqswnal_data.kqn_epdev->DmaState, - kqswnal_data.kqn_eptxdmahandle, - ktx->ktx_basepage, ktx->ktx_nmappedpages); - ktx->ktx_nmappedpages = 0; -} - -int -kqswnal_map_tx_kiov (kqswnal_tx_t *ktx, int nob, int niov, ptl_kiov_t *kiov) -{ - int nfrags = ktx->ktx_niov; - const int maxfrags = sizeof (ktx->ktx_iov)/sizeof (ktx->ktx_iov[0]); - int nmapped = ktx->ktx_nmappedpages; - int maxmapped = ktx->ktx_npages; - uint32_t basepage = ktx->ktx_basepage + nmapped; - char *ptr; - - LASSERT (nmapped <= maxmapped); - LASSERT (nfrags <= maxfrags); - LASSERT (niov > 0); - LASSERT (nob > 0); - - do { - int fraglen = kiov->kiov_len; - - /* nob exactly spans the iovs */ - LASSERT (fraglen <= nob); - /* each frag fits in a page */ - LASSERT (kiov->kiov_offset + kiov->kiov_len <= PAGE_SIZE); - - nmapped++; - if (nmapped > maxmapped) { - CERROR("Can't map message in %d pages (max %d)\n", - nmapped, maxmapped); - return (-EMSGSIZE); - } - - if (nfrags == maxfrags) { - CERROR("Message too fragmented in Elan VM (max %d frags)\n", - maxfrags); - return (-EMSGSIZE); - } - - /* XXX this is really crap, but we'll have to kmap until - * EKC has a page (rather than vaddr) mapping interface */ - - ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset; - - CDEBUG(D_NET, - "%p[%d] loading %p for %d, page %d, %d total\n", - ktx, nfrags, ptr, fraglen, basepage, nmapped); - - elan3_dvma_kaddr_load (kqswnal_data.kqn_epdev->DmaState, - kqswnal_data.kqn_eptxdmahandle, - ptr, fraglen, - basepage, &ktx->ktx_iov[nfrags].Base); - - kunmap (kiov->kiov_page); - - /* keep in loop for failure case */ - ktx->ktx_nmappedpages = nmapped; - - if (nfrags > 0 && /* previous frag mapped */ - ktx->ktx_iov[nfrags].Base == /* contiguous with this one */ - (ktx->ktx_iov[nfrags-1].Base + ktx->ktx_iov[nfrags-1].Len)) - /* just extend previous */ - ktx->ktx_iov[nfrags - 1].Len += fraglen; - else { - ktx->ktx_iov[nfrags].Len = fraglen; - nfrags++; /* new frag */ - } - - basepage++; - kiov++; - niov--; - nob -= fraglen; - - /* iov must not run out before end of data */ - LASSERT (nob == 0 || niov > 0); - - } while (nob > 0); - - ktx->ktx_niov = nfrags; - CDEBUG (D_NET, "%p got %d frags over %d pages\n", - ktx, ktx->ktx_niov, ktx->ktx_nmappedpages); - - return (0); -} - -int -kqswnal_map_tx_iov (kqswnal_tx_t *ktx, int nob, int niov, struct iovec *iov) -{ - int nfrags = ktx->ktx_niov; - const int maxfrags = sizeof (ktx->ktx_iov)/sizeof (ktx->ktx_iov[0]); - int nmapped = ktx->ktx_nmappedpages; - int maxmapped = ktx->ktx_npages; - uint32_t basepage = ktx->ktx_basepage + nmapped; - - LASSERT (nmapped <= maxmapped); - LASSERT (nfrags <= maxfrags); - LASSERT (niov > 0); - LASSERT (nob > 0); - - do { - int fraglen = iov->iov_len; - long npages = kqswnal_pages_spanned (iov->iov_base, fraglen); - - /* nob exactly spans the iovs */ - LASSERT (fraglen <= nob); - - nmapped += npages; - if (nmapped > maxmapped) { - CERROR("Can't map message in %d pages (max %d)\n", - nmapped, maxmapped); - return (-EMSGSIZE); - } - - if (nfrags == maxfrags) { - CERROR("Message too fragmented in Elan VM (max %d frags)\n", - maxfrags); - return (-EMSGSIZE); - } - - CDEBUG(D_NET, - "%p[%d] loading %p for %d, pages %d for %ld, %d total\n", - ktx, nfrags, iov->iov_base, fraglen, basepage, npages, - nmapped); - - elan3_dvma_kaddr_load (kqswnal_data.kqn_epdev->DmaState, - kqswnal_data.kqn_eptxdmahandle, - iov->iov_base, fraglen, - basepage, &ktx->ktx_iov[nfrags].Base); - /* keep in loop for failure case */ - ktx->ktx_nmappedpages = nmapped; - - if (nfrags > 0 && /* previous frag mapped */ - ktx->ktx_iov[nfrags].Base == /* contiguous with this one */ - (ktx->ktx_iov[nfrags-1].Base + ktx->ktx_iov[nfrags-1].Len)) - /* just extend previous */ - ktx->ktx_iov[nfrags - 1].Len += fraglen; - else { - ktx->ktx_iov[nfrags].Len = fraglen; - nfrags++; /* new frag */ - } - - basepage += npages; - iov++; - niov--; - nob -= fraglen; - - /* iov must not run out before end of data */ - LASSERT (nob == 0 || niov > 0); - - } while (nob > 0); - - ktx->ktx_niov = nfrags; - CDEBUG (D_NET, "%p got %d frags over %d pages\n", - ktx, ktx->ktx_niov, ktx->ktx_nmappedpages); - - return (0); -} - -void -kqswnal_put_idle_tx (kqswnal_tx_t *ktx) -{ - kpr_fwd_desc_t *fwd = NULL; - struct list_head *idle = ktx->ktx_idle; - unsigned long flags; - - kqswnal_unmap_tx (ktx); /* release temporary mappings */ - ktx->ktx_state = KTX_IDLE; - - spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags); - - list_add (&ktx->ktx_list, idle); - - /* reserved for non-blocking tx */ - if (idle == &kqswnal_data.kqn_nblk_idletxds) { - spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); - return; - } - - /* anything blocking for a tx descriptor? */ - if (!list_empty(&kqswnal_data.kqn_idletxd_fwdq)) /* forwarded packet? */ - { - CDEBUG(D_NET,"wakeup fwd\n"); - - fwd = list_entry (kqswnal_data.kqn_idletxd_fwdq.next, - kpr_fwd_desc_t, kprfd_list); - list_del (&fwd->kprfd_list); - } - - if (waitqueue_active (&kqswnal_data.kqn_idletxd_waitq)) /* process? */ - { - /* local sender waiting for tx desc */ - CDEBUG(D_NET,"wakeup process\n"); - wake_up (&kqswnal_data.kqn_idletxd_waitq); - } - - spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); - - if (fwd == NULL) - return; - - /* schedule packet for forwarding again */ - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - - list_add_tail (&fwd->kprfd_list, &kqswnal_data.kqn_delayedfwds); - if (waitqueue_active (&kqswnal_data.kqn_sched_waitq)) - wake_up (&kqswnal_data.kqn_sched_waitq); - - spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); -} - -kqswnal_tx_t * -kqswnal_get_idle_tx (kpr_fwd_desc_t *fwd, int may_block) -{ - unsigned long flags; - kqswnal_tx_t *ktx = NULL; - - for (;;) { - spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags); - - /* "normal" descriptor is free */ - if (!list_empty (&kqswnal_data.kqn_idletxds)) { - ktx = list_entry (kqswnal_data.kqn_idletxds.next, - kqswnal_tx_t, ktx_list); - list_del (&ktx->ktx_list); - break; - } - - /* "normal" descriptor pool is empty */ - - if (fwd != NULL) { /* forwarded packet => queue for idle txd */ - CDEBUG (D_NET, "blocked fwd [%p]\n", fwd); - list_add_tail (&fwd->kprfd_list, - &kqswnal_data.kqn_idletxd_fwdq); - break; - } - - /* doing a local transmit */ - if (!may_block) { - if (list_empty (&kqswnal_data.kqn_nblk_idletxds)) { - CERROR ("intr tx desc pool exhausted\n"); - break; - } - - ktx = list_entry (kqswnal_data.kqn_nblk_idletxds.next, - kqswnal_tx_t, ktx_list); - list_del (&ktx->ktx_list); - break; - } - - /* block for idle tx */ - - spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); - - CDEBUG (D_NET, "blocking for tx desc\n"); - wait_event (kqswnal_data.kqn_idletxd_waitq, - !list_empty (&kqswnal_data.kqn_idletxds)); - } - - spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); - - /* Idle descs can't have any mapped (as opposed to pre-mapped) pages */ - LASSERT (ktx == NULL || ktx->ktx_nmappedpages == 0); - return (ktx); -} - -void -kqswnal_tx_done (kqswnal_tx_t *ktx, int error) -{ - switch (ktx->ktx_state) { - case KTX_FORWARDING: /* router asked me to forward this packet */ - kpr_fwd_done (&kqswnal_data.kqn_router, - (kpr_fwd_desc_t *)ktx->ktx_args[0], error); - break; - - case KTX_SENDING: /* packet sourced locally */ - lib_finalize (&kqswnal_lib, ktx->ktx_args[0], - (lib_msg_t *)ktx->ktx_args[1]); - break; - - default: - LASSERT (0); - } - - kqswnal_put_idle_tx (ktx); -} - -static void -kqswnal_txhandler(EP_TXD *txd, void *arg, int status) -{ - kqswnal_tx_t *ktx = (kqswnal_tx_t *)arg; - - LASSERT (txd != NULL); - LASSERT (ktx != NULL); - - CDEBUG(D_NET, "txd %p, arg %p status %d\n", txd, arg, status); - - if (status == EP_SUCCESS) - atomic_inc (&kqswnal_packets_transmitted); - - if (status != EP_SUCCESS) - { - CERROR ("kqswnal: Transmit failed with %d\n", status); - status = -EIO; - } - - kqswnal_tx_done (ktx, status); -} - -int -kqswnal_launch (kqswnal_tx_t *ktx) -{ - /* Don't block for transmit descriptor if we're in interrupt context */ - int attr = in_interrupt() ? (EP_NO_SLEEP | EP_NO_ALLOC) : 0; - int dest = kqswnal_nid2elanid (ktx->ktx_nid); - long flags; - int rc; - - LASSERT (dest >= 0); /* must be a peer */ - rc = ep_transmit_large(kqswnal_data.kqn_eptx, dest, - ktx->ktx_port, attr, kqswnal_txhandler, - ktx, ktx->ktx_iov, ktx->ktx_niov); - if (rc == 0) - atomic_inc (&kqswnal_packets_launched); - - if (rc != ENOMEM) - return (rc); - - /* can't allocate ep txd => queue for later */ - - LASSERT (in_interrupt()); /* not called by thread (not looping) */ - - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - - list_add_tail (&ktx->ktx_list, &kqswnal_data.kqn_delayedtxds); - if (waitqueue_active (&kqswnal_data.kqn_sched_waitq)) - wake_up (&kqswnal_data.kqn_sched_waitq); - - spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); - - return (0); -} - - -static char * -hdr_type_string (ptl_hdr_t *hdr) -{ - switch (hdr->type) { - case PTL_MSG_ACK: - return ("ACK"); - case PTL_MSG_PUT: - return ("PUT"); - case PTL_MSG_GET: - return ("GET"); - case PTL_MSG_REPLY: - return ("REPLY"); - default: - return (""); - } -} - -static void -kqswnal_cerror_hdr(ptl_hdr_t * hdr) -{ - char *type_str = hdr_type_string (hdr); - - CERROR("P3 Header at %p of type %s\n", hdr, type_str); - CERROR(" From nid/pid "LPU64"/%u", NTOH__u64(hdr->src_nid), - NTOH__u32(hdr->src_pid)); - CERROR(" To nid/pid "LPU64"/%u\n", NTOH__u64(hdr->dest_nid), - NTOH__u32(hdr->dest_pid)); - - switch (NTOH__u32(hdr->type)) { - case PTL_MSG_PUT: - CERROR(" Ptl index %d, ack md "LPX64"."LPX64", " - "match bits "LPX64"\n", - NTOH__u32 (hdr->msg.put.ptl_index), - hdr->msg.put.ack_wmd.wh_interface_cookie, - hdr->msg.put.ack_wmd.wh_object_cookie, - NTOH__u64 (hdr->msg.put.match_bits)); - CERROR(" Length %d, offset %d, hdr data "LPX64"\n", - NTOH__u32(PTL_HDR_LENGTH(hdr)), - NTOH__u32(hdr->msg.put.offset), - hdr->msg.put.hdr_data); - break; - - case PTL_MSG_GET: - CERROR(" Ptl index %d, return md "LPX64"."LPX64", " - "match bits "LPX64"\n", - NTOH__u32 (hdr->msg.get.ptl_index), - hdr->msg.get.return_wmd.wh_interface_cookie, - hdr->msg.get.return_wmd.wh_object_cookie, - hdr->msg.get.match_bits); - CERROR(" Length %d, src offset %d\n", - NTOH__u32 (hdr->msg.get.sink_length), - NTOH__u32 (hdr->msg.get.src_offset)); - break; - - case PTL_MSG_ACK: - CERROR(" dst md "LPX64"."LPX64", manipulated length %d\n", - hdr->msg.ack.dst_wmd.wh_interface_cookie, - hdr->msg.ack.dst_wmd.wh_object_cookie, - NTOH__u32 (hdr->msg.ack.mlength)); - break; - - case PTL_MSG_REPLY: - CERROR(" dst md "LPX64"."LPX64", length %d\n", - hdr->msg.reply.dst_wmd.wh_interface_cookie, - hdr->msg.reply.dst_wmd.wh_object_cookie, - NTOH__u32 (PTL_HDR_LENGTH(hdr))); - } - -} /* end of print_hdr() */ - -static int -kqswnal_sendmsg (nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - ptl_hdr_t *hdr, - int type, - ptl_nid_t nid, - ptl_pid_t pid, - unsigned int payload_niov, - struct iovec *payload_iov, - ptl_kiov_t *payload_kiov, - size_t payload_nob) -{ - kqswnal_tx_t *ktx; - int rc; - ptl_nid_t gatewaynid; -#if KQSW_CHECKSUM - int i; - kqsw_csum_t csum; - int sumnob; -#endif - - /* NB, the return code from this procedure is ignored. - * If we can't send, we must still complete with lib_finalize(). - * We'll have to wait for 3.2 to return an error event. - */ - - CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid: "LPX64 - " pid %u\n", payload_nob, payload_niov, nid, pid); - - LASSERT (payload_nob == 0 || payload_niov > 0); - LASSERT (payload_niov <= PTL_MD_MAX_IOV); - - /* It must be OK to kmap() if required */ - LASSERT (payload_kiov == NULL || !in_interrupt ()); - /* payload is either all vaddrs or all pages */ - LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); - - if (payload_nob > KQSW_MAXPAYLOAD) { - CERROR ("request exceeds MTU size "LPSZ" (max %u).\n", - payload_nob, KQSW_MAXPAYLOAD); - lib_finalize (&kqswnal_lib, private, cookie); - return (-1); - } - - if (kqswnal_nid2elanid (nid) < 0) { /* Can't send direct: find gateway? */ - rc = kpr_lookup (&kqswnal_data.kqn_router, nid, &gatewaynid); - if (rc != 0) { - CERROR("Can't route to "LPX64": router error %d\n", - nid, rc); - lib_finalize (&kqswnal_lib, private, cookie); - return (-1); - } - if (kqswnal_nid2elanid (gatewaynid) < 0) { - CERROR("Bad gateway "LPX64" for "LPX64"\n", - gatewaynid, nid); - lib_finalize (&kqswnal_lib, private, cookie); - return (-1); - } - nid = gatewaynid; - } - - /* I may not block for a transmit descriptor if I might block the - * receiver, or an interrupt handler. */ - ktx = kqswnal_get_idle_tx(NULL, !(type == PTL_MSG_ACK || - type == PTL_MSG_REPLY || - in_interrupt())); - if (ktx == NULL) { - kqswnal_cerror_hdr (hdr); - lib_finalize (&kqswnal_lib, private, cookie); - return (-1); - } - - memcpy (ktx->ktx_buffer, hdr, sizeof (*hdr)); /* copy hdr from caller's stack */ - -#if KQSW_CHECKSUM - csum = kqsw_csum (0, (char *)hdr, sizeof (*hdr)); - memcpy (ktx->ktx_buffer + sizeof (*hdr), &csum, sizeof (csum)); - for (csum = 0, i = 0, sumnob = payload_nob; sumnob > 0; i++) { - if (payload_kiov != NULL) { - ptl_kiov_t *kiov = &payload_kiov[i]; - char *addr = ((char *)kmap (kiov->kiov_page)) + - kiov->kiov_offset; - - csum = kqsw_csum (csum, addr, MIN (sumnob, kiov->kiov_len)); - sumnob -= kiov->kiov_len; - } else { - struct iovec *iov = &payload_iov[i]; - - csum = kqsw_csum (csum, iov->iov_base, MIN (sumnob, kiov->iov_len)); - sumnob -= iov->iov_len; - } - } - memcpy(ktx->ktx_buffer +sizeof(*hdr) +sizeof(csum), &csum,sizeof(csum)); -#endif - - /* Set up first frag from pre-mapped buffer (it's at least the - * portals header) */ - ktx->ktx_iov[0].Base = ktx->ktx_ebuffer; - ktx->ktx_iov[0].Len = KQSW_HDR_SIZE; - ktx->ktx_niov = 1; - - if (payload_nob > 0) { /* got some payload (something more to do) */ - /* make a single contiguous message? */ - if (payload_nob <= KQSW_TX_MAXCONTIG) { - /* copy payload to ktx_buffer, immediately after hdr */ - if (payload_kiov != NULL) - lib_copy_kiov2buf (ktx->ktx_buffer + KQSW_HDR_SIZE, - payload_niov, payload_kiov, payload_nob); - else - lib_copy_iov2buf (ktx->ktx_buffer + KQSW_HDR_SIZE, - payload_niov, payload_iov, payload_nob); - /* first frag includes payload */ - ktx->ktx_iov[0].Len += payload_nob; - } else { - if (payload_kiov != NULL) - rc = kqswnal_map_tx_kiov (ktx, payload_nob, - payload_niov, payload_kiov); - else - rc = kqswnal_map_tx_iov (ktx, payload_nob, - payload_niov, payload_iov); - if (rc != 0) { - kqswnal_put_idle_tx (ktx); - lib_finalize (&kqswnal_lib, private, cookie); - return (-1); - } - } - } - - ktx->ktx_port = (payload_nob <= KQSW_SMALLPAYLOAD) ? - EP_SVC_LARGE_PORTALS_SMALL : EP_SVC_LARGE_PORTALS_LARGE; - ktx->ktx_nid = nid; - ktx->ktx_state = KTX_SENDING; /* => lib_finalize() on completion */ - ktx->ktx_args[0] = private; - ktx->ktx_args[1] = cookie; - - rc = kqswnal_launch (ktx); - if (rc != 0) { /* failed? */ - CERROR ("Failed to send packet to "LPX64": %d\n", nid, rc); - lib_finalize (&kqswnal_lib, private, cookie); - return (-1); - } - - CDEBUG(D_NET, "send to "LPSZ" bytes to "LPX64"\n", payload_nob, nid); - return (0); -} - -static int -kqswnal_send (nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - ptl_hdr_t *hdr, - int type, - ptl_nid_t nid, - ptl_pid_t pid, - unsigned int payload_niov, - struct iovec *payload_iov, - size_t payload_nob) -{ - return (kqswnal_sendmsg (nal, private, cookie, hdr, type, nid, pid, - payload_niov, payload_iov, NULL, payload_nob)); -} - -static int -kqswnal_send_pages (nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - ptl_hdr_t *hdr, - int type, - ptl_nid_t nid, - ptl_pid_t pid, - unsigned int payload_niov, - ptl_kiov_t *payload_kiov, - size_t payload_nob) -{ - return (kqswnal_sendmsg (nal, private, cookie, hdr, type, nid, pid, - payload_niov, NULL, payload_kiov, payload_nob)); -} - -int kqswnal_fwd_copy_contig = 0; - -void -kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) -{ - int rc; - kqswnal_tx_t *ktx; - struct iovec *iov = fwd->kprfd_iov; - int niov = fwd->kprfd_niov; - int nob = fwd->kprfd_nob; - ptl_nid_t nid = fwd->kprfd_gateway_nid; - -#if KQSW_CHECKSUM - CERROR ("checksums for forwarded packets not implemented\n"); - LBUG (); -#endif - /* The router wants this NAL to forward a packet */ - CDEBUG (D_NET, "forwarding [%p] to "LPX64", %d frags %d bytes\n", - fwd, nid, niov, nob); - - LASSERT (niov > 0); - - ktx = kqswnal_get_idle_tx (fwd, FALSE); - if (ktx == NULL) /* can't get txd right now */ - return; /* fwd will be scheduled when tx desc freed */ - - if (nid == kqswnal_lib.ni.nid) /* gateway is me */ - nid = fwd->kprfd_target_nid; /* target is final dest */ - - if (kqswnal_nid2elanid (nid) < 0) { - CERROR("Can't forward [%p] to "LPX64": not a peer\n", fwd, nid); - rc = -EHOSTUNREACH; - goto failed; - } - - if (nob > KQSW_NRXMSGBYTES_LARGE) { - CERROR ("Can't forward [%p] to "LPX64 - ": size %d bigger than max packet size %ld\n", - fwd, nid, nob, (long)KQSW_NRXMSGBYTES_LARGE); - rc = -EMSGSIZE; - goto failed; - } - - if ((kqswnal_fwd_copy_contig || niov > 1) && - nob <= KQSW_TX_BUFFER_SIZE) - { - /* send from ktx's pre-allocated/mapped contiguous buffer? */ - lib_copy_iov2buf (ktx->ktx_buffer, niov, iov, nob); - ktx->ktx_iov[0].Base = ktx->ktx_ebuffer; /* already mapped */ - ktx->ktx_iov[0].Len = nob; - ktx->ktx_niov = 1; - } - else - { - /* zero copy */ - ktx->ktx_niov = 0; /* no frags mapped yet */ - rc = kqswnal_map_tx_iov (ktx, nob, niov, iov); - if (rc != 0) - goto failed; - } - - ktx->ktx_port = (nob <= (sizeof (ptl_hdr_t) + KQSW_SMALLPAYLOAD)) ? - EP_SVC_LARGE_PORTALS_SMALL : EP_SVC_LARGE_PORTALS_LARGE; - ktx->ktx_nid = nid; - ktx->ktx_state = KTX_FORWARDING; /* kpr_put_packet() on completion */ - ktx->ktx_args[0] = fwd; - - rc = kqswnal_launch (ktx); - if (rc == 0) - return; - - failed: - LASSERT (rc != 0); - CERROR ("Failed to forward [%p] to "LPX64": %d\n", fwd, nid, rc); - - kqswnal_put_idle_tx (ktx); - /* complete now (with failure) */ - kpr_fwd_done (&kqswnal_data.kqn_router, fwd, rc); -} - -void -kqswnal_fwd_callback (void *arg, int error) -{ - kqswnal_rx_t *krx = (kqswnal_rx_t *)arg; - - /* The router has finished forwarding this packet */ - - if (error != 0) - { - ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_pages[0]); - - CERROR("Failed to route packet from "LPX64" to "LPX64": %d\n", - NTOH__u64(hdr->src_nid), NTOH__u64(hdr->dest_nid),error); - } - - kqswnal_requeue_rx (krx); -} - -void -kqswnal_rx (kqswnal_rx_t *krx) -{ - ptl_hdr_t *hdr = (ptl_hdr_t *) page_address (krx->krx_pages[0]); - ptl_nid_t dest_nid = NTOH__u64 (hdr->dest_nid); - int nob; - int niov; - - if (dest_nid == kqswnal_lib.ni.nid) { /* It's for me :) */ - /* NB krx requeued when lib_parse() calls back kqswnal_recv */ - lib_parse (&kqswnal_lib, hdr, krx); - return; - } - -#if KQSW_CHECKSUM - CERROR ("checksums for forwarded packets not implemented\n"); - LBUG (); -#endif - if (kqswnal_nid2elanid (dest_nid) >= 0) /* should have gone direct to peer */ - { - CERROR("dropping packet from "LPX64" for "LPX64 - ": target is peer\n", NTOH__u64(hdr->src_nid), dest_nid); - kqswnal_requeue_rx (krx); - return; - } - - /* NB forwarding may destroy iov; rebuild every time */ - for (nob = krx->krx_nob, niov = 0; nob > 0; nob -= PAGE_SIZE, niov++) - { - LASSERT (niov < krx->krx_npages); - krx->krx_iov[niov].iov_base= page_address(krx->krx_pages[niov]); - krx->krx_iov[niov].iov_len = MIN(PAGE_SIZE, nob); - } - - kpr_fwd_init (&krx->krx_fwd, dest_nid, - krx->krx_nob, niov, krx->krx_iov, - kqswnal_fwd_callback, krx); - - kpr_fwd_start (&kqswnal_data.kqn_router, &krx->krx_fwd); -} - -/* Receive Interrupt Handler: posts to schedulers */ -void -kqswnal_rxhandler(EP_RXD *rxd) -{ - long flags; - int nob = ep_rxd_len (rxd); - int status = ep_rxd_status (rxd); - kqswnal_rx_t *krx = (kqswnal_rx_t *)ep_rxd_arg (rxd); - - CDEBUG(D_NET, "kqswnal_rxhandler: rxd %p, krx %p, nob %d, status %d\n", - rxd, krx, nob, status); - - LASSERT (krx != NULL); - - krx->krx_rxd = rxd; - krx->krx_nob = nob; - - /* must receive a whole header to be able to parse */ - if (status != EP_SUCCESS || nob < sizeof (ptl_hdr_t)) - { - /* receives complete with failure when receiver is removed */ - if (kqswnal_data.kqn_shuttingdown) - return; - - CERROR("receive status failed with status %d nob %d\n", - ep_rxd_status(rxd), nob); - kqswnal_requeue_rx (krx); - return; - } - - atomic_inc (&kqswnal_packets_received); - - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - - list_add_tail (&krx->krx_list, &kqswnal_data.kqn_readyrxds); - if (waitqueue_active (&kqswnal_data.kqn_sched_waitq)) - wake_up (&kqswnal_data.kqn_sched_waitq); - - spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); -} - -#if KQSW_CHECKSUM -void -kqswnal_csum_error (kqswnal_rx_t *krx, int ishdr) -{ - ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_pages[0]); - - CERROR ("%s checksum mismatch %p: dnid "LPX64", snid "LPX64 - ", dpid %d, spid %d, type %d\n", - ishdr ? "Header" : "Payload", krx, - NTOH__u64(hdr->dest_nid), NTOH__u64(hdr->src_nid) - NTOH__u32(hdr->dest_pid), NTOH__u32(hdr->src_pid), - NTOH__u32(hdr->type)); - - switch (NTOH__u32 (hdr->type)) - { - case PTL_MSG_ACK: - CERROR("ACK: mlen %d dmd "LPX64"."LPX64" match "LPX64 - " len %u\n", - NTOH__u32(hdr->msg.ack.mlength), - hdr->msg.ack.dst_wmd.handle_cookie, - hdr->msg.ack.dst_wmd.handle_idx, - NTOH__u64(hdr->msg.ack.match_bits), - NTOH__u32(hdr->msg.ack.length)); - break; - case PTL_MSG_PUT: - CERROR("PUT: ptl %d amd "LPX64"."LPX64" match "LPX64 - " len %u off %u data "LPX64"\n", - NTOH__u32(hdr->msg.put.ptl_index), - hdr->msg.put.ack_wmd.handle_cookie, - hdr->msg.put.ack_wmd.handle_idx, - NTOH__u64(hdr->msg.put.match_bits), - NTOH__u32(hdr->msg.put.length), - NTOH__u32(hdr->msg.put.offset), - hdr->msg.put.hdr_data); - break; - case PTL_MSG_GET: - CERROR ("GET: <>\n"); - break; - case PTL_MSG_REPLY: - CERROR ("REPLY: <>\n"); - break; - default: - CERROR ("TYPE?: <>\n"); - } -} -#endif - -static int -kqswnal_recvmsg (nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - unsigned int niov, - struct iovec *iov, - ptl_kiov_t *kiov, - size_t mlen, - size_t rlen) -{ - kqswnal_rx_t *krx = (kqswnal_rx_t *)private; - int page; - char *page_ptr; - int page_nob; - char *iov_ptr; - int iov_nob; - int frag; -#if KQSW_CHECKSUM - kqsw_csum_t senders_csum; - kqsw_csum_t payload_csum = 0; - kqsw_csum_t hdr_csum = kqsw_csum(0, page_address(krx->krx_pages[0]), - sizeof(ptl_hdr_t)); - size_t csum_len = mlen; - int csum_frags = 0; - int csum_nob = 0; - static atomic_t csum_counter; - int csum_verbose = (atomic_read(&csum_counter)%1000001) == 0; - - atomic_inc (&csum_counter); - - memcpy (&senders_csum, ((char *)page_address (krx->krx_pages[0])) + - sizeof (ptl_hdr_t), sizeof (kqsw_csum_t)); - if (senders_csum != hdr_csum) - kqswnal_csum_error (krx, 1); -#endif - CDEBUG(D_NET,"kqswnal_recv, mlen="LPSZ", rlen="LPSZ"\n", mlen, rlen); - - /* What was actually received must be >= payload. - * This is an LASSERT, as lib_finalize() doesn't have a completion status. */ - LASSERT (krx->krx_nob >= KQSW_HDR_SIZE + mlen); - LASSERT (mlen <= rlen); - - /* It must be OK to kmap() if required */ - LASSERT (kiov == NULL || !in_interrupt ()); - /* Either all pages or all vaddrs */ - LASSERT (!(kiov != NULL && iov != NULL)); - - if (mlen != 0) - { - page = 0; - page_ptr = ((char *) page_address(krx->krx_pages[0])) + - KQSW_HDR_SIZE; - page_nob = PAGE_SIZE - KQSW_HDR_SIZE; - - LASSERT (niov > 0); - if (kiov != NULL) { - iov_ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset; - iov_nob = kiov->kiov_len; - } else { - iov_ptr = iov->iov_base; - iov_nob = iov->iov_len; - } - - for (;;) - { - /* We expect the iov to exactly match mlen */ - LASSERT (iov_nob <= mlen); - - frag = MIN (page_nob, iov_nob); - memcpy (iov_ptr, page_ptr, frag); -#if KQSW_CHECKSUM - payload_csum = kqsw_csum (payload_csum, iov_ptr, frag); - csum_nob += frag; - csum_frags++; -#endif - mlen -= frag; - if (mlen == 0) - break; - - page_nob -= frag; - if (page_nob != 0) - page_ptr += frag; - else - { - page++; - LASSERT (page < krx->krx_npages); - page_ptr = page_address(krx->krx_pages[page]); - page_nob = PAGE_SIZE; - } - - iov_nob -= frag; - if (iov_nob != 0) - iov_ptr += frag; - else if (kiov != NULL) { - kunmap (kiov->kiov_page); - kiov++; - niov--; - LASSERT (niov > 0); - iov_ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset; - iov_nob = kiov->kiov_len; - } else { - iov++; - niov--; - LASSERT (niov > 0); - iov_ptr = iov->iov_base; - iov_nob = iov->iov_len; - } - } - - if (kiov != NULL) - kunmap (kiov->kiov_page); - } - -#if KQSW_CHECKSUM - memcpy (&senders_csum, ((char *)page_address (krx->krx_pages[0])) + - sizeof(ptl_hdr_t) + sizeof(kqsw_csum_t), sizeof(kqsw_csum_t)); - - if (csum_len != rlen) - CERROR("Unable to checksum data in user's buffer\n"); - else if (senders_csum != payload_csum) - kqswnal_csum_error (krx, 0); - - if (csum_verbose) - CERROR("hdr csum %lx, payload_csum %lx, csum_frags %d, " - "csum_nob %d\n", - hdr_csum, payload_csum, csum_frags, csum_nob); -#endif - lib_finalize(nal, private, cookie); - - kqswnal_requeue_rx (krx); - - return (rlen); -} - -static int -kqswnal_recv(nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - unsigned int niov, - struct iovec *iov, - size_t mlen, - size_t rlen) -{ - return (kqswnal_recvmsg (nal, private, cookie, niov, iov, NULL, mlen, rlen)); -} - -static int -kqswnal_recv_pages (nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - unsigned int niov, - ptl_kiov_t *kiov, - size_t mlen, - size_t rlen) -{ - return (kqswnal_recvmsg (nal, private, cookie, niov, NULL, kiov, mlen, rlen)); -} - -int -kqswnal_thread_start (int (*fn)(void *arg), void *arg) -{ - long pid = kernel_thread (fn, arg, 0); - - if (pid < 0) - return ((int)pid); - - atomic_inc (&kqswnal_data.kqn_nthreads); - return (0); -} - -void -kqswnal_thread_fini (void) -{ - atomic_dec (&kqswnal_data.kqn_nthreads); -} - -int -kqswnal_scheduler (void *arg) -{ - kqswnal_rx_t *krx; - kqswnal_tx_t *ktx; - kpr_fwd_desc_t *fwd; - long flags; - int rc; - int counter = 0; - int did_something; - - kportal_daemonize ("kqswnal_sched"); - kportal_blockallsigs (); - - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - - while (!kqswnal_data.kqn_shuttingdown) - { - did_something = FALSE; - - if (!list_empty (&kqswnal_data.kqn_readyrxds)) - { - krx = list_entry(kqswnal_data.kqn_readyrxds.next, - kqswnal_rx_t, krx_list); - list_del (&krx->krx_list); - spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, - flags); - - kqswnal_rx (krx); - - did_something = TRUE; - spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags); - } - - if (!list_empty (&kqswnal_data.kqn_delayedtxds)) - { - ktx = list_entry(kqswnal_data.kqn_delayedtxds.next, - kqswnal_tx_t, ktx_list); - list_del (&ktx->ktx_list); - spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, - flags); - - rc = kqswnal_launch (ktx); - if (rc != 0) /* failed: ktx_nid down? */ - { - CERROR("Failed delayed transmit to "LPX64 - ": %d\n", ktx->ktx_nid, rc); - kqswnal_tx_done (ktx, rc); - } - - did_something = TRUE; - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - } - - if (!list_empty (&kqswnal_data.kqn_delayedfwds)) - { - fwd = list_entry (kqswnal_data.kqn_delayedfwds.next, kpr_fwd_desc_t, kprfd_list); - list_del (&fwd->kprfd_list); - spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); - - kqswnal_fwd_packet (NULL, fwd); - - did_something = TRUE; - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - } - - /* nothing to do or hogging CPU */ - if (!did_something || counter++ == KQSW_RESCHED) { - spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, - flags); - - counter = 0; - - if (!did_something) { - rc = wait_event_interruptible (kqswnal_data.kqn_sched_waitq, - kqswnal_data.kqn_shuttingdown || - !list_empty(&kqswnal_data.kqn_readyrxds) || - !list_empty(&kqswnal_data.kqn_delayedtxds) || - !list_empty(&kqswnal_data.kqn_delayedfwds)); - LASSERT (rc == 0); - } else if (current->need_resched) - schedule (); - - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - } - } - - spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); - - kqswnal_thread_fini (); - return (0); -} - -nal_cb_t kqswnal_lib = -{ - nal_data: &kqswnal_data, /* NAL private data */ - cb_send: kqswnal_send, - cb_send_pages: kqswnal_send_pages, - cb_recv: kqswnal_recv, - cb_recv_pages: kqswnal_recv_pages, - cb_read: kqswnal_read, - cb_write: kqswnal_write, - cb_malloc: kqswnal_malloc, - cb_free: kqswnal_free, - cb_printf: kqswnal_printf, - cb_cli: kqswnal_cli, - cb_sti: kqswnal_sti, - cb_dist: kqswnal_dist -}; diff --git a/lnet/klnds/scimaclnd/.cvsignore b/lnet/klnds/scimaclnd/.cvsignore deleted file mode 100644 index e995588..0000000 --- a/lnet/klnds/scimaclnd/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lnet/klnds/scimaclnd/Makefile.am b/lnet/klnds/scimaclnd/Makefile.am deleted file mode 100644 index 6da31f0..0000000 --- a/lnet/klnds/scimaclnd/Makefile.am +++ /dev/null @@ -1,11 +0,0 @@ -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../../Rules.linux - -MODULE = kscimacnal -modulenet_DATA = kscimacnal.o -EXTRA_PROGRAMS = kscimacnal - -DEFS = -kscimacnal_SOURCES = scimacnal.c scimacnal_cb.c scimacnal.h diff --git a/lnet/klnds/scimaclnd/README.scimacnal b/lnet/klnds/scimaclnd/README.scimacnal deleted file mode 100644 index d4c6a49..0000000 --- a/lnet/klnds/scimaclnd/README.scimacnal +++ /dev/null @@ -1,14 +0,0 @@ - -scimacnal - A NAL for the Scali ScaMAC midlayer. - -The ScaMAC midlayer is a simplified API to the SCI high performance -interconnect. - -In order to use this NAL you'll need to tune scimac to use larger buffers. -See scimac.conf in this directory for an example. - -Overall performance and stability isn't great but this can be attributed -to the scimac driver which apparently is in need of some development. - -TODO: -Routing isn't yet implemented. diff --git a/lnet/klnds/scimaclnd/scimac.conf b/lnet/klnds/scimaclnd/scimac.conf deleted file mode 100644 index bfb6d02..0000000 --- a/lnet/klnds/scimaclnd/scimac.conf +++ /dev/null @@ -1,35 +0,0 @@ -# Configuration file for the scimac driver - lustre friendly settings -# - -# The maximal number of message headers to use in the system. -scimac_max_no_hdrs = 32 - -# The maximal number of eager buffers to use in the system. -scimac_max_no_ebufs = 8 - -# The maximal size in bytes of each eager buffer. -scimac_max_ebuf_size = 65536 - -# Enable use of a kernel thread to defer reception of packets. -# Default is to use a tasklet (sw interrupt). -scimac_use_ulevel_recv = 1 - -# The maximal number of packets queued for transfer per path at any one time. -scimac_max_send_queuelen = 2000 - -# The packet retransmit time in milliseconds. -# The time elapsed since a packet was attempted sent until the packet is resent. -scimac_pkt_rexmit_time = 200 - -# The packet's maximal retransmit time in milliseconds. -# The total time that a packet will be attempted sent before it is dropped. -scimac_max_rexmit_time = 5000 - -# The lowest valid node identifier in the system. -scimac_min_nodeid_number = 0x100 - -# The largest valid node identifier in the system. -scimac_max_nodeid_number = 0xff00 - -# The incremental nodeid step in the system. -scimac_nodeid_increment = 0x100 diff --git a/lnet/klnds/scimaclnd/scimacnal.c b/lnet/klnds/scimaclnd/scimacnal.c deleted file mode 100644 index 1066d69..0000000 --- a/lnet/klnds/scimaclnd/scimacnal.c +++ /dev/null @@ -1,219 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8:cindent: - * - * Copyright (C) 2003 High Performance Computing Center North (HPC2N) - * Author: Niklas Edmundsson - - * Based on gmnal, which is based on ksocknal and qswnal - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - - -#include "scimacnal.h" - -ptl_handle_ni_t kscimacnal_ni; -nal_t kscimacnal_api; - -kscimacnal_data_t kscimacnal_data; - -kpr_nal_interface_t kscimacnal_router_interface = { - kprni_nalid: SCIMACNAL, - kprni_arg: NULL, - kprni_fwd: kscimacnal_fwd_packet, -}; - - -static int kscimacnal_forward(nal_t *nal, - int id, - void *args, size_t args_len, - void *ret, size_t ret_len) -{ - kscimacnal_data_t *ksci = nal->nal_data; - nal_cb_t *nal_cb = ksci->ksci_cb; - - LASSERT (nal == &kscimacnal_api); - LASSERT (ksci == &kscimacnal_data); - LASSERT (nal_cb == &kscimacnal_lib); - - lib_dispatch(nal_cb, ksci, id, args, ret); /* nal needs ksci */ - return PTL_OK; -} - - -static void kscimacnal_lock(nal_t *nal, unsigned long *flags) -{ - kscimacnal_data_t *ksci = nal->nal_data; - nal_cb_t *nal_cb = ksci->ksci_cb; - - - LASSERT (nal == &kscimacnal_api); - LASSERT (ksci == &kscimacnal_data); - LASSERT (nal_cb == &kscimacnal_lib); - - nal_cb->cb_cli(nal_cb,flags); -} - - -static void kscimacnal_unlock(nal_t *nal, unsigned long *flags) -{ - kscimacnal_data_t *ksci = nal->nal_data; - nal_cb_t *nal_cb = ksci->ksci_cb; - - - LASSERT (nal == &kscimacnal_api); - LASSERT (ksci == &kscimacnal_data); - LASSERT (nal_cb == &kscimacnal_lib); - - nal_cb->cb_sti(nal_cb,flags); -} - - -static int kscimacnal_shutdown(nal_t *nal, int ni) -{ - LASSERT (nal == &kscimacnal_api); - return 0; -} - - -static void kscimacnal_yield( nal_t *nal ) -{ - LASSERT (nal == &kscimacnal_api); - - if (current->need_resched) - schedule(); - return; -} - - -static nal_t *kscimacnal_init(int interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t ac_size, ptl_pid_t requested_pid) -{ - int nnids = 512; /* FIXME: Need ScaMac funktion to get #nodes */ - - CDEBUG(D_NET, "calling lib_init with nid 0x%Lx nnids %d\n", kscimacnal_data.ksci_nid, nnids); - lib_init(&kscimacnal_lib, kscimacnal_data.ksci_nid, 0, nnids,ptl_size, ac_size); - return &kscimacnal_api; -} - - -/* Called by kernel at module unload time */ -static void __exit -kscimacnal_finalize(void) -{ - /* FIXME: How should the shutdown procedure really look? */ - kscimacnal_data.ksci_shuttingdown=1; - - PORTAL_SYMBOL_UNREGISTER(kscimacnal_ni); - - PtlNIFini(kscimacnal_ni); - lib_fini(&kscimacnal_lib); - - mac_finish(kscimacnal_data.ksci_machandle); - - CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read (&portal_kmemory)); - - return; -} - - -/* Called by kernel at module insertion time */ -static int __init -kscimacnal_initialize(void) -{ - int rc; - unsigned long nid=0; - mac_handle_t *machandle = NULL; - - - CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read (&portal_kmemory)); - - kscimacnal_api.forward = kscimacnal_forward; - kscimacnal_api.shutdown = kscimacnal_shutdown; - kscimacnal_api.yield = kscimacnal_yield; - kscimacnal_api.validate = NULL; /* our api validate is a NOOP */ - kscimacnal_api.lock= kscimacnal_lock; - kscimacnal_api.unlock= kscimacnal_unlock; - kscimacnal_api.nal_data = &kscimacnal_data; - - kscimacnal_lib.nal_data = &kscimacnal_data; - - memset(&kscimacnal_data, 0, sizeof(kscimacnal_data)); - - kscimacnal_data.ksci_cb = &kscimacnal_lib; - - /* We're not using this, but cli/sti callbacks does... ??? */ - spin_lock_init(&kscimacnal_data.ksci_dispatch_lock); - - /* FIXME: We only support one adapter for now */ - machandle = mac_init(0, MAC_SAPID_LUSTRE, kscimacnal_rx, - &kscimacnal_data); - - if(!machandle) { - CERROR("mac_init() failed\n"); - return -1; - } - - kscimacnal_data.ksci_machandle = machandle; - - /* Make sure the scimac MTU is tuned */ - if(mac_get_mtusize(machandle) < SCIMACNAL_MTU) { - CERROR("scimac mtu of %ld smaller than SCIMACNAL MTU of %d\n", - mac_get_mtusize(machandle), SCIMACNAL_MTU); - CERROR("Consult README.scimacnal for more information\n"); - mac_finish(machandle); - return -1; - } - - /* Get the node ID */ - /* mac_get_physaddrlen() is a function instead of define, sigh */ - LASSERT(mac_get_physaddrlen(machandle) <= sizeof(nid)); - if(mac_get_physaddr(machandle, (mac_physaddr_t *) &nid)) { - CERROR("mac_get_physaddr() failed\n"); - mac_finish(machandle); - return -1; - } - nid = ntohl(nid); - kscimacnal_data.ksci_nid = nid; - - - /* Initialize Network Interface */ - /* FIXME: What do the magic numbers mean? Documentation anyone? */ - rc = PtlNIInit(kscimacnal_init, 32, 4, 0, &kscimacnal_ni); - if (rc) { - CERROR("PtlNIInit failed %d\n", rc); - mac_finish(machandle); - return (-ENOMEM); - } - - PORTAL_SYMBOL_REGISTER(kscimacnal_ni); - - /* We're done now, it's OK for the RX callback to do stuff */ - kscimacnal_data.ksci_init = 1; - - return 0; -} - - -MODULE_AUTHOR("Niklas Edmundsson "); -MODULE_DESCRIPTION("Kernel Scali ScaMAC SCI NAL v0.0"); -MODULE_LICENSE("GPL"); - -module_init (kscimacnal_initialize); -module_exit (kscimacnal_finalize); - -EXPORT_SYMBOL(kscimacnal_ni); diff --git a/lnet/klnds/scimaclnd/scimacnal.h b/lnet/klnds/scimaclnd/scimacnal.h deleted file mode 100644 index 1ff180e..0000000 --- a/lnet/klnds/scimaclnd/scimacnal.h +++ /dev/null @@ -1,85 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8:cindent: - * - * Copyright (C) 2003 High Performance Computing Center North (HPC2N) - * Author: Niklas Edmundsson - */ - - -#ifndef _SCIMACNAL_H -#define _SCIMACNAL_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include /* For PAGE_SIZE */ - -#define DEBUG_SUBSYSTEM S_UNDEFINED - -#include -#include -#include - -#include - -#ifndef MAC_SAPID_LUSTRE -#define MAC_SAPID_LUSTRE MAC_SAPID_TEST1 -#endif /* MAC_SAPID_LUSTRE */ - -#define SCIMACNAL_MTU 65536 -/* FIXME: What is really the MTU of lustre? */ -#if PTL_MD_MAX_IOV*PAGE_SIZE > SCIMACNAL_MTU -#error Max MTU of ScaMAC is 64k, PTL_MD_MAX_IOV*PAGE_SIZE is bigger. -#endif - -typedef struct { - mac_handle_t *handle; - mac_mblk_t *msg; - mac_msg_type_t type; - void *userdata; -} kscimacnal_rx_t; - - -typedef struct { - nal_cb_t *ktx_nal; - void *ktx_private; - lib_msg_t *ktx_cookie; - ptl_hdr_t ktx_hdr; -} kscimacnal_tx_t; - - -typedef struct { - char ksci_init; - char ksci_shuttingdown; - ptl_nid_t ksci_nid; - nal_cb_t *ksci_cb; - spinlock_t ksci_dispatch_lock; - mac_handle_t *ksci_machandle; -} kscimacnal_data_t; - -extern kscimacnal_data_t kscimacnal_data; -extern nal_t kscimacnal_api; -extern nal_cb_t kscimacnal_lib; - -void kscimacnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd); -void kscimacnal_rx(mac_handle_t *handle, mac_mblk_t *msg, mac_msg_type_t type, void *userdata); - - -#endif /* _SCIMACNAL_H */ diff --git a/lnet/klnds/scimaclnd/scimacnal_cb.c b/lnet/klnds/scimaclnd/scimacnal_cb.c deleted file mode 100644 index 7e4a2e8..0000000 --- a/lnet/klnds/scimaclnd/scimacnal_cb.c +++ /dev/null @@ -1,468 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8:cindent: - * - * Copyright (C) 2003 High Performance Computing Center North (HPC2N) - * Author: Niklas Edmundsson - - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "scimacnal.h" - -static int -kscimacnal_read (nal_cb_t *nal, void *private, - void *dst_addr, user_ptr src_addr, size_t len) -{ - CDEBUG(D_NET, "0x%Lx: reading %ld bytes from %p -> %p\n", - nal->ni.nid, (long)len, src_addr, dst_addr ); - memcpy( dst_addr, src_addr, len ); - return 0; -} - - -static int -kscimacnal_write(nal_cb_t *nal, void *private, - user_ptr dst_addr, void *src_addr, size_t len) -{ - CDEBUG(D_NET, "0x%Lx: writing %ld bytes from %p -> %p\n", - nal->ni.nid, (long)len, src_addr, dst_addr ); - memcpy( dst_addr, src_addr, len ); - return 0; -} - - -static void * -kscimacnal_malloc(nal_cb_t *nal, size_t len) -{ - void *buf; - - PORTAL_ALLOC(buf, len); - return buf; -} - - -static void -kscimacnal_free(nal_cb_t *nal, void *buf, size_t len) -{ - PORTAL_FREE(buf, len); -} - - -static void -kscimacnal_printf(nal_cb_t *nal, const char *fmt, ...) -{ - va_list ap; - char msg[256]; - - if (portal_debug & D_NET) { - va_start( ap, fmt ); - vsnprintf( msg, sizeof(msg), fmt, ap ); - va_end( ap ); - - printk("CPUId: %d %s",smp_processor_id(), msg); - } -} - - -static void -kscimacnal_cli(nal_cb_t *nal, unsigned long *flags) -{ - kscimacnal_data_t *data= nal->nal_data; - - spin_lock_irqsave(&data->ksci_dispatch_lock,*flags); -} - - -static void -kscimacnal_sti(nal_cb_t *nal, unsigned long *flags) -{ - kscimacnal_data_t *data= nal->nal_data; - - spin_unlock_irqrestore(&data->ksci_dispatch_lock,*flags); -} - - -static int -kscimacnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) -{ - /* FIXME: Network distance has a meaning, but is there no easy - * way to figure it out (depends on routing) */ - - if ( nal->ni.nid == nid ) { - *dist = 0; - } else { - *dist = 1; - } - - return 0; -} - - -static -char * get_mac_error(mac_status_t status) -{ - switch(status) { - case MAC_MSG_STAT_OK: - return "MAC_MSG_STAT_OK"; - case MAC_MSG_STAT_FREED: - return "MAC_MSG_STAT_FREED"; - case MAC_MSG_STAT_ABORTED: - return "MAC_MSG_STAT_ABORTED"; - case MAC_MSG_STAT_TIMEDOUT: - return "MAC_MSG_STAT_TIMEDOUT"; - case MAC_MSG_STAT_NODEUNREACH: - return "MAC_MSG_STAT_NODEUNREACH"; - case MAC_MSG_STAT_NETDOWN: - return "MAC_MSG_STAT_NETDOWN"; - case MAC_MSG_STAT_RESET: - return "MAC_MSG_STAT_RESET"; - case MAC_MSG_STAT_INITFAILED: - return "MAC_MSG_STAT_INITFAILED"; - case MAC_MSG_STAT_SYNCFAILED: - return "MAC_MSG_STAT_SYNCFAILED"; - case MAC_MSG_STAT_BADPROTO: - return "MAC_MSG_STAT_BADPROTO"; - case MAC_MSG_STAT_NOBUFSPACE: - return "MAC_MSG_STAT_NOBUFSPACE"; - case MAC_MSG_STAT_CONGESTION: - return "MAC_MSG_STAT_CONGESTION"; - case MAC_MSG_STAT_OTHER: - return "MAC_MSG_STAT_OTHER"; - default: - return "Unknown error"; - } -} - - -/* FIXME add routing code here ? */ - -/* Called by ScaMac when transmission is complete (ie. message is released) */ -static void -kscimacnal_txrelease(mac_mblk_t *msg, mac_msg_status_t status, void *context) -{ - kscimacnal_tx_t *ktx = (kscimacnal_tx_t *)context; - int err=0; - - LASSERT (ktx != NULL); - - /* Euh, there is no feedback when transmission fails?! */ - switch(status) { - case MAC_MSG_STAT_OK: /* normal */ - break; - default: - CERROR("%s (%d):\n", get_mac_error(status), status); - err = -EIO; - break; - } - - lib_finalize(ktx->ktx_nal, ktx->ktx_private, ktx->ktx_cookie); - - PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t))); -} - - -/* Called by portals when it wants to send a message. - * Since ScaMAC has it's own TX thread we don't bother setting up our own. */ -static int -kscimacnal_send(nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - ptl_hdr_t *hdr, - int type, - ptl_nid_t nid, - ptl_pid_t pid, - unsigned int payload_niov, - struct iovec *payload_iov, - size_t payload_len) -{ - kscimacnal_tx_t *ktx=NULL; - kscimacnal_data_t *ksci = nal->nal_data; - int rc=0; - int buf_len = sizeof(ptl_hdr_t) + payload_len; - mac_mblk_t *msg=NULL, *lastblk, *newblk; - unsigned long physaddr; - - - CDEBUG(D_NET, "sending %d bytes from %p to nid 0x%Lx niov: %d\n", - payload_len, payload_iov, nid, payload_niov); - - LASSERT(ksci != NULL); - - LASSERT(hdr != NULL); - - /* Do real check if we can send this */ - if (buf_len > mac_get_mtusize(ksci->ksci_machandle)) { - CERROR("kscimacnal:request exceeds TX MTU size (%ld).\n", - mac_get_mtusize(ksci->ksci_machandle)); - return -EINVAL; - } - - - /* save transaction info for later finalize and cleanup */ - PORTAL_ALLOC(ktx, (sizeof(kscimacnal_tx_t))); - if (!ktx) { - return -ENOMEM; - } - - /* *SIGH* hdr is a stack variable in the calling function, so we - * need to copy it to a buffer. Zerocopy magic (or is it just - * deferred memcpy?) is annoying sometimes. */ - memcpy(&ktx->ktx_hdr, hdr, sizeof(ptl_hdr_t)); - - /* First, put the header in the main message mblk */ - msg = mac_alloc_mblk(&ktx->ktx_hdr, sizeof(ptl_hdr_t), - kscimacnal_txrelease, ktx); - if (!msg) { - PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t))); - return -ENOMEM; - } - mac_put_mblk(msg, sizeof(ptl_hdr_t)); - lastblk=msg; - - /* Allocate additional mblks for each iov as needed. - * Essentially lib_copy_iov2buf with a twist or two */ - while (payload_len > 0) - { - ptl_size_t nob; - - LASSERT (payload_niov > 0); - - nob = MIN (payload_iov->iov_len, payload_len); - - /* We don't need a callback on the additional mblks, since - * all release callbacks seems to be called when the entire - * message has been sent */ - newblk=mac_alloc_mblk(payload_iov->iov_base, nob, NULL, NULL); - if(!newblk) { - mac_free_msg(msg); - PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t))); - return -ENOMEM; - } - mac_put_mblk(newblk, nob); - mac_link_mblk(lastblk, newblk); - lastblk=newblk; - - payload_len -= nob; - payload_niov--; - payload_iov++; - } - - ktx->ktx_nal = nal; - ktx->ktx_private = private; - ktx->ktx_cookie = cookie; - - CDEBUG(D_NET, "mac_send %d bytes to nid: 0x%Lx\n", buf_len, nid); - - physaddr = htonl(nid); - - if((rc=mac_send(ksci->ksci_machandle, msg, - (mac_physaddr_t *) &physaddr))) { - CERROR("kscimacnal: mac_send() failed, rc=%d\n", rc); - mac_free_msg(msg); - PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t))); - return rc; - } - - return 0; -} - - -void -kscimacnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) -{ - CERROR ("forwarding not implemented\n"); -} - - -/* Process a received portals packet */ -/* Called by the ScaMac RX thread when a packet is received */ -void -kscimacnal_rx(mac_handle_t *handle, mac_mblk_t *msg, mac_msg_type_t type, - void *userdata) -{ - ptl_hdr_t *hdr = NULL; - kscimacnal_rx_t krx; - mac_size_t size; - kscimacnal_data_t *ksci = userdata; - - LASSERT(ksci != NULL); - - if ( !ksci->ksci_init || ksci->ksci_shuttingdown || - type == MAC_MSG_TYPE_CTRL || type == MAC_MSG_TYPE_OTHER ) { - /* We're not interested in messages not for us, ignore */ - mac_free_msg(msg); - return; - } - - size = mac_msg_size(msg); - - CDEBUG(D_NET,"msg %p type %d, size %ld bytes (%ld mblks)\n", - msg, type, size, mac_msg_mblks(msg)); - - if( size < sizeof( ptl_hdr_t ) ) { - /* XXX what's this for? */ - if (ksci->ksci_shuttingdown) - return; - CERROR("kscimacnal: did not receive complete portal header," - "size= %ld\n", size); - /* Free the message before exiting */ - mac_free_msg(msg); - return; - } - - /* Provide everything we know */ - krx.handle = handle; - krx.msg = msg; - krx.type = type; - krx.userdata = userdata; - - /* mac_msg_next returns the next mblk with unread data */ - hdr = mac_get_mblk(mac_msg_next(msg), sizeof(ptl_hdr_t) ); - - if(!hdr) { - CERROR("kscimacnal: no data block in message %p\n", msg); - mac_free_msg(msg); - return; - } - - if ( hdr->dest_nid == kscimacnal_lib.ni.nid ) { - PROF_START(lib_parse); - /* sets wanted_len, iovs etc and calls our callback */ - lib_parse(&kscimacnal_lib, hdr, &krx); - PROF_FINISH(lib_parse); -#if 0 /* FIXME: Is it possible to detect this? */ - } else if (kgmnal_ispeer(hdr->dest_nid)) { - /* should have gone direct to peer */ - CERROR("dropping packet from 0x%llx to 0x%llx:" - "target is a peer\n", - hdr->src_nid, hdr->dest_nid); - kgmnal_requeue_rx(&krx); -#endif /* if 0 FIXME */ - } else { - /* forward to gateway */ - CERROR("forwarding not implemented, mynid=0x%llx dest=0x%llx\n", - kscimacnal_lib.ni.nid, hdr->dest_nid); - } - - mac_free_msg(msg); - - CDEBUG(D_NET, "msg %p: Done\n", msg); -} - - -/* Called by portals to process a recieved packet */ -static int kscimacnal_recv(nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - unsigned int niov, - struct iovec *iov, - size_t mlen, - size_t rlen) -{ - kscimacnal_rx_t *krx = private; - mac_mblk_t *mblk; - void *src; - mac_size_t pkt_len; - ptl_size_t iovused=0; - - LASSERT (krx != NULL); - LASSERT (krx->msg != NULL); - - CDEBUG(D_NET,"msg %p: mlen=%d, rlen=%d, niov=%d\n", - krx->msg, mlen, rlen, niov); - - /* What was actually received must be >= what sender claims to have - * sent. This is an LASSERT, since lib-move doesn't check cb return - * code yet. Also, rlen seems to be negative when mlen==0 so don't - * assert on that. - */ - LASSERT (mlen==0 || mac_msg_size(krx->msg) >= sizeof(ptl_hdr_t)+rlen); - LASSERT (mlen==0 || mlen <= rlen); - - PROF_START(memcpy); - - /* mac_msg_next returns next mblk with unread data (ie. can - * be same mblk */ - while (mlen != 0 && (mblk = mac_msg_next(krx->msg))) { - pkt_len = mac_mblk_len(mblk); - src = mac_get_mblk(mblk, pkt_len); /* Next unread block */ - - CDEBUG(D_NET,"msg %p: mblk: %p pkt_len: %ld src: %p\n", - krx->msg, mblk, pkt_len, src); - - LASSERT(src != NULL); - - /* Essentially lib_copy_buf2iov but with continuation support, - * we "gracefully" thrash the argument vars ;) */ - while (pkt_len > 0) { - ptl_size_t nob; - - LASSERT (niov > 0); - - LASSERT(iovused < iov->iov_len); - - nob = MIN (iov->iov_len-iovused, pkt_len); - CDEBUG(D_NET, "iovbase: %p iovlen: %d src: %p nob: %d " - "iovused: %d\n", - iov->iov_base, iov->iov_len, - src, nob, iovused); - - memcpy (iov->iov_base+iovused, src, nob); - pkt_len -= nob; - src += nob; - - if(nob+iovused < iov->iov_len) { - /* We didn't use all of the iov */ - iovused+=nob; - } - else { - niov--; - iov++; - iovused=0; - } - } - } - PROF_FINISH(memcpy); - - CDEBUG(D_NET, "Calling lib_finalize.\n"); - - PROF_START(lib_finalize); - lib_finalize(nal, private, cookie); - PROF_FINISH(lib_finalize); - - CDEBUG(D_NET, "Done.\n"); - - return rlen; -} - - -nal_cb_t kscimacnal_lib = { - nal_data: &kscimacnal_data, /* NAL private data */ - cb_send: kscimacnal_send, - cb_send_pages: NULL, /* Ignore for now */ - cb_recv: kscimacnal_recv, - cb_recv_pages: NULL, - cb_read: kscimacnal_read, - cb_write: kscimacnal_write, - cb_malloc: kscimacnal_malloc, - cb_free: kscimacnal_free, - cb_printf: kscimacnal_printf, - cb_cli: kscimacnal_cli, - cb_sti: kscimacnal_sti, - cb_dist: kscimacnal_dist -}; diff --git a/lnet/klnds/socklnd/.cvsignore b/lnet/klnds/socklnd/.cvsignore deleted file mode 100644 index e995588..0000000 --- a/lnet/klnds/socklnd/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lnet/klnds/socklnd/Makefile.am b/lnet/klnds/socklnd/Makefile.am deleted file mode 100644 index 437d7fc..0000000 --- a/lnet/klnds/socklnd/Makefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../../Rules.linux - -MODULE = ksocknal -modulenet_DATA = ksocknal.o -EXTRA_PROGRAMS = ksocknal - -DEFS = -ksocknal_SOURCES = socknal.c socknal_cb.c socknal.h diff --git a/lnet/klnds/socklnd/Makefile.mk b/lnet/klnds/socklnd/Makefile.mk deleted file mode 100644 index 46edf01..0000000 --- a/lnet/klnds/socklnd/Makefile.mk +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../../Kernelenv - -obj-y += ksocknal.o -ksocknal-objs := socknal.o socknal_cb.o - diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c deleted file mode 100644 index 91d971c..0000000 --- a/lnet/klnds/socklnd/socklnd.c +++ /dev/null @@ -1,860 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Zach Brown - * Author: Peter J. Braam - * Author: Phil Schwan - * Author: Eric Barton - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "socknal.h" - -ptl_handle_ni_t ksocknal_ni; -static nal_t ksocknal_api; -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -ksock_nal_data_t ksocknal_data; -#else -static ksock_nal_data_t ksocknal_data; -#endif - -kpr_nal_interface_t ksocknal_router_interface = { - kprni_nalid: SOCKNAL, - kprni_arg: &ksocknal_data, - kprni_fwd: ksocknal_fwd_packet, -}; - - -int -ksocknal_api_forward(nal_t *nal, int id, void *args, size_t args_len, - void *ret, size_t ret_len) -{ - ksock_nal_data_t *k; - nal_cb_t *nal_cb; - - k = nal->nal_data; - nal_cb = k->ksnd_nal_cb; - - lib_dispatch(nal_cb, k, id, args, ret); /* ksocknal_send needs k */ - return PTL_OK; -} - -int -ksocknal_api_shutdown(nal_t *nal, int ni) -{ - CDEBUG (D_NET, "closing all connections\n"); - - return ksocknal_close_sock(0); /* close all sockets */ -} - -void -ksocknal_api_yield(nal_t *nal) -{ - our_cond_resched(); - return; -} - -void -ksocknal_api_lock(nal_t *nal, unsigned long *flags) -{ - ksock_nal_data_t *k; - nal_cb_t *nal_cb; - - k = nal->nal_data; - nal_cb = k->ksnd_nal_cb; - nal_cb->cb_cli(nal_cb,flags); -} - -void -ksocknal_api_unlock(nal_t *nal, unsigned long *flags) -{ - ksock_nal_data_t *k; - nal_cb_t *nal_cb; - - k = nal->nal_data; - nal_cb = k->ksnd_nal_cb; - nal_cb->cb_sti(nal_cb,flags); -} - -nal_t * -ksocknal_init(int interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t ac_size, ptl_pid_t requested_pid) -{ - CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n", (ptl_nid_t)0); - lib_init(&ksocknal_lib, (ptl_nid_t)0, 0, 10, ptl_size, ac_size); - return (&ksocknal_api); -} - -/* - * EXTRA functions follow - */ - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#define SOCKET_I(inode) (&(inode)->u.socket_i) -#endif -static __inline__ struct socket * -socki_lookup(struct inode *inode) -{ - return SOCKET_I(inode); -} - -int -ksocknal_set_mynid(ptl_nid_t nid) -{ - lib_ni_t *ni = &ksocknal_lib.ni; - - /* FIXME: we have to do this because we call lib_init() at module - * insertion time, which is before we have 'mynid' available. lib_init - * sets the NAL's nid, which it uses to tell other nodes where packets - * are coming from. This is not a very graceful solution to this - * problem. */ - - CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n", - nid, ni->nid); - - ni->nid = nid; - return (0); -} - -void -ksocknal_bind_irq (unsigned int irq, int cpu) -{ -#if (defined(CONFIG_SMP) && CPU_AFFINITY) - char cmdline[64]; - char *argv[] = {"/bin/sh", - "-c", - cmdline, - NULL}; - char *envp[] = {"HOME=/", - "PATH=/sbin:/bin:/usr/sbin:/usr/bin", - NULL}; - - snprintf (cmdline, sizeof (cmdline), - "echo %d > /proc/irq/%u/smp_affinity", 1 << cpu, irq); - - printk (KERN_INFO "Binding irq %u to CPU %d with cmd: %s\n", - irq, cpu, cmdline); - - /* FIXME: Find a better method of setting IRQ affinity... - */ - - call_usermodehelper (argv[0], argv, envp); -#endif -} - -int -ksocknal_add_sock (ptl_nid_t nid, int fd, int bind_irq) -{ - unsigned long flags; - ksock_conn_t *conn; - struct file *file = NULL; - struct socket *sock = NULL; - ksock_sched_t *sched = NULL; - unsigned int irq = 0; - struct net_device *dev = NULL; - int ret; - int idx; - ENTRY; - - LASSERT (!in_interrupt()); - - file = fget(fd); - if (file == NULL) - RETURN(-EINVAL); - - ret = -EINVAL; - sock = socki_lookup(file->f_dentry->d_inode); - if (sock == NULL) - GOTO(error, ret); - - ret = -ENOMEM; - PORTAL_ALLOC(conn, sizeof(*conn)); - if (!conn) - GOTO(error, ret); - - sock->sk->allocation = GFP_NOFS; /* don't call info fs for alloc */ - - conn->ksnc_file = file; - conn->ksnc_sock = sock; - conn->ksnc_saved_data_ready = sock->sk->data_ready; - conn->ksnc_saved_write_space = sock->sk->write_space; - conn->ksnc_peernid = nid; - atomic_set (&conn->ksnc_refcount, 1); /* 1 ref for socklist */ - - conn->ksnc_rx_ready = 0; - conn->ksnc_rx_scheduled = 0; - ksocknal_new_packet (conn, 0); - - INIT_LIST_HEAD (&conn->ksnc_tx_queue); - conn->ksnc_tx_ready = 0; - conn->ksnc_tx_scheduled = 0; - -#warning check it is OK to derefence sk->dst_cache->dev like this... - lock_sock (conn->ksnc_sock->sk); - - if (conn->ksnc_sock->sk->dst_cache != NULL) { - dev = conn->ksnc_sock->sk->dst_cache->dev; - if (dev != NULL) { - irq = dev->irq; - if (irq >= NR_IRQS) { - CERROR ("Unexpected IRQ %x\n", irq); - irq = 0; - } - } - } - - release_sock (conn->ksnc_sock->sk); - - write_lock_irqsave (&ksocknal_data.ksnd_socklist_lock, flags); - - if (irq == 0 || - ksocknal_data.ksnd_irq_info[irq] == SOCKNAL_IRQ_UNASSIGNED) { - /* This is a software NIC, or we haven't associated it with - * a CPU yet */ - - /* Choose the CPU with the fewest connections */ - sched = ksocknal_data.ksnd_schedulers; - for (idx = 1; idx < SOCKNAL_N_SCHED; idx++) - if (sched->kss_nconns > - ksocknal_data.ksnd_schedulers[idx].kss_nconns) - sched = &ksocknal_data.ksnd_schedulers[idx]; - - if (irq != 0) { /* Hardware NIC */ - /* Remember which scheduler we chose */ - idx = sched - ksocknal_data.ksnd_schedulers; - - LASSERT (idx < SOCKNAL_IRQ_SCHED_MASK); - - if (bind_irq) /* remember if we will bind below */ - idx |= SOCKNAL_IRQ_BOUND; - - ksocknal_data.ksnd_irq_info[irq] = idx; - } - } else { - /* This is a hardware NIC, associated with a CPU */ - idx = ksocknal_data.ksnd_irq_info[irq]; - - /* Don't bind again if we've bound already */ - if ((idx & SOCKNAL_IRQ_BOUND) != 0) - bind_irq = 0; - - sched = &ksocknal_data.ksnd_schedulers[idx & SOCKNAL_IRQ_SCHED_MASK]; - } - - sched->kss_nconns++; - conn->ksnc_scheduler = sched; - - list_add(&conn->ksnc_list, &ksocknal_data.ksnd_socklist); - - write_unlock_irqrestore (&ksocknal_data.ksnd_socklist_lock, flags); - - if (bind_irq && /* irq binding required */ - irq != 0) /* hardware NIC */ - ksocknal_bind_irq (irq, sched - ksocknal_data.ksnd_schedulers); - - /* NOW it's safe to get called back when socket is ready... */ - sock->sk->user_data = conn; - sock->sk->data_ready = ksocknal_data_ready; - sock->sk->write_space = ksocknal_write_space; - - /* ...which I call right now to get things going */ - ksocknal_data_ready (sock->sk, 0); - ksocknal_write_space (sock->sk); - - CDEBUG(D_IOCTL, "conn [%p] registered for nid "LPX64"\n", - conn, conn->ksnc_peernid); - - /* Can't unload while connection active */ - PORTAL_MODULE_USE; - RETURN(0); - -error: - fput(file); - return (ret); -} - -/* Passing in a zero nid will close all connections */ -int -ksocknal_close_sock(ptl_nid_t nid) -{ - long flags; - ksock_conn_t *conn; - LIST_HEAD (death_row); - struct list_head *tmp; - - LASSERT (!in_interrupt()); - write_lock_irqsave (&ksocknal_data.ksnd_socklist_lock, flags); - - if (nid == 0) { /* close ALL connections */ - /* insert 'death row' into the socket list... */ - list_add (&death_row, &ksocknal_data.ksnd_socklist); - /* ...extract and reinitialise the socket list itself... */ - list_del_init (&ksocknal_data.ksnd_socklist); - /* ...and voila, death row is the proud owner of all conns */ - } else list_for_each (tmp, &ksocknal_data.ksnd_socklist) { - - conn = list_entry (tmp, ksock_conn_t, ksnc_list); - - if (conn->ksnc_peernid == nid) { - list_del (&conn->ksnc_list); - list_add (&conn->ksnc_list, &death_row); - break; - } - } - - write_unlock_irqrestore (&ksocknal_data.ksnd_socklist_lock, flags); - - if (nid && list_empty (&death_row)) - return (-ENOENT); - - while (!list_empty (&death_row)) { - conn = list_entry (death_row.next, ksock_conn_t, ksnc_list); - list_del (&conn->ksnc_list); - - /* NB I _have_ to restore the callback, rather than storing - * a noop, since the socket could survive past this module - * being unloaded!! */ - conn->ksnc_sock->sk->data_ready = conn->ksnc_saved_data_ready; - conn->ksnc_sock->sk->write_space = conn->ksnc_saved_write_space; - - /* OK; no more callbacks, but they could be in progress now, - * so wait for them to complete... */ - write_lock_irqsave (&ksocknal_data.ksnd_socklist_lock, flags); - - /* ...however if I get the lock before a callback gets it, - * this will make them noop - */ - conn->ksnc_sock->sk->user_data = NULL; - - /* And drop the scheduler's connection count while I've got - * the exclusive lock */ - conn->ksnc_scheduler->kss_nconns--; - - write_unlock_irqrestore(&ksocknal_data.ksnd_socklist_lock, - flags); - - ksocknal_put_conn (conn); /* drop ref for ksnd_socklist */ - } - - return (0); -} - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -struct tcp_opt *sock2tcp_opt(struct sock *sk) -{ - return &(sk->tp_pinfo.af_tcp); -} -#else -struct tcp_opt *sock2tcp_opt(struct sock *sk) -{ - struct tcp_sock *s = (struct tcp_sock *)sk; - return &s->tcp; -} -#endif - -void -ksocknal_push_conn (ksock_conn_t *conn) -{ - struct sock *sk = conn->ksnc_sock->sk; - struct tcp_opt *tp = sock2tcp_opt(sk); - int nonagle; - int val = 1; - int rc; - mm_segment_t oldmm; - - lock_sock (sk); - nonagle = tp->nonagle; - tp->nonagle = 1; - release_sock (sk); - - oldmm = get_fs (); - set_fs (KERNEL_DS); - - rc = sk->prot->setsockopt (sk, SOL_TCP, TCP_NODELAY, - (char *)&val, sizeof (val)); - LASSERT (rc == 0); - - set_fs (oldmm); - - lock_sock (sk); - tp->nonagle = nonagle; - release_sock (sk); -} - -/* Passing in a zero nid pushes all connections */ -int -ksocknal_push_sock (ptl_nid_t nid) -{ - ksock_conn_t *conn; - struct list_head *tmp; - int index; - int i; - - if (nid != 0) { - conn = ksocknal_get_conn (nid); - - if (conn == NULL) - return (-ENOENT); - - ksocknal_push_conn (conn); - ksocknal_put_conn (conn); - - return (0); - } - - /* NB we can't remove connections from the socket list so we have to - * cope with them being removed from under us... - */ - for (index = 0; ; index++) { - read_lock (&ksocknal_data.ksnd_socklist_lock); - - i = 0; - conn = NULL; - - list_for_each (tmp, &ksocknal_data.ksnd_socklist) { - if (i++ == index) { - conn = list_entry(tmp, ksock_conn_t, ksnc_list); - atomic_inc (&conn->ksnc_refcount); // take a ref - break; - } - } - - read_unlock (&ksocknal_data.ksnd_socklist_lock); - - if (conn == NULL) - break; - - ksocknal_push_conn (conn); - ksocknal_put_conn (conn); - } - - return (0); -} - -ksock_conn_t * -ksocknal_get_conn (ptl_nid_t nid) -{ - struct list_head *tmp; - ksock_conn_t *conn; - - PROF_START(conn_list_walk); - - read_lock (&ksocknal_data.ksnd_socklist_lock); - - list_for_each(tmp, &ksocknal_data.ksnd_socklist) { - - conn = list_entry(tmp, ksock_conn_t, ksnc_list); - - if (conn->ksnc_peernid == nid) { - /* caller is referencing */ - atomic_inc (&conn->ksnc_refcount); - - read_unlock (&ksocknal_data.ksnd_socklist_lock); - - CDEBUG(D_NET, "got conn [%p] -> "LPX64" (%d)\n", - conn, nid, atomic_read (&conn->ksnc_refcount)); - - PROF_FINISH(conn_list_walk); - return (conn); - } - } - - read_unlock (&ksocknal_data.ksnd_socklist_lock); - - CDEBUG(D_NET, "No connection found when looking for nid "LPX64"\n", - nid); - PROF_FINISH(conn_list_walk); - return (NULL); -} - -void -ksocknal_close_conn (ksock_conn_t *conn) -{ - CDEBUG (D_NET, "connection [%p] closed \n", conn); - - fput (conn->ksnc_file); - PORTAL_FREE (conn, sizeof (*conn)); - - /* One less connection keeping us hanging on */ - PORTAL_MODULE_UNUSE; -} - -void -_ksocknal_put_conn (ksock_conn_t *conn) -{ - unsigned long flags; - - CDEBUG (D_NET, "connection [%p] handed the black spot\n", conn); - - /* "But what is the black spot, captain?" I asked. - * "That's a summons, mate..." */ - - LASSERT (atomic_read (&conn->ksnc_refcount) == 0); - LASSERT (conn->ksnc_sock->sk->data_ready != ksocknal_data_ready); - LASSERT (conn->ksnc_sock->sk->write_space != ksocknal_write_space); - LASSERT (conn->ksnc_sock->sk->user_data == NULL); - LASSERT (!conn->ksnc_rx_scheduled); - - if (!in_interrupt()) { - ksocknal_close_conn (conn); - return; - } - - spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags); - - list_add (&conn->ksnc_list, &ksocknal_data.ksnd_reaper_list); - wake_up (&ksocknal_data.ksnd_reaper_waitq); - - spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags); -} - -int -ksocknal_cmd(struct portal_ioctl_data * data, void * private) -{ - int rc = -EINVAL; - - LASSERT (data != NULL); - - switch(data->ioc_nal_cmd) { - case NAL_CMD_REGISTER_PEER_FD: { - rc = ksocknal_add_sock(data->ioc_nid, data->ioc_fd, - data->ioc_flags); - break; - } - case NAL_CMD_CLOSE_CONNECTION: { - rc = ksocknal_close_sock(data->ioc_nid); - break; - } - case NAL_CMD_REGISTER_MYNID: { - rc = ksocknal_set_mynid (data->ioc_nid); - break; - } - case NAL_CMD_PUSH_CONNECTION: { - rc = ksocknal_push_sock (data->ioc_nid); - break; - } - } - - return rc; -} - -void -ksocknal_free_buffers (void) -{ - if (ksocknal_data.ksnd_fmbs != NULL) { - ksock_fmb_t *fmb = (ksock_fmb_t *)ksocknal_data.ksnd_fmbs; - int i; - int j; - - for (i = 0; - i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); - i++, fmb++) - for (j = 0; j < fmb->fmb_npages; j++) - if (fmb->fmb_pages[j] != NULL) - __free_page (fmb->fmb_pages[j]); - - PORTAL_FREE (ksocknal_data.ksnd_fmbs, - sizeof (ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); - } - - if (ksocknal_data.ksnd_ltxs != NULL) - PORTAL_FREE (ksocknal_data.ksnd_ltxs, - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + - SOCKNAL_NNBLK_LTXS)); - - if (ksocknal_data.ksnd_schedulers != NULL) - PORTAL_FREE (ksocknal_data.ksnd_schedulers, - sizeof (ksock_sched_t) * SOCKNAL_N_SCHED); -} - -void __exit -ksocknal_module_fini (void) -{ - int i; - - CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", - atomic_read (&portal_kmemory)); - - switch (ksocknal_data.ksnd_init) { - default: - LASSERT (0); - - case SOCKNAL_INIT_ALL: - kportal_nal_unregister(SOCKNAL); - PORTAL_SYMBOL_UNREGISTER (ksocknal_ni); - /* fall through */ - - case SOCKNAL_INIT_PTL: - PtlNIFini(ksocknal_ni); - lib_fini(&ksocknal_lib); - /* fall through */ - - case SOCKNAL_INIT_DATA: - /* Module refcount only gets to zero when all connections - * have been closed so all lists must be empty */ - LASSERT (list_empty (&ksocknal_data.ksnd_socklist)); - LASSERT (list_empty (&ksocknal_data.ksnd_reaper_list)); - LASSERT (list_empty (&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns)); - LASSERT (list_empty (&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns)); - - if (ksocknal_data.ksnd_schedulers != NULL) - for (i = 0; i < SOCKNAL_N_SCHED; i++) { - ksock_sched_t *kss = - &ksocknal_data.ksnd_schedulers[i]; - - LASSERT (list_empty (&kss->kss_tx_conns)); - LASSERT (list_empty (&kss->kss_rx_conns)); - LASSERT (kss->kss_nconns == 0); - } - - /* stop router calling me */ - kpr_shutdown (&ksocknal_data.ksnd_router); - - /* flag threads to terminate; wake and wait for them to die */ - ksocknal_data.ksnd_shuttingdown = 1; - wake_up_all (&ksocknal_data.ksnd_reaper_waitq); - - for (i = 0; i < SOCKNAL_N_SCHED; i++) - wake_up_all(&ksocknal_data.ksnd_schedulers[i].kss_waitq); - - while (atomic_read (&ksocknal_data.ksnd_nthreads) != 0) { - CDEBUG (D_NET, "waitinf for %d threads to terminate\n", - atomic_read (&ksocknal_data.ksnd_nthreads)); - set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (HZ); - } - - kpr_deregister (&ksocknal_data.ksnd_router); - - ksocknal_free_buffers(); - /* fall through */ - - case SOCKNAL_INIT_NOTHING: - break; - } - - CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", - atomic_read (&portal_kmemory)); - - printk(KERN_INFO "Routing socket NAL unloaded (final mem %d)\n", - atomic_read(&portal_kmemory)); -} - - -int __init -ksocknal_module_init (void) -{ - int pkmem = atomic_read(&portal_kmemory); - int rc; - int i; - int j; - - /* packet descriptor must fit in a router descriptor's scratchpad */ - LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t)); - - LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); - - ksocknal_api.forward = ksocknal_api_forward; - ksocknal_api.shutdown = ksocknal_api_shutdown; - ksocknal_api.yield = ksocknal_api_yield; - ksocknal_api.validate = NULL; /* our api validate is a NOOP */ - ksocknal_api.lock = ksocknal_api_lock; - ksocknal_api.unlock = ksocknal_api_unlock; - ksocknal_api.nal_data = &ksocknal_data; - - ksocknal_lib.nal_data = &ksocknal_data; - - memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */ - - INIT_LIST_HEAD(&ksocknal_data.ksnd_socklist); - rwlock_init(&ksocknal_data.ksnd_socklist_lock); - - ksocknal_data.ksnd_nal_cb = &ksocknal_lib; - spin_lock_init (&ksocknal_data.ksnd_nal_cb_lock); - - spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock); - INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs); - INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns); - - spin_lock_init(&ksocknal_data.ksnd_large_fmp.fmp_lock); - INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs); - INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns); - - spin_lock_init(&ksocknal_data.ksnd_idle_ltx_lock); - INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_nblk_ltx_list); - INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_ltx_list); - init_waitqueue_head(&ksocknal_data.ksnd_idle_ltx_waitq); - - spin_lock_init (&ksocknal_data.ksnd_reaper_lock); - INIT_LIST_HEAD (&ksocknal_data.ksnd_reaper_list); - init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq); - - memset (&ksocknal_data.ksnd_irq_info, SOCKNAL_IRQ_UNASSIGNED, - sizeof (ksocknal_data.ksnd_irq_info)); - - /* flag lists/ptrs/locks initialised */ - ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA; - - PORTAL_ALLOC(ksocknal_data.ksnd_schedulers, - sizeof(ksock_sched_t) * SOCKNAL_N_SCHED); - if (ksocknal_data.ksnd_schedulers == NULL) - RETURN(-ENOMEM); - - for (i = 0; i < SOCKNAL_N_SCHED; i++) { - ksock_sched_t *kss = &ksocknal_data.ksnd_schedulers[i]; - - spin_lock_init (&kss->kss_lock); - INIT_LIST_HEAD (&kss->kss_rx_conns); - INIT_LIST_HEAD (&kss->kss_tx_conns); -#if SOCKNAL_ZC - INIT_LIST_HEAD (&kss->kss_zctxdone_list); -#endif - init_waitqueue_head (&kss->kss_waitq); - } - - CERROR ("ltx "LPSZ", total "LPSZ"\n", sizeof (ksock_ltx_t), - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS)); - - PORTAL_ALLOC(ksocknal_data.ksnd_ltxs, - sizeof(ksock_ltx_t) * (SOCKNAL_NLTXS +SOCKNAL_NNBLK_LTXS)); - if (ksocknal_data.ksnd_ltxs == NULL) { - ksocknal_module_fini (); - return (-ENOMEM); - } - - /* Deterministic bugs please */ - memset (ksocknal_data.ksnd_ltxs, 0xeb, - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS)); - - for (i = 0; i < SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS; i++) { - ksock_ltx_t *ltx = &((ksock_ltx_t *)ksocknal_data.ksnd_ltxs)[i]; - - ltx->ltx_idle = i < SOCKNAL_NLTXS ? - &ksocknal_data.ksnd_idle_ltx_list : - &ksocknal_data.ksnd_idle_nblk_ltx_list; - list_add (<x->ltx_tx.tx_list, ltx->ltx_idle); - } - - rc = PtlNIInit(ksocknal_init, 32, 4, 0, &ksocknal_ni); - if (rc != 0) { - CERROR("ksocknal: PtlNIInit failed: error %d\n", rc); - ksocknal_module_fini (); - RETURN (rc); - } - PtlNIDebug(ksocknal_ni, ~0); - - ksocknal_data.ksnd_init = SOCKNAL_INIT_PTL; // flag PtlNIInit() called - - for (i = 0; i < SOCKNAL_N_SCHED; i++) { - rc = ksocknal_thread_start (ksocknal_scheduler, - &ksocknal_data.ksnd_schedulers[i]); - if (rc != 0) { - CERROR("Can't spawn socknal scheduler[%d]: %d\n", - i, rc); - ksocknal_module_fini (); - RETURN (rc); - } - } - - rc = ksocknal_thread_start (ksocknal_reaper, NULL); - if (rc != 0) { - CERROR("Can't spawn socknal reaper: %d\n", rc); - ksocknal_module_fini (); - RETURN (rc); - } - - rc = kpr_register(&ksocknal_data.ksnd_router, - &ksocknal_router_interface); - if (rc != 0) { - CDEBUG(D_NET, "Can't initialise routing interface " - "(rc = %d): not routing\n", rc); - } else { - /* Only allocate forwarding buffers if I'm on a gateway */ - - PORTAL_ALLOC(ksocknal_data.ksnd_fmbs, - sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); - if (ksocknal_data.ksnd_fmbs == NULL) { - ksocknal_module_fini (); - RETURN(-ENOMEM); - } - - /* NULL out buffer pointers etc */ - memset(ksocknal_data.ksnd_fmbs, 0, - sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); - - for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS); i++) { - ksock_fmb_t *fmb = - &((ksock_fmb_t *)ksocknal_data.ksnd_fmbs)[i]; - - if (i < SOCKNAL_SMALL_FWD_NMSGS) { - fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES; - fmb->fmb_pool = &ksocknal_data.ksnd_small_fmp; - } else { - fmb->fmb_npages = SOCKNAL_LARGE_FWD_PAGES; - fmb->fmb_pool = &ksocknal_data.ksnd_large_fmp; - } - - LASSERT (fmb->fmb_npages > 0); - for (j = 0; j < fmb->fmb_npages; j++) { - fmb->fmb_pages[j] = alloc_page(GFP_KERNEL); - - if (fmb->fmb_pages[j] == NULL) { - ksocknal_module_fini (); - return (-ENOMEM); - } - - LASSERT(page_address (fmb->fmb_pages[j]) != - NULL); - } - - list_add(&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs); - } - } - - rc = kportal_nal_register(SOCKNAL, &ksocknal_cmd, NULL); - if (rc != 0) { - CERROR ("Can't initialise command interface (rc = %d)\n", rc); - ksocknal_module_fini (); - return (rc); - } - - PORTAL_SYMBOL_REGISTER(ksocknal_ni); - - /* flag everything initialised */ - ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL; - - printk(KERN_INFO "Routing socket NAL loaded (Routing %s, initial " - "mem %d)\n", - kpr_routing (&ksocknal_data.ksnd_router) ? - "enabled" : "disabled", pkmem); - - return (0); -} - -MODULE_AUTHOR("Cluster File Systems, Inc. "); -MODULE_DESCRIPTION("Kernel TCP Socket NAL v0.01"); -MODULE_LICENSE("GPL"); - -module_init(ksocknal_module_init); -module_exit(ksocknal_module_fini); - -EXPORT_SYMBOL (ksocknal_ni); diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h deleted file mode 100644 index 86cdeb0..0000000 --- a/lnet/klnds/socklnd/socklnd.h +++ /dev/null @@ -1,292 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Zach Brown - * Author: Peter J. Braam - * Author: Phil Schwan - * Author: Eric Barton - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define DEBUG_PORTAL_ALLOC -#define EXPORT_SYMTAB - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#define DEBUG_SUBSYSTEM S_SOCKNAL - -#include -#include -#include - -#define SOCKNAL_N_SCHED num_online_cpus() /* # socknal schedulers */ - -#if PTL_LARGE_MTU -# define SOCKNAL_MAX_FWD_PAYLOAD (256<<10) /* biggest payload I can forward */ -#else -# define SOCKNAL_MAX_FWD_PAYLOAD (64<<10) /* biggest payload I can forward */ -#endif - -#define SOCKNAL_NLTXS 128 /* # normal transmit messages */ -#define SOCKNAL_NNBLK_LTXS 128 /* # transmit messages reserved if can't block */ - -#define SOCKNAL_SMALL_FWD_NMSGS 128 /* # small messages I can be forwarding at any time */ -#define SOCKNAL_LARGE_FWD_NMSGS 64 /* # large messages I can be forwarding at any time */ - -#define SOCKNAL_SMALL_FWD_PAGES 1 /* # pages in a small message fwd buffer */ - -#define SOCKNAL_LARGE_FWD_PAGES (PAGE_ALIGN (sizeof (ptl_hdr_t) + SOCKNAL_MAX_FWD_PAYLOAD) >> PAGE_SHIFT) - /* # pages in a large message fwd buffer */ - -#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */ - -#define SOCKNAL_TX_LOW_WATER(sk) (((sk)->sndbuf*8)/10) - -typedef struct /* pool of forwarding buffers */ -{ - spinlock_t fmp_lock; /* serialise */ - struct list_head fmp_idle_fmbs; /* buffers waiting for a connection */ - struct list_head fmp_blocked_conns; /* connections waiting for a buffer */ -} ksock_fmb_pool_t; - - -typedef struct /* per scheduler state */ -{ - spinlock_t kss_lock; /* serialise */ - struct list_head kss_rx_conns; /* conn waiting to be read */ - struct list_head kss_tx_conns; /* conn waiting to be written */ -#if SOCKNAL_ZC - struct list_head kss_zctxdone_list; /* completed ZC transmits */ -#endif - wait_queue_head_t kss_waitq; /* where scheduler sleeps */ - int kss_nconns; /* # connections assigned to this scheduler */ -} ksock_sched_t; - -typedef struct { - int ksnd_init; /* initialisation state */ - - struct list_head ksnd_socklist; /* all my connections */ - rwlock_t ksnd_socklist_lock; /* stabilise add/find/remove */ - - nal_cb_t *ksnd_nal_cb; - spinlock_t ksnd_nal_cb_lock; /* lib cli/sti lock */ - - atomic_t ksnd_nthreads; /* # live threads */ - int ksnd_shuttingdown; /* tell threads to exit */ - ksock_sched_t *ksnd_schedulers; /* scheduler state */ - - kpr_router_t ksnd_router; /* THE router */ - - void *ksnd_fmbs; /* all the pre-allocated FMBs */ - ksock_fmb_pool_t ksnd_small_fmp; /* small message forwarding buffers */ - ksock_fmb_pool_t ksnd_large_fmp; /* large message forwarding buffers */ - - void *ksnd_ltxs; /* all the pre-allocated LTXs */ - spinlock_t ksnd_idle_ltx_lock; /* serialise ltx alloc/free */ - struct list_head ksnd_idle_ltx_list; /* where to get an idle LTX */ - struct list_head ksnd_idle_nblk_ltx_list; /* where to get an idle LTX if you can't block */ - wait_queue_head_t ksnd_idle_ltx_waitq; /* where to block for an idle LTX */ - - struct list_head ksnd_reaper_list; /* conn waiting to be reaped */ - wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */ - spinlock_t ksnd_reaper_lock; /* serialise */ - unsigned char ksnd_irq_info[NR_IRQS]; /* irq->scheduler lookup */ -} ksock_nal_data_t; - -#define SOCKNAL_INIT_NOTHING 0 -#define SOCKNAL_INIT_DATA 1 -#define SOCKNAL_INIT_PTL 2 -#define SOCKNAL_INIT_ALL 3 - -#define SOCKNAL_IRQ_BOUND 0x80 /* flag we _did_ bind already */ -#define SOCKNAL_IRQ_SCHED_MASK 0x7f /* we assume < 127 CPUs */ -#define SOCKNAL_IRQ_UNASSIGNED 0xff /* flag unassigned */ - -/* A packet just assembled for transmission is represented by 1 or more - * struct iovec fragments and 0 or more ptl_kiov_t fragments. Forwarded - * messages, or messages from an MD with PTL_MD_KIOV _not_ set have 0 - * ptl_kiov_t fragments. Messages from an MD with PTL_MD_KIOV set, have 1 - * struct iovec fragment (the header) and up to PTL_MD_MAX_IOV ptl_kiov_t - * fragments. - * - * On the receive side, initially 1 struct iovec fragment is posted for - * receive (the header). Once the header has been received, if the message - * requires forwarding or will be received into mapped memory, up to - * PTL_MD_MAX_IOV struct iovec fragments describe the target memory. - * Otherwise up to PTL_MD_MAX_IOV ptl_kiov_t fragments are used. - */ - -typedef struct /* transmit packet */ -{ - struct list_head tx_list; /* queue on conn for transmission etc */ - char tx_isfwd; /* forwarding / sourced here */ - int tx_nob; /* # packet bytes */ - int tx_niov; /* # packet iovec frags */ - struct iovec *tx_iov; /* packet iovec frags */ - int tx_nkiov; /* # packet page frags */ - ptl_kiov_t *tx_kiov; /* packet page frags */ -#if SOCKNAL_ZC - ksock_sched_t *tx_sched; /* who to wake on callback */ - zccd_t tx_zccd; /* zero copy callback descriptor */ -#endif -} ksock_tx_t; - -#define KSOCK_ZCCD_2_TX(ptr) list_entry (ptr, ksock_tx_t, tx_zccd) -/* network zero copy callback descriptor embedded in ksock_tx_t */ - -/* space for the tx frag descriptors: hdr is always 1 iovec - * and payload is PTL_MD_MAX of either type. */ -typedef struct -{ - struct iovec hdr; - union { - struct iovec iov[PTL_MD_MAX_IOV]; - ptl_kiov_t kiov[PTL_MD_MAX_IOV]; - } payload; -} ksock_txiovspace_t; - -typedef struct /* locally transmitted packet */ -{ - ksock_tx_t ltx_tx; /* send info */ - struct list_head *ltx_idle; /* where to put when idle */ - void *ltx_private; /* lib_finalize() callback arg */ - void *ltx_cookie; /* lib_finalize() callback arg */ - ksock_txiovspace_t ltx_iov_space; /* where to stash frag descriptors */ - ptl_hdr_t ltx_hdr; /* buffer for packet header */ -} ksock_ltx_t; - -#define KSOCK_TX_2_KPR_FWD_DESC(ptr) list_entry ((kprfd_scratch_t *)ptr, kpr_fwd_desc_t, kprfd_scratch) -/* forwarded packets (router->socknal) embedded in kpr_fwd_desc_t::kprfd_scratch */ - -#define KSOCK_TX_2_KSOCK_LTX(ptr) list_entry (ptr, ksock_ltx_t, ltx_tx) -/* local packets (lib->socknal) embedded in ksock_ltx_t::ltx_tx */ - -/* NB list_entry() is used here as convenient macro for calculating a - * pointer to a struct from the address of a member. - */ - -typedef struct /* Kernel portals Socket Forwarding message buffer */ -{ /* (socknal->router) */ - struct list_head fmb_list; /* queue idle */ - kpr_fwd_desc_t fmb_fwd; /* router's descriptor */ - int fmb_npages; /* # pages allocated */ - ksock_fmb_pool_t *fmb_pool; /* owning pool */ - struct page *fmb_pages[SOCKNAL_LARGE_FWD_PAGES]; - struct iovec fmb_iov[SOCKNAL_LARGE_FWD_PAGES]; -} ksock_fmb_t; - -/* space for the rx frag descriptors; we either read a single contiguous - * header, or PTL_MD_MAX_IOV frags of payload of either type. */ -typedef union { - struct iovec iov[PTL_MD_MAX_IOV]; - ptl_kiov_t kiov[PTL_MD_MAX_IOV]; -} ksock_rxiovspace_t; - -#define SOCKNAL_RX_HEADER 1 /* reading header */ -#define SOCKNAL_RX_BODY 2 /* reading body (to deliver here) */ -#define SOCKNAL_RX_BODY_FWD 3 /* reading body (to forward) */ -#define SOCKNAL_RX_SLOP 4 /* skipping body */ -#define SOCKNAL_RX_GET_FMB 5 /* scheduled for forwarding */ -#define SOCKNAL_RX_FMB_SLEEP 6 /* blocked waiting for a fwd desc */ - -typedef struct -{ - struct list_head ksnc_list; /* stash on global socket list */ - struct file *ksnc_file; /* socket filp */ - struct socket *ksnc_sock; /* actual socket */ - void *ksnc_saved_data_ready; /* socket's original data_ready() callback */ - void *ksnc_saved_write_space; /* socket's original write_space() callback */ - ptl_nid_t ksnc_peernid; /* who's on the other end */ - atomic_t ksnc_refcount; /* # users */ - ksock_sched_t *ksnc_scheduler; /* who schedules this connection */ - - /* READER */ - struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */ - volatile int ksnc_rx_ready; /* data ready to read */ - int ksnc_rx_scheduled; /* being progressed */ - int ksnc_rx_state; /* what is being read */ - int ksnc_rx_nob_left; /* # bytes to next hdr/body */ - int ksnc_rx_nob_wanted; /* bytes actually wanted */ - int ksnc_rx_niov; /* # iovec frags */ - struct iovec *ksnc_rx_iov; /* the iovec frags */ - int ksnc_rx_nkiov; /* # page frags */ - ptl_kiov_t *ksnc_rx_kiov; /* the page frags */ - ksock_rxiovspace_t ksnc_rx_iov_space; /* space for frag descriptors */ - void *ksnc_cookie; /* rx lib_finalize passthru arg */ - ptl_hdr_t ksnc_hdr; /* where I read headers into */ - - /* WRITER */ - struct list_head ksnc_tx_list; /* where I enq waiting for output space */ - struct list_head ksnc_tx_queue; /* packets waiting to be sent */ - volatile int ksnc_tx_ready; /* write space */ - int ksnc_tx_scheduled; /* being progressed */ - -} ksock_conn_t; - -extern int ksocknal_add_sock (ptl_nid_t nid, int fd, int client); -extern int ksocknal_close_sock(ptl_nid_t nid); -extern int ksocknal_set_mynid(ptl_nid_t nid); -extern int ksocknal_push_sock(ptl_nid_t nid); -extern ksock_conn_t *ksocknal_get_conn (ptl_nid_t nid); -extern void _ksocknal_put_conn (ksock_conn_t *conn); -extern void ksocknal_close_conn (ksock_conn_t *conn); - -static inline void -ksocknal_put_conn (ksock_conn_t *conn) -{ - CDEBUG (D_OTHER, "putting conn[%p] -> "LPX64" (%d)\n", - conn, conn->ksnc_peernid, atomic_read (&conn->ksnc_refcount)); - - if (atomic_dec_and_test (&conn->ksnc_refcount)) - _ksocknal_put_conn (conn); -} - -extern int ksocknal_thread_start (int (*fn)(void *arg), void *arg); -extern int ksocknal_new_packet (ksock_conn_t *conn, int skip); -extern void ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd); -extern int ksocknal_scheduler (void *arg); -extern int ksocknal_reaper (void *arg); -extern void ksocknal_data_ready(struct sock *sk, int n); -extern void ksocknal_write_space(struct sock *sk); - - -extern nal_cb_t ksocknal_lib; -extern ksock_nal_data_t ksocknal_data; diff --git a/lnet/klnds/socklnd/socklnd_cb.c b/lnet/klnds/socklnd/socklnd_cb.c deleted file mode 100644 index 6147d8a..0000000 --- a/lnet/klnds/socklnd/socklnd_cb.c +++ /dev/null @@ -1,1613 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Zach Brown - * Author: Peter J. Braam - * Author: Phil Schwan - * Author: Eric Barton - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "socknal.h" - -atomic_t ksocknal_packets_received; -atomic_t ksocknal_packets_launched; -atomic_t ksocknal_packets_being_sent; - -#if SOCKNAL_ZC -int ksocknal_do_zc = 1; -int ksocknal_zc_min_frag = 2048; -#endif - -/* - * LIB functions follow - * - */ -int -ksocknal_read(nal_cb_t *nal, void *private, void *dst_addr, - user_ptr src_addr, size_t len) -{ - CDEBUG(D_NET, LPX64": reading %ld bytes from %p -> %p\n", - nal->ni.nid, (long)len, src_addr, dst_addr); - - memcpy( dst_addr, src_addr, len ); - return 0; -} - -int -ksocknal_write(nal_cb_t *nal, void *private, user_ptr dst_addr, - void *src_addr, size_t len) -{ - CDEBUG(D_NET, LPX64": writing %ld bytes from %p -> %p\n", - nal->ni.nid, (long)len, src_addr, dst_addr); - - memcpy( dst_addr, src_addr, len ); - return 0; -} - -int -ksocknal_callback (nal_cb_t * nal, void *private, lib_eq_t *eq, - ptl_event_t *ev) -{ - CDEBUG(D_NET, LPX64": callback eq %p ev %p\n", - nal->ni.nid, eq, ev); - - if (eq->event_callback != NULL) - eq->event_callback(ev); - - return 0; -} - -void * -ksocknal_malloc(nal_cb_t *nal, size_t len) -{ - void *buf; - - PORTAL_ALLOC(buf, len); - - if (buf != NULL) - memset(buf, 0, len); - - return (buf); -} - -void -ksocknal_free(nal_cb_t *nal, void *buf, size_t len) -{ - PORTAL_FREE(buf, len); -} - -void -ksocknal_printf(nal_cb_t *nal, const char *fmt, ...) -{ - va_list ap; - char msg[256]; - - va_start (ap, fmt); - vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */ - va_end (ap); - - msg[sizeof (msg) - 1] = 0; /* ensure terminated */ - - CDEBUG (D_NET, "%s", msg); -} - -void -ksocknal_cli(nal_cb_t *nal, unsigned long *flags) -{ - ksock_nal_data_t *data = nal->nal_data; - - spin_lock(&data->ksnd_nal_cb_lock); -} - -void -ksocknal_sti(nal_cb_t *nal, unsigned long *flags) -{ - ksock_nal_data_t *data; - data = nal->nal_data; - - spin_unlock(&data->ksnd_nal_cb_lock); -} - -int -ksocknal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) -{ - /* I would guess that if ksocknal_get_conn(nid) == NULL, - and we're not routing, then 'nid' is very distant :) */ - if ( nal->ni.nid == nid ) { - *dist = 0; - } else { - *dist = 1; - } - - return 0; -} - -ksock_ltx_t * -ksocknal_get_ltx (int may_block) -{ - long flags; - ksock_ltx_t *ltx = NULL; - - for (;;) { - spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - if (!list_empty (&ksocknal_data.ksnd_idle_ltx_list)) { - ltx = list_entry(ksocknal_data.ksnd_idle_ltx_list.next, - ksock_ltx_t, ltx_tx.tx_list); - list_del (<x->ltx_tx.tx_list); - break; - } - - if (!may_block) { - if (!list_empty(&ksocknal_data.ksnd_idle_nblk_ltx_list)) { - ltx = list_entry(ksocknal_data.ksnd_idle_nblk_ltx_list.next, - ksock_ltx_t, ltx_tx.tx_list); - list_del (<x->ltx_tx.tx_list); - } - break; - } - - spin_unlock_irqrestore(&ksocknal_data.ksnd_idle_ltx_lock, - flags); - - wait_event (ksocknal_data.ksnd_idle_ltx_waitq, - !list_empty (&ksocknal_data.ksnd_idle_ltx_list)); - } - - spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - return (ltx); -} - -#if SOCKNAL_ZC -struct page * -ksocknal_kvaddr_to_page (unsigned long vaddr) -{ - struct page *page; - - if (vaddr >= VMALLOC_START && - vaddr < VMALLOC_END) - page = vmalloc_to_page ((void *)vaddr); -#if CONFIG_HIGHMEM - else if (vaddr >= PKMAP_BASE && - vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) - page = vmalloc_to_page ((void *)vaddr); - /* in 2.4 ^ just walks the page tables */ -#endif - else - page = virt_to_page (vaddr); - - if (page == NULL || - !VALID_PAGE (page)) - return (NULL); - - return (page); -} -#endif - -int -ksocknal_send_iov (struct socket *sock, ksock_tx_t *tx, int more) -{ - struct iovec *iov = tx->tx_iov; - int fragsize = iov->iov_len; - unsigned long vaddr = (unsigned long)iov->iov_base; -#if SOCKNAL_ZC - int offset = vaddr & (PAGE_SIZE - 1); - int zcsize = MIN (fragsize, PAGE_SIZE - offset); - struct page *page; -#endif - int rc; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone, so we only send 1 frag at a time. */ - LASSERT (fragsize <= tx->tx_nob); - LASSERT (tx->tx_niov > 0); - more |= (tx->tx_niov > 1); - -#if SOCKNAL_ZC - if (ksocknal_do_zc && - (sock->sk->route_caps & NETIF_F_SG) && - (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) && - zcsize >= ksocknal_zc_min_frag && - (page = ksocknal_kvaddr_to_page (vaddr)) != NULL) { - - CDEBUG(D_NET, "vaddr %p, page %p->%p + offset %x for %d\n", - (void *)vaddr, page, page_address(page), offset, zcsize); - - more |= (zcsize < fragsize); - - rc = tcp_sendpage_zccd(sock, page, offset, zcsize, - more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT, - &tx->tx_zccd); - } else -#endif - { - /* NB don't pass tx's iov; sendmsg may or may not update it */ - struct iovec fragiov = { .iov_base = (void *)vaddr, - .iov_len = fragsize}; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = &fragiov, - .msg_iovlen = 1, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT - }; - mm_segment_t oldmm = get_fs(); - - set_fs (KERNEL_DS); - rc = sock->sk->prot->sendmsg(sock->sk, &msg, fragsize); - set_fs (oldmm); - } - - if (rc <= 0) - return (rc); - - tx->tx_nob -= rc; - - if (rc < fragsize) { - /* didn't send whole frag */ - iov->iov_base = (void *)(vaddr + rc); - iov->iov_len = fragsize - rc; - return (-EAGAIN); - } - - /* everything went */ - LASSERT (rc == fragsize); - tx->tx_iov++; - tx->tx_niov--; - return (1); -} - -int -ksocknal_send_kiov (struct socket *sock, ksock_tx_t *tx, int more) -{ - ptl_kiov_t *kiov = tx->tx_kiov; - int fragsize = kiov->kiov_len; - struct page *page = kiov->kiov_page; - int offset = kiov->kiov_offset; - int rc; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone, so we only send 1 frag at a time. */ - LASSERT (fragsize <= tx->tx_nob); - LASSERT (offset + fragsize <= PAGE_SIZE); - LASSERT (tx->tx_nkiov > 0); - more |= (tx->tx_nkiov > 1); - -#if SOCKNAL_ZC - if (ksocknal_do_zc && - (sock->sk->route_caps & NETIF_F_SG) && - (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) && - fragsize >= ksocknal_zc_min_frag) { - - CDEBUG(D_NET, "page %p + offset %x for %d\n", - page, offset, fragsize); - - rc = tcp_sendpage_zccd(sock, page, offset, fragsize, - more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT, - &tx->tx_zccd); - } else -#endif - { - char *addr = ((char *)kmap (page)) + offset; - struct iovec fragiov = {.iov_base = addr, - .iov_len = fragsize}; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = &fragiov, - .msg_iovlen = 1, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT - }; - mm_segment_t oldmm = get_fs(); - - set_fs (KERNEL_DS); - rc = sock->sk->prot->sendmsg(sock->sk, &msg, fragsize); - set_fs (oldmm); - kunmap (page); - } - - if (rc <= 0) - return (rc); - - tx->tx_nob -= rc; - - if (rc < fragsize) { - /* didn't send whole frag */ - kiov->kiov_offset = offset + rc; - kiov->kiov_len = fragsize - rc; - return (-EAGAIN); - } - - /* everything went */ - LASSERT (rc == fragsize); - tx->tx_kiov++; - tx->tx_nkiov--; - return (1); -} - -int -ksocknal_sendmsg (struct socket *sock, ksock_tx_t *tx, int more) -{ - int rc; - int sent_some = 0; - ENTRY; - - LASSERT (!in_interrupt()); - - for (;;) { - if (tx->tx_niov != 0) - rc = ksocknal_send_iov (sock, tx, more || tx->tx_nkiov != 0); - else - rc = ksocknal_send_kiov (sock, tx, more); - - /* Interpret a zero rc the same as -EAGAIN (Adaptech TOE) */ - if (rc <= 0) /* error or partial send */ - RETURN ((sent_some || rc == -EAGAIN) ? 0 : rc); - - if (tx->tx_nob == 0) /* sent everything */ - RETURN (0); - - sent_some = 1; - } -} - -int -ksocknal_recv_iov (ksock_conn_t *conn) -{ - struct iovec *iov = conn->ksnc_rx_iov; - int fragsize = iov->iov_len; - unsigned long vaddr = (unsigned long)iov->iov_base; - struct iovec fragiov = { .iov_base = (void *)vaddr, - .iov_len = fragsize}; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = &fragiov, - .msg_iovlen = 1, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = 0 - }; - mm_segment_t oldmm = get_fs(); - int rc; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone, so we only receive 1 frag at a time. */ - LASSERT (conn->ksnc_rx_niov > 0); - LASSERT (fragsize <= conn->ksnc_rx_nob_wanted); - - set_fs (KERNEL_DS); - rc = sock_recvmsg (conn->ksnc_sock, &msg, fragsize, MSG_DONTWAIT); - /* NB this is just a boolean............................^ */ - set_fs (oldmm); - - if (rc <= 0) - return (rc); - - conn->ksnc_rx_nob_wanted -= rc; - conn->ksnc_rx_nob_left -= rc; - - if (rc < fragsize) { - iov->iov_base = (void *)(vaddr + rc); - iov->iov_len = fragsize - rc; - return (-EAGAIN); - } - - LASSERT (rc == fragsize); - conn->ksnc_rx_iov++; - conn->ksnc_rx_niov--; - return (1); -} - -int -ksocknal_recv_kiov (ksock_conn_t *conn) -{ - ptl_kiov_t *kiov = conn->ksnc_rx_kiov; - struct page *page = kiov->kiov_page; - int offset = kiov->kiov_offset; - int fragsize = kiov->kiov_len; - unsigned long vaddr = ((unsigned long)kmap (page)) + offset; - struct iovec fragiov = { .iov_base = (void *)vaddr, - .iov_len = fragsize}; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = &fragiov, - .msg_iovlen = 1, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = 0 - }; - mm_segment_t oldmm = get_fs(); - int rc; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone, so we only receive 1 frag at a time. */ - LASSERT (fragsize <= conn->ksnc_rx_nob_wanted); - LASSERT (conn->ksnc_rx_nkiov > 0); - LASSERT (offset + fragsize <= PAGE_SIZE); - - set_fs (KERNEL_DS); - rc = sock_recvmsg (conn->ksnc_sock, &msg, fragsize, MSG_DONTWAIT); - /* NB this is just a boolean............................^ */ - set_fs (oldmm); - kunmap (page); - - if (rc <= 0) - return (rc); - - conn->ksnc_rx_nob_wanted -= rc; - conn->ksnc_rx_nob_left -= rc; - - if (rc < fragsize) { - kiov->kiov_offset = offset + rc; - kiov->kiov_len = fragsize - rc; - return (-EAGAIN); - } - - LASSERT (rc == fragsize); - conn->ksnc_rx_kiov++; - conn->ksnc_rx_nkiov--; - return (1); -} - -int -ksocknal_recvmsg (ksock_conn_t *conn) -{ - int rc; - int got_some = 0; - ENTRY; - - LASSERT (!in_interrupt ()); - - for (;;) { - LASSERT (conn->ksnc_rx_nob_wanted > 0); - - if (conn->ksnc_rx_niov != 0) - rc = ksocknal_recv_iov (conn); - else - rc = ksocknal_recv_kiov (conn); - - /* CAVEAT EMPTOR: we return... - * <= 0 for error (0 == EOF) and > 0 for success (unlike sendmsg()) */ - - if (rc <= 0) /* error/EOF or partial receive */ - RETURN ((got_some || rc == -EAGAIN) ? 1 : rc); - - if (conn->ksnc_rx_nob_wanted == 0) - RETURN (1); - - got_some = 0; - } -} - -#if SOCKNAL_ZC -void -ksocknal_zc_callback (zccd_t *zcd) -{ - ksock_tx_t *tx = KSOCK_ZCCD_2_TX(zcd); - ksock_sched_t *sched = tx->tx_sched; - unsigned long flags; - ENTRY; - - /* Schedule tx for cleanup (can't do it now due to lock conflicts) */ - - spin_lock_irqsave (&sched->kss_lock, flags); - - list_add_tail (&tx->tx_list, &sched->kss_zctxdone_list); - if (waitqueue_active (&sched->kss_waitq)) - wake_up (&sched->kss_waitq); - - spin_unlock_irqrestore (&sched->kss_lock, flags); - EXIT; -} -#endif - -void -ksocknal_tx_done (ksock_tx_t *tx) -{ - long flags; - ksock_ltx_t *ltx; - ENTRY; - - atomic_dec (&ksocknal_packets_being_sent); - - if (tx->tx_isfwd) { /* was a forwarded packet? */ - kpr_fwd_done (&ksocknal_data.ksnd_router, - KSOCK_TX_2_KPR_FWD_DESC (tx), 0); - EXIT; - return; - } - - /* local send */ - ltx = KSOCK_TX_2_KSOCK_LTX (tx); - - lib_finalize (&ksocknal_lib, ltx->ltx_private, ltx->ltx_cookie); - - spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - list_add_tail (<x->ltx_tx.tx_list, ltx->ltx_idle); - - /* normal tx desc => wakeup anyone blocking for one */ - if (ltx->ltx_idle == &ksocknal_data.ksnd_idle_ltx_list && - waitqueue_active (&ksocknal_data.ksnd_idle_ltx_waitq)) - wake_up (&ksocknal_data.ksnd_idle_ltx_waitq); - - spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags); - EXIT; -} - -void -ksocknal_process_transmit (ksock_sched_t *sched, long *irq_flags) -{ - ksock_conn_t *conn; - ksock_tx_t *tx; - int rc; - - LASSERT (!list_empty (&sched->kss_tx_conns)); - conn = list_entry(sched->kss_tx_conns.next, ksock_conn_t, ksnc_tx_list); - list_del (&conn->ksnc_tx_list); - - LASSERT (conn->ksnc_tx_scheduled); - LASSERT (conn->ksnc_tx_ready); - LASSERT (!list_empty (&conn->ksnc_tx_queue)); - tx = list_entry (conn->ksnc_tx_queue.next, ksock_tx_t, tx_list); - /* assume transmit will complete now, so dequeue while I've got lock */ - list_del (&tx->tx_list); - - spin_unlock_irqrestore (&sched->kss_lock, *irq_flags); - - LASSERT (tx->tx_nob > 0); - - conn->ksnc_tx_ready = 0;/* write_space may race with me and set ready */ - mb(); /* => clear BEFORE trying to write */ - - rc = ksocknal_sendmsg (conn->ksnc_sock, tx, - !list_empty (&conn->ksnc_tx_queue)); /* more to come? */ - - CDEBUG (D_NET, "send(%d) %d\n", tx->tx_nob, rc); - - if (rc != 0) { -#warning FIXME: handle socket errors properly - CERROR("Error socknal send(%d) %p: %d\n", tx->tx_nob, conn, rc); - /* kid on for now the whole packet went. - * NB when we handle the error better, we'll still need to - * block for zccd completion. - */ - tx->tx_nob = 0; - } - - if (tx->tx_nob == 0) /* nothing left to send */ - { - /* everything went; assume more can go, so prevent write_space locking */ - conn->ksnc_tx_ready = 1; - - ksocknal_put_conn (conn); /* release packet's ref */ - atomic_inc (&ksocknal_packets_being_sent); -#if SOCKNAL_ZC - if (atomic_read (&tx->tx_zccd.zccd_count) != 1) { - /* zccd skbufs are still in-flight. Release my - * initial ref on zccd, so callback can occur */ - zccd_put (&tx->tx_zccd); - } else -#endif - ksocknal_tx_done (tx); - - spin_lock_irqsave (&sched->kss_lock, *irq_flags); - } else { - spin_lock_irqsave (&sched->kss_lock, *irq_flags); - - /* back onto HEAD of tx_queue */ - list_add (&tx->tx_list, &conn->ksnc_tx_queue); - } - - if (!conn->ksnc_tx_ready || /* no space to write now */ - list_empty (&conn->ksnc_tx_queue)) {/* nothing to write */ - conn->ksnc_tx_scheduled = 0; /* not being scheduled */ - ksocknal_put_conn (conn); /* release scheduler's ref */ - } else /* let scheduler call me again */ - list_add_tail (&conn->ksnc_tx_list, &sched->kss_tx_conns); -} - -void -ksocknal_launch_packet (ksock_conn_t *conn, ksock_tx_t *tx) -{ - unsigned long flags; - ksock_sched_t *sched = conn->ksnc_scheduler; - - /* Ensure the frags we've been given EXACTLY match the number of - * bytes we want to send. Many TCP/IP stacks disregard any total - * size parameters passed to them and just look at the frags. - * - * We always expect at least 1 mapped fragment containing the - * complete portals header. - */ - LASSERT (lib_iov_nob (tx->tx_niov, tx->tx_iov) + - lib_kiov_nob (tx->tx_nkiov, tx->tx_kiov) == tx->tx_nob); - LASSERT (tx->tx_niov >= 1); - LASSERT (tx->tx_iov[0].iov_len >= sizeof (ptl_hdr_t)); - - CDEBUG (D_NET, "type %d, nob %d niov %d nkiov %d\n", - ((ptl_hdr_t *)tx->tx_iov[0].iov_base)->type, tx->tx_nob, - tx->tx_niov, tx->tx_nkiov); - -#if SOCKNAL_ZC - zccd_init (&tx->tx_zccd, ksocknal_zc_callback); - /* NB this sets 1 ref on zccd, so the callback can only occur - * after I've released this ref */ - tx->tx_sched = sched; -#endif - spin_lock_irqsave (&sched->kss_lock, flags); - - list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue); - - if (conn->ksnc_tx_ready && /* able to send */ - !conn->ksnc_tx_scheduled) { /* not scheduled to send */ - list_add_tail (&conn->ksnc_tx_list, &sched->kss_tx_conns); - conn->ksnc_tx_scheduled = 1; - atomic_inc (&conn->ksnc_refcount); /* extra ref for scheduler */ - if (waitqueue_active (&sched->kss_waitq)) - wake_up (&sched->kss_waitq); - } - - spin_unlock_irqrestore (&sched->kss_lock, flags); - - atomic_inc (&ksocknal_packets_launched); -} - -ksock_conn_t * -ksocknal_send_target (ptl_nid_t nid) -{ - ptl_nid_t gatewaynid; - ksock_conn_t *conn; - int rc; - - if ((conn = ksocknal_get_conn (nid)) == NULL) { - /* It's not a peer; try to find a gateway */ - rc = kpr_lookup (&ksocknal_data.ksnd_router, nid, &gatewaynid); - if (rc != 0) { - CERROR("Can't route to "LPX64": router error %d\n", - nid, rc); - return (NULL); - } - - if ((conn = ksocknal_get_conn (gatewaynid)) == NULL) { - CERROR ("Can't route to "LPX64": gateway "LPX64 - " is not a peer\n", nid, gatewaynid); - return (NULL); - } - } - - return (conn); -} - -ksock_ltx_t * -ksocknal_setup_hdr (nal_cb_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type) -{ - ksock_ltx_t *ltx; - - /* I may not block for a transmit descriptor if I might block the - * receiver, or an interrupt handler. */ - ltx = ksocknal_get_ltx (!(type == PTL_MSG_ACK || - type == PTL_MSG_REPLY || - in_interrupt ())); - if (ltx == NULL) { - CERROR ("Can't allocate tx desc\n"); - return (NULL); - } - - /* Init local send packet (storage for hdr, finalize() args) */ - ltx->ltx_hdr = *hdr; - ltx->ltx_private = private; - ltx->ltx_cookie = cookie; - - /* Init common ltx_tx */ - ltx->ltx_tx.tx_isfwd = 0; - ltx->ltx_tx.tx_nob = sizeof (*hdr); - - /* We always have 1 mapped frag for the header */ - ltx->ltx_tx.tx_niov = 1; - ltx->ltx_tx.tx_iov = <x->ltx_iov_space.hdr; - ltx->ltx_tx.tx_iov[0].iov_base = <x->ltx_hdr; - ltx->ltx_tx.tx_iov[0].iov_len = sizeof (ltx->ltx_hdr); - - ltx->ltx_tx.tx_kiov = NULL; - ltx->ltx_tx.tx_nkiov = 0; - - return (ltx); -} - -int -ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, struct iovec *payload_iov, - size_t payload_len) -{ - ksock_ltx_t *ltx; - ksock_conn_t *conn; - - /* NB 'private' is different depending on what we're sending. - * Just ignore it until we can rely on it - * - * Also, the return code from this procedure is ignored. - * If we can't send, we must still complete with lib_finalize(). - * We'll have to wait for 3.2 to return an error event. - */ - - CDEBUG(D_NET, - "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64 - " pid %d\n", payload_len, payload_niov, nid, pid); - - conn = ksocknal_send_target (nid); - if (conn == NULL) { - lib_finalize (&ksocknal_lib, private, cookie); - return (-1); - } - - ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type); - if (ltx == NULL) { - ksocknal_put_conn (conn); - lib_finalize (&ksocknal_lib, private, cookie); - return (-1); - } - - /* append the payload_iovs to the one pointing at the header */ - LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0); - LASSERT (payload_niov <= PTL_MD_MAX_IOV); - - memcpy (ltx->ltx_tx.tx_iov + 1, payload_iov, - payload_niov * sizeof (*payload_iov)); - ltx->ltx_tx.tx_niov = 1 + payload_niov; - ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len; - - ksocknal_launch_packet (conn, <x->ltx_tx); - return (0); -} - -int -ksocknal_send_pages (nal_cb_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, ptl_kiov_t *payload_iov, size_t payload_len) -{ - ksock_ltx_t *ltx; - ksock_conn_t *conn; - - /* NB 'private' is different depending on what we're sending. - * Just ignore it until we can rely on it */ - - CDEBUG(D_NET, - "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64" pid %d\n", - payload_len, payload_niov, nid, pid); - - conn = ksocknal_send_target (nid); - if (conn == NULL) - return (-1); - - ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type); - if (ltx == NULL) { - ksocknal_put_conn (conn); - return (-1); - } - - LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0); - LASSERT (payload_niov <= PTL_MD_MAX_IOV); - - ltx->ltx_tx.tx_kiov = ltx->ltx_iov_space.payload.kiov; - memcpy (ltx->ltx_tx.tx_kiov, payload_iov, - payload_niov * sizeof (*payload_iov)); - ltx->ltx_tx.tx_nkiov = payload_niov; - ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len; - - ksocknal_launch_packet (conn, <x->ltx_tx); - return (0); -} - -void -ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) -{ - ksock_conn_t *conn; - ptl_nid_t nid = fwd->kprfd_gateway_nid; - ksock_tx_t *tx = (ksock_tx_t *)&fwd->kprfd_scratch; - - CDEBUG (D_NET, "Forwarding [%p] -> "LPX64" ("LPX64"))\n", fwd, - fwd->kprfd_gateway_nid, fwd->kprfd_target_nid); - - /* I'm the gateway; must be the last hop */ - if (nid == ksocknal_lib.ni.nid) - nid = fwd->kprfd_target_nid; - - conn = ksocknal_get_conn (nid); - if (conn == NULL) { - CERROR ("[%p] fwd to "LPX64" isn't a peer\n", fwd, nid); - kpr_fwd_done (&ksocknal_data.ksnd_router, fwd, -EHOSTUNREACH); - return; - } - - /* This forward has now got a ref on conn */ - - tx->tx_isfwd = 1; /* This is a forwarding packet */ - tx->tx_nob = fwd->kprfd_nob; - tx->tx_niov = fwd->kprfd_niov; - tx->tx_iov = fwd->kprfd_iov; - tx->tx_nkiov = 0; - tx->tx_kiov = NULL; - - ksocknal_launch_packet (conn, tx); -} - -int -ksocknal_thread_start (int (*fn)(void *arg), void *arg) -{ - long pid = kernel_thread (fn, arg, 0); - - if (pid < 0) - return ((int)pid); - - atomic_inc (&ksocknal_data.ksnd_nthreads); - return (0); -} - -void -ksocknal_thread_fini (void) -{ - atomic_dec (&ksocknal_data.ksnd_nthreads); -} - -void -ksocknal_fmb_callback (void *arg, int error) -{ - ksock_fmb_t *fmb = (ksock_fmb_t *)arg; - ksock_fmb_pool_t *fmp = fmb->fmb_pool; - ptl_hdr_t *hdr = (ptl_hdr_t *) page_address(fmb->fmb_pages[0]); - ksock_conn_t *conn = NULL; - ksock_sched_t *sched; - long flags; - - if (error != 0) - CERROR("Failed to route packet from "LPX64" to "LPX64": %d\n", - NTOH__u64(hdr->src_nid), NTOH__u64(hdr->dest_nid), - error); - else - CDEBUG (D_NET, "routed packet from "LPX64" to "LPX64": OK\n", - NTOH__u64 (hdr->src_nid), NTOH__u64 (hdr->dest_nid)); - - spin_lock_irqsave (&fmp->fmp_lock, flags); - - list_add (&fmb->fmb_list, &fmp->fmp_idle_fmbs); - - if (!list_empty (&fmp->fmp_blocked_conns)) { - conn = list_entry (fmb->fmb_pool->fmp_blocked_conns.next, - ksock_conn_t, ksnc_rx_list); - list_del (&conn->ksnc_rx_list); - } - - spin_unlock_irqrestore (&fmp->fmp_lock, flags); - - if (conn == NULL) - return; - - CDEBUG (D_NET, "Scheduling conn %p\n", conn); - LASSERT (conn->ksnc_rx_scheduled); - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_FMB_SLEEP); - - conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB; - - sched = conn->ksnc_scheduler; - - spin_lock_irqsave (&sched->kss_lock, flags); - - list_add_tail (&conn->ksnc_rx_list, &sched->kss_rx_conns); - - if (waitqueue_active (&sched->kss_waitq)) - wake_up (&sched->kss_waitq); - - spin_unlock_irqrestore (&sched->kss_lock, flags); -} - -ksock_fmb_t * -ksocknal_get_idle_fmb (ksock_conn_t *conn) -{ - int payload_nob = conn->ksnc_rx_nob_left; - int packet_nob = sizeof (ptl_hdr_t) + payload_nob; - long flags; - ksock_fmb_pool_t *pool; - ksock_fmb_t *fmb; - - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB); - LASSERT (ksocknal_data.ksnd_fmbs != NULL); - - if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE) - pool = &ksocknal_data.ksnd_small_fmp; - else - pool = &ksocknal_data.ksnd_large_fmp; - - spin_lock_irqsave (&pool->fmp_lock, flags); - - if (!list_empty (&pool->fmp_idle_fmbs)) { - fmb = list_entry(pool->fmp_idle_fmbs.next, - ksock_fmb_t, fmb_list); - list_del (&fmb->fmb_list); - spin_unlock_irqrestore (&pool->fmp_lock, flags); - - return (fmb); - } - - /* deschedule until fmb free */ - - conn->ksnc_rx_state = SOCKNAL_RX_FMB_SLEEP; - - list_add_tail (&conn->ksnc_rx_list, - &pool->fmp_blocked_conns); - - spin_unlock_irqrestore (&pool->fmp_lock, flags); - return (NULL); -} - - -int -ksocknal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb) -{ - int payload_nob = conn->ksnc_rx_nob_left; - int packet_nob = sizeof (ptl_hdr_t) + payload_nob; - ptl_nid_t dest_nid = NTOH__u64 (conn->ksnc_hdr.dest_nid); - int niov; /* at least the header */ - int nob; - - LASSERT (conn->ksnc_rx_scheduled); - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB); - LASSERT (conn->ksnc_rx_nob_wanted == conn->ksnc_rx_nob_left); - LASSERT (payload_nob >= 0); - LASSERT (packet_nob <= fmb->fmb_npages * PAGE_SIZE); - LASSERT (sizeof (ptl_hdr_t) < PAGE_SIZE); - - /* Got a forwarding buffer; copy the header we just read into the - * forwarding buffer. If there's payload start reading reading it - * into the buffer, otherwise the forwarding buffer can be kicked - * off immediately. - * - * NB fmb->fmb_iov spans the WHOLE packet. - * conn->ksnc_rx_iov spans just the payload. - */ - - fmb->fmb_iov[0].iov_base = page_address (fmb->fmb_pages[0]); - - /* copy header */ - memcpy (fmb->fmb_iov[0].iov_base, &conn->ksnc_hdr, sizeof (ptl_hdr_t)); - - if (payload_nob == 0) { /* got complete packet already */ - atomic_inc (&ksocknal_packets_received); - - CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (immediate)\n", - conn, NTOH__u64 (conn->ksnc_hdr.src_nid), - dest_nid, packet_nob); - - fmb->fmb_iov[0].iov_len = sizeof (ptl_hdr_t); - - kpr_fwd_init (&fmb->fmb_fwd, dest_nid, - packet_nob, 1, fmb->fmb_iov, - ksocknal_fmb_callback, fmb); - - /* forward it now */ - kpr_fwd_start (&ksocknal_data.ksnd_router, &fmb->fmb_fwd); - - ksocknal_new_packet (conn, 0); /* on to next packet */ - return (1); - } - - niov = 1; - if (packet_nob <= PAGE_SIZE) { /* whole packet fits in first page */ - fmb->fmb_iov[0].iov_len = packet_nob; - } else { - fmb->fmb_iov[0].iov_len = PAGE_SIZE; - nob = packet_nob - PAGE_SIZE; - - do { - LASSERT (niov < fmb->fmb_npages); - fmb->fmb_iov[niov].iov_base = - page_address (fmb->fmb_pages[niov]); - fmb->fmb_iov[niov].iov_len = MIN (PAGE_SIZE, nob); - nob -= PAGE_SIZE; - niov++; - } while (nob > 0); - } - - kpr_fwd_init (&fmb->fmb_fwd, dest_nid, - packet_nob, niov, fmb->fmb_iov, - ksocknal_fmb_callback, fmb); - - /* stash router's descriptor ready for call to kpr_fwd_start */ - conn->ksnc_cookie = &fmb->fmb_fwd; - - conn->ksnc_rx_state = SOCKNAL_RX_BODY_FWD; /* read in the payload */ - - /* payload is desc's iov-ed buffer, but skipping the hdr */ - LASSERT (niov <= sizeof (conn->ksnc_rx_iov_space) / - sizeof (struct iovec)); - - conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space; - conn->ksnc_rx_iov[0].iov_base = - (void *)(((unsigned long)fmb->fmb_iov[0].iov_base) + - sizeof (ptl_hdr_t)); - conn->ksnc_rx_iov[0].iov_len = - fmb->fmb_iov[0].iov_len - sizeof (ptl_hdr_t); - - if (niov > 1) - memcpy(&conn->ksnc_rx_iov[1], &fmb->fmb_iov[1], - (niov - 1) * sizeof (struct iovec)); - - conn->ksnc_rx_niov = niov; - - CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d reading body\n", conn, - NTOH__u64 (conn->ksnc_hdr.src_nid), dest_nid, payload_nob); - return (0); -} - -void -ksocknal_fwd_parse (ksock_conn_t *conn) -{ - ksock_conn_t *conn2; - ptl_nid_t dest_nid = NTOH__u64 (conn->ksnc_hdr.dest_nid); - int body_len = NTOH__u32 (PTL_HDR_LENGTH(&conn->ksnc_hdr)); - - CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d parsing header\n", conn, - NTOH__u64 (conn->ksnc_hdr.src_nid), - dest_nid, conn->ksnc_rx_nob_left); - - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER); - LASSERT (conn->ksnc_rx_scheduled); - - if (body_len < 0) { /* length corrupt (overflow) */ - CERROR("dropping packet from "LPX64" for "LPX64": packet " - "size %d illegal\n", NTOH__u64 (conn->ksnc_hdr.src_nid), - dest_nid, body_len); - ksocknal_new_packet (conn, 0); /* on to new packet */ - return; - } - - if (ksocknal_data.ksnd_fmbs == NULL) { /* not forwarding */ - CERROR("dropping packet from "LPX64" for "LPX64": not " - "forwarding\n", conn->ksnc_hdr.src_nid, - conn->ksnc_hdr.dest_nid); - /* on to new packet (skip this one's body) */ - ksocknal_new_packet (conn, body_len); - return; - } - - if (body_len > SOCKNAL_MAX_FWD_PAYLOAD) { /* too big to forward */ - CERROR ("dropping packet from "LPX64" for "LPX64 - ": packet size %d too big\n", conn->ksnc_hdr.src_nid, - conn->ksnc_hdr.dest_nid, body_len); - /* on to new packet (skip this one's body) */ - ksocknal_new_packet (conn, body_len); - return; - } - - /* should have gone direct */ - conn2 = ksocknal_get_conn (conn->ksnc_hdr.dest_nid); - if (conn2 != NULL) { - CERROR ("dropping packet from "LPX64" for "LPX64 - ": target is a peer\n", conn->ksnc_hdr.src_nid, - conn->ksnc_hdr.dest_nid); - ksocknal_put_conn (conn2); /* drop ref from get above */ - - /* on to next packet (skip this one's body) */ - ksocknal_new_packet (conn, body_len); - return; - } - - conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB; /* Getting FMB now */ - conn->ksnc_rx_nob_left = body_len; /* stash packet size */ - conn->ksnc_rx_nob_wanted = body_len; /* (no slop) */ -} - -int -ksocknal_new_packet (ksock_conn_t *conn, int nob_to_skip) -{ - static char ksocknal_slop_buffer[4096]; - - int nob; - int niov; - int skipped; - - if (nob_to_skip == 0) { /* right at next packet boundary now */ - conn->ksnc_rx_state = SOCKNAL_RX_HEADER; - conn->ksnc_rx_nob_wanted = sizeof (ptl_hdr_t); - conn->ksnc_rx_nob_left = sizeof (ptl_hdr_t); - - conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space; - conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_hdr; - conn->ksnc_rx_iov[0].iov_len = sizeof (ptl_hdr_t); - conn->ksnc_rx_niov = 1; - - conn->ksnc_rx_kiov = NULL; - conn->ksnc_rx_nkiov = 0; - return (1); - } - - /* Set up to skip as much a possible now. If there's more left - * (ran out of iov entries) we'll get called again */ - - conn->ksnc_rx_state = SOCKNAL_RX_SLOP; - conn->ksnc_rx_nob_left = nob_to_skip; - conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space; - skipped = 0; - niov = 0; - - do { - nob = MIN (nob_to_skip, sizeof (ksocknal_slop_buffer)); - - conn->ksnc_rx_iov[niov].iov_base = ksocknal_slop_buffer; - conn->ksnc_rx_iov[niov].iov_len = nob; - niov++; - skipped += nob; - nob_to_skip -=nob; - - } while (nob_to_skip != 0 && /* mustn't overflow conn's rx iov */ - niov < sizeof(conn->ksnc_rx_iov_space) / sizeof (struct iovec)); - - conn->ksnc_rx_niov = niov; - conn->ksnc_rx_kiov = NULL; - conn->ksnc_rx_nkiov = 0; - conn->ksnc_rx_nob_wanted = skipped; - return (0); -} - -void -ksocknal_process_receive (ksock_sched_t *sched, long *irq_flags) -{ - ksock_conn_t *conn; - ksock_fmb_t *fmb; - int rc; - - /* NB: sched->ksnc_lock lock held */ - - LASSERT (!list_empty (&sched->kss_rx_conns)); - conn = list_entry(sched->kss_rx_conns.next, ksock_conn_t, ksnc_rx_list); - list_del (&conn->ksnc_rx_list); - - spin_unlock_irqrestore (&sched->kss_lock, *irq_flags); - - CDEBUG(D_NET, "sched %p conn %p\n", sched, conn); - LASSERT (atomic_read (&conn->ksnc_refcount) > 0); - LASSERT (conn->ksnc_rx_scheduled); - LASSERT (conn->ksnc_rx_ready); - - /* doesn't need a forwarding buffer */ - if (conn->ksnc_rx_state != SOCKNAL_RX_GET_FMB) - goto try_read; - - get_fmb: - fmb = ksocknal_get_idle_fmb (conn); - if (fmb == NULL) { /* conn descheduled waiting for idle fmb */ - spin_lock_irqsave (&sched->kss_lock, *irq_flags); - return; - } - - if (ksocknal_init_fmb (conn, fmb)) /* packet forwarded ? */ - goto out; /* come back later for next packet */ - - try_read: - /* NB: sched lock NOT held */ - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER || - conn->ksnc_rx_state == SOCKNAL_RX_BODY || - conn->ksnc_rx_state == SOCKNAL_RX_BODY_FWD || - conn->ksnc_rx_state == SOCKNAL_RX_SLOP); - - LASSERT (conn->ksnc_rx_nob_wanted > 0); - - conn->ksnc_rx_ready = 0;/* data ready may race with me and set ready */ - mb(); /* => clear BEFORE trying to read */ - - rc = ksocknal_recvmsg(conn); - - if (rc == 0) - goto out; - if (rc < 0) { -#warning FIXME: handle socket errors properly - CERROR ("Error socknal read %p: %d\n", conn, rc); - goto out; - } - - if (conn->ksnc_rx_nob_wanted != 0) /* short read */ - goto out; /* try again later */ - - /* got all I wanted, assume there's more - prevent data_ready locking */ - conn->ksnc_rx_ready = 1; - - switch (conn->ksnc_rx_state) { - case SOCKNAL_RX_HEADER: - /* It's not for me */ - if (conn->ksnc_hdr.type != PTL_MSG_HELLO && - NTOH__u64(conn->ksnc_hdr.dest_nid) != ksocknal_lib.ni.nid) { - ksocknal_fwd_parse (conn); - switch (conn->ksnc_rx_state) { - case SOCKNAL_RX_HEADER: /* skipped (zero payload) */ - goto out; /* => come back later */ - case SOCKNAL_RX_SLOP: /* skipping packet's body */ - goto try_read; /* => go read it */ - case SOCKNAL_RX_GET_FMB: /* forwarding */ - goto get_fmb; /* => go get a fwd msg buffer */ - default: - LBUG (); - } - /* Not Reached */ - } - - PROF_START(lib_parse); - /* sets wanted_len, iovs etc */ - lib_parse(&ksocknal_lib, &conn->ksnc_hdr, conn); - PROF_FINISH(lib_parse); - - if (conn->ksnc_rx_nob_wanted != 0) { /* need to get payload? */ - conn->ksnc_rx_state = SOCKNAL_RX_BODY; - goto try_read; /* go read the payload */ - } - /* Fall through (completed packet for me) */ - - case SOCKNAL_RX_BODY: - atomic_inc (&ksocknal_packets_received); - /* packet is done now */ - lib_finalize(&ksocknal_lib, NULL, conn->ksnc_cookie); - /* Fall through */ - - case SOCKNAL_RX_SLOP: - /* starting new packet? */ - if (ksocknal_new_packet (conn, conn->ksnc_rx_nob_left)) - goto out; /* come back later */ - goto try_read; /* try to finish reading slop now */ - - case SOCKNAL_RX_BODY_FWD: - CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (got body)\n", - conn, NTOH__u64 (conn->ksnc_hdr.src_nid), - NTOH__u64 (conn->ksnc_hdr.dest_nid), - conn->ksnc_rx_nob_left); - - atomic_inc (&ksocknal_packets_received); - - /* ksocknal_init_fmb() put router desc. in conn->ksnc_cookie */ - kpr_fwd_start (&ksocknal_data.ksnd_router, - (kpr_fwd_desc_t *)conn->ksnc_cookie); - - /* no slop in forwarded packets */ - LASSERT (conn->ksnc_rx_nob_left == 0); - - ksocknal_new_packet (conn, 0); /* on to next packet */ - goto out; /* (later) */ - - default: - } - - /* Not Reached */ - LBUG (); - - out: - spin_lock_irqsave (&sched->kss_lock, *irq_flags); - - /* no data there to read? */ - if (!conn->ksnc_rx_ready) { - /* let socket callback schedule again */ - conn->ksnc_rx_scheduled = 0; - ksocknal_put_conn (conn); /* release scheduler's ref */ - } else /* let scheduler call me again */ - list_add_tail (&conn->ksnc_rx_list, &sched->kss_rx_conns); -} - -int -ksocknal_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, - unsigned int niov, struct iovec *iov, size_t mlen, size_t rlen) -{ - ksock_conn_t *conn = (ksock_conn_t *)private; - - LASSERT (mlen <= rlen); - LASSERT (niov <= PTL_MD_MAX_IOV); - - conn->ksnc_cookie = msg; - conn->ksnc_rx_nob_wanted = mlen; - conn->ksnc_rx_nob_left = rlen; - - conn->ksnc_rx_nkiov = 0; - conn->ksnc_rx_kiov = NULL; - conn->ksnc_rx_niov = niov; - conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov; - memcpy (conn->ksnc_rx_iov, iov, niov * sizeof (*iov)); - - LASSERT (mlen == - lib_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) + - lib_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov)); - - return (rlen); -} - -int -ksocknal_recv_pages (nal_cb_t *nal, void *private, lib_msg_t *msg, - unsigned int niov, ptl_kiov_t *kiov, size_t mlen, size_t rlen) -{ - ksock_conn_t *conn = (ksock_conn_t *)private; - - LASSERT (mlen <= rlen); - LASSERT (niov <= PTL_MD_MAX_IOV); - - conn->ksnc_cookie = msg; - conn->ksnc_rx_nob_wanted = mlen; - conn->ksnc_rx_nob_left = rlen; - - conn->ksnc_rx_niov = 0; - conn->ksnc_rx_iov = NULL; - conn->ksnc_rx_nkiov = niov; - conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov; - memcpy (conn->ksnc_rx_kiov, kiov, niov * sizeof (*kiov)); - - LASSERT (mlen == - lib_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) + - lib_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov)); - - return (rlen); -} - -int ksocknal_scheduler (void *arg) -{ - ksock_sched_t *sched = (ksock_sched_t *)arg; - unsigned long flags; - int rc; - int nloops = 0; - int id = sched - ksocknal_data.ksnd_schedulers; - char name[16]; -#if (CONFIG_SMP && CPU_AFFINITY) -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - int cpu = cpu_logical_map(id % num_online_cpus()); -#else -#warning "Take care of architecure specific logical APIC map" - int cpu = 1; /* Have to change later. */ -#endif /* LINUX_VERSION_CODE */ - - set_cpus_allowed (current, 1 << cpu); - id = cpu; -#endif /* CONFIG_SMP && CPU_AFFINITY */ - - snprintf (name, sizeof (name),"ksocknald[%d]", id); - kportal_daemonize (name); - kportal_blockallsigs (); - - spin_lock_irqsave (&sched->kss_lock, flags); - - while (!ksocknal_data.ksnd_shuttingdown) { - int did_something = 0; - - /* Ensure I progress everything semi-fairly */ - - if (!list_empty (&sched->kss_rx_conns)) { - did_something = 1; - /* drops & regains kss_lock */ - ksocknal_process_receive (sched, &flags); - } - - if (!list_empty (&sched->kss_tx_conns)) { - did_something = 1; - /* drops and regains kss_lock */ - ksocknal_process_transmit (sched, &flags); - } -#if SOCKNAL_ZC - if (!list_empty (&sched->kss_zctxdone_list)) { - ksock_tx_t *tx = - list_entry(sched->kss_zctxdone_list.next, - ksock_tx_t, tx_list); - did_something = 1; - - list_del (&tx->tx_list); - spin_unlock_irqrestore (&sched->kss_lock, flags); - - ksocknal_tx_done (tx); - - spin_lock_irqsave (&sched->kss_lock, flags); - } -#endif - if (!did_something || /* nothing to do */ - ++nloops == SOCKNAL_RESCHED) { /* hogging CPU? */ - spin_unlock_irqrestore (&sched->kss_lock, flags); - - nloops = 0; - - if (!did_something) { /* wait for something to do */ -#if SOCKNAL_ZC - rc = wait_event_interruptible (sched->kss_waitq, - ksocknal_data.ksnd_shuttingdown || - !list_empty(&sched->kss_rx_conns) || - !list_empty(&sched->kss_tx_conns) || - !list_empty(&sched->kss_zctxdone_list)); -#else - rc = wait_event_interruptible (sched->kss_waitq, - ksocknal_data.ksnd_shuttingdown || - !list_empty(&sched->kss_rx_conns) || - !list_empty(&sched->kss_tx_conns)); -#endif - LASSERT (rc == 0); - } else - our_cond_resched(); - - spin_lock_irqsave (&sched->kss_lock, flags); - } - } - - spin_unlock_irqrestore (&sched->kss_lock, flags); - ksocknal_thread_fini (); - return (0); -} - -void -ksocknal_data_ready (struct sock *sk, int n) -{ - unsigned long flags; - ksock_conn_t *conn; - ksock_sched_t *sched; - ENTRY; - - /* interleave correctly with closing sockets... */ - read_lock (&ksocknal_data.ksnd_socklist_lock); - - conn = sk->user_data; - if (conn == NULL) { /* raced with ksocknal_close_sock */ - LASSERT (sk->data_ready != &ksocknal_data_ready); - sk->data_ready (sk, n); - } else if (!conn->ksnc_rx_ready) { /* new news */ - /* Set ASAP in case of concurrent calls to me */ - conn->ksnc_rx_ready = 1; - - sched = conn->ksnc_scheduler; - - spin_lock_irqsave (&sched->kss_lock, flags); - - /* Set again (process_receive may have cleared while I blocked for the lock) */ - conn->ksnc_rx_ready = 1; - - if (!conn->ksnc_rx_scheduled) { /* not being progressed */ - list_add_tail(&conn->ksnc_rx_list, - &sched->kss_rx_conns); - conn->ksnc_rx_scheduled = 1; - /* extra ref for scheduler */ - atomic_inc (&conn->ksnc_refcount); - - if (waitqueue_active (&sched->kss_waitq)) - wake_up (&sched->kss_waitq); - } - - spin_unlock_irqrestore (&sched->kss_lock, flags); - } - - read_unlock (&ksocknal_data.ksnd_socklist_lock); - - EXIT; -} - -void -ksocknal_write_space (struct sock *sk) -{ - unsigned long flags; - ksock_conn_t *conn; - ksock_sched_t *sched; - - /* interleave correctly with closing sockets... */ - read_lock (&ksocknal_data.ksnd_socklist_lock); - - conn = sk->user_data; - - CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n", - sk, tcp_wspace(sk), SOCKNAL_TX_LOW_WATER(sk), conn, - (conn == NULL) ? "" : (test_bit (0, &conn->ksnc_tx_ready) ? - " ready" : " blocked"), - (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ? - " scheduled" : " idle"), - (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ? - " empty" : " queued")); - - if (conn == NULL) { /* raced with ksocknal_close_sock */ - LASSERT (sk->write_space != &ksocknal_write_space); - sk->write_space (sk); - } else if (tcp_wspace(sk) >= SOCKNAL_TX_LOW_WATER(sk)) { /* got enough space */ - clear_bit (SOCK_NOSPACE, &sk->socket->flags); - - if (!conn->ksnc_tx_ready) { /* new news */ - /* Set ASAP in case of concurrent calls to me */ - conn->ksnc_tx_ready = 1; - - sched = conn->ksnc_scheduler; - - spin_lock_irqsave (&sched->kss_lock, flags); - - /* Set again (process_transmit may have - cleared while I blocked for the lock) */ - conn->ksnc_tx_ready = 1; - - if (!conn->ksnc_tx_scheduled && // not being progressed - !list_empty(&conn->ksnc_tx_queue)){//packets to send - list_add_tail (&conn->ksnc_tx_list, - &sched->kss_tx_conns); - conn->ksnc_tx_scheduled = 1; - /* extra ref for scheduler */ - atomic_inc (&conn->ksnc_refcount); - - if (waitqueue_active (&sched->kss_waitq)) - wake_up (&sched->kss_waitq); - } - - spin_unlock_irqrestore (&sched->kss_lock, flags); - } - } - - read_unlock (&ksocknal_data.ksnd_socklist_lock); -} - -int -ksocknal_reaper (void *arg) -{ - unsigned long flags; - ksock_conn_t *conn; - int rc; - - kportal_daemonize ("ksocknal_reaper"); - kportal_blockallsigs (); - - while (!ksocknal_data.ksnd_shuttingdown) { - spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags); - - if (list_empty (&ksocknal_data.ksnd_reaper_list)) { - conn = NULL; - } else { - conn = list_entry (ksocknal_data.ksnd_reaper_list.next, - ksock_conn_t, ksnc_list); - list_del (&conn->ksnc_list); - } - - spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags); - - if (conn != NULL) - ksocknal_close_conn (conn); - else { - rc = wait_event_interruptible (ksocknal_data.ksnd_reaper_waitq, - ksocknal_data.ksnd_shuttingdown || - !list_empty(&ksocknal_data.ksnd_reaper_list)); - LASSERT (rc == 0); - } - } - - ksocknal_thread_fini (); - return (0); -} - -nal_cb_t ksocknal_lib = { - nal_data: &ksocknal_data, /* NAL private data */ - cb_send: ksocknal_send, - cb_send_pages: ksocknal_send_pages, - cb_recv: ksocknal_recv, - cb_recv_pages: ksocknal_recv_pages, - cb_read: ksocknal_read, - cb_write: ksocknal_write, - cb_callback: ksocknal_callback, - cb_malloc: ksocknal_malloc, - cb_free: ksocknal_free, - cb_printf: ksocknal_printf, - cb_cli: ksocknal_cli, - cb_sti: ksocknal_sti, - cb_dist: ksocknal_dist -}; diff --git a/lnet/klnds/toelnd/.cvsignore b/lnet/klnds/toelnd/.cvsignore deleted file mode 100644 index e995588..0000000 --- a/lnet/klnds/toelnd/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lnet/klnds/toelnd/Makefile.am b/lnet/klnds/toelnd/Makefile.am deleted file mode 100644 index 9bfff64..0000000 --- a/lnet/klnds/toelnd/Makefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../../Rules.linux - -MODULE = ktoenal -modulenet_DATA = ktoenal.o -EXTRA_PROGRAMS = ktoenal - -DEFS = -ktoenal_SOURCES = toenal.c toenal_cb.c toenal.h diff --git a/lnet/klnds/toelnd/toenal.c b/lnet/klnds/toelnd/toenal.c deleted file mode 100644 index 1f5dc38..0000000 --- a/lnet/klnds/toelnd/toenal.c +++ /dev/null @@ -1,629 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Zach Brown - * Author: Peter J. Braam - * Author: Phil Schwan - * Author: Eric Barton - * Author: Kedar Sovani - * Author: Amey Inamdar - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ -#include -#include "toenal.h" - -ptl_handle_ni_t ktoenal_ni; -static nal_t ktoenal_api; -static ksock_nal_data_t ktoenal_data; - -/* -ksocknal_interface_t ktoenal_interface = { - ksni_add_sock: ktoenal_add_sock, - ksni_close_sock: ktoenal_close_sock, - ksni_set_mynid: ktoenal_set_mynid, -}; -*/ - -kpr_nal_interface_t ktoenal_router_interface = { - kprni_nalid: TOENAL, - kprni_arg: &ktoenal_data, - kprni_fwd: ktoenal_fwd_packet, -}; - - -int -ktoenal_api_forward(nal_t *nal, int id, void *args, size_t args_len, - void *ret, size_t ret_len) -{ - ksock_nal_data_t *k; - nal_cb_t *nal_cb; - - k = nal->nal_data; - nal_cb = k->ksnd_nal_cb; - - lib_dispatch(nal_cb, k, id, args, ret); /* ktoenal_send needs k */ - return PTL_OK; -} - -int -ktoenal_api_shutdown(nal_t *nal, int ni) -{ - CDEBUG (D_NET, "closing all connections\n"); - - return ktoenal_close_sock(0); /* close all sockets */ -} - -void -ktoenal_api_yield(nal_t *nal) -{ - our_cond_resched(); - return; -} - -void -ktoenal_api_lock(nal_t *nal, unsigned long *flags) -{ - ksock_nal_data_t *k; - nal_cb_t *nal_cb; - - k = nal->nal_data; - nal_cb = k->ksnd_nal_cb; - nal_cb->cb_cli(nal_cb,flags); -} - -void -ktoenal_api_unlock(nal_t *nal, unsigned long *flags) -{ - ksock_nal_data_t *k; - nal_cb_t *nal_cb; - - k = nal->nal_data; - nal_cb = k->ksnd_nal_cb; - nal_cb->cb_sti(nal_cb,flags); -} - -nal_t * -ktoenal_init(int interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t ac_size, ptl_pid_t requested_pid) -{ - CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n", - ktoenal_data.ksnd_mynid); - lib_init(&ktoenal_lib, ktoenal_data.ksnd_mynid, 0, 10, ptl_size, - ac_size); - return (&ktoenal_api); -} - -/* - * EXTRA functions follow - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#define SOCKET_I(inode) (&(inode)->u.socket_i) -#endif -static __inline__ struct socket * -socki_lookup(struct inode *inode) -{ - return SOCKET_I(inode); -} - -int -ktoenal_set_mynid(ptl_nid_t nid) -{ - lib_ni_t *ni = &ktoenal_lib.ni; - - /* FIXME: we have to do this because we call lib_init() at module - * insertion time, which is before we have 'mynid' available. lib_init - * sets the NAL's nid, which it uses to tell other nodes where packets - * are coming from. This is not a very graceful solution to this - * problem. */ - - CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n", nid, ni->nid); - - ktoenal_data.ksnd_mynid = nid; - ni->nid = nid; - return (0); -} - -int -ktoenal_add_sock (ptl_nid_t nid, int fd) -{ - unsigned long flags; - ksock_conn_t *conn; - struct file *file = NULL; - struct socket *sock = NULL; - int ret; - ENTRY; - - file = fget(fd); - if (file == NULL) - RETURN(-EINVAL); - - ret = -EINVAL; - sock = socki_lookup(file->f_dentry->d_inode); - if (sock == NULL) - GOTO(error, ret); - - ret = -ENOMEM; - PORTAL_ALLOC(conn, sizeof(*conn)); - if (!conn) - GOTO(error, ret); - - memset (conn, 0, sizeof (conn)); /* zero for consistency */ - file->f_flags |= O_NONBLOCK; /* Does this have any conflicts */ - conn->ksnc_file = file; - conn->ksnc_sock = sock; - conn->ksnc_peernid = nid; - atomic_set (&conn->ksnc_refcount, 1); /* 1 ref for socklist */ - - conn->ksnc_rx_ready = 0; - conn->ksnc_rx_scheduled = 0; - ktoenal_new_packet (conn, 0); - - INIT_LIST_HEAD (&conn->ksnc_tx_queue); - conn->ksnc_tx_ready = 0; - conn->ksnc_tx_scheduled = 0; - - LASSERT (!in_interrupt()); - write_lock_irqsave (&ktoenal_data.ksnd_socklist_lock, flags); - - list_add(&conn->ksnc_list, &ktoenal_data.ksnd_socklist); - write_unlock_irqrestore (&ktoenal_data.ksnd_socklist_lock, flags); - - ktoenal_data_ready(conn); - ktoenal_write_space(conn); - - ktoenal_data.ksnd_slistchange = 1; - wake_up_process(ktoenal_data.ksnd_pollthread_tsk); - /* Schedule pollthread so that it will poll - * for newly created socket - */ - - - CDEBUG(D_IOCTL, "conn [%p] registered for nid "LPX64"\n", - conn, conn->ksnc_peernid); - - /* Can't unload while connection active */ - PORTAL_MODULE_USE; - RETURN(0); - -error: - fput(file); - return (ret); -} - -/* Passing in a zero nid will close all connections */ -int -ktoenal_close_sock(ptl_nid_t nid) -{ - long flags; - ksock_conn_t *conn; - LIST_HEAD (death_row); - struct list_head *tmp; - - LASSERT (!in_interrupt()); - write_lock_irqsave (&ktoenal_data.ksnd_socklist_lock, flags); - - if (nid == 0) /* close ALL connections */ - { - /* insert 'death row' into the socket list... */ - list_add (&death_row, &ktoenal_data.ksnd_socklist); - /* ...extract and reinitialise the socket list itself... */ - list_del_init (&ktoenal_data.ksnd_socklist); - /* ...and voila, death row is the proud owner of all conns */ - } else list_for_each (tmp, &ktoenal_data.ksnd_socklist) { - - conn = list_entry (tmp, ksock_conn_t, ksnc_list); - - if (conn->ksnc_peernid == nid) - { - list_del (&conn->ksnc_list); - list_add (&conn->ksnc_list, &death_row); - break; - } - } - - - write_unlock_irqrestore (&ktoenal_data.ksnd_socklist_lock, flags); - - if (list_empty (&death_row)) - return (-ENOENT); - - do { - conn = list_entry (death_row.next, ksock_conn_t, ksnc_list); - list_del (&conn->ksnc_list); - ktoenal_put_conn (conn); /* drop ref for ksnd_socklist */ - } while (!list_empty (&death_row)); - - ktoenal_data.ksnd_slistchange = 1; - wake_up_process(ktoenal_data.ksnd_pollthread_tsk); - - return (0); -} - - -ksock_conn_t * -ktoenal_get_conn (ptl_nid_t nid) -{ - struct list_head *tmp; - ksock_conn_t *conn; - - PROF_START(conn_list_walk); - - read_lock (&ktoenal_data.ksnd_socklist_lock); - - list_for_each(tmp, &ktoenal_data.ksnd_socklist) { - - conn = list_entry(tmp, ksock_conn_t, ksnc_list); - - if (conn->ksnc_peernid == nid) - { - /* caller is referencing */ - atomic_inc (&conn->ksnc_refcount); - - read_unlock (&ktoenal_data.ksnd_socklist_lock); - - CDEBUG(D_NET, "got conn [%p] -> "LPX64" (%d)\n", - conn, nid, atomic_read (&conn->ksnc_refcount)); - - PROF_FINISH(conn_list_walk); - return (conn); - } - } - - read_unlock (&ktoenal_data.ksnd_socklist_lock); - - CDEBUG(D_NET, "No connection found when looking for nid "LPX64"\n", nid); - PROF_FINISH(conn_list_walk); - return (NULL); -} - -void -ktoenal_close_conn (ksock_conn_t *conn) -{ - CDEBUG (D_NET, "connection [%p] closed \n", conn); - - fput (conn->ksnc_file); - PORTAL_FREE (conn, sizeof (*conn)); - /* One less connection keeping us hanging on */ - PORTAL_MODULE_UNUSE; -} - -void -_ktoenal_put_conn (ksock_conn_t *conn) -{ - unsigned long flags; - - CDEBUG (D_NET, "connection [%p] handed the black spot\n", conn); - - /* "But what is the black spot, captain?" I asked. - * "That's a summons, mate..." */ - - LASSERT (atomic_read (&conn->ksnc_refcount) == 0); - LASSERT (!conn->ksnc_rx_scheduled); - - if (!in_interrupt()) - { - ktoenal_close_conn (conn); - return; - } - - spin_lock_irqsave (&ktoenal_data.ksnd_reaper_lock, flags); - - list_add (&conn->ksnc_list, &ktoenal_data.ksnd_reaper_list); - wake_up (&ktoenal_data.ksnd_reaper_waitq); - - spin_unlock_irqrestore (&ktoenal_data.ksnd_reaper_lock, flags); -} - -void -ktoenal_free_buffers (void) -{ - if (ktoenal_data.ksnd_fmbs != NULL) - { - ksock_fmb_t *fmb = (ksock_fmb_t *)ktoenal_data.ksnd_fmbs; - int i; - int j; - - for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++, fmb++) - for (j = 0; j < fmb->fmb_npages; j++) - if (fmb->fmb_pages[j] != NULL) - __free_page (fmb->fmb_pages[j]); - - PORTAL_FREE (ktoenal_data.ksnd_fmbs, - sizeof (ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS)); - } - - if (ktoenal_data.ksnd_ltxs != NULL) - PORTAL_FREE (ktoenal_data.ksnd_ltxs, - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS)); -} - -int -ktoenal_cmd(struct portal_ioctl_data * data, void * private) -{ - int rc = -EINVAL; - - LASSERT (data != NULL); - - switch(data->ioc_nal_cmd) { - case NAL_CMD_REGISTER_PEER_FD: { - rc = ktoenal_add_sock(data->ioc_nid, data->ioc_fd); - break; - } - case NAL_CMD_CLOSE_CONNECTION: { - rc = ktoenal_close_sock(data->ioc_nid); - break; - } - case NAL_CMD_REGISTER_MYNID: { - rc = ktoenal_set_mynid (data->ioc_nid); - break; - } - } - - return rc; -} - - -void __exit -ktoenal_module_fini (void) -{ - CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", - atomic_read (&portal_kmemory)); - - switch (ktoenal_data.ksnd_init) - { - default: - LASSERT (0); - - case SOCKNAL_INIT_ALL: - kportal_nal_unregister(TOENAL); - PORTAL_SYMBOL_UNREGISTER (ktoenal_ni); - /* fall through */ - - case SOCKNAL_INIT_PTL: - PtlNIFini(ktoenal_ni); - lib_fini(&ktoenal_lib); - /* fall through */ - - case SOCKNAL_INIT_DATA: - /* Module refcount only gets to zero when all connections - * have been closed so all lists must be empty */ - LASSERT (list_empty (&ktoenal_data.ksnd_socklist)); - LASSERT (list_empty (&ktoenal_data.ksnd_reaper_list)); - LASSERT (list_empty (&ktoenal_data.ksnd_rx_conns)); - LASSERT (list_empty (&ktoenal_data.ksnd_tx_conns)); - LASSERT (list_empty (&ktoenal_data.ksnd_small_fmp.fmp_blocked_conns)); - LASSERT (list_empty (&ktoenal_data.ksnd_large_fmp.fmp_blocked_conns)); - - kpr_shutdown (&ktoenal_data.ksnd_router); /* stop router calling me */ - - /* flag threads to terminate; wake and wait for them to die */ - ktoenal_data.ksnd_shuttingdown = 1; - wake_up_all (&ktoenal_data.ksnd_reaper_waitq); - wake_up_all (&ktoenal_data.ksnd_sched_waitq); - wake_up_process(ktoenal_data.ksnd_pollthread_tsk); - - while (atomic_read (&ktoenal_data.ksnd_nthreads) != 0) - { - CDEBUG (D_NET, "waitinf for %d threads to terminate\n", - atomic_read (&ktoenal_data.ksnd_nthreads)); - set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (HZ); - } - - kpr_deregister (&ktoenal_data.ksnd_router); - - ktoenal_free_buffers(); - /* fall through */ - - case SOCKNAL_INIT_NOTHING: - break; - } - - CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", - atomic_read (&portal_kmemory)); - - printk(KERN_INFO "Routing socket NAL unloaded (final mem %d)\n", - atomic_read(&portal_kmemory)); -} - -int __init -ktoenal_module_init (void) -{ - int pkmem = atomic_read(&portal_kmemory); - int rc; - int i; - int j; - - /* packet descriptor must fit in a router descriptor's scratchpad */ - LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t)); - - LASSERT (ktoenal_data.ksnd_init == SOCKNAL_INIT_NOTHING); - - ktoenal_api.forward = ktoenal_api_forward; - ktoenal_api.shutdown = ktoenal_api_shutdown; - ktoenal_api.yield = ktoenal_api_yield; - ktoenal_api.validate = NULL; /* our api validate is a NOOP */ - ktoenal_api.lock = ktoenal_api_lock; - ktoenal_api.unlock = ktoenal_api_unlock; - ktoenal_api.nal_data = &ktoenal_data; - - ktoenal_lib.nal_data = &ktoenal_data; - - memset (&ktoenal_data, 0, sizeof (ktoenal_data)); /* zero pointers */ - - INIT_LIST_HEAD(&ktoenal_data.ksnd_socklist); - rwlock_init(&ktoenal_data.ksnd_socklist_lock); - - ktoenal_data.ksnd_nal_cb = &ktoenal_lib; - spin_lock_init (&ktoenal_data.ksnd_nal_cb_lock); - - spin_lock_init (&ktoenal_data.ksnd_sched_lock); - - init_waitqueue_head (&ktoenal_data.ksnd_sched_waitq); - - INIT_LIST_HEAD (&ktoenal_data.ksnd_rx_conns); - INIT_LIST_HEAD (&ktoenal_data.ksnd_tx_conns); - - INIT_LIST_HEAD(&ktoenal_data.ksnd_small_fmp.fmp_idle_fmbs); - INIT_LIST_HEAD(&ktoenal_data.ksnd_small_fmp.fmp_blocked_conns); - INIT_LIST_HEAD(&ktoenal_data.ksnd_large_fmp.fmp_idle_fmbs); - INIT_LIST_HEAD(&ktoenal_data.ksnd_large_fmp.fmp_blocked_conns); - - INIT_LIST_HEAD(&ktoenal_data.ksnd_idle_nblk_ltx_list); - INIT_LIST_HEAD(&ktoenal_data.ksnd_idle_ltx_list); - init_waitqueue_head(&ktoenal_data.ksnd_idle_ltx_waitq); - - INIT_LIST_HEAD (&ktoenal_data.ksnd_reaper_list); - init_waitqueue_head(&ktoenal_data.ksnd_reaper_waitq); - spin_lock_init (&ktoenal_data.ksnd_reaper_lock); - - ktoenal_data.ksnd_init = SOCKNAL_INIT_DATA; /* flag lists/ptrs/locks initialised */ - - PORTAL_ALLOC(ktoenal_data.ksnd_fmbs, - sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS)); - if (ktoenal_data.ksnd_fmbs == NULL) - RETURN(-ENOMEM); - - /* NULL out buffer pointers etc */ - memset(ktoenal_data.ksnd_fmbs, 0, - sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS)); - - for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++) - { - ksock_fmb_t *fmb = &((ksock_fmb_t *)ktoenal_data.ksnd_fmbs)[i]; - - if (i < SOCKNAL_SMALL_FWD_NMSGS) - { - fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES; - fmb->fmb_pool = &ktoenal_data.ksnd_small_fmp; - } - else - { - fmb->fmb_npages = SOCKNAL_LARGE_FWD_PAGES; - fmb->fmb_pool = &ktoenal_data.ksnd_large_fmp; - } - - LASSERT (fmb->fmb_npages > 0); - for (j = 0; j < fmb->fmb_npages; j++) - { - fmb->fmb_pages[j] = alloc_page(GFP_KERNEL); - - if (fmb->fmb_pages[j] == NULL) - { - ktoenal_module_fini (); - return (-ENOMEM); - } - - LASSERT (page_address (fmb->fmb_pages[j]) != NULL); - } - - list_add (&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs); - } - - PORTAL_ALLOC(ktoenal_data.ksnd_ltxs, - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS)); - if (ktoenal_data.ksnd_ltxs == NULL) - { - ktoenal_module_fini (); - return (-ENOMEM); - } - - /* Deterministic bugs please */ - memset (ktoenal_data.ksnd_ltxs, 0xeb, - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS)); - - for (i = 0; i < SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS; i++) - { - ksock_ltx_t *ltx = &((ksock_ltx_t *)ktoenal_data.ksnd_ltxs)[i]; - - ltx->ltx_idle = i < SOCKNAL_NLTXS ? - &ktoenal_data.ksnd_idle_ltx_list : - &ktoenal_data.ksnd_idle_nblk_ltx_list; - list_add (<x->ltx_tx.tx_list, ltx->ltx_idle); - } - - rc = PtlNIInit(ktoenal_init, 32, 4, 0, &ktoenal_ni); - if (rc != 0) - { - CERROR("ktoenal: PtlNIInit failed: error %d\n", rc); - ktoenal_module_fini (); - RETURN (rc); - } - PtlNIDebug(ktoenal_ni, ~0); - - ktoenal_data.ksnd_init = SOCKNAL_INIT_PTL; /* flag PtlNIInit() called */ - - ktoenal_data.ksnd_slistchange = 1; - for (i = 0; i < TOENAL_N_SCHED; i++) - { - rc = ktoenal_thread_start (ktoenal_scheduler, NULL); - if (rc != 0) - { - CERROR("Can't spawn socknal scheduler[%d]: %d\n", i, rc); - ktoenal_module_fini (); - RETURN (rc); - } - } - - rc = ktoenal_thread_start (ktoenal_reaper, NULL); - if (rc != 0) - { - CERROR("Can't spawn socknal reaper: %d\n", rc); - ktoenal_module_fini (); - RETURN (rc); - } - - rc = ktoenal_thread_start (ktoenal_pollthread, NULL); - if (rc != 0) - { - CERROR("Can't spawn socknal pollthread: %d\n", rc); - ktoenal_module_fini (); - RETURN (rc); - } - - rc = kpr_register(&ktoenal_data.ksnd_router, - &ktoenal_router_interface); - if (rc != 0) - CDEBUG (D_NET, "Can't initialise routing interface (rc = %d): not routing\n", rc); - - rc = kportal_nal_register(TOENAL, &ktoenal_cmd, NULL); - if (rc != 0) - CDEBUG(D_NET, "Can't initialise command interface (rc = %d)\n", - rc); - - PORTAL_SYMBOL_REGISTER(ktoenal_ni); - - /* flag everything initialised */ - ktoenal_data.ksnd_init = SOCKNAL_INIT_ALL; - - printk(KERN_INFO"Routing TOE NAL loaded (Routing %s, initial mem %d)\n", - kpr_routing(&ktoenal_data.ksnd_router) ? "enabled" : "disabled", - pkmem); - - return (0); -} - -MODULE_AUTHOR("Cluster File Systems, Inc. "); -MODULE_DESCRIPTION("Kernel TCP Socket NAL v0.01"); -MODULE_LICENSE("GPL"); - -module_init(ktoenal_module_init); -module_exit(ktoenal_module_fini); - -EXPORT_SYMBOL (ktoenal_ni); diff --git a/lnet/klnds/toelnd/toenal.h b/lnet/klnds/toelnd/toenal.h deleted file mode 100644 index f793d3b..0000000 --- a/lnet/klnds/toelnd/toenal.h +++ /dev/null @@ -1,236 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Zach Brown - * Author: Peter J. Braam - * Author: Phil Schwan - * Author: Eric Barton - * Author: Kedar Sovani - * Author: Amey Inamdar - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define DEBUG_PORTAL_ALLOC -#define EXPORT_SYMTAB - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include - -#define DEBUG_SUBSYSTEM S_SOCKNAL - -#include -#include -#include - -#define SOCKNAL_MAX_FWD_PAYLOAD (64<<10) /* biggest payload I can forward */ - -#define SOCKNAL_NLTXS 128 /* # normal transmit messages */ -#define SOCKNAL_NNBLK_LTXS 128 /* # transmit messages reserved if can't block */ - -#define SOCKNAL_SMALL_FWD_NMSGS 128 /* # small messages I can be forwarding at any time */ -#define SOCKNAL_LARGE_FWD_NMSGS 32 /* # large messages I can be forwarding at any time */ - -#define SOCKNAL_SMALL_FWD_PAGES 1 /* # pages in a small message fwd buffer */ - -#define SOCKNAL_LARGE_FWD_PAGES (PAGE_ALIGN (sizeof (ptl_hdr_t) + SOCKNAL_MAX_FWD_PAYLOAD) >> PAGE_SHIFT) - /* # pages in a large message fwd buffer */ - -#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */ - -#define SOCKNAL_TX_LOW_WATER(sk) (((sk)->sndbuf*8)/10) - -#define TOENAL_N_SCHED 1 - -typedef struct /* pool of forwarding buffers */ -{ - struct list_head fmp_idle_fmbs; /* buffers waiting for a connection */ - struct list_head fmp_blocked_conns; /* connections waiting for a buffer */ -} ksock_fmb_pool_t; - -typedef struct { - int ksnd_init; /* initialisation state */ - - struct list_head ksnd_socklist; /* all my connections */ - rwlock_t ksnd_socklist_lock; /* stabilise add/find/remove */ - - - ptl_nid_t ksnd_mynid; - nal_cb_t *ksnd_nal_cb; - spinlock_t ksnd_nal_cb_lock; /* lib cli/sti lock */ - - atomic_t ksnd_nthreads; /* # live threads */ - int ksnd_shuttingdown; /* tell threads to exit */ - - kpr_router_t ksnd_router; /* THE router */ - - spinlock_t ksnd_sched_lock; /* serialise packet scheduling */ - wait_queue_head_t ksnd_sched_waitq; /* where scheduler(s) wait */ - - struct list_head ksnd_rx_conns; /* conn waiting to be read */ - struct list_head ksnd_tx_conns; /* conn waiting to be written */ - - void *ksnd_fmbs; /* all the pre-allocated FMBs */ - ksock_fmb_pool_t ksnd_small_fmp; /* small message forwarding buffers */ - ksock_fmb_pool_t ksnd_large_fmp; /* large message forwarding buffers */ - - void *ksnd_ltxs; /* all the pre-allocated LTXs */ - struct list_head ksnd_idle_ltx_list; /* where to get an idle LTX */ - struct list_head ksnd_idle_nblk_ltx_list; /* where to get an idle LTX if you can't block */ - wait_queue_head_t ksnd_idle_ltx_waitq; /* where to block for an idle LTX */ - - struct list_head ksnd_reaper_list; /* conn waiting to be reaped */ - wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */ - spinlock_t ksnd_reaper_lock; /* serialise */ - - struct task_struct *ksnd_pollthread_tsk;/* task_struct for the poll thread */ - poll_table ksnd_pwait; /* poll wait table for the socket */ - int ksnd_slistchange; /* informs the pollthread that - * the socklist has changed */ -} ksock_nal_data_t; - -#define SOCKNAL_INIT_NOTHING 0 -#define SOCKNAL_INIT_DATA 1 -#define SOCKNAL_INIT_PTL 2 -#define SOCKNAL_INIT_ALL 3 - -typedef struct /* transmit packet */ -{ - struct list_head tx_list; /* queue on conn for transmission etc */ - char tx_isfwd; /* forwarding / sourced here */ - int tx_nob; /* # packet bytes */ - int tx_niov; /* # packet frags */ - struct iovec *tx_iov; /* packet frags */ -} ksock_tx_t; - -typedef struct /* locally transmitted packet */ -{ - ksock_tx_t ltx_tx; /* send info */ - struct list_head *ltx_idle; /* where to put when idle */ - void *ltx_private; /* lib_finalize() callback arg */ - void *ltx_cookie; /* lib_finalize() callback arg */ - struct iovec ltx_iov[1 + PTL_MD_MAX_IOV]; /* msg frags */ - ptl_hdr_t ltx_hdr; /* buffer for packet header */ -} ksock_ltx_t; - -#define KSOCK_TX_2_KPR_FWD_DESC(ptr) list_entry (ptr, kpr_fwd_desc_t, kprfd_scratch) -/* forwarded packets (router->socknal) embedded in kpr_fwd_desc_t::kprfd_scratch */ - -#define KSOCK_TX_2_KSOCK_LTX(ptr) list_entry (ptr, ksock_ltx_t, ltx_tx) -/* local packets (lib->socknal) embedded in ksock_ltx_t::ltx_tx */ - -/* NB list_entry() is used here as convenient macro for calculating a - * pointer to a struct from the addres of a member. - */ - -typedef struct /* Kernel portals Socket Forwarding message buffer */ -{ /* (socknal->router) */ - struct list_head fmb_list; /* queue idle */ - kpr_fwd_desc_t fmb_fwd; /* router's descriptor */ - int fmb_npages; /* # pages allocated */ - ksock_fmb_pool_t *fmb_pool; /* owning pool */ - struct page *fmb_pages[SOCKNAL_LARGE_FWD_PAGES]; - struct iovec fmb_iov[SOCKNAL_LARGE_FWD_PAGES]; -} ksock_fmb_t; - -#define SOCKNAL_RX_HEADER 1 /* reading header */ -#define SOCKNAL_RX_BODY 2 /* reading body (to deliver here) */ -#define SOCKNAL_RX_BODY_FWD 3 /* reading body (to forward) */ -#define SOCKNAL_RX_SLOP 4 /* skipping body */ -#define SOCKNAL_RX_GET_FMB 5 /* scheduled for forwarding */ -#define SOCKNAL_RX_FMB_SLEEP 6 /* blocked waiting for a fwd desc */ - -typedef struct -{ - struct list_head ksnc_list; /* stash on global socket list */ - struct file *ksnc_file; /* socket filp */ - struct socket *ksnc_sock; /* socket */ - ptl_nid_t ksnc_peernid; /* who's on the other end */ - atomic_t ksnc_refcount; /* # users */ - - /* READER */ - struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */ - unsigned long ksnc_rx_ready; /* data ready to read */ - int ksnc_rx_scheduled; /* being progressed */ - int ksnc_rx_state; /* what is being read */ - int ksnc_rx_nob_left; /* # bytes to next hdr/body */ - int ksnc_rx_nob_wanted; /* bytes actually wanted */ - int ksnc_rx_niov; /* # frags */ - struct iovec ksnc_rx_iov[1 + PTL_MD_MAX_IOV]; /* the frags */ - - void *ksnc_cookie; /* rx lib_finalize passthru arg */ - ptl_hdr_t ksnc_hdr; /* where I read headers into */ - - /* WRITER */ - struct list_head ksnc_tx_list; /* where I enq waiting for output space */ - struct list_head ksnc_tx_queue; /* packets waiting to be sent */ - unsigned long ksnc_tx_ready; /* write space */ - int ksnc_tx_scheduled; /* being progressed */ - -} ksock_conn_t; - -extern int ktoenal_add_sock (ptl_nid_t nid, int fd); -extern int ktoenal_close_sock(ptl_nid_t nid); -extern int ktoenal_set_mynid(ptl_nid_t nid); -extern int ktoenal_push_sock(ptl_nid_t nid); -extern ksock_conn_t *ktoenal_get_conn (ptl_nid_t nid); -extern void _ktoenal_put_conn (ksock_conn_t *conn); -extern void ktoenal_close_conn (ksock_conn_t *conn); - -static inline void -ktoenal_put_conn (ksock_conn_t *conn) -{ - CDEBUG (D_OTHER, "putting conn[%p] -> "LPX64" (%d)\n", - conn, conn->ksnc_peernid, atomic_read (&conn->ksnc_refcount)); - - if (atomic_dec_and_test (&conn->ksnc_refcount)) - _ktoenal_put_conn (conn); -} - -extern int ktoenal_thread_start (int (*fn)(void *arg), void *arg); -extern int ktoenal_new_packet (ksock_conn_t *conn, int skip); -extern void ktoenal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd); -extern int ktoenal_scheduler (void *arg); -extern int ktoenal_reaper (void *arg); -extern int ktoenal_pollthread (void *arg); -extern void ktoenal_data_ready(ksock_conn_t *conn); -extern void ktoenal_write_space(ksock_conn_t *conn); - - -extern nal_cb_t ktoenal_lib; -extern ksock_nal_data_t ktoenal_data; diff --git a/lnet/klnds/toelnd/toenal_cb.c b/lnet/klnds/toelnd/toenal_cb.c deleted file mode 100644 index ec37f6f..0000000 --- a/lnet/klnds/toelnd/toenal_cb.c +++ /dev/null @@ -1,1219 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Zach Brown - * Author: Peter J. Braam - * Author: Phil Schwan - * Author: Eric Barton - * Author: Kedar Sovani - * Author: Amey Inamdar - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include -#include "toenal.h" - -atomic_t ktoenal_packets_received; -long ktoenal_packets_launched; -long ktoenal_packets_transmitted; - -/* - * LIB functions follow - * - */ -int -ktoenal_read(nal_cb_t *nal, void *private, void *dst_addr, - user_ptr src_addr, size_t len) -{ - CDEBUG(D_NET, LPX64": reading %ld bytes from %p -> %p\n", - nal->ni.nid, (long)len, src_addr, dst_addr); - - memcpy( dst_addr, src_addr, len ); - return 0; -} - -int -ktoenal_write(nal_cb_t *nal, void *private, user_ptr dst_addr, - void *src_addr, size_t len) -{ - CDEBUG(D_NET, LPX64": writing %ld bytes from %p -> %p\n", - nal->ni.nid, (long)len, src_addr, dst_addr); - - memcpy( dst_addr, src_addr, len ); - return 0; -} - -int -ktoenal_callback (nal_cb_t * nal, void *private, lib_eq_t *eq, - ptl_event_t *ev) -{ - CDEBUG(D_NET, LPX64": callback eq %p ev %p\n", - nal->ni.nid, eq, ev); - - if (eq->event_callback != NULL) - eq->event_callback(ev); - - return 0; -} - -void * -ktoenal_malloc(nal_cb_t *nal, size_t len) -{ - void *buf; - - PORTAL_ALLOC(buf, len); - - if (buf != NULL) - memset(buf, 0, len); - - return (buf); -} - -void -ktoenal_free(nal_cb_t *nal, void *buf, size_t len) -{ - PORTAL_FREE(buf, len); -} - -void -ktoenal_printf(nal_cb_t *nal, const char *fmt, ...) -{ - va_list ap; - char msg[256]; - - va_start (ap, fmt); - vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */ - va_end (ap); - - msg[sizeof (msg) - 1] = 0; /* ensure terminated */ - - CDEBUG (D_NET, "%s", msg); -} - -void -ktoenal_cli(nal_cb_t *nal, unsigned long *flags) -{ - ksock_nal_data_t *data = nal->nal_data; - - spin_lock(&data->ksnd_nal_cb_lock); -} - -void -ktoenal_sti(nal_cb_t *nal, unsigned long *flags) -{ - ksock_nal_data_t *data; - data = nal->nal_data; - - spin_unlock(&data->ksnd_nal_cb_lock); -} - -int -ktoenal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) -{ - /* I would guess that if ktoenal_get_conn(nid) == NULL, - and we're not routing, then 'nid' is very distant :) */ - if ( nal->ni.nid == nid ) { - *dist = 0; - } else { - *dist = 1; - } - - return 0; -} - -ksock_ltx_t * -ktoenal_get_ltx (int may_block) -{ - long flags; - ksock_ltx_t *ltx = NULL; - - for (;;) - { - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags); - - if (!list_empty (&ktoenal_data.ksnd_idle_ltx_list)) - { - ltx = list_entry (ktoenal_data.ksnd_idle_ltx_list.next, ksock_ltx_t, ltx_tx.tx_list); - list_del (<x->ltx_tx.tx_list); - break; - } - - if (!may_block) - { - if (!list_empty (&ktoenal_data.ksnd_idle_nblk_ltx_list)) - { - ltx = list_entry (ktoenal_data.ksnd_idle_nblk_ltx_list.next, - ksock_ltx_t, ltx_tx.tx_list); - list_del (<x->ltx_tx.tx_list); - } - break; - } - - spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags); - - wait_event (ktoenal_data.ksnd_idle_ltx_waitq, - !list_empty (&ktoenal_data.ksnd_idle_ltx_list)); - } - - spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags); - - return (ltx); -} - -int -ktoenal_sendmsg (struct file *sock, struct iovec *iov, int niov, int nob, int flags) -{ - /* NB This procedure "consumes" iov (actually we do, tcp_sendmsg doesn't) - */ - mm_segment_t oldmm; - int rc; - - LASSERT (niov > 0); - LASSERT (nob > 0); - - oldmm = get_fs(); - set_fs (KERNEL_DS); - -#ifdef PORTAL_DEBUG - { - int total_nob; - int i; - - for (i = total_nob = 0; i < niov; i++) - total_nob += iov[i].iov_len; - - LASSERT (nob == total_nob); - } -#endif - LASSERT (!in_interrupt()); - - rc = sock->f_op->writev(sock, iov, niov, NULL); - - set_fs (oldmm); - - if (rc > 0) /* sent something? */ - { - nob = rc; /* consume iov */ - for (;;) - { - LASSERT (niov > 0); - - if (iov->iov_len >= nob) - { - iov->iov_len -= nob; - iov->iov_base = (void *)(((unsigned long)iov->iov_base) + nob); - break; - } - nob -= iov->iov_len; - iov->iov_len = 0; - iov++; - niov--; - } - } - - return (rc); -} - -int -ktoenal_recvmsg(struct file *sock, struct iovec *iov, int niov, int toread) -{ - /* NB This procedure "consumes" iov (actually tcp_recvmsg does) - */ - mm_segment_t oldmm; - int ret, i, len = 0, origlen = 0; - - PROF_START(our_recvmsg); - for(i = 0; i < niov; i++) { - len += iov[i].iov_len; - if(len >= toread) - break; - } - - if(len >= toread) { - origlen = iov[i].iov_len; - iov[i].iov_len -= (len - toread); - } - else { /* i == niov */ - i = niov - 1; - } - - oldmm = get_fs(); - set_fs(KERNEL_DS); - - ret = sock->f_op->readv(sock, iov, i + 1, NULL); - - set_fs(oldmm); - - if(origlen) - iov[i].iov_len = origlen; - - PROF_FINISH(our_recvmsg); - return ret; -} - -void -ktoenal_process_transmit (ksock_conn_t *conn, long *irq_flags) -{ - ksock_tx_t *tx = list_entry (conn->ksnc_tx_queue.next, ksock_tx_t, tx_list); - int rc; - - LASSERT (conn->ksnc_tx_scheduled); - LASSERT (conn->ksnc_tx_ready); - LASSERT (!list_empty (&conn->ksnc_tx_queue)); - - /* assume transmit will complete now, so dequeue while I've got the lock */ - list_del (&tx->tx_list); - - spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, *irq_flags); - - LASSERT (tx->tx_nob > 0); - - conn->ksnc_tx_ready = 0; /* write_space may race with me and set ready */ - mb(); /* => clear BEFORE trying to write */ - - rc = ktoenal_sendmsg (conn->ksnc_file, - tx->tx_iov, tx->tx_niov, tx->tx_nob, - list_empty (&conn->ksnc_tx_queue) ? - MSG_DONTWAIT : (MSG_DONTWAIT | MSG_MORE)); - - CDEBUG (D_NET, "send(%d) %d\n", tx->tx_nob, rc); - - if (rc < 0) /* error */ - { - if (rc == -EAGAIN) /* socket full => */ - rc = 0; /* nothing sent */ - else - { -#warning FIXME: handle socket errors properly - CERROR ("Error socknal send(%d) %p: %d\n", tx->tx_nob, conn, rc); - rc = tx->tx_nob; /* kid on for now whole packet went */ - } - } - - if (rc == tx->tx_nob) /* everything went */ - { - conn->ksnc_tx_ready = 1; /* assume more can go (ASAP) */ - ktoenal_put_conn (conn); /* release packet's ref */ - - if (tx->tx_isfwd) /* was a forwarded packet? */ - { - kpr_fwd_done (&ktoenal_data.ksnd_router, - KSOCK_TX_2_KPR_FWD_DESC (tx), 0); - - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags); - } - else /* local send */ - { - ksock_ltx_t *ltx = KSOCK_TX_2_KSOCK_LTX (tx); - - lib_finalize (&ktoenal_lib, ltx->ltx_private, ltx->ltx_cookie); - - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags); - - list_add (<x->ltx_tx.tx_list, ltx->ltx_idle); - - /* normal tx desc => wakeup anyone blocking for one */ - if (ltx->ltx_idle == &ktoenal_data.ksnd_idle_ltx_list && - waitqueue_active (&ktoenal_data.ksnd_idle_ltx_waitq)) - wake_up (&ktoenal_data.ksnd_idle_ltx_waitq); - } - ktoenal_packets_transmitted++; - } - else - { - tx->tx_nob -= rc; - - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags); - - /* back onto HEAD of tx_queue */ - list_add (&tx->tx_list, &conn->ksnc_tx_queue); - } - - if (!conn->ksnc_tx_ready || /* no space to write now */ - list_empty (&conn->ksnc_tx_queue)) /* nothing to write */ - { - conn->ksnc_tx_scheduled = 0; /* not being scheduled */ - ktoenal_put_conn (conn); /* release scheduler's ref */ - } - else /* let scheduler call me again */ - list_add_tail (&conn->ksnc_tx_list, &ktoenal_data.ksnd_tx_conns); -} - -void -ktoenal_launch_packet (ksock_conn_t *conn, ksock_tx_t *tx) -{ - long flags; - int nob = tx->tx_nob; - struct iovec *iov = tx->tx_iov; - int niov = 1; - - LASSERT (nob >= sizeof (ptl_hdr_t)); - - /* Truncate iov to exactly match total packet length - * since socket sendmsg pays no attention to requested length. - */ - for (;;) - { - LASSERT (niov <= tx->tx_niov); - LASSERT (iov->iov_len >= 0); - - if (iov->iov_len >= nob) - { - iov->iov_len = nob; - break; - } - nob -= iov->iov_len; - iov++; - niov++; - } - tx->tx_niov = niov; - - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags); - list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue); - - if (conn->ksnc_tx_ready && /* able to send */ - !conn->ksnc_tx_scheduled) /* not scheduled to send */ - { - list_add_tail (&conn->ksnc_tx_list, &ktoenal_data.ksnd_tx_conns); - conn->ksnc_tx_scheduled = 1; - atomic_inc (&conn->ksnc_refcount); /* extra ref for scheduler */ - if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq)) - wake_up (&ktoenal_data.ksnd_sched_waitq); - } - - ktoenal_packets_launched++; - spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags); -} - -int -ktoenal_send(nal_cb_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, struct iovec *payload_iov, size_t payload_len) -{ - ptl_nid_t gatewaynid; - ksock_conn_t *conn; - ksock_ltx_t *ltx; - int rc; - int i; - - /* By this point, as it happens, we have absolutely no idea what - * 'private' is. It might be ksock_nal_data or it might be ksock_conn. - * Ha ha, isn't that a funny joke? - * - * FIXME: this is not the right way to fix this; the right way is to - * always pass in the same kind of structure. This is hard right now. - * To revisit this issue, set a breakpoint in here and watch for when - * it's called from lib_finalize. I think this occurs when we send a - * packet as a side-effect of another packet, such as when an ACK has - * been requested. -phil */ - - CDEBUG(D_NET, "sending %d bytes from [%d](%p,%d)... to nid: " - LPX64" pid %d\n", (int)payload_len, payload_niov, - payload_niov > 0 ? payload_iov[0].iov_base : NULL, - (int)(payload_niov > 0 ? payload_iov[0].iov_len : 0), nid, pid); - - if ((conn = ktoenal_get_conn (nid)) == NULL) - { - /* It's not a peer; try to find a gateway */ - rc = kpr_lookup (&ktoenal_data.ksnd_router, nid, &gatewaynid); - if (rc != 0) - { - CERROR ("Can't route to "LPX64": router error %d\n", nid, rc); - return (-1); - } - - if ((conn = ktoenal_get_conn (gatewaynid)) == NULL) - { - CERROR ("Can't route to "LPX64": gateway "LPX64" is not a peer\n", - nid, gatewaynid); - return (-1); - } - } - - /* This transmit has now got a ref on conn */ - - /* I may not block for a transmit descriptor if I might block the - * receiver, or an interrupt handler. */ - ltx = ktoenal_get_ltx (!(type == PTL_MSG_ACK || - type == PTL_MSG_REPLY || - in_interrupt ())); - if (ltx == NULL) - { - CERROR ("Can't allocate tx desc\n"); - ktoenal_put_conn (conn); - return (-1); - } - - /* Init common (to sends and forwards) packet part */ - ltx->ltx_tx.tx_isfwd = 0; - ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len; - ltx->ltx_tx.tx_niov = 1 + payload_niov; - ltx->ltx_tx.tx_iov = ltx->ltx_iov; - - /* Init local send packet (storage for hdr, finalize() args, iov) */ - ltx->ltx_hdr = *hdr; - ltx->ltx_private = private; - ltx->ltx_cookie = cookie; - - ltx->ltx_iov[0].iov_base = <x->ltx_hdr; - ltx->ltx_iov[0].iov_len = sizeof (ltx->ltx_hdr); - - LASSERT (payload_niov <= PTL_MD_MAX_IOV); - - for (i = 0; i < payload_niov; i++) - { - ltx->ltx_iov[1 + i].iov_base = payload_iov[i].iov_base; - ltx->ltx_iov[1 + i].iov_len = payload_iov[i].iov_len; - } - - ktoenal_launch_packet (conn, <x->ltx_tx); - return (0); -} - -void -ktoenal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) -{ - ksock_conn_t *conn; - ptl_nid_t nid = fwd->kprfd_gateway_nid; - ksock_tx_t *tx = (ksock_tx_t *)&fwd->kprfd_scratch; - - CDEBUG (D_NET, "Forwarding [%p] -> "LPX64" ("LPX64"))\n", fwd, - fwd->kprfd_gateway_nid, fwd->kprfd_target_nid); - - if (nid == ktoenal_lib.ni.nid) /* I'm the gateway; must be the last hop */ - nid = fwd->kprfd_target_nid; - - conn = ktoenal_get_conn (nid); - if (conn == NULL) - { - CERROR ("[%p] fwd to "LPX64" isn't a peer\n", fwd, nid); - kpr_fwd_done (&ktoenal_data.ksnd_router, fwd, -EHOSTUNREACH); - return; - } - - /* This forward has now got a ref on conn */ - - tx->tx_isfwd = 1; /* This is a forwarding packet */ - tx->tx_nob = fwd->kprfd_nob; - tx->tx_niov = fwd->kprfd_niov; - tx->tx_iov = fwd->kprfd_iov; - - ktoenal_launch_packet (conn, tx); -} - -int -ktoenal_thread_start (int (*fn)(void *arg), void *arg) -{ - long pid = kernel_thread (fn, arg, 0); - - if (pid < 0) - return ((int)pid); - - atomic_inc (&ktoenal_data.ksnd_nthreads); - return (0); -} - -void -ktoenal_thread_fini (void) -{ - atomic_dec (&ktoenal_data.ksnd_nthreads); -} - -void -ktoenal_fmb_callback (void *arg, int error) -{ - ksock_fmb_t *fmb = (ksock_fmb_t *)arg; - ptl_hdr_t *hdr = (ptl_hdr_t *) page_address(fmb->fmb_pages[0]); - ksock_conn_t *conn; - long flags; - - CDEBUG (D_NET, "routed packet from "LPX64" to "LPX64": %d\n", - hdr->src_nid, hdr->dest_nid, error); - - if (error != 0) - CERROR ("Failed to route packet from "LPX64" to "LPX64": %d\n", - hdr->src_nid, hdr->dest_nid, error); - - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags); - - list_add (&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs); - - if (!list_empty (&fmb->fmb_pool->fmp_blocked_conns)) - { - conn = list_entry (fmb->fmb_pool->fmp_blocked_conns.next, ksock_conn_t, ksnc_rx_list); - list_del (&conn->ksnc_rx_list); - - CDEBUG (D_NET, "Scheduling conn %p\n", conn); - LASSERT (conn->ksnc_rx_scheduled); - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_FMB_SLEEP); - - conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB; - list_add_tail (&conn->ksnc_rx_list, &ktoenal_data.ksnd_rx_conns); - - if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq)) - wake_up (&ktoenal_data.ksnd_sched_waitq); - } - - spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags); -} - -ksock_fmb_t * -ktoenal_get_idle_fmb (ksock_conn_t *conn) -{ - /* NB called with sched lock held */ - int payload_nob = conn->ksnc_rx_nob_left; - int packet_nob = sizeof (ptl_hdr_t) + payload_nob; - ksock_fmb_pool_t *pool; - ksock_fmb_t *fmb; - - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB); - - if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE) - pool = &ktoenal_data.ksnd_small_fmp; - else - pool = &ktoenal_data.ksnd_large_fmp; - - if (!list_empty (&pool->fmp_idle_fmbs)) - { - fmb = list_entry (pool->fmp_idle_fmbs.next, ksock_fmb_t, fmb_list); - list_del (&fmb->fmb_list); - return (fmb); - } - - /* deschedule until fmb free */ - - conn->ksnc_rx_state = SOCKNAL_RX_FMB_SLEEP; - - list_add_tail (&conn->ksnc_rx_list, - &pool->fmp_blocked_conns); - return (NULL); -} - - -int -ktoenal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb) -{ - int payload_nob = conn->ksnc_rx_nob_left; - int packet_nob = sizeof (ptl_hdr_t) + payload_nob; - int niov; /* at least the header */ - int nob; - - LASSERT (conn->ksnc_rx_scheduled); - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB); - LASSERT (conn->ksnc_rx_nob_wanted == conn->ksnc_rx_nob_left); - LASSERT (payload_nob >= 0); - LASSERT (packet_nob <= fmb->fmb_npages * PAGE_SIZE); - LASSERT (sizeof (ptl_hdr_t) < PAGE_SIZE); - - /* Got a forwarding buffer; copy the header we just read into the - * forwarding buffer. If there's payload start reading reading it - * into the buffer, otherwise the forwarding buffer can be kicked - * off immediately. - * - * NB fmb->fmb_iov spans the WHOLE packet. - * conn->ksnc_rx_iov spans just the payload. - */ - - fmb->fmb_iov[0].iov_base = page_address (fmb->fmb_pages[0]); - - memcpy (fmb->fmb_iov[0].iov_base, &conn->ksnc_hdr, sizeof (ptl_hdr_t)); /* copy header */ - - if (payload_nob == 0) /* got complete packet already */ - { - atomic_inc (&ktoenal_packets_received); - - CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (immediate)\n", conn, - conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, packet_nob); - - fmb->fmb_iov[0].iov_len = sizeof (ptl_hdr_t); - - kpr_fwd_init (&fmb->fmb_fwd, conn->ksnc_hdr.dest_nid, - packet_nob, 1, fmb->fmb_iov, - ktoenal_fmb_callback, fmb); - - kpr_fwd_start (&ktoenal_data.ksnd_router, &fmb->fmb_fwd); /* forward it now */ - - ktoenal_new_packet (conn, 0); /* on to next packet */ - return (1); - } - - niov = 1; - if (packet_nob <= PAGE_SIZE) /* whole packet fits in first page */ - fmb->fmb_iov[0].iov_len = packet_nob; - else - { - fmb->fmb_iov[0].iov_len = PAGE_SIZE; - nob = packet_nob - PAGE_SIZE; - - do - { - LASSERT (niov < fmb->fmb_npages); - fmb->fmb_iov[niov].iov_base = page_address (fmb->fmb_pages[niov]); - fmb->fmb_iov[niov].iov_len = MIN (PAGE_SIZE, nob); - nob -= PAGE_SIZE; - niov++; - } while (nob > 0); - } - - kpr_fwd_init (&fmb->fmb_fwd, conn->ksnc_hdr.dest_nid, - packet_nob, niov, fmb->fmb_iov, - ktoenal_fmb_callback, fmb); - - /* stash router's descriptor ready for call to kpr_fwd_start */ - conn->ksnc_cookie = &fmb->fmb_fwd; - - conn->ksnc_rx_state = SOCKNAL_RX_BODY_FWD; /* read in the payload */ - - /* payload is desc's iov-ed buffer, but skipping the hdr */ - LASSERT (niov <= sizeof (conn->ksnc_rx_iov) / sizeof (conn->ksnc_rx_iov[0])); - - conn->ksnc_rx_iov[0].iov_base = (void *)(((unsigned long)fmb->fmb_iov[0].iov_base) + sizeof (ptl_hdr_t)); - conn->ksnc_rx_iov[0].iov_len = fmb->fmb_iov[0].iov_len - sizeof (ptl_hdr_t); - - if (niov > 1) - memcpy (&conn->ksnc_rx_iov[1], &fmb->fmb_iov[1], (niov - 1) * sizeof (struct iovec)); - - conn->ksnc_rx_niov = niov; - - CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d reading body\n", conn, - conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, payload_nob); - return (0); -} - -void -ktoenal_fwd_parse (ksock_conn_t *conn) -{ - ksock_conn_t *conn2; - int body_len; - - CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d parsing header\n", conn, - conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, conn->ksnc_rx_nob_left); - - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER); - LASSERT (conn->ksnc_rx_scheduled); - - switch (conn->ksnc_hdr.type) - { - case PTL_MSG_GET: - case PTL_MSG_ACK: - body_len = 0; - break; - case PTL_MSG_PUT: - body_len = conn->ksnc_hdr.msg.put.length; - break; - case PTL_MSG_REPLY: - body_len = conn->ksnc_hdr.msg.reply.length; - break; - default: - /* Unrecognised packet type */ - CERROR ("Unrecognised packet type %d from "LPX64" for "LPX64"\n", - conn->ksnc_hdr.type, conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid); - /* Ignore this header and go back to reading a new packet. */ - ktoenal_new_packet (conn, 0); - return; - } - - if (body_len < 0) /* length corrupt */ - { - CERROR ("dropping packet from "LPX64" for "LPX64": packet size %d illegal\n", - conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, body_len); - ktoenal_new_packet (conn, 0); /* on to new packet */ - return; - } - - if (body_len > SOCKNAL_MAX_FWD_PAYLOAD) /* too big to forward */ - { - CERROR ("dropping packet from "LPX64" for "LPX64": packet size %d too big\n", - conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, body_len); - ktoenal_new_packet (conn, body_len); /* on to new packet (skip this one's body) */ - return; - } - - conn2 = ktoenal_get_conn (conn->ksnc_hdr.dest_nid); /* should have gone direct */ - if (conn2 != NULL) - { - CERROR ("dropping packet from "LPX64" for "LPX64": target is a peer\n", - conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid); - ktoenal_put_conn (conn2); /* drop ref from get above */ - - ktoenal_new_packet (conn, body_len); /* on to next packet (skip this one's body) */ - return; - } - - conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB; /* Getting FMB now */ - conn->ksnc_rx_nob_left = body_len; /* stash packet size */ - conn->ksnc_rx_nob_wanted = body_len; /* (no slop) */ -} - -int -ktoenal_new_packet (ksock_conn_t *conn, int nob_to_skip) -{ - static char ktoenal_slop_buffer[4096]; - - int nob; - int niov; - int skipped; - - if (nob_to_skip == 0) /* right at next packet boundary now */ - { - conn->ksnc_rx_state = SOCKNAL_RX_HEADER; - conn->ksnc_rx_nob_wanted = sizeof (ptl_hdr_t); - conn->ksnc_rx_nob_left = sizeof (ptl_hdr_t); - - conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_hdr; - conn->ksnc_rx_iov[0].iov_len = sizeof (ptl_hdr_t); - conn->ksnc_rx_niov = 1; - return (1); - } - - /* set up to skip as much a possible now */ - /* if there's more left (ran out of iov entries) we'll get called again */ - - conn->ksnc_rx_state = SOCKNAL_RX_SLOP; - conn->ksnc_rx_nob_left = nob_to_skip; - skipped = 0; - niov = 0; - - do - { - nob = MIN (nob_to_skip, sizeof (ktoenal_slop_buffer)); - - conn->ksnc_rx_iov[niov].iov_base = ktoenal_slop_buffer; - conn->ksnc_rx_iov[niov].iov_len = nob; - niov++; - skipped += nob; - nob_to_skip -=nob; - - } while (nob_to_skip != 0 && /* mustn't overflow conn's rx iov */ - niov < sizeof (conn->ksnc_rx_iov)/sizeof (conn->ksnc_rx_iov[0])); - - conn->ksnc_rx_niov = niov; - conn->ksnc_rx_nob_wanted = skipped; - return (0); -} - -void -ktoenal_process_receive (ksock_conn_t *conn, long *irq_flags) -{ - ksock_fmb_t *fmb; - int len; - LASSERT (atomic_read (&conn->ksnc_refcount) > 0); - LASSERT (conn->ksnc_rx_scheduled); - LASSERT (conn->ksnc_rx_ready); - - /* NB: sched lock held */ - CDEBUG(D_NET, "conn %p\n", conn); - - if (conn->ksnc_rx_state != SOCKNAL_RX_GET_FMB) /* doesn't need a forwarding buffer */ - { - spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, *irq_flags); - goto try_read; - } - - get_fmb: - /* NB: sched lock held */ - fmb = ktoenal_get_idle_fmb (conn); - if (fmb == NULL) /* conn descheduled waiting for idle fmb */ - return; - - spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, *irq_flags); - - if (ktoenal_init_fmb (conn, fmb)) /* packet forwarded ? */ - goto out; /* come back later for next packet */ - - try_read: - /* NB: sched lock NOT held */ - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER || - conn->ksnc_rx_state == SOCKNAL_RX_BODY || - conn->ksnc_rx_state == SOCKNAL_RX_BODY_FWD || - conn->ksnc_rx_state == SOCKNAL_RX_SLOP); - - LASSERT (conn->ksnc_rx_niov > 0); - LASSERT (conn->ksnc_rx_nob_wanted > 0); - - conn->ksnc_rx_ready = 0; /* data ready may race with me and set ready */ - mb(); /* => clear BEFORE trying to read */ - - /* NB ktoenal_recvmsg "consumes" the iov passed to it */ - len = ktoenal_recvmsg(conn->ksnc_file, - conn->ksnc_rx_iov, conn->ksnc_rx_niov, - conn->ksnc_rx_nob_wanted); - CDEBUG (D_NET, "%p read(%d) %d\n", conn, conn->ksnc_rx_nob_wanted, len); - - if (len <= 0) /* nothing ready (EAGAIN) or EOF or error */ - { - if (len != -EAGAIN && /* ! nothing to read now */ - len != 0) /* ! nothing to read ever */ - { -#warning FIXME: handle socket errors properly - CERROR ("Error socknal read(%d) %p: %d\n", - conn->ksnc_rx_nob_wanted, conn, len); - } - goto out; /* come back when there's data ready */ - } - - LASSERT (len <= conn->ksnc_rx_nob_wanted); - conn->ksnc_rx_nob_wanted -= len; - conn->ksnc_rx_nob_left -= len; - - if (conn->ksnc_rx_nob_wanted != 0) /* short read */ - goto out; /* try again later */ - - conn->ksnc_rx_ready = 1; /* assume there's more to be had */ - - switch (conn->ksnc_rx_state) - { - case SOCKNAL_RX_HEADER: - if (conn->ksnc_hdr.dest_nid != ktoenal_lib.ni.nid) /* It's not for me */ - { - ktoenal_fwd_parse (conn); - switch (conn->ksnc_rx_state) - { - case SOCKNAL_RX_HEADER: /* skipped this packet (zero payload) */ - goto out; /* => come back later */ - case SOCKNAL_RX_SLOP: /* skipping this packet's body */ - goto try_read; /* => go read it */ - case SOCKNAL_RX_GET_FMB: /* forwarding */ - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags); - goto get_fmb; /* => go get a fwd msg buffer */ - default: - } - /* Not Reached */ - LBUG (); - } - - PROF_START(lib_parse); - lib_parse(&ktoenal_lib, &conn->ksnc_hdr, conn); /* sets wanted_len, iovs etc */ - PROF_FINISH(lib_parse); - - if (conn->ksnc_rx_nob_wanted != 0) /* need to get some payload? */ - { - conn->ksnc_rx_state = SOCKNAL_RX_BODY; - goto try_read; /* go read the payload */ - } - /* Fall through (completed packet for me) */ - - case SOCKNAL_RX_BODY: - atomic_inc (&ktoenal_packets_received); - lib_finalize(&ktoenal_lib, NULL, conn->ksnc_cookie); /* packet is done now */ - /* Fall through */ - - case SOCKNAL_RX_SLOP: - if (ktoenal_new_packet (conn, conn->ksnc_rx_nob_left)) /* starting new packet? */ - goto out; /* come back later */ - goto try_read; /* try to finish reading slop now */ - - case SOCKNAL_RX_BODY_FWD: - CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (got body)\n", conn, - conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, conn->ksnc_rx_nob_left); - - atomic_inc (&ktoenal_packets_received); - - /* ktoenal_init_fmb() stashed router descriptor in conn->ksnc_cookie */ - kpr_fwd_start (&ktoenal_data.ksnd_router, (kpr_fwd_desc_t *)conn->ksnc_cookie); - - LASSERT (conn->ksnc_rx_nob_left == 0); /* no slop in forwarded packets */ - - ktoenal_new_packet (conn, 0); /* on to next packet */ - goto out; /* (later) */ - - default: - } - - /* Not Reached */ - LBUG (); - - out: - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags); - - if (!conn->ksnc_rx_ready) /* no data there to read? */ - { - conn->ksnc_rx_scheduled = 0; /* let socket callback schedule again */ - ktoenal_put_conn (conn); /* release scheduler's ref */ - } - else /* let scheduler call me again */ - list_add_tail (&conn->ksnc_rx_list, &ktoenal_data.ksnd_rx_conns); -} - -int -ktoenal_recv(nal_cb_t *nal, void *private, lib_msg_t *msg, - unsigned int niov, struct iovec *iov, size_t mlen, size_t rlen) -{ - ksock_conn_t *conn = (ksock_conn_t *)private; - int i; - - conn->ksnc_cookie = msg; - - LASSERT (niov <= PTL_MD_MAX_IOV); - for (i = 0; i < niov; i++) - { - conn->ksnc_rx_iov[i].iov_len = iov[i].iov_len; - conn->ksnc_rx_iov[i].iov_base = iov[i].iov_base; - } - - conn->ksnc_rx_niov = niov; - conn->ksnc_rx_nob_wanted = mlen; - conn->ksnc_rx_nob_left = rlen; - - return (rlen); -} - -int -ktoenal_scheduler (void *arg) -{ - unsigned long flags; - ksock_conn_t *conn; - int rc; - int nloops = 0; - - kportal_daemonize ("ktoenal_sched"); - kportal_blockallsigs (); - - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags); - - while (!ktoenal_data.ksnd_shuttingdown) - { - int did_something = 0; - - /* Ensure I progress everything semi-fairly */ - - if (!list_empty (&ktoenal_data.ksnd_rx_conns)) - { - did_something = 1; - conn = list_entry (ktoenal_data.ksnd_rx_conns.next, - ksock_conn_t, ksnc_rx_list); - list_del (&conn->ksnc_rx_list); - - ktoenal_process_receive (conn, &flags); /* drops & regains ksnd_sched_lock */ - } - - if (!list_empty (&ktoenal_data.ksnd_tx_conns)) - { - did_something = 1; - conn = list_entry (ktoenal_data.ksnd_tx_conns.next, - ksock_conn_t, ksnc_tx_list); - - list_del (&conn->ksnc_tx_list); - ktoenal_process_transmit (conn, &flags); /* drops and regains ksnd_sched_lock */ - } - - if (!did_something || /* nothing to do */ - ++nloops == SOCKNAL_RESCHED) /* hogging CPU? */ - { - spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags); - - nloops = 0; - - if (!did_something) { /* wait for something to do */ - rc = wait_event_interruptible (ktoenal_data.ksnd_sched_waitq, - ktoenal_data.ksnd_shuttingdown || - !list_empty (&ktoenal_data.ksnd_rx_conns) || - !list_empty (&ktoenal_data.ksnd_tx_conns)); - LASSERT (rc == 0); - } else - our_cond_resched(); - - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags); - } - } - - spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags); - ktoenal_thread_fini (); - return (0); -} - - -int -ktoenal_reaper (void *arg) -{ - unsigned long flags; - ksock_conn_t *conn; - int rc; - - kportal_daemonize ("ktoenal_reaper"); - kportal_blockallsigs (); - - while (!ktoenal_data.ksnd_shuttingdown) - { - spin_lock_irqsave (&ktoenal_data.ksnd_reaper_lock, flags); - - if (list_empty (&ktoenal_data.ksnd_reaper_list)) - conn = NULL; - else - { - conn = list_entry (ktoenal_data.ksnd_reaper_list.next, - ksock_conn_t, ksnc_list); - list_del (&conn->ksnc_list); - } - - spin_unlock_irqrestore (&ktoenal_data.ksnd_reaper_lock, flags); - - if (conn != NULL) - ktoenal_close_conn (conn); - else { - rc = wait_event_interruptible (ktoenal_data.ksnd_reaper_waitq, - ktoenal_data.ksnd_shuttingdown || - !list_empty(&ktoenal_data.ksnd_reaper_list)); - LASSERT (rc == 0); - } - } - - ktoenal_thread_fini (); - return (0); -} - -#define POLLREAD (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI) -#define POLLWRITE (POLLOUT | POLLWRNORM | POLLWRBAND) - -int -ktoenal_pollthread(void *arg) -{ - unsigned int mask; - struct list_head *tmp; - ksock_conn_t *conn; - - /* Save the task struct for waking it up */ - ktoenal_data.ksnd_pollthread_tsk = current; - - kportal_daemonize ("ktoenal_pollthread"); - kportal_blockallsigs (); - - poll_initwait(&ktoenal_data.ksnd_pwait); - - while(!ktoenal_data.ksnd_shuttingdown) { - - set_current_state(TASK_INTERRUPTIBLE); - - read_lock (&ktoenal_data.ksnd_socklist_lock); - list_for_each(tmp, &ktoenal_data.ksnd_socklist) { - - conn = list_entry(tmp, ksock_conn_t, ksnc_list); - atomic_inc(&conn->ksnc_refcount); - read_unlock (&ktoenal_data.ksnd_socklist_lock); - - mask = conn->ksnc_file->f_op->poll(conn->ksnc_file, - ktoenal_data.ksnd_slistchange ? - &ktoenal_data.ksnd_pwait : NULL); - - if(mask & POLLREAD) { - ktoenal_data_ready(conn); - - } - if (mask & POLLWRITE) { - ktoenal_write_space(conn); - - } - if (mask & (POLLERR | POLLHUP)) { - /* Do error processing */ - } - - read_lock (&ktoenal_data.ksnd_socklist_lock); - if(atomic_dec_and_test(&conn->ksnc_refcount)) - _ktoenal_put_conn(conn); - } - ktoenal_data.ksnd_slistchange = 0; - read_unlock (&ktoenal_data.ksnd_socklist_lock); - - schedule_timeout(MAX_SCHEDULE_TIMEOUT); - if(ktoenal_data.ksnd_slistchange) { - poll_freewait(&ktoenal_data.ksnd_pwait); - poll_initwait(&ktoenal_data.ksnd_pwait); - } - } - poll_freewait(&ktoenal_data.ksnd_pwait); - ktoenal_thread_fini(); - return (0); -} - -void -ktoenal_data_ready (ksock_conn_t *conn) -{ - unsigned long flags; - ENTRY; - - if (!test_and_set_bit (0, &conn->ksnc_rx_ready)) { - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags); - - if (!conn->ksnc_rx_scheduled) { /* not being progressed */ - list_add_tail (&conn->ksnc_rx_list, - &ktoenal_data.ksnd_rx_conns); - conn->ksnc_rx_scheduled = 1; - /* extra ref for scheduler */ - atomic_inc (&conn->ksnc_refcount); - - /* This is done to avoid the effects of a sequence - * of events in which the rx_ready is lost - */ - conn->ksnc_rx_ready=1; - - if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq)) - wake_up (&ktoenal_data.ksnd_sched_waitq); - } - - spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags); - } - - EXIT; -} - -void -ktoenal_write_space (ksock_conn_t *conn) -{ - unsigned long flags; - - CDEBUG (D_NET, "conn %p%s%s%s\n", - conn, - (conn == NULL) ? "" : (test_bit (0, &conn->ksnc_tx_ready) ? " ready" : " blocked"), - (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ? " scheduled" : " idle"), - (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ? " empty" : " queued")); - - - if (!test_and_set_bit (0, &conn->ksnc_tx_ready)) { - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags); - - if (!list_empty (&conn->ksnc_tx_queue) && /* packets to send */ - !conn->ksnc_tx_scheduled) { /* not being progressed */ - - list_add_tail (&conn->ksnc_tx_list, - &ktoenal_data.ksnd_tx_conns); - conn->ksnc_tx_scheduled = 1; - /* extra ref for scheduler */ - atomic_inc (&conn->ksnc_refcount); - - if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq)) - wake_up (&ktoenal_data.ksnd_sched_waitq); - } - spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags); - } -} - -nal_cb_t ktoenal_lib = { - nal_data: &ktoenal_data, /* NAL private data */ - cb_send: ktoenal_send, - cb_recv: ktoenal_recv, - cb_read: ktoenal_read, - cb_write: ktoenal_write, - cb_callback: ktoenal_callback, - cb_malloc: ktoenal_malloc, - cb_free: ktoenal_free, - cb_printf: ktoenal_printf, - cb_cli: ktoenal_cli, - cb_sti: ktoenal_sti, - cb_dist: ktoenal_dist -}; diff --git a/lnet/libcfs/.cvsignore b/lnet/libcfs/.cvsignore deleted file mode 100644 index 67d1a3d..0000000 --- a/lnet/libcfs/.cvsignore +++ /dev/null @@ -1,4 +0,0 @@ -.deps -Makefile -Makefile.in -link-stamp diff --git a/lnet/libcfs/Makefile.am b/lnet/libcfs/Makefile.am deleted file mode 100644 index 20d7fbd..0000000 --- a/lnet/libcfs/Makefile.am +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (C) 2001, 2002 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - - -MODULE = portals -modulenet_DATA = portals.o -EXTRA_PROGRAMS = portals - -LIBLINKS := lib-dispatch.c lib-eq.c lib-init.c lib-md.c lib-me.c lib-move.c lib-msg.c lib-ni.c lib-pid.c -APILINKS := api-eq.c api-errno.c api-init.c api-me.c api-ni.c api-wrap.c -LINKS = $(APILINKS) $(LIBLINKS) -DISTCLEANFILES = $(LINKS) link-stamp *.orig *.rej - -$(LINKS): link-stamp -link-stamp: - -list='$(LIBLINKS)'; for f in $$list; do echo $$f ; ln -sf $(srcdir)/../portals/$$f .; done - -list='$(APILINKS)'; for f in $$list; do echo $$f ; ln -sf $(srcdir)/../portals/$$f .; done - echo timestamp > link-stamp - -DEFS = -portals_SOURCES = $(LINKS) module.c proc.c debug.c - -# Don't distribute any patched files. -dist-hook: - list='$(EXT2C)'; for f in $$list; do rm -f $(distdir)/$$f; done - -include ../Rules.linux diff --git a/lnet/libcfs/Makefile.mk b/lnet/libcfs/Makefile.mk deleted file mode 100644 index 3196ea2..0000000 --- a/lnet/libcfs/Makefile.mk +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include fs/lustre/portals/Kernelenv - -obj-y += libcfs.o -licfs-objs := module.o proc.o debug.o \ No newline at end of file diff --git a/lnet/libcfs/debug.c b/lnet/libcfs/debug.c deleted file mode 100644 index 8d26dbb..0000000 --- a/lnet/libcfs/debug.c +++ /dev/null @@ -1,830 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define EXPORT_SYMTAB - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -# define DEBUG_SUBSYSTEM S_PORTALS - -#include - -#define DEBUG_OVERFLOW 1024 -static char *debug_buf = NULL; -static unsigned long debug_size = 0; -static atomic_t debug_off_a = ATOMIC_INIT(0); -static int debug_wrapped; -wait_queue_head_t debug_ctlwq; -#define DAEMON_SND_SIZE (64 << 10) - -/* - * used by the daemon to keep track the offset into debug_buffer for the next - * write to the file. Usually, the daemon is to write out buffer - * from debug_daemon_next_write upto debug_off - * variable usage - * Reader - portals_debug_msg() - * Writer - portals_debug_daemon() - * portals_debug_daemon_start() during daemon init time - * portals_debug_daemon_continue() to reset to debug_off - * portals_debug_clear_buffer() reset to debug_off for clear - * Note that *_start(), *_continue() & *clear_buffer() should serialized; - */ -static atomic_t debug_daemon_next_write; - -/* - * A debug_daemon can be in following states - * stopped - stopped state means there is no debug_daemon running. - * accordingly, it must be in paused state - * a daemon is in !stopped && !paused state after - * "lctl debug_daemon start" creates debug_daemon successfully - * Variable Usage - * Reader - portals_debug_daemon() - * portals_debug_set_daemon() routines - * Writer - portals_debug_set_daemon() routines - * portals_debug_daemon() on IO error - * paused - a debug_daemon state is changed from !paused into paused - * when "lctl debug_daemon paused" is issued - * "lctl debug_daemon continue" gets a daemon into !paused mode - * Reader - portals_debug_set_daemon() routines - * portals_debug_msg() - * Writer - portals_debug_set_daemon() on init - * portals_debug_daemon() - * - * Daemon state diagram. - * (stopped, paused) - * | <-- debug_daemon start - * V - * (!stopped, !paused) - * | <-- debug_daemon pause - * V - * (!stopped, paused) - * | <-- debug_daemon continue - * V - * (!stopped, !paused) - * | <-- debug_daemon stop - * V - * (stopped, paused) - * Overlapped - this is a state when CDEBUG is too fast for the daemon to - * write out the debug_bufferr. That is, debug_off is to - * overlap debug_daemon_next_write; - * Reader - portals_debug_msg() - * Writer - portals_debug_msg() - */ - -/* - * Description on Trace Daemon Synchronization - * - * Three categories of code are synchronizing between each other - * 1. lctl, portals_debug_set_daemon(), the user debug control code, - * as well as portals_debug_clear_buffer() - * 2. CDEBUG, portals_debug_msg(), the debug put messages routine - * 3. Daemon, portals_debug_daemon(), to write out debug log file - * - * - * Three different controls for synchronizations - * - * 1. debug_daemon_semaphore - * The usage of this semaphore is to serialize multiple lctl controls - * in manipulating debug daemon state. The semaphore serves as the - * gatekeeper to allow only one user control thread, at any giving time, - * to access debug daemon state and keeps the other user control requests - * in wait state until the current control request is serviced. - * - * 2. wait_queue_head_t lctl (paired with lctl_event flag) - * Lctl event is the event between portals_debug_set_daemon() and - * portals_debug_daemon(). Lctl is an indicator for portals_debug_daemon() - * to flush data out to file. portals_debug_daemon() is to use lctl event - * as signal channel to wakeup portals_debug_set_daemon() upon flush - * operation is done. - * - * Producer : - * portals_debug_daemon() uses to wake up - * portals_debug_set_daemon(), pause and stop, routines - * Consumer : - * portals_debug_set_daemon(), stop and pause operations, - * wait and sleep on the event - * - * 3. wait_queue_head_t daemon (paired with daemon_event flag) - * This is an event channel to wakeup portals_debug_daemon. Daemon - * wakes up to run whenever there is an event posted. Daemon handles - * 2 types of operations . 1. Writes data out to debug file, 2. Flushes - * file and terminates base on lctl event. - * File operation - - * Daemon is normally in a sleep state. - * Daemon is woken up through daemon event whenever CDEBUG is - * putting data over any 64K boundary. - * File flush and termination - - * On portals_debug_daemon_stop/pause() operations, lctl control - * is to wake up daemon through daemon event. - * - * We can't use sleep_on() and wake_up() to replace daemon event because - * portals_debug_daemon() must catch the wakeup operation posted by - * portals_debug_daemon_stop/pause(). Otherwise, stop and pause may - * stuck in lctl wait event. - * - * Producer : - * a. portals_debug_daemon_pause() and portals_debug_daemon_stop() - * uses the event to wake up portals_debug_daemon() - * b. portals_debug_msg() uses the event to wake up - * portals_debug_daemon() whenever the data output is acrossing - * a 64K bytes boundary. - * Consumer : - * portals_debug_daemon() wakes up upon daemon event. - * - * Sequence for portals_debug_daemon_stop() operation - * - * _Portals_debug_daemon_stop()_ _Daemon_ - * Wait_event(daemon) or running - * Paused = 1; - * Wakeup_event (daemon) - * Wait_event(lctl) - * Set force_flush flag if lctlevnt - * Flush data - * Wakeup_event (lctl) - * Wait_event(daemon) - * Stopped = 1; - * Wakeup_event (daemon) - * Wait_event(lctl) - * Exit daemon loop if (Stopped) - * Wakeup_event (lctl) - * Exit - * Return to user application - * - * - * _Portals_debug_msg()_ _Daemon_ - * Wait_event(daemon) or running - * If (WriteStart<64Kjournal_info; - current->journal_info = NULL; - sprintf(debug_file_name, "%s.%ld", debug_file_path, CURRENT_TIME); - file = filp_open(debug_file_name, O_CREAT|O_TRUNC|O_RDWR, 0644); - - if (!file || IS_ERR(file)) { - CERROR("cannot open %s for dumping: %ld\n", debug_file_name, - PTR_ERR(file)); - GOTO(out, PTR_ERR(file)); - } else { - printk(KERN_ALERT "dumping log to %s ... writing ...\n", - debug_file_name); - } - - debug_off = atomic_read(&debug_off_a); - oldfs = get_fs(); - set_fs(get_ds()); - if (debug_wrapped) { - rc = file->f_op->write(file, debug_buf + debug_off + 1, - debug_size-debug_off-1, &file->f_pos); - rc += file->f_op->write(file, debug_buf, debug_off + 1, - &file->f_pos); - } else { - rc = file->f_op->write(file, debug_buf, debug_off,&file->f_pos); - } - printk("wrote %d bytes\n", rc); - set_fs(oldfs); - - rc = file->f_op->fsync(file, file->f_dentry, 1); - if (rc) - CERROR("sync returns %d\n", rc); - filp_close(file, 0); -out: - current->journal_info = journal_info; - wake_up(&debug_ctlwq); - return 0; -} - -int portals_debug_daemon(void *arg) -{ - struct file *file; - void *journal_info; - mm_segment_t oldfs; - unsigned long force_flush = 0; - unsigned long size, off, flags; - int rc; - - kportal_daemonize("ldebug_daemon"); - reparent_to_init(); - journal_info = current->journal_info; - current->journal_info = NULL; - - file = filp_open(debug_daemon_file_path, - O_CREAT|O_TRUNC|O_RDWR|O_LARGEFILE, 0644); - - if (!file || IS_ERR(file)) { - CERROR("cannot open %s for logging", debug_daemon_file_path); - GOTO(out1, PTR_ERR(file)); - } else { - printk(KERN_ALERT "daemon dumping log to %s ... writing ...\n", - debug_daemon_file_path); - } - - debug_daemon_state.overlapped = 0; - debug_daemon_state.stopped = 0; - - spin_lock_irqsave(&portals_debug_lock, flags); - off = atomic_read(&debug_off_a) + 1; - if (debug_wrapped) - off = (off >= debug_size)? 0 : off; - else - off = 0; - atomic_set(&debug_daemon_next_write, off); - atomic_set(&debug_daemon_state.paused, 0); - spin_unlock_irqrestore(&portals_debug_lock, flags); - - oldfs = get_fs(); - set_fs(KERNEL_DS); - while (1) { - unsigned long ending; - unsigned long start, tail; - long delta; - - debug_daemon_state.daemon_event = 0; - - ending = atomic_read(&debug_off_a); - start = atomic_read(&debug_daemon_next_write); - - /* check if paused is imposed by lctl ? */ - force_flush = !debug_daemon_state.lctl_event; - - delta = ending - start; - tail = debug_size - start; - size = (delta >= 0) ? delta : tail; - while (size && (force_flush || (delta < 0) || - (size >= DAEMON_SND_SIZE))) { - if (daemon_file_size_limit) { - int ssize = daemon_file_size_limit - file->f_pos; - if (size > ssize) - size = ssize; - } - - rc = file->f_op->write(file, debug_buf+start, - size, &file->f_pos); - if (rc < 0) { - printk(KERN_ALERT - "Debug_daemon write error %d\n", rc); - goto out; - } - start += rc; - delta = ending - start; - tail = debug_size - start; - if (tail == 0) - start = 0; - if (delta >= 0) - size = delta; - else - size = (tail == 0) ? ending : tail; - if (daemon_file_size_limit == file->f_pos) { - // file wrapped around - file->f_pos = 0; - } - } - atomic_set(&debug_daemon_next_write, start); - if (force_flush) { - rc = file->f_op->fsync(file, file->f_dentry, 1); - if (rc < 0) { - printk(KERN_ALERT - "Debug_daemon sync error %d\n", rc); - goto out; - } - if (debug_daemon_state.stopped) - break; - debug_daemon_state.lctl_event = 1; - wake_up(&debug_daemon_state.lctl); - } - wait_event(debug_daemon_state.daemon, - debug_daemon_state.daemon_event); - } -out: - atomic_set(&debug_daemon_state.paused, 1); - debug_daemon_state.stopped = 1; - set_fs(oldfs); - filp_close(file, 0); - current->journal_info = journal_info; -out1: - debug_daemon_state.lctl_event = 1; - wake_up(&debug_daemon_state.lctl); - return 0; -} - -void portals_debug_print(void) -{ - unsigned long dumplen = 64 * 1024; - char *start1, *start2; - char *end1, *end2; - unsigned long debug_off = atomic_read(&debug_off_a); - - start1 = debug_buf + debug_off - dumplen; - if (start1 < debug_buf) { - start1 += debug_size; - end1 = debug_buf + debug_size - 1; - start2 = debug_buf; - end2 = debug_buf + debug_off; - } else { - end1 = debug_buf + debug_off; - start2 = debug_buf + debug_off; - end2 = debug_buf + debug_off; - } - - while (start1 < end1) { - int count = MIN(1024, end1 - start1); - printk("%*s", count, start1); - start1 += 1024; - } - while (start2 < end2) { - int count = MIN(1024, end2 - start2); - printk("%*s", count, start2); - start2 += 1024; - } -} - -void portals_debug_dumplog(void) -{ - int rc; - ENTRY; - - init_waitqueue_head(&debug_ctlwq); - - rc = kernel_thread(portals_do_debug_dumplog, - NULL, CLONE_VM | CLONE_FS | CLONE_FILES); - if (rc < 0) { - printk(KERN_ERR "cannot start dump thread\n"); - return; - } - sleep_on(&debug_ctlwq); -} - -int portals_debug_daemon_start(char *file, unsigned int size) -{ - int rc; - - if (!debug_daemon_state.stopped) - return -EALREADY; - - if (file != NULL) - strncpy(debug_daemon_file_path, file, 1024); - - init_waitqueue_head(&debug_daemon_state.lctl); - init_waitqueue_head(&debug_daemon_state.daemon); - - daemon_file_size_limit = size << 20; - - debug_daemon_state.lctl_event = 0; - rc = kernel_thread(portals_debug_daemon, NULL, 0); - if (rc < 0) { - printk(KERN_ERR "cannot start debug daemon thread\n"); - strncpy(debug_daemon_file_path, "\0", 1); - return rc; - } - wait_event(debug_daemon_state.lctl, debug_daemon_state.lctl_event); - return 0; -} - -int portals_debug_daemon_pause(void) -{ - if (atomic_read(&debug_daemon_state.paused)) - return -EALREADY; - - atomic_set(&debug_daemon_state.paused, 1); - debug_daemon_state.lctl_event = 0; - debug_daemon_state.daemon_event = 1; - wake_up(&debug_daemon_state.daemon); - wait_event(debug_daemon_state.lctl, debug_daemon_state.lctl_event); - return 0; -} - -int portals_debug_daemon_continue(void) -{ - if (!atomic_read(&debug_daemon_state.paused)) - return -EINVAL; - if (debug_daemon_state.stopped) - return -EINVAL; - - debug_daemon_state.overlapped = 0; - atomic_set(&debug_daemon_next_write, atomic_read(&debug_off_a)); - atomic_set(&debug_daemon_state.paused, 0); - return 0; -} - -int portals_debug_daemon_stop(void) -{ - if (debug_daemon_state.stopped) - return -EALREADY; - - if (!atomic_read(&debug_daemon_state.paused)) - portals_debug_daemon_pause(); - - debug_daemon_state.lctl_event = 0; - debug_daemon_state.stopped = 1; - - debug_daemon_state.daemon_event = 1; - wake_up(&debug_daemon_state.daemon); - wait_event(debug_daemon_state.lctl, debug_daemon_state.lctl_event); - - debug_daemon_file_path[0] = '\0'; - return 0; -} - -int portals_debug_set_daemon(unsigned int cmd, unsigned int length, - char *filename, unsigned int size) -{ - int rc = -EINVAL; - - down(&debug_daemon_semaphore); - switch (cmd) { - case DEBUG_DAEMON_START: - if (length && (filename[length -1] != '\0')) { - CERROR("Invalid filename for debug_daemon\n"); - rc = -EINVAL; - break; - } - rc = portals_debug_daemon_start(filename, size); - break; - case DEBUG_DAEMON_STOP: - rc = portals_debug_daemon_stop(); - break; - case DEBUG_DAEMON_PAUSE: - rc = portals_debug_daemon_pause(); - break; - case DEBUG_DAEMON_CONTINUE: - rc = portals_debug_daemon_continue(); - break; - default: - CERROR("unknown set_daemon cmd\n"); - } - up(&debug_daemon_semaphore); - return rc; -} - -static int panic_dumplog(struct notifier_block *self, unsigned long unused1, - void *unused2) -{ - if (handled_panic) - return 0; - else - handled_panic = 1; - - if (in_interrupt()) { - portals_debug_print(); - return 0; - } - - while (current->lock_depth >= 0) - unlock_kernel(); - portals_debug_dumplog(); - return 0; -} - -static struct notifier_block lustre_panic_notifier = { - notifier_call : panic_dumplog, - next : NULL, - priority : 10000 -}; - -int portals_debug_init(unsigned long bufsize) -{ - unsigned long debug_off = atomic_read(&debug_off_a); - if (debug_buf != NULL) - return -EALREADY; - - atomic_set(&debug_daemon_state.paused, 1); - debug_daemon_state.stopped = 1; - - debug_buf = vmalloc(bufsize + DEBUG_OVERFLOW); - if (debug_buf == NULL) - return -ENOMEM; - memset(debug_buf, 0, debug_size); - debug_wrapped = 0; - - printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n", - bufsize, debug_buf); - atomic_set(&debug_off_a, debug_off); - notifier_chain_register(&panic_notifier_list, &lustre_panic_notifier); - debug_size = bufsize; - - return 0; -} - -int portals_debug_cleanup(void) -{ - notifier_chain_unregister(&panic_notifier_list, &lustre_panic_notifier); - if (debug_buf == NULL) - return -EINVAL; - - down(&debug_daemon_semaphore); - portals_debug_daemon_stop(); - - vfree(debug_buf); - atomic_set(&debug_off_a, 0); - up(&debug_daemon_semaphore); - - return 0; -} - -int portals_debug_clear_buffer(void) -{ - unsigned long flags; - unsigned long state; - - if (debug_buf == NULL) - return -EINVAL; - - down(&debug_daemon_semaphore); - state = atomic_read(&debug_daemon_state.paused); - if (!state) - portals_debug_daemon_pause(); - spin_lock_irqsave(&portals_debug_lock, flags); - atomic_set(&debug_off_a, 0); - debug_wrapped = 0; - atomic_set(&debug_daemon_next_write, 0); - debug_daemon_state.overlapped = 0; - spin_unlock_irqrestore(&portals_debug_lock, flags); - - if (!state) - atomic_set(&debug_daemon_state.paused, 0); - up(&debug_daemon_semaphore); - - return 0; -} - -/* Debug markers, although printed by S_PORTALS - * should not be be marked as such. - */ -#undef DEBUG_SUBSYSTEM -#define DEBUG_SUBSYSTEM S_UNDEFINED -int portals_debug_mark_buffer(char *text) -{ - if (debug_buf == NULL) - return -EINVAL; - - CDEBUG(0, "*******************************************************************************\n"); - CDEBUG(0, "DEBUG MARKER: %s\n", text); - CDEBUG(0, "*******************************************************************************\n"); - - return 0; -} -#undef DEBUG_SUBSYSTEM -#define DEBUG_SUBSYSTEM S_PORTALS - -__s32 portals_debug_copy_to_user(char *buf, unsigned long len) -{ - int rc; - unsigned long debug_off; - unsigned long flags; - - if (len < debug_size) - return -ENOSPC; - - debug_off = atomic_read(&debug_off_a); - spin_lock_irqsave(&portals_debug_lock, flags); - if (debug_wrapped) { - /* All of this juggling with the 1s is to keep the trailing nul - * (which falls at debug_buf + debug_off) at the end of what we - * copy into user space */ - copy_to_user(buf, debug_buf + debug_off + 1, - debug_size - debug_off - 1); - copy_to_user(buf + debug_size - debug_off - 1, - debug_buf, debug_off + 1); - rc = debug_size; - } else { - copy_to_user(buf, debug_buf, debug_off); - rc = debug_off; - } - spin_unlock_irqrestore(&portals_debug_lock, flags); - - return rc; -} - -/* FIXME: I'm not very smart; someone smarter should make this better. */ -void -portals_debug_msg (int subsys, int mask, char *file, char *fn, int line, - unsigned long stack, const char *format, ...) -{ - va_list ap; - unsigned long flags; - int max_nob; - int prefix_nob; - int msg_nob; - struct timeval tv; - unsigned long base_offset; - unsigned long debug_off; - - if (debug_buf == NULL) { - printk("portals_debug_msg: debug_buf is NULL!\n"); - return; - } - - spin_lock_irqsave(&portals_debug_lock, flags); - debug_off = atomic_read(&debug_off_a); - if (!atomic_read(&debug_daemon_state.paused)) { - unsigned long available; - long delta; - long v = atomic_read(&debug_daemon_next_write); - - delta = debug_off - v; - available = (delta>=0) ? debug_size-delta : -delta; - // Check if we still have enough debug buffer for CDEBUG - if (available < DAEMON_SND_SIZE) { - /* Drop CDEBUG packets until enough debug_buffer is - * available */ - if (debug_daemon_state.overlapped) - goto out; - /* If this is the first time, leave a marker in the - * output */ - debug_daemon_state.overlapped = 1; - ap = NULL; - format = "DEBUG MARKER: Debug buffer overlapped\n"; - } else /* More space just became available */ - debug_daemon_state.overlapped = 0; - } - - max_nob = debug_size - debug_off + DEBUG_OVERFLOW; - if (max_nob <= 0) { - spin_unlock_irqrestore(&portals_debug_lock, flags); - printk("logic error in portals_debug_msg: <0 bytes to write\n"); - return; - } - - /* NB since we pass a non-zero sized buffer (at least) on the first - * print, we can be assured that by the end of all the snprinting, - * we _do_ have a terminated buffer, even if our message got truncated. - */ - - do_gettimeofday(&tv); - - prefix_nob = snprintf(debug_buf + debug_off, max_nob, - "%02x:%06x:%d:%lu.%06lu ", - subsys >> 24, mask, smp_processor_id(), - tv.tv_sec, tv.tv_usec); - max_nob -= prefix_nob; - -#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) - msg_nob = snprintf(debug_buf + debug_off + prefix_nob, max_nob, - "(%s:%d:%s() %d | %d+%lu): ", - file, line, fn, current->pid, - current->thread.extern_pid, stack); -#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - msg_nob = snprintf(debug_buf + debug_off + prefix_nob, max_nob, - "(%s:%d:%s() %d | %d+%lu): ", - file, line, fn, current->pid, - current->thread.mode.tt.extern_pid, stack); -#else - msg_nob = snprintf(debug_buf + debug_off + prefix_nob, max_nob, - "(%s:%d:%s() %d+%lu): ", - file, line, fn, current->pid, stack); -#endif - max_nob -= msg_nob; - - va_start(ap, format); - msg_nob += vsnprintf(debug_buf + debug_off + prefix_nob + msg_nob, - max_nob, format, ap); - max_nob -= msg_nob; - va_end(ap); - - /* Print to console, while msg is contiguous in debug_buf */ - /* NB safely terminated see above */ - if ((mask & D_EMERG) != 0) - printk(KERN_EMERG "%s", debug_buf + debug_off + prefix_nob); - if ((mask & D_ERROR) != 0) - printk(KERN_ERR "%s", debug_buf + debug_off + prefix_nob); - else if (portal_printk) - printk("<%d>%s", portal_printk, debug_buf+debug_off+prefix_nob); - base_offset = debug_off & 0xFFFF; - - debug_off += prefix_nob + msg_nob; - if (debug_off > debug_size) { - memcpy(debug_buf, debug_buf + debug_size, - debug_off - debug_size + 1); - debug_off -= debug_size; - debug_wrapped = 1; - } - - atomic_set(&debug_off_a, debug_off); - if (!atomic_read(&debug_daemon_state.paused) && - ((base_offset+prefix_nob+msg_nob) >= DAEMON_SND_SIZE)) { - debug_daemon_state.daemon_event = 1; - wake_up(&debug_daemon_state.daemon); - } -out: - spin_unlock_irqrestore(&portals_debug_lock, flags); -} - -void portals_debug_set_level(unsigned int debug_level) -{ - printk("Setting portals debug level to %08x\n", debug_level); - portal_debug = debug_level; -} - -void portals_run_lbug_upcall(char * file, char *fn, int line) -{ - char *argv[6]; - char *envp[3]; - char buf[32]; - int rc; - - ENTRY; - snprintf (buf, sizeof buf, "%d", line); - - argv[0] = portals_upcall; - argv[1] = "LBUG"; - argv[2] = file; - argv[3] = fn; - argv[4] = buf; - argv[5] = NULL; - - envp[0] = "HOME=/"; - envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; - envp[2] = NULL; - - rc = call_usermodehelper(argv[0], argv, envp); - if (rc < 0) { - CERROR("Error invoking lbug upcall %s %s %s %s %s: %d; check " - "/proc/sys/portals/upcall\n", - argv[0], argv[1], argv[2], argv[3], argv[4], rc); - - } else { - CERROR("Invoked upcall %s %s %s %s %s\n", - argv[0], argv[1], argv[2], argv[3], argv[4]); - } -} - - -EXPORT_SYMBOL(portals_debug_dumplog); -EXPORT_SYMBOL(portals_debug_msg); -EXPORT_SYMBOL(portals_debug_set_level); -EXPORT_SYMBOL(portals_run_lbug_upcall); diff --git a/lnet/libcfs/module.c b/lnet/libcfs/module.c deleted file mode 100644 index 14cc325..0000000 --- a/lnet/libcfs/module.c +++ /dev/null @@ -1,575 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif -#define DEBUG_SUBSYSTEM S_PORTALS - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#define PORTAL_MINOR 240 - -extern void (kping_client)(struct portal_ioctl_data *); - -struct nal_cmd_handler { - nal_cmd_handler_t nch_handler; - void * nch_private; -}; - -static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1]; -struct semaphore nal_cmd_sem; - -#ifdef PORTAL_DEBUG -void -kportal_assertion_failed (char *expr, char *file, char *func, int line) -{ - portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK(), - "ASSERTION(%s) failed\n", expr); - LBUG_WITH_LOC(file, func, line); -} -#endif - -void -kportal_daemonize (char *str) -{ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,63)) - daemonize(str); -#else - daemonize(); - snprintf (current->comm, sizeof (current->comm), "%s", str); -#endif -} - -void -kportal_blockallsigs () -{ - unsigned long flags; - - SIGNAL_MASK_LOCK(current, flags); - sigfillset(¤t->blocked); - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); -} - -/* called when opening /dev/device */ -static int kportal_psdev_open(struct inode * inode, struct file * file) -{ - ENTRY; - - if (!inode) - RETURN(-EINVAL); - PORTAL_MODULE_USE; - RETURN(0); -} - -/* called when closing /dev/device */ -static int kportal_psdev_release(struct inode * inode, struct file * file) -{ - ENTRY; - - if (!inode) - RETURN(-EINVAL); - - PORTAL_MODULE_UNUSE; - RETURN(0); -} - -static inline void freedata(void *data, int len) -{ - PORTAL_FREE(data, len); -} - -static int -kportal_add_route(int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid, - ptl_nid_t hi_nid) -{ - int rc; - kpr_control_interface_t *ci; - - ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET (kpr_control_interface); - if (ci == NULL) - return (-ENODEV); - - rc = ci->kprci_add_route (gateway_nalid, gateway_nid, lo_nid, hi_nid); - - PORTAL_SYMBOL_PUT(kpr_control_interface); - return (rc); -} - -static int -kportal_del_route(ptl_nid_t target) -{ - int rc; - kpr_control_interface_t *ci; - - ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface); - if (ci == NULL) - return (-ENODEV); - - rc = ci->kprci_del_route (target); - - PORTAL_SYMBOL_PUT(kpr_control_interface); - return (rc); -} - -static int -kportal_get_route(int index, __u32 *gateway_nalidp, ptl_nid_t *gateway_nidp, - ptl_nid_t *lo_nidp, ptl_nid_t *hi_nidp) -{ - int gateway_nalid; - ptl_nid_t gateway_nid; - ptl_nid_t lo_nid; - ptl_nid_t hi_nid; - int rc; - kpr_control_interface_t *ci; - - ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET(kpr_control_interface); - if (ci == NULL) - return (-ENODEV); - - rc = ci->kprci_get_route(index, &gateway_nalid, &gateway_nid, &lo_nid, - &hi_nid); - - if (rc == 0) { - CDEBUG(D_IOCTL, "got route [%d] %d "LPX64":"LPX64" - "LPX64"\n", - index, gateway_nalid, gateway_nid, lo_nid, hi_nid); - - *gateway_nalidp = (__u32)gateway_nalid; - *gateway_nidp = (__u32)gateway_nid; - *lo_nidp = (__u32)lo_nid; - *hi_nidp = (__u32)hi_nid; - } - - PORTAL_SYMBOL_PUT (kpr_control_interface); - return (rc); -} - -static int -kportal_nal_cmd(int nal, struct portal_ioctl_data *data) -{ - int rc = -EINVAL; - - ENTRY; - - down(&nal_cmd_sem); - if (nal > 0 && nal <= NAL_MAX_NR && nal_cmd[nal].nch_handler) { - CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, data->ioc_nal_cmd); - rc = nal_cmd[nal].nch_handler(data, nal_cmd[nal].nch_private); - } - up(&nal_cmd_sem); - RETURN(rc); -} - -ptl_handle_ni_t * -kportal_get_ni (int nal) -{ - - switch (nal) - { - case QSWNAL: - return (PORTAL_SYMBOL_GET(kqswnal_ni)); - case SOCKNAL: - return (PORTAL_SYMBOL_GET(ksocknal_ni)); - case TOENAL: - return (PORTAL_SYMBOL_GET(ktoenal_ni)); - case GMNAL: - return (PORTAL_SYMBOL_GET(kgmnal_ni)); - case TCPNAL: - /* userspace NAL */ - return (NULL); - case SCIMACNAL: - return (PORTAL_SYMBOL_GET(kscimacnal_ni)); - default: - /* A warning to a naive caller */ - CERROR ("unknown nal: %d\n", nal); - return (NULL); - } -} - -void -kportal_put_ni (int nal) -{ - - switch (nal) - { - case QSWNAL: - PORTAL_SYMBOL_PUT(kqswnal_ni); - break; - case SOCKNAL: - PORTAL_SYMBOL_PUT(ksocknal_ni); - break; - case TOENAL: - PORTAL_SYMBOL_PUT(ktoenal_ni); - break; - case GMNAL: - PORTAL_SYMBOL_PUT(kgmnal_ni); - break; - case TCPNAL: - /* A lesson to a malicious caller */ - LBUG (); - case SCIMACNAL: - PORTAL_SYMBOL_PUT(kscimacnal_ni); - break; - default: - CERROR ("unknown nal: %d\n", nal); - } -} - -int -kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private) -{ - int rc = 0; - - CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler); - - if (nal > 0 && nal <= NAL_MAX_NR) { - down(&nal_cmd_sem); - if (nal_cmd[nal].nch_handler != NULL) - rc = -EBUSY; - else { - nal_cmd[nal].nch_handler = handler; - nal_cmd[nal].nch_private = private; - } - up(&nal_cmd_sem); - } - return rc; -} - -int -kportal_nal_unregister(int nal) -{ - int rc = 0; - - CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal); - - if (nal > 0 && nal <= NAL_MAX_NR) { - down(&nal_cmd_sem); - nal_cmd[nal].nch_handler = NULL; - nal_cmd[nal].nch_private = NULL; - up(&nal_cmd_sem); - } - return rc; -} - - -static int kportal_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) -{ - int err = 0; - char buf[1024]; - struct portal_ioctl_data *data; - - ENTRY; - - if ( _IOC_TYPE(cmd) != IOC_PORTAL_TYPE || - _IOC_NR(cmd) < IOC_PORTAL_MIN_NR || - _IOC_NR(cmd) > IOC_PORTAL_MAX_NR ) { - CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n", - _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd)); - RETURN(-EINVAL); - } - - if (portal_ioctl_getdata(buf, buf + 800, (void *)arg)) { - CERROR("PORTALS ioctl: data error\n"); - RETURN(-EINVAL); - } - - data = (struct portal_ioctl_data *)buf; - - switch (cmd) { - case IOC_PORTAL_SET_DAEMON: - RETURN (portals_debug_set_daemon ( - (unsigned int) data->ioc_count, - (unsigned int) data->ioc_inllen1, - (char *) data->ioc_inlbuf1, - (unsigned int) data->ioc_misc)); - case IOC_PORTAL_GET_DEBUG: { - __s32 size = portals_debug_copy_to_user(data->ioc_pbuf1, - data->ioc_plen1); - - if (size < 0) - RETURN(size); - - data->ioc_size = size; - err = copy_to_user((char *)arg, data, sizeof(*data)); - RETURN(err); - } - case IOC_PORTAL_CLEAR_DEBUG: - portals_debug_clear_buffer(); - RETURN(0); - case IOC_PORTAL_PANIC: - if (!capable (CAP_SYS_BOOT)) - RETURN (-EPERM); - panic("debugctl-invoked panic"); - RETURN(0); - case IOC_PORTAL_MARK_DEBUG: - if (data->ioc_inlbuf1 == NULL || - data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0') - RETURN(-EINVAL); - portals_debug_mark_buffer(data->ioc_inlbuf1); - RETURN(0); - case IOC_PORTAL_PING: { - void (*ping)(struct portal_ioctl_data *); - - CDEBUG(D_IOCTL, "doing %d pings to nid "LPU64"\n", - data->ioc_count, data->ioc_nid); - ping = PORTAL_SYMBOL_GET(kping_client); - if (!ping) - CERROR("PORTAL_SYMBOL_GET failed\n"); - else { - ping(data); - PORTAL_SYMBOL_PUT(kping_client); - } - RETURN(0); - } - - case IOC_PORTAL_ADD_ROUTE: - CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n", - data->ioc_nal, data->ioc_nid, data->ioc_nid2, - data->ioc_nid3); - err = kportal_add_route(data->ioc_nal, data->ioc_nid, - MIN (data->ioc_nid2, data->ioc_nid3), - MAX (data->ioc_nid2, data->ioc_nid3)); - break; - - case IOC_PORTAL_DEL_ROUTE: - CDEBUG (D_IOCTL, "Removing route to "LPU64"\n", data->ioc_nid); - err = kportal_del_route (data->ioc_nid); - break; - - case IOC_PORTAL_GET_ROUTE: - CDEBUG (D_IOCTL, "Getting route [%d]\n", data->ioc_count); - err = kportal_get_route(data->ioc_count, &data->ioc_nal, - &data->ioc_nid, &data->ioc_nid2, - &data->ioc_nid3); - if (err == 0) - if (copy_to_user((char *)arg, data, sizeof (*data))) - err = -EFAULT; - break; - - case IOC_PORTAL_GET_NID: { - const ptl_handle_ni_t *nip; - ptl_process_id_t pid; - - CDEBUG (D_IOCTL, "Getting nid [%d]\n", data->ioc_nal); - - nip = kportal_get_ni (data->ioc_nal); - if (nip == NULL) - RETURN (-EINVAL); - - err = PtlGetId (*nip, &pid); - LASSERT (err == PTL_OK); - kportal_put_ni (data->ioc_nal); - - data->ioc_nid = pid.nid; - if (copy_to_user ((char *)arg, data, sizeof (*data))) - err = -EFAULT; - break; - } - - case IOC_PORTAL_NAL_CMD: - CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", data->ioc_nal, - data->ioc_nal_cmd); - err = kportal_nal_cmd(data->ioc_nal, data); - if (err == 0) - if (copy_to_user((char *)arg, data, sizeof (*data))) - err = -EFAULT; - break; - - case IOC_PORTAL_FAIL_NID: { - const ptl_handle_ni_t *nip; - - CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n", - data->ioc_nal, data->ioc_nid, data->ioc_count); - - nip = kportal_get_ni (data->ioc_nal); - if (nip == NULL) - return (-EINVAL); - - err = PtlFailNid (*nip, data->ioc_nid, data->ioc_count); - kportal_put_ni (data->ioc_nal); - break; - } - - default: - err = -EINVAL; - break; - } - - RETURN(err); -} - - -static struct file_operations portalsdev_fops = { - ioctl: kportal_ioctl, - open: kportal_psdev_open, - release: kportal_psdev_release -}; - - -static struct miscdevice portal_dev = { - PORTAL_MINOR, - "portals", - &portalsdev_fops -}; - -extern int insert_proc(void); -extern void remove_proc(void); -MODULE_AUTHOR("Peter J. Braam "); -MODULE_DESCRIPTION("Portals v3.1"); -MODULE_LICENSE("GPL"); - -static int init_kportals_module(void) -{ - int rc; - - rc = portals_debug_init(5 * 1024 * 1024); - if (rc < 0) { - printk(KERN_ERR "portals_debug_init: %d\n", rc); - return (rc); - } - - sema_init(&nal_cmd_sem, 1); - - rc = misc_register(&portal_dev); - if (rc) { - CERROR("misc_register: error %d\n", rc); - goto cleanup_debug; - } - - rc = PtlInit(); - if (rc) { - CERROR("PtlInit: error %d\n", rc); - goto cleanup_deregister; - } - - rc = insert_proc(); - if (rc) { - CERROR("insert_proc: error %d\n", rc); - goto cleanup_fini; - } - - CDEBUG (D_OTHER, "portals setup OK\n"); - return (0); - - cleanup_fini: - PtlFini(); - cleanup_deregister: - misc_deregister(&portal_dev); - cleanup_debug: - portals_debug_cleanup(); - return rc; -} - -static void exit_kportals_module(void) -{ - int rc; - - remove_proc(); - PtlFini(); - - CDEBUG(D_MALLOC, "before Portals cleanup: kmem %d\n", - atomic_read(&portal_kmemory)); - - - rc = misc_deregister(&portal_dev); - if (rc) - CERROR("misc_deregister error %d\n", rc); - - if (atomic_read(&portal_kmemory) != 0) - CERROR("Portals memory leaked: %d bytes\n", - atomic_read(&portal_kmemory)); - - rc = portals_debug_cleanup(); - if (rc) - printk(KERN_ERR "portals_debug_cleanup: %d\n", rc); -} - -EXPORT_SYMBOL(lib_dispatch); -EXPORT_SYMBOL(PtlMEAttach); -EXPORT_SYMBOL(PtlMEInsert); -EXPORT_SYMBOL(PtlMEUnlink); -EXPORT_SYMBOL(PtlEQAlloc); -EXPORT_SYMBOL(PtlMDAttach); -EXPORT_SYMBOL(PtlMDUnlink); -EXPORT_SYMBOL(PtlNIInit); -EXPORT_SYMBOL(PtlNIFini); -EXPORT_SYMBOL(PtlNIDebug); -EXPORT_SYMBOL(PtlInit); -EXPORT_SYMBOL(PtlFini); -EXPORT_SYMBOL(PtlPut); -EXPORT_SYMBOL(PtlGet); -EXPORT_SYMBOL(ptl_err_str); -EXPORT_SYMBOL(portal_subsystem_debug); -EXPORT_SYMBOL(portal_debug); -EXPORT_SYMBOL(portal_stack); -EXPORT_SYMBOL(portal_printk); -EXPORT_SYMBOL(PtlEQWait); -EXPORT_SYMBOL(PtlEQFree); -EXPORT_SYMBOL(PtlEQGet); -EXPORT_SYMBOL(PtlGetId); -EXPORT_SYMBOL(PtlMDBind); -EXPORT_SYMBOL(lib_iov_nob); -EXPORT_SYMBOL(lib_copy_iov2buf); -EXPORT_SYMBOL(lib_copy_buf2iov); -EXPORT_SYMBOL(lib_kiov_nob); -EXPORT_SYMBOL(lib_copy_kiov2buf); -EXPORT_SYMBOL(lib_copy_buf2kiov); -EXPORT_SYMBOL(lib_finalize); -EXPORT_SYMBOL(lib_parse); -EXPORT_SYMBOL(lib_init); -EXPORT_SYMBOL(lib_fini); -EXPORT_SYMBOL(portal_kmemory); -EXPORT_SYMBOL(kportal_daemonize); -EXPORT_SYMBOL(kportal_blockallsigs); -EXPORT_SYMBOL(kportal_nal_register); -EXPORT_SYMBOL(kportal_nal_unregister); -EXPORT_SYMBOL(kportal_assertion_failed); -EXPORT_SYMBOL(dispatch_name); -EXPORT_SYMBOL(kportal_get_ni); -EXPORT_SYMBOL(kportal_put_ni); - -module_init(init_kportals_module); -module_exit (exit_kportals_module); diff --git a/lnet/libcfs/proc.c b/lnet/libcfs/proc.c deleted file mode 100644 index 2fa739a..0000000 --- a/lnet/libcfs/proc.c +++ /dev/null @@ -1,290 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Zach Brown - * Author: Peter J. Braam - * Author: Phil Schwan - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define EXPORT_SYMTAB - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include - -# define DEBUG_SUBSYSTEM S_PORTALS - -#include -#include - -static struct ctl_table_header *portals_table_header = NULL; -extern char debug_file_path[1024]; -extern char debug_daemon_file_path[1024]; -extern char portals_upcall[1024]; - -#define PSDEV_PORTALS (0x100) -#define PSDEV_DEBUG 1 /* control debugging */ -#define PSDEV_SUBSYSTEM_DEBUG 2 /* control debugging */ -#define PSDEV_PRINTK 3 /* force all errors to console */ -#define PSDEV_DEBUG_PATH 4 /* crashdump log location */ -#define PSDEV_DEBUG_DUMP_PATH 5 /* crashdump tracelog location */ -#define PSDEV_PORTALS_UPCALL 6 /* User mode upcall script */ - -#define PORTALS_PRIMARY_CTLCNT 6 -static struct ctl_table portals_table[PORTALS_PRIMARY_CTLCNT + 1] = { - {PSDEV_DEBUG, "debug", &portal_debug, sizeof(int), 0644, NULL, - &proc_dointvec}, - {PSDEV_SUBSYSTEM_DEBUG, "subsystem_debug", &portal_subsystem_debug, - sizeof(int), 0644, NULL, &proc_dointvec}, - {PSDEV_PRINTK, "printk", &portal_printk, sizeof(int), 0644, NULL, - &proc_dointvec}, - {PSDEV_DEBUG_PATH, "debug_path", debug_file_path, - sizeof(debug_file_path), 0644, NULL, &proc_dostring, &sysctl_string}, - {PSDEV_DEBUG_DUMP_PATH, "debug_daemon_path", debug_daemon_file_path, - sizeof(debug_daemon_file_path), 0644, NULL, &proc_dostring, - &sysctl_string}, - {PSDEV_PORTALS_UPCALL, "upcall", portals_upcall, - sizeof(portals_upcall), 0644, NULL, &proc_dostring, - &sysctl_string}, - {0} -}; - -static struct ctl_table top_table[2] = { - {PSDEV_PORTALS, "portals", NULL, 0, 0555, portals_table}, - {0} -}; - - -#ifdef PORTALS_PROFILING -/* - * profiling stuff. we do this statically for now 'cause its simple, - * but we could do some tricks with elf sections to have this array - * automatically built. - */ -#define def_prof(FOO) [PROF__##FOO] = {#FOO, 0, } - -struct prof_ent prof_ents[] = { - def_prof(our_recvmsg), - def_prof(our_sendmsg), - def_prof(socknal_recv), - def_prof(lib_parse), - def_prof(conn_list_walk), - def_prof(memcpy), - def_prof(lib_finalize), - def_prof(pingcli_time), - def_prof(gmnal_send), - def_prof(gmnal_recv), -}; - -EXPORT_SYMBOL(prof_ents); - -/* - * this function is as crazy as the proc filling api - * requires. - * - * buffer: page allocated for us to scribble in. the - * data returned to the user will be taken from here. - * *start: address of the pointer that will tell the - * caller where in buffer the data the user wants is. - * ppos: offset in the entire /proc file that the user - * currently wants. - * wanted: the amount of data the user wants. - * - * while going, 'curpos' is the offset in the entire - * file where we currently are. We only actually - * start filling buffer when we get to a place in - * the file that the user cares about. - * - * we take care to only sprintf when the user cares because - * we're holding a lock while we do this. - * - * we're smart and know that we generate fixed size lines. - * we only start writing to the buffer when the user cares. - * This is unpredictable because we don't snapshot the - * list between calls that are filling in a file from - * the list. The list could change mid read and the - * output will look very weird indeed. oh well. - */ - -static int prof_read_proc(char *buffer, char **start, off_t ppos, int wanted, - int *eof, void *data) -{ - int len = 0, i; - int curpos; - char *header = "Interval Cycles_per (Starts Finishes Total)\n"; - int header_len = strlen(header); - char *format = "%-15s %.12Ld (%.12d %.12d %.12Ld)"; - int line_len = (15 + 1 + 12 + 2 + 12 + 1 + 12 + 1 + 12 + 1); - - *start = buffer; - - if (ppos < header_len) { - int diff = MIN(header_len, wanted); - memcpy(buffer, header + ppos, diff); - len += diff; - ppos += diff; - } - - if (len >= wanted) - goto out; - - curpos = header_len; - - for ( i = 0; i < MAX_PROFS ; i++) { - int copied; - struct prof_ent *pe = &prof_ents[i]; - long long cycles_per; - /* - * find the part of the array that the buffer wants - */ - if (ppos >= (curpos + line_len)) { - curpos += line_len; - continue; - } - /* the clever caller split a line */ - if (ppos > curpos) { - *start = buffer + (ppos - curpos); - } - - if (pe->finishes == 0) - cycles_per = 0; - else - { - cycles_per = pe->total_cycles; - do_div (cycles_per, pe->finishes); - } - - copied = sprintf(buffer + len, format, pe->str, cycles_per, - pe->starts, pe->finishes, pe->total_cycles); - - len += copied; - - /* pad to line len, -1 for \n */ - if ((copied < line_len-1)) { - int diff = (line_len-1) - copied; - memset(buffer + len, ' ', diff); - len += diff; - copied += diff; - } - - buffer[len++]= '\n'; - - /* bail if we have enough */ - if (((buffer + len) - *start) >= wanted) - break; - - curpos += line_len; - } - - /* lameness */ - if (i == MAX_PROFS) - *eof = 1; - out: - - return MIN(((buffer + len) - *start), wanted); -} - -/* - * all kids love /proc :/ - */ -static unsigned char basedir[]="net/portals"; -#endif /* PORTALS_PROFILING */ - -int insert_proc(void) -{ -#if PORTALS_PROFILING - unsigned char dir[128]; - struct proc_dir_entry *ent; - - if (ARRAY_SIZE(prof_ents) != MAX_PROFS) { - CERROR("profiling enum and array are out of sync.\n"); - return -1; - } - - /* - * This is pretty lame. assuming that failure just - * means that they already existed. - */ - strcat(dir, basedir); - create_proc_entry(dir, S_IFDIR, 0); - - strcat(dir, "/cycles"); - ent = create_proc_entry(dir, 0, 0); - if (!ent) { - CERROR("couldn't register %s?\n", dir); - return -1; - } - - ent->data = NULL; - ent->read_proc = prof_read_proc; -#endif /* PORTALS_PROFILING */ - -#ifdef CONFIG_SYSCTL - if (!portals_table_header) - portals_table_header = register_sysctl_table(top_table, 0); -#endif - - return 0; -} - -void remove_proc(void) -{ -#if PORTALS_PROFILING - unsigned char dir[128]; - int end; - - dir[0]='\0'; - strcat(dir, basedir); - - end = strlen(dir); - - strcat(dir, "/cycles"); - remove_proc_entry(dir,0); - - dir[end] = '\0'; - remove_proc_entry(dir,0); -#endif /* PORTALS_PROFILING */ - -#ifdef CONFIG_SYSCTL - if (portals_table_header) - unregister_sysctl_table(portals_table_header); - portals_table_header = NULL; -#endif -} diff --git a/lnet/lnet/.cvsignore b/lnet/lnet/.cvsignore deleted file mode 100644 index e995588..0000000 --- a/lnet/lnet/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lnet/lnet/Makefile.am b/lnet/lnet/Makefile.am deleted file mode 100644 index 8c03749..0000000 --- a/lnet/lnet/Makefile.am +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (C) 2002 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - - -CPPFLAGS= -INCLUDES=-I$(top_srcdir)/portals/include -I$(top_srcdir)/include -lib_LIBRARIES= libportals.a -libportals_a_SOURCES= api-eq.c api-init.c api-me.c api-errno.c api-ni.c api-wrap.c lib-dispatch.c lib-init.c lib-me.c lib-msg.c lib-eq.c lib-md.c lib-move.c lib-ni.c lib-pid.c diff --git a/lnet/lnet/Makefile.mk b/lnet/lnet/Makefile.mk deleted file mode 100644 index 5627ef7..0000000 --- a/lnet/lnet/Makefile.mk +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../Kernelenv - -obj-y += portals.o -portals-objs := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o lib-move.o lib-msg.o lib-ni.o lib-not-impl.o lib-pid.o api-eq.o api-errno.o api-init.o api-md.o api-me.o api-ni.o api-wrap.o diff --git a/lnet/lnet/api-eq.c b/lnet/lnet/api-eq.c deleted file mode 100644 index e066619..0000000 --- a/lnet/lnet/api-eq.c +++ /dev/null @@ -1,158 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * api/api-eq.c - * User-level event queue management routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include - -int ptl_eq_init(void) -{ - /* Nothing to do anymore... */ - return PTL_OK; -} - -void ptl_eq_fini(void) -{ - /* Nothing to do anymore... */ -} - -int ptl_eq_ni_init(nal_t * nal) -{ - /* Nothing to do anymore... */ - return PTL_OK; -} - -void ptl_eq_ni_fini(nal_t * nal) -{ - /* Nothing to do anymore... */ -} - -int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev) -{ - ptl_eq_t *eq; - int rc, new_index; - unsigned long flags; - ptl_event_t *new_event; - nal_t *nal; - ENTRY; - - if (!ptl_init) - RETURN(PTL_NOINIT); - - nal = ptl_hndl2nal(&eventq); - if (!nal) - RETURN(PTL_INV_EQ); - - eq = ptl_handle2usereq(&eventq); - nal->lock(nal, &flags); - - /* size must be a power of 2 to handle a wrapped sequence # */ - LASSERT (eq->size != 0 && - eq->size == LOWEST_BIT_SET (eq->size)); - - new_index = eq->sequence & (eq->size - 1); - new_event = &eq->base[new_index]; - CDEBUG(D_INFO, "new_event: %p, sequence: %lu, eq->size: %u\n", - new_event, eq->sequence, eq->size); - if (PTL_SEQ_GT (eq->sequence, new_event->sequence)) { - nal->unlock(nal, &flags); - RETURN(PTL_EQ_EMPTY); - } - - *ev = *new_event; - - /* Set the unlinked_me interface number if there is one to pass - * back, since the NAL hasn't a clue what it is and therefore can't - * set it. */ - if (!PtlHandleEqual (ev->unlinked_me, PTL_HANDLE_NONE)) - ev->unlinked_me.nal_idx = eventq.nal_idx; - - /* ensure event is delivered correctly despite possible - races with lib_finalize */ - if (eq->sequence != new_event->sequence) { - CERROR("DROPPING EVENT: eq seq %lu ev seq %lu\n", - eq->sequence, new_event->sequence); - rc = PTL_EQ_DROPPED; - } else { - rc = PTL_OK; - } - - eq->sequence = new_event->sequence + 1; - nal->unlock(nal, &flags); - RETURN(rc); -} - - -int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out) -{ - int rc; - - /* PtlEQGet does the handle checking */ - while ((rc = PtlEQGet(eventq_in, event_out)) == PTL_EQ_EMPTY) { - nal_t *nal = ptl_hndl2nal(&eventq_in); - - if (nal->yield) - nal->yield(nal); - } - - return rc; -} - -#ifndef __KERNEL__ -static jmp_buf eq_jumpbuf; - -static void eq_timeout(int signal) -{ - longjmp(eq_jumpbuf, -1); -} - -int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out, - int timeout) -{ - static void (*prev) (int); - static int left_over; - time_t time_at_start; - int rc; - - if (setjmp(eq_jumpbuf)) { - signal(SIGALRM, prev); - alarm(left_over - timeout); - return PTL_EQ_EMPTY; - } - - left_over = alarm(timeout); - prev = signal(SIGALRM, eq_timeout); - time_at_start = time(NULL); - if (left_over < timeout) - alarm(left_over); - - rc = PtlEQWait(eventq_in, event_out); - - signal(SIGALRM, prev); - alarm(left_over); /* Should compute how long we waited */ - - return rc; -} - -#endif - diff --git a/lnet/lnet/api-errno.c b/lnet/lnet/api-errno.c deleted file mode 100644 index 026c93b..0000000 --- a/lnet/lnet/api-errno.c +++ /dev/null @@ -1,55 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * api/api-errno.c - * Instantiate the string table of errors - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - */ - -/* If you change these, you must update the number table in portals/errno.h */ -const char *ptl_err_str[] = { - "PTL_OK", - "PTL_SEGV", - - "PTL_NOSPACE", - "PTL_INUSE", - "PTL_VAL_FAILED", - - "PTL_NAL_FAILED", - "PTL_NOINIT", - "PTL_INIT_DUP", - "PTL_INIT_INV", - "PTL_AC_INV_INDEX", - - "PTL_INV_ASIZE", - "PTL_INV_HANDLE", - "PTL_INV_MD", - "PTL_INV_ME", - "PTL_INV_NI", -/* If you change these, you must update the number table in portals/errno.h */ - "PTL_ILL_MD", - "PTL_INV_PROC", - "PTL_INV_PSIZE", - "PTL_INV_PTINDEX", - "PTL_INV_REG", - - "PTL_INV_SR_INDX", - "PTL_ML_TOOLONG", - "PTL_ADDR_UNKNOWN", - "PTL_INV_EQ", - "PTL_EQ_DROPPED", - - "PTL_EQ_EMPTY", - "PTL_NOUPDATE", - "PTL_FAIL", - "PTL_NOT_IMPLEMENTED", - "PTL_NO_ACK", - - "PTL_IOV_TOO_MANY", - "PTL_IOV_TOO_SMALL", - - "PTL_EQ_INUSE", - "PTL_MD_INUSE" -}; -/* If you change these, you must update the number table in portals/errno.h */ diff --git a/lnet/lnet/api-init.c b/lnet/lnet/api-init.c deleted file mode 100644 index e59c922..0000000 --- a/lnet/lnet/api-init.c +++ /dev/null @@ -1,71 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * api/api-init.c - * Initialization and global data for the p30 user side library - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include - -int ptl_init; -unsigned int portal_subsystem_debug = 0xfff7e3ff; -unsigned int portal_debug = ~0; -unsigned int portal_printk; -unsigned int portal_stack; - -#ifdef __KERNEL__ -atomic_t portal_kmemory = ATOMIC_INIT(0); -#endif - -int __p30_initialized; -int __p30_myr_initialized; -int __p30_ip_initialized; -ptl_handle_ni_t __myr_ni_handle; -ptl_handle_ni_t __ip_ni_handle; - -int __p30_myr_timeout = 10; -int __p30_ip_timeout; - -int PtlInit(void) -{ - - if (ptl_init) - return PTL_OK; - - ptl_ni_init(); - ptl_me_init(); - ptl_eq_init(); - ptl_init = 1; - __p30_initialized = 1; - - return PTL_OK; -} - - -void PtlFini(void) -{ - - /* Reverse order of initialization */ - ptl_eq_fini(); - ptl_me_fini(); - ptl_ni_fini(); - ptl_init = 0; -} diff --git a/lnet/lnet/api-me.c b/lnet/lnet/api-me.c deleted file mode 100644 index e724e58..0000000 --- a/lnet/lnet/api-me.c +++ /dev/null @@ -1,42 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * api/api-me.c - * Match Entry local operations. - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include - -int ptl_me_init(void) -{ - return PTL_OK; -} -void ptl_me_fini(void) -{ /* Nothing to do */ -} -int ptl_me_ni_init(nal_t * nal) -{ - return PTL_OK; -} - -void ptl_me_ni_fini(nal_t * nal) -{ /* Nothing to do... */ -} diff --git a/lnet/lnet/api-ni.c b/lnet/lnet/api-ni.c deleted file mode 100644 index b2e069e..0000000 --- a/lnet/lnet/api-ni.c +++ /dev/null @@ -1,197 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * api/api-ni.c - * Network Interface code - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include - -/* Put some magic in the NI handle so uninitialised/zeroed handles are easy - * to spot */ -#define NI_HANDLE_MAGIC 0xebc0de00 -#define NI_HANDLE_MASK 0x000000ff -#define MAX_NIS 8 -static nal_t *ptl_interfaces[MAX_NIS]; -int ptl_num_interfaces = 0; - -nal_t *ptl_hndl2nal(ptl_handle_any_t *handle) -{ - unsigned int idx = handle->nal_idx; - - /* XXX we really rely on the caller NOT racing with interface - * setup/teardown. That ensures her NI handle can't get - * invalidated out from under her (or worse, swapped for a - * completely different interface!) */ - - if (((idx ^ NI_HANDLE_MAGIC) & ~NI_HANDLE_MASK) != 0) - return NULL; - - idx &= NI_HANDLE_MASK; - if (idx < MAX_NIS) - return ptl_interfaces[idx]; - - return NULL; -} - -int ptl_ni_init(void) -{ - int i; - - LASSERT (MAX_NIS <= (NI_HANDLE_MASK + 1)); - - for (i = 0; i < MAX_NIS; i++) - ptl_interfaces[i] = NULL; - - return PTL_OK; -} - -void ptl_ni_fini(void) -{ - int i; - - for (i = 0; i < MAX_NIS; i++) { - nal_t *nal = ptl_interfaces[i]; - if (!nal) - continue; - - if (nal->shutdown) - nal->shutdown(nal, i); - } -} - -#ifdef __KERNEL__ -DECLARE_MUTEX(ptl_ni_init_mutex); - -static void ptl_ni_init_mutex_enter (void) -{ - down (&ptl_ni_init_mutex); -} - -static void ptl_ni_init_mutex_exit (void) -{ - up (&ptl_ni_init_mutex); -} - -#else -static void ptl_ni_init_mutex_enter (void) -{ -} - -static void ptl_ni_init_mutex_exit (void) -{ -} - -#endif - -int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, ptl_pid_t requested_pid, - ptl_handle_ni_t * handle) -{ - nal_t *nal; - int i; - - if (!ptl_init) - return PTL_NOINIT; - - ptl_ni_init_mutex_enter (); - - nal = interface(ptl_num_interfaces, ptl_size, acl_size, requested_pid); - - if (!nal) { - ptl_ni_init_mutex_exit (); - return PTL_NAL_FAILED; - } - - for (i = 0; i < ptl_num_interfaces; i++) { - if (ptl_interfaces[i] == nal) { - nal->refct++; - handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | i; - fprintf(stderr, "Returning existing NAL (%d)\n", i); - ptl_ni_init_mutex_exit (); - return PTL_OK; - } - } - nal->refct = 1; - - if (ptl_num_interfaces >= MAX_NIS) { - if (nal->shutdown) - nal->shutdown (nal, ptl_num_interfaces); - ptl_ni_init_mutex_exit (); - return PTL_NOSPACE; - } - - handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | ptl_num_interfaces; - ptl_interfaces[ptl_num_interfaces++] = nal; - - ptl_eq_ni_init(nal); - ptl_me_ni_init(nal); - - ptl_ni_init_mutex_exit (); - return PTL_OK; -} - - -int PtlNIFini(ptl_handle_ni_t ni) -{ - nal_t *nal; - int idx; - int rc; - - if (!ptl_init) - return PTL_NOINIT; - - ptl_ni_init_mutex_enter (); - - nal = ptl_hndl2nal (&ni); - if (nal == NULL) { - ptl_ni_init_mutex_exit (); - return PTL_INV_HANDLE; - } - - idx = ni.nal_idx & NI_HANDLE_MASK; - - nal->refct--; - if (nal->refct > 0) { - ptl_ni_init_mutex_exit (); - return PTL_OK; - } - - ptl_me_ni_fini(nal); - ptl_eq_ni_fini(nal); - - rc = PTL_OK; - if (nal->shutdown) - rc = nal->shutdown(nal, idx); - - ptl_interfaces[idx] = NULL; - ptl_num_interfaces--; - - ptl_ni_init_mutex_exit (); - return rc; -} - -int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * ni_out) -{ - *ni_out = handle_in; - - return PTL_OK; -} diff --git a/lnet/lnet/api-wrap.c b/lnet/lnet/api-wrap.c deleted file mode 100644 index e54707f..0000000 --- a/lnet/lnet/api-wrap.c +++ /dev/null @@ -1,599 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * api/api-wrap.c - * User-level wrappers that dispatch across the protection boundaries - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -# define DEBUG_SUBSYSTEM S_PORTALS -#include - -static int do_forward(ptl_handle_any_t any_h, int cmd, void *argbuf, - int argsize, void *retbuf, int retsize) -{ - nal_t *nal; - - if (!ptl_init) { - fprintf(stderr, "PtlGetId: Not initialized\n"); - return PTL_NOINIT; - } - - nal = ptl_hndl2nal(&any_h); - if (!nal) - return PTL_INV_HANDLE; - - nal->forward(nal, cmd, argbuf, argsize, retbuf, retsize); - - return PTL_OK; -} - -int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id) -{ - PtlGetId_in args; - PtlGetId_out ret; - int rc; - - args.handle_in = ni_handle; - - rc = do_forward(ni_handle, PTL_GETID, &args, sizeof(args), &ret, - sizeof(ret)); - if (rc != PTL_OK) - return rc; - - if (id) - *id = ret.id_out; - - return ret.rc; -} - -int PtlFailNid (ptl_handle_ni_t interface, ptl_nid_t nid, unsigned int threshold) -{ - PtlFailNid_in args; - PtlFailNid_out ret; - int rc; - - args.interface = interface; - args.nid = nid; - args.threshold = threshold; - - rc = do_forward (interface, PTL_FAILNID, - &args, sizeof(args), &ret, sizeof (ret)); - - return ((rc != PTL_OK) ? rc : ret.rc); -} - -int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in, - ptl_sr_value_t * status_out) -{ - PtlNIStatus_in args; - PtlNIStatus_out ret; - int rc; - - args.interface_in = interface_in; - args.register_in = register_in; - - rc = do_forward(interface_in, PTL_NISTATUS, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return rc; - - if (status_out) - *status_out = ret.status_out; - - return ret.rc; -} - -int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in, - unsigned long *distance_out) -{ - PtlNIDist_in args; - PtlNIDist_out ret; - int rc; - - args.interface_in = interface_in; - args.process_in = process_in; - - rc = do_forward(interface_in, PTL_NIDIST, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return rc; - - if (distance_out) - *distance_out = ret.distance_out; - - return ret.rc; -} - - - -unsigned int PtlNIDebug(ptl_handle_ni_t ni, unsigned int mask_in) -{ - PtlNIDebug_in args; - PtlNIDebug_out ret; - int rc; - - args.mask_in = mask_in; - - rc = do_forward(ni, PTL_NIDEBUG, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return rc; - - return ret.rc; -} - -int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in, - ptl_process_id_t match_id_in, ptl_match_bits_t match_bits_in, - ptl_match_bits_t ignore_bits_in, ptl_unlink_t unlink_in, - ptl_ins_pos_t pos_in, ptl_handle_me_t * handle_out) -{ - PtlMEAttach_in args; - PtlMEAttach_out ret; - int rc; - - args.interface_in = interface_in; - args.index_in = index_in; - args.match_id_in = match_id_in; - args.match_bits_in = match_bits_in; - args.ignore_bits_in = ignore_bits_in; - args.unlink_in = unlink_in; - args.position_in = pos_in; - - rc = do_forward(interface_in, PTL_MEATTACH, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return rc; - - if (handle_out) { - handle_out->nal_idx = interface_in.nal_idx; - handle_out->cookie = ret.handle_out.cookie; - } - - return ret.rc; -} - -int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in, - ptl_match_bits_t match_bits_in, ptl_match_bits_t ignore_bits_in, - ptl_unlink_t unlink_in, ptl_ins_pos_t position_in, - ptl_handle_me_t * handle_out) -{ - PtlMEInsert_in args; - PtlMEInsert_out ret; - int rc; - - args.current_in = current_in; - args.match_id_in = match_id_in; - args.match_bits_in = match_bits_in; - args.ignore_bits_in = ignore_bits_in; - args.unlink_in = unlink_in; - args.position_in = position_in; - - rc = do_forward(current_in, PTL_MEINSERT, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc; - - if (handle_out) { - handle_out->nal_idx = current_in.nal_idx; - handle_out->cookie = ret.handle_out.cookie; - } - return ret.rc; -} - -int PtlMEUnlink(ptl_handle_me_t current_in) -{ - PtlMEUnlink_in args; - PtlMEUnlink_out ret; - int rc; - - args.current_in = current_in; - args.unlink_in = PTL_RETAIN; - - rc = do_forward(current_in, PTL_MEUNLINK, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc; - - return ret.rc; -} - -int PtlTblDump(ptl_handle_ni_t ni, int index_in) -{ - PtlTblDump_in args; - PtlTblDump_out ret; - int rc; - - args.index_in = index_in; - - rc = do_forward(ni, PTL_TBLDUMP, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return rc; - - return ret.rc; -} - -int PtlMEDump(ptl_handle_me_t current_in) -{ - PtlMEDump_in args; - PtlMEDump_out ret; - int rc; - - args.current_in = current_in; - - rc = do_forward(current_in, PTL_MEDUMP, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc; - - return ret.rc; -} - -static int validate_md(ptl_handle_any_t current_in, ptl_md_t md_in) -{ - nal_t *nal; - int rc; - int i; - - if (!ptl_init) { - fprintf(stderr, "PtlMDAttach/Bind/Update: Not initialized\n"); - return PTL_NOINIT; - } - - nal = ptl_hndl2nal(¤t_in); - if (!nal) - return PTL_INV_HANDLE; - - if (nal->validate != NULL) /* nal->validate not a NOOP */ - { - if ((md_in.options & PTL_MD_IOV) == 0) /* contiguous */ - { - rc = nal->validate (nal, md_in.start, md_in.length); - if (rc) - return (PTL_SEGV); - } - else - { - struct iovec *iov = (struct iovec *)md_in.start; - - for (i = 0; i < md_in.niov; i++, iov++) - { - rc = nal->validate (nal, iov->iov_base, iov->iov_len); - if (rc) - return (PTL_SEGV); - } - } - } - - return 0; -} - -static ptl_handle_eq_t md2eq (ptl_md_t *md) -{ - if (PtlHandleEqual (md->eventq, PTL_EQ_NONE)) - return (PTL_EQ_NONE); - - return (ptl_handle2usereq (&md->eventq)->cb_eq_handle); -} - - -int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in, - ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out) -{ - PtlMDAttach_in args; - PtlMDAttach_out ret; - int rc; - - rc = validate_md(me_in, md_in); - if (rc == PTL_OK) { - args.eq_in = md2eq(&md_in); - args.me_in = me_in; - args.md_in = md_in; - args.unlink_in = unlink_in; - - rc = do_forward(me_in, PTL_MDATTACH, - &args, sizeof(args), &ret, sizeof(ret)); - } - - if (rc != PTL_OK) - return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc; - - if (handle_out) { - handle_out->nal_idx = me_in.nal_idx; - handle_out->cookie = ret.handle_out.cookie; - } - return ret.rc; -} - - - -int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in, - ptl_handle_md_t * handle_out) -{ - PtlMDBind_in args; - PtlMDBind_out ret; - int rc; - - rc = validate_md(ni_in, md_in); - if (rc != PTL_OK) - return rc; - - args.eq_in = md2eq(&md_in); - args.ni_in = ni_in; - args.md_in = md_in; - - rc = do_forward(ni_in, PTL_MDBIND, - &args, sizeof(args), &ret, sizeof(ret)); - - if (rc != PTL_OK) - return rc; - - if (handle_out) { - handle_out->nal_idx = ni_in.nal_idx; - handle_out->cookie = ret.handle_out.cookie; - } - return ret.rc; -} - -int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout, - ptl_md_t *new_inout, ptl_handle_eq_t testq_in) -{ - PtlMDUpdate_internal_in args; - PtlMDUpdate_internal_out ret; - int rc; - - args.md_in = md_in; - - if (old_inout) { - args.old_inout = *old_inout; - args.old_inout_valid = 1; - } else - args.old_inout_valid = 0; - - if (new_inout) { - rc = validate_md (md_in, *new_inout); - if (rc != PTL_OK) - return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc; - args.new_inout = *new_inout; - args.new_inout_valid = 1; - } else - args.new_inout_valid = 0; - - if (PtlHandleEqual (testq_in, PTL_EQ_NONE)) { - args.testq_in = PTL_EQ_NONE; - args.sequence_in = -1; - } else { - ptl_eq_t *eq = ptl_handle2usereq (&testq_in); - - args.testq_in = eq->cb_eq_handle; - args.sequence_in = eq->sequence; - } - - rc = do_forward(md_in, PTL_MDUPDATE, &args, sizeof(args), &ret, - sizeof(ret)); - if (rc != PTL_OK) - return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc; - - if (old_inout) - *old_inout = ret.old_inout; - - return ret.rc; -} - -int PtlMDUnlink(ptl_handle_md_t md_in) -{ - PtlMDUnlink_in args; - PtlMDUnlink_out ret; - int rc; - - args.md_in = md_in; - rc = do_forward(md_in, PTL_MDUNLINK, &args, sizeof(args), &ret, - sizeof(ret)); - if (rc != PTL_OK) - return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc; - - return ret.rc; -} - -int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count, - int (*callback) (ptl_event_t * event), - ptl_handle_eq_t * handle_out) -{ - ptl_eq_t *eq = NULL; - ptl_event_t *ev = NULL; - PtlEQAlloc_in args; - PtlEQAlloc_out ret; - int rc, i; - nal_t *nal; - - if (!ptl_init) - return PTL_NOINIT; - - nal = ptl_hndl2nal (&interface); - if (nal == NULL) - return PTL_INV_HANDLE; - - if (count != LOWEST_BIT_SET(count)) { /* not a power of 2 already */ - do { /* knock off all but the top bit... */ - count &= ~LOWEST_BIT_SET (count); - } while (count != LOWEST_BIT_SET(count)); - - count <<= 1; /* ...and round up */ - } - - if (count == 0) /* catch bad parameter / overflow on roundup */ - return (PTL_VAL_FAILED); - - PORTAL_ALLOC(ev, count * sizeof(ptl_event_t)); - if (!ev) - return PTL_NOSPACE; - - for (i = 0; i < count; i++) - ev[i].sequence = 0; - - if (nal->validate != NULL) { - rc = nal->validate(nal, ev, count * sizeof(ptl_event_t)); - if (rc != PTL_OK) - goto fail; - } - - args.ni_in = interface; - args.count_in = count; - args.base_in = ev; - args.len_in = count * sizeof(*ev); - args.callback_in = callback; - - rc = do_forward(interface, PTL_EQALLOC, &args, sizeof(args), &ret, - sizeof(ret)); - if (rc != PTL_OK) - goto fail; - if (ret.rc) - GOTO(fail, rc = ret.rc); - - PORTAL_ALLOC(eq, sizeof(*eq)); - if (!eq) { - rc = PTL_NOSPACE; - goto fail; - } - - eq->sequence = 1; - eq->size = count; - eq->base = ev; - - /* EQ handles are a little wierd. PtlEQGet() just looks at the - * queued events in shared memory. It doesn't want to do_forward() - * at all, so the cookie in the EQ handle we pass out of here is - * simply a pointer to the event queue we just set up. We stash - * the handle returned by do_forward(), so we can pass it back via - * do_forward() when we need to. */ - - eq->cb_eq_handle.nal_idx = interface.nal_idx; - eq->cb_eq_handle.cookie = ret.handle_out.cookie; - - handle_out->nal_idx = interface.nal_idx; - handle_out->cookie = (__u64)((unsigned long)eq); - return PTL_OK; - -fail: - PORTAL_FREE(ev, count * sizeof(ptl_event_t)); - return rc; -} - -int PtlEQFree(ptl_handle_eq_t eventq) -{ - PtlEQFree_in args; - PtlEQFree_out ret; - ptl_eq_t *eq; - int rc; - - eq = ptl_handle2usereq (&eventq); - args.eventq_in = eq->cb_eq_handle; - - rc = do_forward(eq->cb_eq_handle, PTL_EQFREE, &args, - sizeof(args), &ret, sizeof(ret)); - - /* XXX we're betting rc == PTL_OK here */ - PORTAL_FREE(eq->base, eq->size * sizeof(ptl_event_t)); - PORTAL_FREE(eq, sizeof(*eq)); - - return rc; -} - -int PtlACEntry(ptl_handle_ni_t ni_in, ptl_ac_index_t index_in, - ptl_process_id_t match_id_in, ptl_pt_index_t portal_in) -{ - PtlACEntry_in args; - PtlACEntry_out ret; - int rc; - - /* - * Copy arguments into the argument block to - * hand to the forwarding object - */ - args.ni_in = ni_in; - args.index_in = index_in; - args.match_id_in = match_id_in; - args.portal_in = portal_in; - - rc = do_forward(ni_in, PTL_ACENTRY, &args, sizeof(args), &ret, - sizeof(ret)); - - return (rc != PTL_OK) ? rc : ret.rc; -} - -int PtlPut(ptl_handle_md_t md_in, ptl_ack_req_t ack_req_in, - ptl_process_id_t target_in, ptl_pt_index_t portal_in, - ptl_ac_index_t cookie_in, ptl_match_bits_t match_bits_in, - ptl_size_t offset_in, ptl_hdr_data_t hdr_data_in) -{ - PtlPut_in args; - PtlPut_out ret; - int rc; - - /* - * Copy arguments into the argument block to - * hand to the forwarding object - */ - args.md_in = md_in; - args.ack_req_in = ack_req_in; - args.target_in = target_in; - args.portal_in = portal_in; - args.cookie_in = cookie_in; - args.match_bits_in = match_bits_in; - args.offset_in = offset_in; - args.hdr_data_in = hdr_data_in; - - rc = do_forward(md_in, PTL_PUT, &args, sizeof(args), &ret, sizeof(ret)); - - return (rc != PTL_OK) ? rc : ret.rc; -} - -int PtlGet(ptl_handle_md_t md_in, ptl_process_id_t target_in, - ptl_pt_index_t portal_in, ptl_ac_index_t cookie_in, - ptl_match_bits_t match_bits_in, ptl_size_t offset_in) -{ - PtlGet_in args; - PtlGet_out ret; - int rc; - - /* - * Copy arguments into the argument block to - * hand to the forwarding object - */ - args.md_in = md_in; - args.target_in = target_in; - args.portal_in = portal_in; - args.cookie_in = cookie_in; - args.match_bits_in = match_bits_in; - args.offset_in = offset_in; - - rc = do_forward(md_in, PTL_GET, &args, sizeof(args), &ret, sizeof(ret)); - - return (rc != PTL_OK) ? rc : ret.rc; -} diff --git a/lnet/lnet/lib-dispatch.c b/lnet/lnet/lib-dispatch.c deleted file mode 100644 index 13036c7..0000000 --- a/lnet/lnet/lib-dispatch.c +++ /dev/null @@ -1,80 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-dispatch.c - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_PORTALS -#include -#include - -typedef struct { - int (*fun) (nal_cb_t * nal, void *private, void *in, void *out); - char *name; -} dispatch_table_t; - -static dispatch_table_t dispatch_table[] = { - [PTL_GETID] {do_PtlGetId, "PtlGetId"}, - [PTL_NISTATUS] {do_PtlNIStatus, "PtlNIStatus"}, - [PTL_NIDIST] {do_PtlNIDist, "PtlNIDist"}, - [PTL_NIDEBUG] {do_PtlNIDebug, "PtlNIDebug"}, - [PTL_MEATTACH] {do_PtlMEAttach, "PtlMEAttach"}, - [PTL_MEINSERT] {do_PtlMEInsert, "PtlMEInsert"}, - [PTL_MEUNLINK] {do_PtlMEUnlink, "PtlMEUnlink"}, - [PTL_TBLDUMP] {do_PtlTblDump, "PtlTblDump"}, - [PTL_MEDUMP] {do_PtlMEDump, "PtlMEDump"}, - [PTL_MDATTACH] {do_PtlMDAttach, "PtlMDAttach"}, - [PTL_MDBIND] {do_PtlMDBind, "PtlMDBind"}, - [PTL_MDUPDATE] {do_PtlMDUpdate_internal, "PtlMDUpdate_internal"}, - [PTL_MDUNLINK] {do_PtlMDUnlink, "PtlMDUnlink"}, - [PTL_EQALLOC] {do_PtlEQAlloc_internal, "PtlEQAlloc_internal"}, - [PTL_EQFREE] {do_PtlEQFree_internal, "PtlEQFree_internal"}, - [PTL_PUT] {do_PtlPut, "PtlPut"}, - [PTL_GET] {do_PtlGet, "PtlGet"}, - [PTL_FAILNID] {do_PtlFailNid, "PtlFailNid"}, - /* */ {0, ""} -}; - -/* - * This really should be elsewhere, but lib-p30/dispatch.c is - * an automatically generated file. - */ -void lib_dispatch(nal_cb_t * nal, void *private, int index, void *arg_block, - void *ret_block) -{ - lib_ni_t *ni = &nal->ni; - - if (index < 0 || index > LIB_MAX_DISPATCH || - !dispatch_table[index].fun) { - CDEBUG(D_NET, LPU64": Invalid API call %d\n", ni->nid, index); - return; - } - - CDEBUG(D_NET, LPU64": API call %s (%d)\n", ni->nid, - dispatch_table[index].name, index); - - dispatch_table[index].fun(nal, private, arg_block, ret_block); -} - -char *dispatch_name(int index) -{ - return dispatch_table[index].name; -} diff --git a/lnet/lnet/lib-eq.c b/lnet/lnet/lib-eq.c deleted file mode 100644 index ce343c1..0000000 --- a/lnet/lnet/lib-eq.c +++ /dev/null @@ -1,128 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-eq.c - * Library level Event queue management routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_PORTALS -#include -#include - -int do_PtlEQAlloc_internal(nal_cb_t * nal, void *private, void *v_args, - void *v_ret) -{ - /* - * Incoming: - * ptl_handle_ni_t ni_in - * ptl_size_t count_in - * void * base_in - * - * Outgoing: - * ptl_handle_eq_t * handle_out - */ - - PtlEQAlloc_in *args = v_args; - PtlEQAlloc_out *ret = v_ret; - - lib_eq_t *eq; - unsigned long flags; - - /* api should have rounded up */ - if (args->count_in != LOWEST_BIT_SET (args->count_in)) - return ret->rc = PTL_VAL_FAILED; - - eq = lib_eq_alloc (nal); - if (eq == NULL) - return (ret->rc = PTL_NOSPACE); - - state_lock(nal, &flags); - - if (nal->cb_map != NULL) { - struct iovec iov = { - .iov_base = args->base_in, - .iov_len = args->count_in * sizeof (ptl_event_t) }; - - ret->rc = nal->cb_map (nal, 1, &iov, &eq->eq_addrkey); - if (ret->rc != PTL_OK) { - lib_eq_free (nal, eq); - - state_unlock (nal, &flags); - return (ret->rc); - } - } - - eq->sequence = 1; - eq->base = args->base_in; - eq->size = args->count_in; - eq->eq_refcount = 0; - eq->event_callback = args->callback_in; - - lib_initialise_handle (nal, &eq->eq_lh, PTL_COOKIE_TYPE_EQ); - list_add (&eq->eq_list, &nal->ni.ni_active_eqs); - - state_unlock(nal, &flags); - - ptl_eq2handle(&ret->handle_out, eq); - return (ret->rc = PTL_OK); -} - -int do_PtlEQFree_internal(nal_cb_t * nal, void *private, void *v_args, - void *v_ret) -{ - /* - * Incoming: - * ptl_handle_eq_t eventq_in - * - * Outgoing: - */ - - PtlEQFree_in *args = v_args; - PtlEQFree_out *ret = v_ret; - lib_eq_t *eq; - long flags; - - state_lock (nal, &flags); - - eq = ptl_handle2eq(&args->eventq_in, nal); - if (eq == NULL) { - ret->rc = PTL_INV_EQ; - } else if (eq->eq_refcount != 0) { - ret->rc = PTL_EQ_INUSE; - } else { - if (nal->cb_unmap != NULL) { - struct iovec iov = { - .iov_base = eq->base, - .iov_len = eq->size * sizeof (ptl_event_t) }; - - nal->cb_unmap(nal, 1, &iov, &eq->eq_addrkey); - } - - lib_invalidate_handle (nal, &eq->eq_lh); - list_del (&eq->eq_list); - lib_eq_free (nal, eq); - ret->rc = PTL_OK; - } - - state_unlock (nal, &flags); - - return (ret->rc); -} diff --git a/lnet/lnet/lib-init.c b/lnet/lnet/lib-init.c deleted file mode 100644 index 99c4d32..0000000 --- a/lnet/lnet/lib-init.c +++ /dev/null @@ -1,474 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-init.c - * Start up the internal library and clear all structures - * Called by the NAL when it initializes. Safe to call multiple times. - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -# define DEBUG_SUBSYSTEM S_PORTALS -#include - -#ifdef __KERNEL__ -# include /* for memset() */ -# include -# ifdef KERNEL_ADDR_CACHE -# include -# endif -#else -# include -# include -#endif - -#ifdef PTL_USE_SLAB_CACHE -static int ptl_slab_users; - -kmem_cache_t *ptl_md_slab; -kmem_cache_t *ptl_msg_slab; -kmem_cache_t *ptl_me_slab; -kmem_cache_t *ptl_eq_slab; - -atomic_t md_in_use_count; -atomic_t msg_in_use_count; -atomic_t me_in_use_count; -atomic_t eq_in_use_count; - -/* NB zeroing in ctor and on freeing ensures items that - * kmem_cache_validate() OK, but haven't been initialised - * as an MD/ME/EQ can't have valid handles - */ -static void -ptl_md_slab_ctor (void *obj, kmem_cache_t *slab, unsigned long flags) -{ - memset (obj, 0, sizeof (lib_md_t)); -} - -static void -ptl_me_slab_ctor (void *obj, kmem_cache_t *slab, unsigned long flags) -{ - memset (obj, 0, sizeof (lib_me_t)); -} - -static void -ptl_eq_slab_ctor (void *obj, kmem_cache_t *slab, unsigned long flags) -{ - memset (obj, 0, sizeof (lib_eq_t)); -} - -int -kportal_descriptor_setup (nal_cb_t *nal) -{ - /* NB on failure caller must still call kportal_descriptor_cleanup */ - /* ****** */ - - /* We'll have 1 set of slabs for ALL the nals :) */ - - if (ptl_slab_users++) - return 0; - - ptl_md_slab = kmem_cache_create("portals_MD", - sizeof(lib_md_t), 0, - SLAB_HWCACHE_ALIGN, - ptl_md_slab_ctor, NULL); - if (!ptl_md_slab) { - CERROR("couldn't allocate ptl_md_t slab"); - RETURN (PTL_NOSPACE); - } - - /* NB no ctor for msgs; they don't need handle verification */ - ptl_msg_slab = kmem_cache_create("portals_MSG", - sizeof(lib_msg_t), 0, - SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (!ptl_msg_slab) { - CERROR("couldn't allocate ptl_msg_t slab"); - RETURN (PTL_NOSPACE); - } - - ptl_me_slab = kmem_cache_create("portals_ME", - sizeof(lib_me_t), 0, - SLAB_HWCACHE_ALIGN, - ptl_me_slab_ctor, NULL); - if (!ptl_me_slab) { - CERROR("couldn't allocate ptl_me_t slab"); - RETURN (PTL_NOSPACE); - } - - ptl_eq_slab = kmem_cache_create("portals_EQ", - sizeof(lib_eq_t), 0, - SLAB_HWCACHE_ALIGN, - ptl_eq_slab_ctor, NULL); - if (!ptl_eq_slab) { - CERROR("couldn't allocate ptl_eq_t slab"); - RETURN (PTL_NOSPACE); - } - - RETURN(PTL_OK); -} - -void -kportal_descriptor_cleanup (nal_cb_t *nal) -{ - if (--ptl_slab_users != 0) - return; - - LASSERT (atomic_read (&md_in_use_count) == 0); - LASSERT (atomic_read (&me_in_use_count) == 0); - LASSERT (atomic_read (&eq_in_use_count) == 0); - LASSERT (atomic_read (&msg_in_use_count) == 0); - - if (ptl_md_slab != NULL) - kmem_cache_destroy(ptl_md_slab); - if (ptl_msg_slab != NULL) - kmem_cache_destroy(ptl_msg_slab); - if (ptl_me_slab != NULL) - kmem_cache_destroy(ptl_me_slab); - if (ptl_eq_slab != NULL) - kmem_cache_destroy(ptl_eq_slab); -} -#else - -int -lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int n, int size) -{ - char *space; - - LASSERT (n > 0); - - size += offsetof (lib_freeobj_t, fo_contents); - - space = nal->cb_malloc (nal, n * size); - if (space == NULL) - return (PTL_NOSPACE); - - INIT_LIST_HEAD (&fl->fl_list); - fl->fl_objs = space; - fl->fl_nobjs = n; - fl->fl_objsize = size; - - do - { - memset (space, 0, size); - list_add ((struct list_head *)space, &fl->fl_list); - space += size; - } while (--n != 0); - - return (PTL_OK); -} - -void -lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl) -{ - struct list_head *el; - int count; - - if (fl->fl_nobjs == 0) - return; - - count = 0; - for (el = fl->fl_list.next; el != &fl->fl_list; el = el->next) - count++; - - LASSERT (count == fl->fl_nobjs); - - nal->cb_free (nal, fl->fl_objs, fl->fl_nobjs * fl->fl_objsize); - memset (fl, 0, sizeof (fl)); -} - -int -kportal_descriptor_setup (nal_cb_t *nal) -{ - /* NB on failure caller must still call kportal_descriptor_cleanup */ - /* ****** */ - int rc; - - memset (&nal->ni.ni_free_mes, 0, sizeof (nal->ni.ni_free_mes)); - memset (&nal->ni.ni_free_msgs, 0, sizeof (nal->ni.ni_free_msgs)); - memset (&nal->ni.ni_free_mds, 0, sizeof (nal->ni.ni_free_mds)); - memset (&nal->ni.ni_free_eqs, 0, sizeof (nal->ni.ni_free_eqs)); - - rc = lib_freelist_init (nal, &nal->ni.ni_free_mes, - MAX_MES, sizeof (lib_me_t)); - if (rc != PTL_OK) - return (rc); - - rc = lib_freelist_init (nal, &nal->ni.ni_free_msgs, - MAX_MSGS, sizeof (lib_msg_t)); - if (rc != PTL_OK) - return (rc); - - rc = lib_freelist_init (nal, &nal->ni.ni_free_mds, - MAX_MDS, sizeof (lib_md_t)); - if (rc != PTL_OK) - return (rc); - - rc = lib_freelist_init (nal, &nal->ni.ni_free_eqs, - MAX_EQS, sizeof (lib_eq_t)); - return (rc); -} - -void -kportal_descriptor_cleanup (nal_cb_t *nal) -{ - lib_freelist_fini (nal, &nal->ni.ni_free_mes); - lib_freelist_fini (nal, &nal->ni.ni_free_msgs); - lib_freelist_fini (nal, &nal->ni.ni_free_mds); - lib_freelist_fini (nal, &nal->ni.ni_free_eqs); -} - -#endif - -__u64 -lib_create_interface_cookie (nal_cb_t *nal) -{ - /* NB the interface cookie in wire handles guards against delayed - * replies and ACKs appearing valid in a new instance of the same - * interface. Initialisation time, even if it's only implemented - * to millisecond resolution is probably easily good enough. */ - struct timeval tv; - __u64 cookie; -#ifndef __KERNEL__ - int rc = gettimeofday (&tv, NULL); - LASSERT (rc == 0); -#else - do_gettimeofday(&tv); -#endif - cookie = tv.tv_sec; - cookie *= 1000000; - cookie += tv.tv_usec; - return (cookie); -} - -int -lib_setup_handle_hash (nal_cb_t *nal) -{ - lib_ni_t *ni = &nal->ni; - int i; - - /* Arbitrary choice of hash table size */ -#ifdef __KERNEL__ - ni->ni_lh_hash_size = PAGE_SIZE / sizeof (struct list_head); -#else - ni->ni_lh_hash_size = (MAX_MES + MAX_MDS + MAX_EQS)/4; -#endif - ni->ni_lh_hash_table = - (struct list_head *)nal->cb_malloc (nal, ni->ni_lh_hash_size - * sizeof (struct list_head)); - if (ni->ni_lh_hash_table == NULL) - return (PTL_NOSPACE); - - for (i = 0; i < ni->ni_lh_hash_size; i++) - INIT_LIST_HEAD (&ni->ni_lh_hash_table[i]); - - ni->ni_next_object_cookie = PTL_COOKIE_TYPES; - - return (PTL_OK); -} - -void -lib_cleanup_handle_hash (nal_cb_t *nal) -{ - lib_ni_t *ni = &nal->ni; - - if (ni->ni_lh_hash_table == NULL) - return; - - nal->cb_free (nal, ni->ni_lh_hash_table, - ni->ni_lh_hash_size * sizeof (struct list_head)); -} - -lib_handle_t * -lib_lookup_cookie (nal_cb_t *nal, __u64 cookie, int type) -{ - /* ALWAYS called with statelock held */ - lib_ni_t *ni = &nal->ni; - struct list_head *list; - struct list_head *el; - unsigned int hash; - - if ((cookie & (PTL_COOKIE_TYPES - 1)) != type) - return (NULL); - - hash = ((unsigned int)cookie) % ni->ni_lh_hash_size; - list = &ni->ni_lh_hash_table[hash]; - - list_for_each (el, list) { - lib_handle_t *lh = list_entry (el, lib_handle_t, lh_hash_chain); - - if (lh->lh_cookie == cookie) - return (lh); - } - - return (NULL); -} - -void -lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh, int type) -{ - /* ALWAYS called with statelock held */ - lib_ni_t *ni = &nal->ni; - unsigned int hash; - - LASSERT (type >= 0 && type < PTL_COOKIE_TYPES); - lh->lh_cookie = ni->ni_next_object_cookie | type; - ni->ni_next_object_cookie += PTL_COOKIE_TYPES; - - hash = ((unsigned int)lh->lh_cookie) % ni->ni_lh_hash_size; - list_add (&lh->lh_hash_chain, &ni->ni_lh_hash_table[hash]); -} - -void -lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh) -{ - list_del (&lh->lh_hash_chain); -} - -int -lib_init(nal_cb_t * nal, ptl_nid_t nid, ptl_pid_t pid, int gsize, - ptl_pt_index_t ptl_size, ptl_ac_index_t acl_size) -{ - int rc = PTL_OK; - lib_ni_t *ni = &nal->ni; - int i; - ENTRY; - - /* NB serialised in PtlNIInit() */ - - if (ni->refcnt != 0) { /* already initialised */ - ni->refcnt++; - goto out; - } - - lib_assert_wire_constants (); - - /* - * Allocate the portal table for this interface - * and all per-interface objects. - */ - memset(&ni->counters, 0, sizeof(lib_counters_t)); - - rc = kportal_descriptor_setup (nal); - if (rc != PTL_OK) - goto out; - - INIT_LIST_HEAD (&ni->ni_active_msgs); - INIT_LIST_HEAD (&ni->ni_active_mds); - INIT_LIST_HEAD (&ni->ni_active_eqs); - - INIT_LIST_HEAD (&ni->ni_test_peers); - - ni->ni_interface_cookie = lib_create_interface_cookie (nal); - ni->ni_next_object_cookie = 0; - rc = lib_setup_handle_hash (nal); - if (rc != PTL_OK) - goto out; - - ni->nid = nid; - ni->pid = pid; - - ni->num_nodes = gsize; - ni->tbl.size = ptl_size; - - ni->tbl.tbl = nal->cb_malloc(nal, sizeof(struct list_head) * ptl_size); - if (ni->tbl.tbl == NULL) { - rc = PTL_NOSPACE; - goto out; - } - - for (i = 0; i < ptl_size; i++) - INIT_LIST_HEAD(&(ni->tbl.tbl[i])); - - ni->debug = PTL_DEBUG_NONE; - ni->up = 1; - ni->refcnt++; - - out: - if (rc != PTL_OK) { - lib_cleanup_handle_hash (nal); - kportal_descriptor_cleanup (nal); - } - - RETURN (rc); -} - -int -lib_fini(nal_cb_t * nal) -{ - lib_ni_t *ni = &nal->ni; - int idx; - - ni->refcnt--; - - if (ni->refcnt != 0) - goto out; - - /* NB no stat_lock() since this is the last reference. The NAL - * should have shut down already, so it should be safe to unlink - * and free all descriptors, even those that appear committed to a - * network op (eg MD with non-zero pending count) - */ - - for (idx = 0; idx < ni->tbl.size; idx++) - while (!list_empty (&ni->tbl.tbl[idx])) { - lib_me_t *me = list_entry (ni->tbl.tbl[idx].next, - lib_me_t, me_list); - - CERROR ("Active me %p on exit\n", me); - list_del (&me->me_list); - lib_me_free (nal, me); - } - - while (!list_empty (&ni->ni_active_mds)) { - lib_md_t *md = list_entry (ni->ni_active_mds.next, - lib_md_t, md_list); - - CERROR ("Active md %p on exit\n", md); - list_del (&md->md_list); - lib_md_free (nal, md); - } - - while (!list_empty (&ni->ni_active_eqs)) { - lib_eq_t *eq = list_entry (ni->ni_active_eqs.next, - lib_eq_t, eq_list); - - CERROR ("Active eq %p on exit\n", eq); - list_del (&eq->eq_list); - lib_eq_free (nal, eq); - } - - while (!list_empty (&ni->ni_active_msgs)) { - lib_msg_t *msg = list_entry (ni->ni_active_msgs.next, - lib_msg_t, msg_list); - - CERROR ("Active msg %p on exit\n", msg); - list_del (&msg->msg_list); - lib_msg_free (nal, msg); - } - - nal->cb_free(nal, ni->tbl.tbl, sizeof(struct list_head) * ni->tbl.size); - ni->up = 0; - - lib_cleanup_handle_hash (nal); - kportal_descriptor_cleanup (nal); - - out: - return (PTL_OK); -} diff --git a/lnet/lnet/lib-md.c b/lnet/lnet/lib-md.c deleted file mode 100644 index a79e2be..0000000 --- a/lnet/lnet/lib-md.c +++ /dev/null @@ -1,412 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-md.c - * Memory Descriptor management routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef __KERNEL__ -# include -#else -# define DEBUG_SUBSYSTEM S_PORTALS -# include -#endif - -#include -#include - -/* - * must be called with state lock held - */ -void lib_md_unlink(nal_cb_t * nal, lib_md_t * md) -{ - lib_me_t *me = md->me; - - if (md->pending != 0) { - CDEBUG(D_NET, "Queueing unlink of md %p\n", md); - md->md_flags |= PTL_MD_FLAG_UNLINK; - return; - } - - CDEBUG(D_NET, "Unlinking md %p\n", md); - - if ((md->options & PTL_MD_KIOV) != 0) { - if (nal->cb_unmap_pages != NULL) - nal->cb_unmap_pages (nal, md->md_niov, md->md_iov.kiov, - &md->md_addrkey); - } else if (nal->cb_unmap != NULL) - nal->cb_unmap (nal, md->md_niov, md->md_iov.iov, - &md->md_addrkey); - - if (me) { - me->md = NULL; - if (me->unlink == PTL_UNLINK) - lib_me_unlink(nal, me); - } - - if (md->eq != NULL) - { - md->eq->eq_refcount--; - LASSERT (md->eq->eq_refcount >= 0); - } - - lib_invalidate_handle (nal, &md->md_lh); - list_del (&md->md_list); - lib_md_free(nal, md); -} - -/* must be called with state lock held */ -static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, - ptl_md_t *md, ptl_handle_eq_t *eqh, int unlink) -{ - const int max_size_opts = PTL_MD_AUTO_UNLINK | - PTL_MD_MAX_SIZE; - lib_eq_t *eq = NULL; - int rc; - int i; - - /* NB we are passes an allocated, but uninitialised/active md. - * if we return success, caller may lib_md_unlink() it. - * otherwise caller may only lib_md_free() it. - */ - - if (!PtlHandleEqual (*eqh, PTL_EQ_NONE)) { - eq = ptl_handle2eq(eqh, nal); - if (eq == NULL) - return PTL_INV_EQ; - } - - if ((md->options & PTL_MD_IOV) != 0 && /* discontiguous MD */ - md->niov > PTL_MD_MAX_IOV) /* too many fragments */ - return PTL_IOV_TOO_MANY; - - if ((md->options & max_size_opts) != 0 && /* max size used */ - (md->max_size < 0 || md->max_size > md->length)) // illegal max_size - return PTL_INV_MD; - - new->me = NULL; - new->start = md->start; - new->length = md->length; - new->offset = 0; - new->max_size = md->max_size; - new->unlink = unlink; - new->options = md->options; - new->user_ptr = md->user_ptr; - new->eq = eq; - new->threshold = md->threshold; - new->pending = 0; - new->md_flags = 0; - - if ((md->options & PTL_MD_IOV) != 0) { - int total_length = 0; - - if ((md->options & PTL_MD_KIOV) != 0) /* Can't specify both */ - return PTL_INV_MD; - - new->md_niov = md->niov; - - if (nal->cb_read (nal, private, new->md_iov.iov, md->start, - md->niov * sizeof (new->md_iov.iov[0]))) - return PTL_SEGV; - - for (i = 0; i < new->md_niov; i++) { - /* We take the base address on trust */ - if (new->md_iov.iov[i].iov_len <= 0) /* invalid length */ - return PTL_VAL_FAILED; - - total_length += new->md_iov.iov[i].iov_len; - } - - if (md->length > total_length) - return PTL_IOV_TOO_SMALL; - - if (nal->cb_map != NULL) { - rc = nal->cb_map (nal, new->md_niov, new->md_iov.iov, - &new->md_addrkey); - if (rc != PTL_OK) - return (rc); - } - } else if ((md->options & PTL_MD_KIOV) != 0) { -#ifndef __KERNEL__ - return PTL_INV_MD; -#else - int total_length = 0; - - /* Trap attempt to use paged I/O if unsupported early. */ - if (nal->cb_send_pages == NULL || - nal->cb_recv_pages == NULL) - return PTL_INV_MD; - - new->md_niov = md->niov; - - if (nal->cb_read (nal, private, new->md_iov.kiov, md->start, - md->niov * sizeof (new->md_iov.kiov[0]))) - return PTL_SEGV; - - for (i = 0; i < new->md_niov; i++) { - /* We take the page pointer on trust */ - if (new->md_iov.kiov[i].kiov_offset + - new->md_iov.kiov[i].kiov_len > PAGE_SIZE ) - return PTL_VAL_FAILED; /* invalid length */ - - total_length += new->md_iov.kiov[i].kiov_len; - } - - if (md->length > total_length) - return PTL_IOV_TOO_SMALL; - - if (nal->cb_map_pages != NULL) { - rc = nal->cb_map_pages (nal, new->md_niov, new->md_iov.kiov, - &new->md_addrkey); - if (rc != PTL_OK) - return (rc); - } -#endif - } else { /* contiguous */ - new->md_niov = 1; - new->md_iov.iov[0].iov_base = md->start; - new->md_iov.iov[0].iov_len = md->length; - - if (nal->cb_map != NULL) { - rc = nal->cb_map (nal, new->md_niov, new->md_iov.iov, - &new->md_addrkey); - if (rc != PTL_OK) - return (rc); - } - } - - if (eq != NULL) - eq->eq_refcount++; - - /* It's good; let handle2md succeed and add to active mds */ - lib_initialise_handle (nal, &new->md_lh, PTL_COOKIE_TYPE_MD); - list_add (&new->md_list, &nal->ni.ni_active_mds); - - return PTL_OK; -} - -/* must be called with state lock held */ -void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md, ptl_md_t * new) -{ - /* NB this doesn't copy out all the iov entries so when a - * discontiguous MD is copied out, the target gets to know the - * original iov pointer (in start) and the number of entries it had - * and that's all. - */ - new->start = md->start; - new->length = md->length; - new->threshold = md->threshold; - new->max_size = md->max_size; - new->options = md->options; - new->user_ptr = md->user_ptr; - ptl_eq2handle(&new->eventq, md->eq); - new->niov = ((md->options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0) ? 0 : md->md_niov; -} - -int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - /* - * Incoming: - * ptl_handle_me_t current_in - * ptl_md_t md_in - * ptl_unlink_t unlink_in - * - * Outgoing: - * ptl_handle_md_t * handle_out - */ - - PtlMDAttach_in *args = v_args; - PtlMDAttach_out *ret = v_ret; - lib_me_t *me; - lib_md_t *md; - unsigned long flags; - - md = lib_md_alloc (nal); - if (md == NULL) - return (ret->rc = PTL_NOSPACE); - - state_lock(nal, &flags); - - me = ptl_handle2me(&args->me_in, nal); - if (me == NULL) { - ret->rc = PTL_INV_ME; - } else if (me->md != NULL) { - ret->rc = PTL_INUSE; - } else { - ret->rc = lib_md_build(nal, md, private, &args->md_in, - &args->eq_in, args->unlink_in); - - if (ret->rc == PTL_OK) { - me->md = md; - md->me = me; - - ptl_md2handle(&ret->handle_out, md); - - state_unlock (nal, &flags); - return (PTL_OK); - } - } - - lib_md_free (nal, md); - - state_unlock (nal, &flags); - return (ret->rc); -} - -int do_PtlMDBind(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - /* - * Incoming: - * ptl_handle_ni_t ni_in - * ptl_md_t md_in - * - * Outgoing: - * ptl_handle_md_t * handle_out - */ - - PtlMDBind_in *args = v_args; - PtlMDBind_out *ret = v_ret; - lib_md_t *md; - unsigned long flags; - - md = lib_md_alloc (nal); - if (md == NULL) - return (ret->rc = PTL_NOSPACE); - - state_lock(nal, &flags); - - ret->rc = lib_md_build(nal, md, private, - &args->md_in, &args->eq_in, PTL_UNLINK); - - if (ret->rc == PTL_OK) { - ptl_md2handle(&ret->handle_out, md); - - state_unlock(nal, &flags); - return (PTL_OK); - } - - lib_md_free (nal, md); - - state_unlock(nal, &flags); - return (ret->rc); -} - -int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - PtlMDUnlink_in *args = v_args; - PtlMDUnlink_out *ret = v_ret; - - lib_md_t *md; - unsigned long flags; - - state_lock(nal, &flags); - - md = ptl_handle2md(&args->md_in, nal); - if (md == NULL) { - ret->rc = PTL_INV_MD; - } else if (md->pending != 0) { /* being filled/spilled */ - ret->rc = PTL_MD_INUSE; - } else { - /* Callers attempting to unlink a busy MD which will get - * unlinked once the net op completes should see INUSE, - * before completion and INV_MD thereafter. LASSERT we've - * got that right... */ - LASSERT ((md->md_flags & PTL_MD_FLAG_UNLINK) == 0); - - lib_md_deconstruct(nal, md, &ret->status_out); - lib_md_unlink(nal, md); - ret->rc = PTL_OK; - } - - state_unlock(nal, &flags); - - return (ret->rc); -} - -int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *v_args, - void *v_ret) -{ - /* - * Incoming: - * ptl_handle_md_t md_in - * ptl_md_t * old_inout - * ptl_md_t * new_inout - * ptl_handle_eq_t testq_in - * ptl_seq_t sequence_in - * - * Outgoing: - * ptl_md_t * old_inout - * ptl_md_t * new_inout - */ - PtlMDUpdate_internal_in *args = v_args; - PtlMDUpdate_internal_out *ret = v_ret; - lib_md_t *md; - lib_eq_t *test_eq = NULL; - ptl_md_t *new = &args->new_inout; - unsigned long flags; - - state_lock(nal, &flags); - - md = ptl_handle2md(&args->md_in, nal); - if (md == NULL) { - ret->rc = PTL_INV_MD; - goto out; - } - - if (args->old_inout_valid) - lib_md_deconstruct(nal, md, &ret->old_inout); - - if (!args->new_inout_valid) { - ret->rc = PTL_OK; - goto out; - } - - if (!PtlHandleEqual (args->testq_in, PTL_EQ_NONE)) { - test_eq = ptl_handle2eq(&args->testq_in, nal); - if (test_eq == NULL) { - ret->rc = PTL_INV_EQ; - goto out; - } - } - - if (md->pending != 0) { - ret->rc = PTL_NOUPDATE; - goto out; - } - - if (test_eq == NULL || - test_eq->sequence == args->sequence_in) { - lib_me_t *me = md->me; - -#warning this does not track eq refcounts properly - - ret->rc = lib_md_build(nal, md, private, - new, &new->eventq, md->unlink); - - md->me = me; - } else { - ret->rc = PTL_NOUPDATE; - } - - out: - state_unlock(nal, &flags); - return (ret->rc); -} diff --git a/lnet/lnet/lib-me.c b/lnet/lnet/lib-me.c deleted file mode 100644 index bd1af5b..0000000 --- a/lnet/lnet/lib-me.c +++ /dev/null @@ -1,227 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-me.c - * Match Entry management routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef __KERNEL__ -# include -#else -# define DEBUG_SUBSYSTEM S_PORTALS -# include -#endif - -#include -#include - -static void lib_me_dump(nal_cb_t * nal, lib_me_t * me); - -int do_PtlMEAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - PtlMEAttach_in *args = v_args; - PtlMEAttach_out *ret = v_ret; - lib_ni_t *ni = &nal->ni; - lib_ptl_t *tbl = &ni->tbl; - unsigned long flags; - lib_me_t *me; - - if (args->index_in < 0 || args->index_in >= tbl->size) - return ret->rc = PTL_INV_PTINDEX; - - /* Should check for valid matchid, but not yet */ - if (0) - return ret->rc = PTL_INV_PROC; - - me = lib_me_alloc (nal); - if (me == NULL) - return (ret->rc = PTL_NOSPACE); - - state_lock(nal, &flags); - - me->match_id = args->match_id_in; - me->match_bits = args->match_bits_in; - me->ignore_bits = args->ignore_bits_in; - me->unlink = args->unlink_in; - me->md = NULL; - - lib_initialise_handle (nal, &me->me_lh, PTL_COOKIE_TYPE_ME); - - if (args->position_in == PTL_INS_AFTER) - list_add_tail(&me->me_list, &(tbl->tbl[args->index_in])); - else - list_add(&me->me_list, &(tbl->tbl[args->index_in])); - - ptl_me2handle(&ret->handle_out, me); - - state_unlock(nal, &flags); - - return ret->rc = PTL_OK; -} - -int do_PtlMEInsert(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - PtlMEInsert_in *args = v_args; - PtlMEInsert_out *ret = v_ret; - unsigned long flags; - lib_me_t *me; - lib_me_t *new; - - new = lib_me_alloc (nal); - if (new == NULL) - return (ret->rc = PTL_NOSPACE); - - /* Should check for valid matchid, but not yet */ - - state_lock(nal, &flags); - - me = ptl_handle2me(&args->current_in, nal); - if (me == NULL) { - lib_me_free (nal, new); - - state_unlock (nal, &flags); - return (ret->rc = PTL_INV_ME); - } - - new->match_id = args->match_id_in; - new->match_bits = args->match_bits_in; - new->ignore_bits = args->ignore_bits_in; - new->unlink = args->unlink_in; - new->md = NULL; - - lib_initialise_handle (nal, &new->me_lh, PTL_COOKIE_TYPE_ME); - - if (args->position_in == PTL_INS_AFTER) - list_add_tail(&new->me_list, &me->me_list); - else - list_add(&new->me_list, &me->me_list); - - ptl_me2handle(&ret->handle_out, new); - - state_unlock(nal, &flags); - - return ret->rc = PTL_OK; -} - -int do_PtlMEUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - PtlMEUnlink_in *args = v_args; - PtlMEUnlink_out *ret = v_ret; - unsigned long flags; - lib_me_t *me; - - state_lock(nal, &flags); - - me = ptl_handle2me(&args->current_in, nal); - if (me == NULL) { - ret->rc = PTL_INV_ME; - } else { - lib_me_unlink(nal, me); - ret->rc = PTL_OK; - } - - state_unlock(nal, &flags); - - return (ret->rc); -} - -/* call with state_lock please */ -void lib_me_unlink(nal_cb_t *nal, lib_me_t *me) -{ - lib_ni_t *ni = &nal->ni; - - if (ni->debug & PTL_DEBUG_UNLINK) { - ptl_handle_any_t handle; - ptl_me2handle(&handle, me); - } - - list_del (&me->me_list); - - if (me->md) { - me->md->me = NULL; - lib_md_unlink(nal, me->md); - } - - lib_invalidate_handle (nal, &me->me_lh); - lib_me_free(nal, me); -} - -int do_PtlTblDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - PtlTblDump_in *args = v_args; - PtlTblDump_out *ret = v_ret; - lib_ptl_t *tbl = &nal->ni.tbl; - ptl_handle_any_t handle; - struct list_head *tmp; - unsigned long flags; - - if (args->index_in < 0 || args->index_in >= tbl->size) - return ret->rc = PTL_INV_PTINDEX; - - nal->cb_printf(nal, "Portal table index %d\n", args->index_in); - - state_lock(nal, &flags); - list_for_each(tmp, &(tbl->tbl[args->index_in])) { - lib_me_t *me = list_entry(tmp, lib_me_t, me_list); - ptl_me2handle(&handle, me); - lib_me_dump(nal, me); - } - state_unlock(nal, &flags); - - return ret->rc = PTL_OK; -} - -int do_PtlMEDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - PtlMEDump_in *args = v_args; - PtlMEDump_out *ret = v_ret; - lib_me_t *me; - unsigned long flags; - - state_lock(nal, &flags); - - me = ptl_handle2me(&args->current_in, nal); - if (me == NULL) { - ret->rc = PTL_INV_ME; - } else { - lib_me_dump(nal, me); - ret->rc = PTL_OK; - } - - state_unlock(nal, &flags); - - return ret->rc; -} - -static void lib_me_dump(nal_cb_t * nal, lib_me_t * me) -{ - nal->cb_printf(nal, "Match Entry %p ("LPX64")\n", me, - me->me_lh.lh_cookie); - - nal->cb_printf(nal, "\tMatch/Ignore\t= %016lx / %016lx\n", - me->match_bits, me->ignore_bits); - - nal->cb_printf(nal, "\tMD\t= %p\n", me->md); - nal->cb_printf(nal, "\tprev\t= %p\n", - list_entry(me->me_list.prev, lib_me_t, me_list)); - nal->cb_printf(nal, "\tnext\t= %p\n", - list_entry(me->me_list.next, lib_me_t, me_list)); -} diff --git a/lnet/lnet/lib-move.c b/lnet/lnet/lib-move.c deleted file mode 100644 index fde4f16..0000000 --- a/lnet/lnet/lib-move.c +++ /dev/null @@ -1,1379 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-move.c - * Data movement routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef __KERNEL__ -# include -#else -# define DEBUG_SUBSYSTEM S_PORTALS -# include -#endif -#include -#include -#include - -/* - * Right now it does not check access control lists. - * - * We only support one MD per ME, which is how the Portals 3.1 spec is written. - * All previous complication is removed. - */ - -static lib_me_t * -lib_find_me(nal_cb_t *nal, int index, int op_mask, ptl_nid_t src_nid, - ptl_pid_t src_pid, ptl_size_t rlength, ptl_size_t roffset, - ptl_match_bits_t match_bits, ptl_size_t *mlength_out, - ptl_size_t *offset_out, int *unlink_out) -{ - lib_ni_t *ni = &nal->ni; - struct list_head *match_list = &ni->tbl.tbl[index]; - struct list_head *tmp; - lib_me_t *me; - lib_md_t *md; - ptl_size_t mlength; - ptl_size_t offset; - - ENTRY; - - CDEBUG (D_NET, "Request from "LPU64".%d of length %d into portal %d " - "MB="LPX64"\n", src_nid, src_pid, rlength, index, match_bits); - - if (index < 0 || index >= ni->tbl.size) { - CERROR("Invalid portal %d not in [0-%d]\n", - index, ni->tbl.size); - goto failed; - } - - list_for_each (tmp, match_list) { - me = list_entry(tmp, lib_me_t, me_list); - md = me->md; - - /* ME attached but MD not attached yet */ - if (md == NULL) - continue; - - LASSERT (me == md->me); - - /* MD deactivated */ - if (md->threshold == 0) - continue; - - /* mismatched MD op */ - if ((md->options & op_mask) == 0) - continue; - - /* mismatched ME nid/pid? */ - if (me->match_id.nid != PTL_NID_ANY && - me->match_id.nid != src_nid) - continue; - - if (me->match_id.pid != PTL_PID_ANY && - me->match_id.pid != src_pid) - continue; - - /* mismatched ME matchbits? */ - if (((me->match_bits ^ match_bits) & ~me->ignore_bits) != 0) - continue; - - /* Hurrah! This _is_ a match; check it out... */ - - if ((md->options & PTL_MD_MANAGE_REMOTE) == 0) - offset = md->offset; - else - offset = roffset; - - mlength = md->length - offset; - if ((md->options & PTL_MD_MAX_SIZE) != 0 && - mlength > md->max_size) - mlength = md->max_size; - - if (rlength <= mlength) { /* fits in allowed space */ - mlength = rlength; - } else if ((md->options & PTL_MD_TRUNCATE) == 0) { - /* this packet _really_ is too big */ - CERROR("Matching packet %d too big: %d left, " - "%d allowed\n", rlength, md->length - offset, - mlength); - goto failed; - } - - md->offset = offset + mlength; - - *offset_out = offset; - *mlength_out = mlength; - *unlink_out = ((md->options & PTL_MD_AUTO_UNLINK) != 0 && - md->offset >= (md->length - md->max_size)); - RETURN (me); - } - - failed: - CERROR (LPU64": Dropping %s from "LPU64".%d portal %d match "LPX64 - " offset %d length %d: no match\n", - ni->nid, (op_mask == PTL_MD_OP_GET) ? "GET" : "PUT", - src_nid, src_pid, index, match_bits, roffset, rlength); - RETURN(NULL); -} - -int do_PtlFailNid (nal_cb_t *nal, void *private, void *v_args, void *v_ret) -{ - PtlFailNid_in *args = v_args; - PtlFailNid_out *ret = v_ret; - lib_test_peer_t *tp; - unsigned long flags; - struct list_head *el; - struct list_head *next; - struct list_head cull; - - if (args->threshold != 0) { - /* Adding a new entry */ - tp = (lib_test_peer_t *)nal->cb_malloc (nal, sizeof (*tp)); - if (tp == NULL) - return (ret->rc = PTL_FAIL); - - tp->tp_nid = args->nid; - tp->tp_threshold = args->threshold; - - state_lock (nal, &flags); - list_add (&tp->tp_list, &nal->ni.ni_test_peers); - state_unlock (nal, &flags); - return (ret->rc = PTL_OK); - } - - /* removing entries */ - INIT_LIST_HEAD (&cull); - - state_lock (nal, &flags); - - list_for_each_safe (el, next, &nal->ni.ni_test_peers) { - tp = list_entry (el, lib_test_peer_t, tp_list); - - if (tp->tp_threshold == 0 || /* needs culling anyway */ - args->nid == PTL_NID_ANY || /* removing all entries */ - tp->tp_nid == args->nid) /* matched this one */ - { - list_del (&tp->tp_list); - list_add (&tp->tp_list, &cull); - } - } - - state_unlock (nal, &flags); - - while (!list_empty (&cull)) { - tp = list_entry (cull.next, lib_test_peer_t, tp_list); - - list_del (&tp->tp_list); - nal->cb_free (nal, tp, sizeof (*tp)); - } - return (ret->rc = PTL_OK); -} - -static int -fail_peer (nal_cb_t *nal, ptl_nid_t nid, int outgoing) -{ - lib_test_peer_t *tp; - struct list_head *el; - struct list_head *next; - unsigned long flags; - struct list_head cull; - int fail = 0; - - INIT_LIST_HEAD (&cull); - - state_lock (nal, &flags); - - list_for_each_safe (el, next, &nal->ni.ni_test_peers) { - tp = list_entry (el, lib_test_peer_t, tp_list); - - if (tp->tp_threshold == 0) { - /* zombie entry */ - if (outgoing) { - /* only cull zombies on outgoing tests, - * since we may be at interrupt priority on - * incoming messages. */ - list_del (&tp->tp_list); - list_add (&tp->tp_list, &cull); - } - continue; - } - - if (tp->tp_nid == PTL_NID_ANY || /* fail every peer */ - nid == tp->tp_nid) { /* fail this peer */ - fail = 1; - - if (tp->tp_threshold != PTL_MD_THRESH_INF) { - tp->tp_threshold--; - if (outgoing && - tp->tp_threshold == 0) { - /* see above */ - list_del (&tp->tp_list); - list_add (&tp->tp_list, &cull); - } - } - break; - } - } - - state_unlock (nal, &flags); - - while (!list_empty (&cull)) { - tp = list_entry (cull.next, lib_test_peer_t, tp_list); - list_del (&tp->tp_list); - - nal->cb_free (nal, tp, sizeof (*tp)); - } - - return (fail); -} - -ptl_size_t -lib_iov_nob (int niov, struct iovec *iov) -{ - ptl_size_t nob = 0; - - while (niov-- > 0) - nob += (iov++)->iov_len; - - return (nob); -} - -void -lib_copy_iov2buf (char *dest, int niov, struct iovec *iov, ptl_size_t len) -{ - ptl_size_t nob; - - while (len > 0) - { - LASSERT (niov > 0); - nob = MIN (iov->iov_len, len); - memcpy (dest, iov->iov_base, nob); - - len -= nob; - dest += nob; - niov--; - iov++; - } -} - -void -lib_copy_buf2iov (int niov, struct iovec *iov, char *src, ptl_size_t len) -{ - ptl_size_t nob; - - while (len > 0) - { - LASSERT (niov > 0); - nob = MIN (iov->iov_len, len); - memcpy (iov->iov_base, src, nob); - - len -= nob; - src += nob; - niov--; - iov++; - } -} - -static int -lib_extract_iov (struct iovec *dst, lib_md_t *md, - ptl_size_t offset, ptl_size_t len) -{ - /* Initialise 'dst' to the subset of 'src' starting at 'offset', - * for exactly 'len' bytes, and return the number of entries. - * NB not destructive to 'src' */ - int src_niov = md->md_niov; - struct iovec *src = md->md_iov.iov; - ptl_size_t frag_len; - int dst_niov; - - LASSERT (len >= 0); - LASSERT (offset >= 0); - LASSERT (offset + len <= md->length); - - if (len == 0) /* no data => */ - return (0); /* no frags */ - - LASSERT (src_niov > 0); - while (offset >= src->iov_len) { /* skip initial frags */ - offset -= src->iov_len; - src_niov--; - src++; - LASSERT (src_niov > 0); - } - - dst_niov = 1; - for (;;) { - LASSERT (src_niov > 0); - LASSERT (dst_niov <= PTL_MD_MAX_IOV); - - frag_len = src->iov_len - offset; - dst->iov_base = ((char *)src->iov_base) + offset; - - if (len <= frag_len) { - dst->iov_len = len; - return (dst_niov); - } - - dst->iov_len = frag_len; - - len -= frag_len; - dst++; - src++; - dst_niov++; - src_niov--; - offset = 0; - } -} - -#ifndef __KERNEL__ -ptl_size_t -lib_kiov_nob (int niov, ptl_kiov_t *kiov) -{ - LASSERT (0); - return (0); -} - -void -lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, ptl_size_t len) -{ - LASSERT (0); -} - -void -lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, char *dest, ptl_size_t len) -{ - LASSERT (0); -} - -static int -lib_extract_kiov (ptl_kiov_t *dst, lib_md_t *md, - ptl_size_t offset, ptl_size_t len) -{ - LASSERT (0); -} - -#else - -ptl_size_t -lib_kiov_nob (int niov, ptl_kiov_t *kiov) -{ - ptl_size_t nob = 0; - - while (niov-- > 0) - nob += (kiov++)->kiov_len; - - return (nob); -} - -void -lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, ptl_size_t len) -{ - ptl_size_t nob; - char *addr; - - LASSERT (!in_interrupt ()); - while (len > 0) - { - LASSERT (niov > 0); - nob = MIN (kiov->kiov_len, len); - - addr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset; - memcpy (dest, addr, nob); - kunmap (kiov->kiov_page); - - len -= nob; - dest += nob; - niov--; - kiov++; - } -} - -void -lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, char *src, ptl_size_t len) -{ - ptl_size_t nob; - char *addr; - - LASSERT (!in_interrupt ()); - while (len > 0) - { - LASSERT (niov > 0); - nob = MIN (kiov->kiov_len, len); - - addr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset; - memcpy (addr, src, nob); - kunmap (kiov->kiov_page); - - len -= nob; - src += nob; - niov--; - kiov++; - } -} - -static int -lib_extract_kiov (ptl_kiov_t *dst, lib_md_t *md, - ptl_size_t offset, ptl_size_t len) -{ - /* Initialise 'dst' to the subset of 'src' starting at 'offset', - * for exactly 'len' bytes, and return the number of entries. - * NB not destructive to 'src' */ - int src_niov = md->md_niov; - ptl_kiov_t *src = md->md_iov.kiov; - ptl_size_t frag_len; - int dst_niov; - - LASSERT (len >= 0); - LASSERT (offset >= 0); - LASSERT (offset + len <= md->length); - - if (len == 0) /* no data => */ - return (0); /* no frags */ - - LASSERT (src_niov > 0); - while (offset >= src->kiov_len) { /* skip initial frags */ - offset -= src->kiov_len; - src_niov--; - src++; - LASSERT (src_niov > 0); - } - - dst_niov = 1; - for (;;) { - LASSERT (src_niov > 0); - LASSERT (dst_niov <= PTL_MD_MAX_IOV); - - frag_len = src->kiov_len - offset; - dst->kiov_page = src->kiov_page; - dst->kiov_offset = src->kiov_offset + offset; - - if (len <= frag_len) { - dst->kiov_len = len; - LASSERT (dst->kiov_offset + dst->kiov_len <= PAGE_SIZE); - return (dst_niov); - } - - dst->kiov_len = frag_len; - LASSERT (dst->kiov_offset + dst->kiov_len <= PAGE_SIZE); - - len -= frag_len; - dst++; - src++; - dst_niov++; - src_niov--; - offset = 0; - } -} -#endif - -void -lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md, - ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen) -{ - int niov; - - if (mlen == 0) - nal->cb_recv (nal, private, msg, 0, NULL, 0, rlen); - else if ((md->options & PTL_MD_KIOV) == 0) { - niov = lib_extract_iov (msg->msg_iov.iov, md, offset, mlen); - nal->cb_recv (nal, private, msg, - niov, msg->msg_iov.iov, mlen, rlen); - } else { - niov = lib_extract_kiov (msg->msg_iov.kiov, md, offset, mlen); - nal->cb_recv_pages (nal, private, msg, - niov, msg->msg_iov.kiov, mlen, rlen); - } -} - -int -lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - lib_md_t *md, ptl_size_t offset, ptl_size_t len) -{ - int niov; - - if (len == 0) - return (nal->cb_send (nal, private, msg, - hdr, type, nid, pid, - 0, NULL, 0)); - - if ((md->options & PTL_MD_KIOV) == 0) { - niov = lib_extract_iov (msg->msg_iov.iov, md, offset, len); - return (nal->cb_send (nal, private, msg, - hdr, type, nid, pid, - niov, msg->msg_iov.iov, len)); - } - - niov = lib_extract_kiov (msg->msg_iov.kiov, md, offset, len); - return (nal->cb_send_pages (nal, private, msg, - hdr, type, nid, pid, - niov, msg->msg_iov.kiov, len)); -} - -static lib_msg_t * -get_new_msg (nal_cb_t *nal, lib_md_t *md) -{ - /* ALWAYS called holding the state_lock */ - lib_counters_t *counters = &nal->ni.counters; - lib_msg_t *msg = lib_msg_alloc (nal); - - if (msg == NULL) - return (NULL); - - memset (msg, 0, sizeof (*msg)); - - msg->send_ack = 0; - - msg->md = md; - msg->ev.arrival_time = get_cycles(); - md->pending++; - if (md->threshold != PTL_MD_THRESH_INF) { - LASSERT (md->threshold > 0); - md->threshold--; - } - - counters->msgs_alloc++; - if (counters->msgs_alloc > counters->msgs_max) - counters->msgs_max = counters->msgs_alloc; - - list_add (&msg->msg_list, &nal->ni.ni_active_msgs); - - return (msg); -} - - -/* - * Incoming messages have a ptl_msg_t object associated with them - * by the library. This object encapsulates the state of the - * message and allows the NAL to do non-blocking receives or sends - * of long messages. - * - */ -static int parse_put(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) -{ - lib_ni_t *ni = &nal->ni; - ptl_size_t mlength = 0; - ptl_size_t offset = 0; - int unlink = 0; - lib_me_t *me; - lib_md_t *md; - lib_msg_t *msg; - unsigned long flags; - - /* Convert put fields to host byte order */ - hdr->msg.put.match_bits = NTOH__u64 (hdr->msg.put.match_bits); - hdr->msg.put.ptl_index = NTOH__u32 (hdr->msg.put.ptl_index); - hdr->msg.put.offset = NTOH__u32 (hdr->msg.put.offset); - - state_lock(nal, &flags); - - me = lib_find_me(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT, - hdr->src_nid, hdr->src_pid, - PTL_HDR_LENGTH (hdr), hdr->msg.put.offset, - hdr->msg.put.match_bits, - &mlength, &offset, &unlink); - if (me == NULL) - goto drop; - - md = me->md; - CDEBUG(D_NET, "Incoming put index %x from "LPU64"/%u of length %d/%d " - "into md "LPX64" [%d] + %d\n", hdr->msg.put.ptl_index, - hdr->src_nid, hdr->src_pid, mlength, PTL_HDR_LENGTH(hdr), - md->md_lh.lh_cookie, md->md_niov, offset); - - msg = get_new_msg (nal, md); - if (msg == NULL) { - CERROR(LPU64": Dropping PUT from "LPU64": can't allocate msg\n", - ni->nid, hdr->src_nid); - goto drop; - } - - if (!ptl_is_wire_handle_none(&hdr->msg.put.ack_wmd) && - !(md->options & PTL_MD_ACK_DISABLE)) { - msg->send_ack = 1; - msg->ack_wmd = hdr->msg.put.ack_wmd; - msg->nid = hdr->src_nid; - msg->pid = hdr->src_pid; - msg->ev.match_bits = hdr->msg.put.match_bits; - } - - if (md->eq) { - msg->ev.type = PTL_EVENT_PUT; - msg->ev.initiator.nid = hdr->src_nid; - msg->ev.initiator.pid = hdr->src_pid; - msg->ev.portal = hdr->msg.put.ptl_index; - msg->ev.match_bits = hdr->msg.put.match_bits; - msg->ev.rlength = PTL_HDR_LENGTH(hdr); - msg->ev.mlength = mlength; - msg->ev.offset = offset; - msg->ev.hdr_data = hdr->msg.put.hdr_data; - - /* NB if this match has exhausted the MD, we can't be sure - * that this event will the the last one associated with - * this MD in the event queue (another message already - * matching this ME/MD could end up being last). So we - * remember the ME handle anyway and check again when we're - * allocating our slot in the event queue. - */ - ptl_me2handle (&msg->ev.unlinked_me, me); - - lib_md_deconstruct(nal, md, &msg->ev.mem_desc); - } - - ni->counters.recv_count++; - ni->counters.recv_length += mlength; - - /* only unlink after MD's pending count has been bumped - * in get_new_msg() otherwise lib_me_unlink() will nuke it */ - if (unlink) { - md->md_flags |= PTL_MD_FLAG_AUTO_UNLINKED; - lib_me_unlink (nal, me); - } - - state_unlock(nal, &flags); - - lib_recv (nal, private, msg, md, offset, mlength, PTL_HDR_LENGTH (hdr)); - return 0; - - drop: - nal->ni.counters.drop_count++; - nal->ni.counters.drop_length += PTL_HDR_LENGTH(hdr); - state_unlock (nal, &flags); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); - return -1; -} - -static int parse_get(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) -{ - lib_ni_t *ni = &nal->ni; - ptl_size_t mlength = 0; - ptl_size_t offset = 0; - int unlink = 0; - lib_me_t *me; - lib_md_t *md; - lib_msg_t *msg; - ptl_hdr_t reply; - unsigned long flags; - int rc; - - /* Convert get fields to host byte order */ - hdr->msg.get.match_bits = NTOH__u64 (hdr->msg.get.match_bits); - hdr->msg.get.ptl_index = NTOH__u32 (hdr->msg.get.ptl_index); - hdr->msg.get.sink_length = NTOH__u32 (hdr->msg.get.sink_length); - hdr->msg.get.src_offset = NTOH__u32 (hdr->msg.get.src_offset); - - /* compatibility check until field is deleted */ - if (hdr->msg.get.return_offset != 0) - CERROR("Unexpected non-zero get.return_offset %x from " - LPU64"\n", hdr->msg.get.return_offset, hdr->src_nid); - - state_lock(nal, &flags); - - me = lib_find_me(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET, - hdr->src_nid, hdr->src_pid, - hdr->msg.get.sink_length, hdr->msg.get.src_offset, - hdr->msg.get.match_bits, - &mlength, &offset, &unlink); - if (me == NULL) - goto drop; - - md = me->md; - CDEBUG(D_NET, "Incoming get index %d from "LPU64".%u of length %d/%d " - "from md "LPX64" [%d] + %d\n", hdr->msg.get.ptl_index, - hdr->src_nid, hdr->src_pid, mlength, PTL_HDR_LENGTH(hdr), - md->md_lh.lh_cookie, md->md_niov, offset); - - msg = get_new_msg (nal, md); - if (msg == NULL) { - CERROR(LPU64": Dropping GET from "LPU64": can't allocate msg\n", - ni->nid, hdr->src_nid); - goto drop; - } - - if (md->eq) { - msg->ev.type = PTL_EVENT_GET; - msg->ev.initiator.nid = hdr->src_nid; - msg->ev.initiator.pid = hdr->src_pid; - msg->ev.portal = hdr->msg.get.ptl_index; - msg->ev.match_bits = hdr->msg.get.match_bits; - msg->ev.rlength = PTL_HDR_LENGTH(hdr); - msg->ev.mlength = mlength; - msg->ev.offset = offset; - msg->ev.hdr_data = 0; - - /* NB if this match has exhausted the MD, we can't be sure - * that this event will the the last one associated with - * this MD in the event queue (another message already - * matching this ME/MD could end up being last). So we - * remember the ME handle anyway and check again when we're - * allocating our slot in the event queue. - */ - ptl_me2handle (&msg->ev.unlinked_me, me); - - lib_md_deconstruct(nal, md, &msg->ev.mem_desc); - } - - ni->counters.send_count++; - ni->counters.send_length += mlength; - - /* only unlink after MD's refcount has been bumped - * in get_new_msg() otherwise lib_me_unlink() will nuke it */ - if (unlink) { - md->md_flags |= PTL_MD_FLAG_AUTO_UNLINKED; - lib_me_unlink (nal, me); - } - - state_unlock(nal, &flags); - - memset (&reply, 0, sizeof (reply)); - reply.type = HTON__u32 (PTL_MSG_REPLY); - reply.dest_nid = HTON__u64 (hdr->src_nid); - reply.src_nid = HTON__u64 (ni->nid); - reply.dest_pid = HTON__u32 (hdr->src_pid); - reply.src_pid = HTON__u32 (ni->pid); - PTL_HDR_LENGTH(&reply) = HTON__u32 (mlength); - - reply.msg.reply.dst_wmd = hdr->msg.get.return_wmd; - - rc = lib_send (nal, private, msg, &reply, PTL_MSG_REPLY, - hdr->src_nid, hdr->src_pid, md, offset, mlength); - if (rc != 0) { - CERROR(LPU64": Dropping GET from "LPU64": send REPLY failed\n", - ni->nid, hdr->src_nid); - state_lock (nal, &flags); - goto drop; - } - - /* Complete the incoming message */ - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); - return (rc); - drop: - ni->counters.drop_count++; - ni->counters.drop_length += hdr->msg.get.sink_length; - state_unlock(nal, &flags); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); - return -1; -} - -static int parse_reply(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) -{ - lib_ni_t *ni = &nal->ni; - lib_md_t *md; - int rlength; - int length; - lib_msg_t *msg; - unsigned long flags; - - /* compatibility check until field is deleted */ - if (hdr->msg.reply.dst_offset != 0) - CERROR("Unexpected non-zero reply.dst_offset %x from "LPU64"\n", - hdr->msg.reply.dst_offset, hdr->src_nid); - - state_lock(nal, &flags); - - /* NB handles only looked up by creator (no flips) */ - md = ptl_wire_handle2md(&hdr->msg.reply.dst_wmd, nal); - if (md == NULL || md->threshold == 0) { - CERROR (LPU64": Dropping REPLY from "LPU64" for %s MD "LPX64"."LPX64"\n", - ni->nid, hdr->src_nid, - md == NULL ? "invalid" : "inactive", - hdr->msg.reply.dst_wmd.wh_interface_cookie, - hdr->msg.reply.dst_wmd.wh_object_cookie); - goto drop; - } - - LASSERT (md->offset == 0); - - length = rlength = PTL_HDR_LENGTH(hdr); - - if (length > md->length) { - if ((md->options & PTL_MD_TRUNCATE) == 0) { - CERROR (LPU64": Dropping REPLY from "LPU64 - " length %d for MD "LPX64" would overflow (%d)\n", - ni->nid, hdr->src_nid, length, - hdr->msg.reply.dst_wmd.wh_object_cookie, - md->length); - goto drop; - } - length = md->length; - } - - CDEBUG(D_NET, "Reply from "LPU64" of length %d/%d into md "LPX64"\n", - hdr->src_nid, length, rlength, - hdr->msg.reply.dst_wmd.wh_object_cookie); - - msg = get_new_msg (nal, md); - if (msg == NULL) { - CERROR(LPU64": Dropping REPLY from "LPU64": can't " - "allocate msg\n", ni->nid, hdr->src_nid); - goto drop; - } - - if (md->eq) { - msg->ev.type = PTL_EVENT_REPLY; - msg->ev.initiator.nid = hdr->src_nid; - msg->ev.initiator.pid = hdr->src_pid; - msg->ev.rlength = rlength; - msg->ev.mlength = length; - msg->ev.offset = 0; - - lib_md_deconstruct(nal, md, &msg->ev.mem_desc); - } - - ni->counters.recv_count++; - ni->counters.recv_length += length; - - state_unlock(nal, &flags); - - lib_recv (nal, private, msg, md, 0, length, rlength); - return 0; - - drop: - nal->ni.counters.drop_count++; - nal->ni.counters.drop_length += PTL_HDR_LENGTH(hdr); - state_unlock (nal, &flags); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); - return -1; -} - -static int parse_ack(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) -{ - lib_ni_t *ni = &nal->ni; - lib_md_t *md; - lib_msg_t *msg = NULL; - unsigned long flags; - - /* Convert ack fields to host byte order */ - hdr->msg.ack.match_bits = NTOH__u64 (hdr->msg.ack.match_bits); - hdr->msg.ack.mlength = NTOH__u32 (hdr->msg.ack.mlength); - - state_lock(nal, &flags); - - /* NB handles only looked up by creator (no flips) */ - md = ptl_wire_handle2md(&hdr->msg.ack.dst_wmd, nal); - if (md == NULL || md->threshold == 0) { - CDEBUG(D_INFO, LPU64": Dropping ACK from "LPU64" to %s MD " - LPX64"."LPX64"\n", ni->nid, hdr->src_nid, - (md == NULL) ? "invalid" : "inactive", - hdr->msg.ack.dst_wmd.wh_interface_cookie, - hdr->msg.ack.dst_wmd.wh_object_cookie); - goto drop; - } - - CDEBUG(D_NET, LPU64": ACK from "LPU64" into md "LPX64"\n", - ni->nid, hdr->src_nid, - hdr->msg.ack.dst_wmd.wh_object_cookie); - - msg = get_new_msg (nal, md); - if (msg == NULL) { - CERROR(LPU64": Dropping ACK from "LPU64": can't allocate msg\n", - ni->nid, hdr->src_nid); - goto drop; - } - - if (md->eq) { - msg->ev.type = PTL_EVENT_ACK; - msg->ev.initiator.nid = hdr->src_nid; - msg->ev.initiator.pid = hdr->src_pid; - msg->ev.mlength = hdr->msg.ack.mlength; - msg->ev.match_bits = hdr->msg.ack.match_bits; - - lib_md_deconstruct(nal, md, &msg->ev.mem_desc); - } - - ni->counters.recv_count++; - state_unlock(nal, &flags); - lib_recv (nal, private, msg, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); - return 0; - - drop: - nal->ni.counters.drop_count++; - state_unlock (nal, &flags); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); - return -1; -} - -static char * -hdr_type_string (ptl_hdr_t *hdr) -{ - switch (hdr->type) { - case PTL_MSG_ACK: - return ("ACK"); - case PTL_MSG_PUT: - return ("PUT"); - case PTL_MSG_GET: - return ("GET"); - case PTL_MSG_REPLY: - return ("REPLY"); - case PTL_MSG_HELLO: - return ("HELLO"); - default: - return (""); - } -} - -void print_hdr(nal_cb_t * nal, ptl_hdr_t * hdr) -{ - char *type_str = hdr_type_string (hdr); - - nal->cb_printf(nal, "P3 Header at %p of type %s\n", hdr, type_str); - nal->cb_printf(nal, " From nid/pid %Lu/%Lu", hdr->src_nid, - hdr->src_pid); - nal->cb_printf(nal, " To nid/pid %Lu/%Lu\n", hdr->dest_nid, - hdr->dest_pid); - - switch (hdr->type) { - default: - break; - - case PTL_MSG_PUT: - nal->cb_printf(nal, - " Ptl index %d, ack md "LPX64"."LPX64", " - "match bits "LPX64"\n", - hdr->msg.put.ptl_index, - hdr->msg.put.ack_wmd.wh_interface_cookie, - hdr->msg.put.ack_wmd.wh_object_cookie, - hdr->msg.put.match_bits); - nal->cb_printf(nal, - " Length %d, offset %d, hdr data "LPX64"\n", - PTL_HDR_LENGTH(hdr), hdr->msg.put.offset, - hdr->msg.put.hdr_data); - break; - - case PTL_MSG_GET: - nal->cb_printf(nal, - " Ptl index %d, return md "LPX64"."LPX64", " - "match bits "LPX64"\n", hdr->msg.get.ptl_index, - hdr->msg.get.return_wmd.wh_interface_cookie, - hdr->msg.get.return_wmd.wh_object_cookie, - hdr->msg.get.match_bits); - nal->cb_printf(nal, - " Length %d, src offset %d\n", - hdr->msg.get.sink_length, - hdr->msg.get.src_offset); - break; - - case PTL_MSG_ACK: - nal->cb_printf(nal, " dst md "LPX64"."LPX64", " - "manipulated length %d\n", - hdr->msg.ack.dst_wmd.wh_interface_cookie, - hdr->msg.ack.dst_wmd.wh_object_cookie, - hdr->msg.ack.mlength); - break; - - case PTL_MSG_REPLY: - nal->cb_printf(nal, " dst md "LPX64"."LPX64", " - "length %d\n", - hdr->msg.reply.dst_wmd.wh_interface_cookie, - hdr->msg.reply.dst_wmd.wh_object_cookie, - PTL_HDR_LENGTH(hdr)); - } - -} /* end of print_hdr() */ - - -int lib_parse(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) -{ - unsigned long flags; - - /* NB static check; optimizer will elide this if it's right */ - LASSERT (offsetof (ptl_hdr_t, msg.ack.length) == - offsetof (ptl_hdr_t, msg.put.length)); - LASSERT (offsetof (ptl_hdr_t, msg.ack.length) == - offsetof (ptl_hdr_t, msg.get.length)); - LASSERT (offsetof (ptl_hdr_t, msg.ack.length) == - offsetof (ptl_hdr_t, msg.reply.length)); - - /* convert common fields to host byte order */ - hdr->dest_nid = NTOH__u64 (hdr->dest_nid); - hdr->src_nid = NTOH__u64 (hdr->src_nid); - hdr->dest_pid = NTOH__u32 (hdr->dest_pid); - hdr->src_pid = NTOH__u32 (hdr->src_pid); - hdr->type = NTOH__u32 (hdr->type); - PTL_HDR_LENGTH(hdr) = NTOH__u32 (PTL_HDR_LENGTH(hdr)); -#if 0 - nal->cb_printf(nal, "%d: lib_parse: nal=%p hdr=%p type=%d\n", - nal->ni.nid, nal, hdr, hdr->type); - print_hdr(nal, hdr); -#endif - if (hdr->type == PTL_MSG_HELLO) { - /* dest_nid is really ptl_magicversion_t */ - ptl_magicversion_t *mv = (ptl_magicversion_t *)&hdr->dest_nid; - - CERROR (LPU64": Dropping unexpected HELLO message: " - "magic %d, version %d.%d from "LPD64"\n", - nal->ni.nid, mv->magic, - mv->version_major, mv->version_minor, - hdr->src_nid); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); - return (-1); - } - - if (hdr->dest_nid != nal->ni.nid) { - CERROR(LPU64": Dropping %s message from "LPU64" to "LPU64 - " (not me)\n", nal->ni.nid, hdr_type_string (hdr), - hdr->src_nid, hdr->dest_nid); - - state_lock (nal, &flags); - nal->ni.counters.drop_count++; - nal->ni.counters.drop_length += PTL_HDR_LENGTH(hdr); - state_unlock (nal, &flags); - - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); - return (-1); - } - - if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */ - fail_peer (nal, hdr->src_nid, 0)) /* shall we now? */ - { - CERROR(LPU64": Dropping incoming %s from "LPU64 - ": simulated failure\n", - nal->ni.nid, hdr_type_string (hdr), - hdr->src_nid); - return (-1); - } - - switch (hdr->type) { - case PTL_MSG_ACK: - return (parse_ack(nal, hdr, private)); - case PTL_MSG_PUT: - return (parse_put(nal, hdr, private)); - break; - case PTL_MSG_GET: - return (parse_get(nal, hdr, private)); - break; - case PTL_MSG_REPLY: - return (parse_reply(nal, hdr, private)); - break; - default: - CERROR(LPU64": Dropping message from "LPU64 - ": Bad type=0x%x\n", nal->ni.nid, hdr->src_nid, - hdr->type); - - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); - return (-1); - } -} - - -int do_PtlPut(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - /* - * Incoming: - * ptl_handle_md_t md_in - * ptl_ack_req_t ack_req_in - * ptl_process_id_t target_in - * ptl_pt_index_t portal_in - * ptl_ac_index_t cookie_in - * ptl_match_bits_t match_bits_in - * ptl_size_t offset_in - * - * Outgoing: - */ - - PtlPut_in *args = v_args; - PtlPut_out *ret = v_ret; - ptl_hdr_t hdr; - - lib_ni_t *ni = &nal->ni; - lib_md_t *md; - lib_msg_t *msg = NULL; - ptl_process_id_t *id = &args->target_in; - unsigned long flags; - - if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */ - fail_peer (nal, id->nid, 1)) /* shall we now? */ - { - CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n", - nal->ni.nid, id->nid); - return (ret->rc = PTL_INV_PROC); - } - - ret->rc = PTL_OK; - state_lock(nal, &flags); - md = ptl_handle2md(&args->md_in, nal); - if (md == NULL || !md->threshold) { - state_unlock(nal, &flags); - return ret->rc = PTL_INV_MD; - } - - CDEBUG(D_NET, "PtlPut -> %Lu: %lu\n", (unsigned long long)id->nid, - (unsigned long)id->pid); - - memset (&hdr, 0, sizeof (hdr)); - hdr.type = HTON__u32 (PTL_MSG_PUT); - hdr.dest_nid = HTON__u64 (id->nid); - hdr.src_nid = HTON__u64 (ni->nid); - hdr.dest_pid = HTON__u32 (id->pid); - hdr.src_pid = HTON__u32 (ni->pid); - PTL_HDR_LENGTH(&hdr) = HTON__u32 (md->length); - - /* NB handles only looked up by creator (no flips) */ - if (args->ack_req_in == PTL_ACK_REQ) { - hdr.msg.put.ack_wmd.wh_interface_cookie = ni->ni_interface_cookie; - hdr.msg.put.ack_wmd.wh_object_cookie = md->md_lh.lh_cookie; - } else { - hdr.msg.put.ack_wmd = PTL_WIRE_HANDLE_NONE; - } - - hdr.msg.put.match_bits = HTON__u64 (args->match_bits_in); - hdr.msg.put.ptl_index = HTON__u32 (args->portal_in); - hdr.msg.put.offset = HTON__u32 (args->offset_in); - hdr.msg.put.hdr_data = args->hdr_data_in; - - ni->counters.send_count++; - ni->counters.send_length += md->length; - - msg = get_new_msg (nal, md); - if (msg == NULL) { - CERROR("BAD: could not allocate msg!\n"); - state_unlock(nal, &flags); - return ret->rc = PTL_NOSPACE; - } - - /* - * If this memory descriptor has an event queue associated with - * it we need to allocate a message state object and record the - * information about this operation that will be recorded into - * event queue once the message has been completed. - * - * NB. We're now committed to the GET, since we just marked the MD - * busy. Callers who observe this (by getting PTL_MD_INUSE from - * PtlMDUnlink()) expect a completion event to tell them when the - * MD becomes idle. - */ - if (md->eq) { - msg->ev.type = PTL_EVENT_SENT; - msg->ev.initiator.nid = ni->nid; - msg->ev.initiator.pid = ni->pid; - msg->ev.portal = args->portal_in; - msg->ev.match_bits = args->match_bits_in; - msg->ev.rlength = md->length; - msg->ev.mlength = md->length; - msg->ev.offset = args->offset_in; - msg->ev.hdr_data = args->hdr_data_in; - - lib_md_deconstruct(nal, md, &msg->ev.mem_desc); - } - - state_unlock(nal, &flags); - - lib_send (nal, private, msg, &hdr, PTL_MSG_PUT, - id->nid, id->pid, md, 0, md->length); - - return ret->rc = PTL_OK; -} - - -int do_PtlGet(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - /* - * Incoming: - * ptl_handle_md_t md_in - * ptl_process_id_t target_in - * ptl_pt_index_t portal_in - * ptl_ac_index_t cookie_in - * ptl_match_bits_t match_bits_in - * ptl_size_t offset_in - * - * Outgoing: - */ - - PtlGet_in *args = v_args; - PtlGet_out *ret = v_ret; - ptl_hdr_t hdr; - lib_msg_t *msg = NULL; - lib_ni_t *ni = &nal->ni; - ptl_process_id_t *id = &args->target_in; - lib_md_t *md; - unsigned long flags; - - if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */ - fail_peer (nal, id->nid, 1)) /* shall we now? */ - { - CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n", - nal->ni.nid, id->nid); - return (ret->rc = PTL_INV_PROC); - } - - state_lock(nal, &flags); - md = ptl_handle2md(&args->md_in, nal); - if (md == NULL || !md->threshold) { - state_unlock(nal, &flags); - return ret->rc = PTL_INV_MD; - } - - LASSERT (md->offset == 0); - - CDEBUG(D_NET, "PtlGet -> %Lu: %lu\n", (unsigned long long)id->nid, - (unsigned long)id->pid); - - memset (&hdr, 0, sizeof (hdr)); - hdr.type = HTON__u32 (PTL_MSG_GET); - hdr.dest_nid = HTON__u64 (id->nid); - hdr.src_nid = HTON__u64 (ni->nid); - hdr.dest_pid = HTON__u32 (id->pid); - hdr.src_pid = HTON__u32 (ni->pid); - PTL_HDR_LENGTH(&hdr) = 0; - - /* NB handles only looked up by creator (no flips) */ - hdr.msg.get.return_wmd.wh_interface_cookie = ni->ni_interface_cookie; - hdr.msg.get.return_wmd.wh_object_cookie = md->md_lh.lh_cookie; - - hdr.msg.get.match_bits = HTON__u64 (args->match_bits_in); - hdr.msg.get.ptl_index = HTON__u32 (args->portal_in); - hdr.msg.get.src_offset = HTON__u32 (args->offset_in); - hdr.msg.get.sink_length = HTON__u32 (md->length); - - ni->counters.send_count++; - - msg = get_new_msg (nal, md); - if (msg == NULL) { - CERROR("do_PtlGet: BAD - could not allocate cookie!\n"); - state_unlock(nal, &flags); - return ret->rc = PTL_NOSPACE; - } - - /* - * If this memory descriptor has an event queue associated with - * it we must allocate a message state object that will record - * the information to be filled in once the message has been - * completed. More information is in the do_PtlPut() comments. - * - * NB. We're now committed to the GET, since we just marked the MD - * busy. Callers who observe this (by getting PTL_MD_INUSE from - * PtlMDUnlink()) expect a completion event to tell them when the - * MD becomes idle. - */ - if (md->eq) { - msg->ev.type = PTL_EVENT_SENT; - msg->ev.initiator.nid = ni->nid; - msg->ev.initiator.pid = ni->pid; - msg->ev.portal = args->portal_in; - msg->ev.match_bits = args->match_bits_in; - msg->ev.rlength = md->length; - msg->ev.mlength = md->length; - msg->ev.offset = args->offset_in; - msg->ev.hdr_data = 0; - - lib_md_deconstruct(nal, md, &msg->ev.mem_desc); - } - - state_unlock(nal, &flags); - - lib_send (nal, private, msg, &hdr, PTL_MSG_GET, - id->nid, id->pid, NULL, 0, 0); - - return ret->rc = PTL_OK; -} - -void lib_assert_wire_constants (void) -{ - /* Wire protocol assertions generated by 'wirecheck' */ - - /* Constants... */ - LASSERT (PORTALS_PROTO_MAGIC == 0xeebc0ded); - LASSERT (PORTALS_PROTO_VERSION_MAJOR == 0); - LASSERT (PORTALS_PROTO_VERSION_MINOR == 1); - LASSERT (PTL_MSG_ACK == 0); - LASSERT (PTL_MSG_PUT == 1); - LASSERT (PTL_MSG_GET == 2); - LASSERT (PTL_MSG_REPLY == 3); - LASSERT (PTL_MSG_HELLO == 4); - - /* Checks for struct ptl_handle_wire_t */ - LASSERT (sizeof (ptl_handle_wire_t) == 16); - LASSERT (offsetof (ptl_handle_wire_t, wh_interface_cookie) == 0); - LASSERT (sizeof (((ptl_handle_wire_t *)0)->wh_interface_cookie) == 8); - LASSERT (offsetof (ptl_handle_wire_t, wh_object_cookie) == 8); - LASSERT (sizeof (((ptl_handle_wire_t *)0)->wh_object_cookie) == 8); - - /* Checks for struct ptl_magicversion_t */ - LASSERT (sizeof (ptl_magicversion_t) == 8); - LASSERT (offsetof (ptl_magicversion_t, magic) == 0); - LASSERT (sizeof (((ptl_magicversion_t *)0)->magic) == 4); - LASSERT (offsetof (ptl_magicversion_t, version_major) == 4); - LASSERT (sizeof (((ptl_magicversion_t *)0)->version_major) == 2); - LASSERT (offsetof (ptl_magicversion_t, version_minor) == 6); - LASSERT (sizeof (((ptl_magicversion_t *)0)->version_minor) == 2); - - /* Checks for struct ptl_hdr_t */ - LASSERT (sizeof (ptl_hdr_t) == 72); - LASSERT (offsetof (ptl_hdr_t, dest_nid) == 0); - LASSERT (sizeof (((ptl_hdr_t *)0)->dest_nid) == 8); - LASSERT (offsetof (ptl_hdr_t, src_nid) == 8); - LASSERT (sizeof (((ptl_hdr_t *)0)->src_nid) == 8); - LASSERT (offsetof (ptl_hdr_t, dest_pid) == 16); - LASSERT (sizeof (((ptl_hdr_t *)0)->dest_pid) == 4); - LASSERT (offsetof (ptl_hdr_t, src_pid) == 20); - LASSERT (sizeof (((ptl_hdr_t *)0)->src_pid) == 4); - LASSERT (offsetof (ptl_hdr_t, type) == 24); - LASSERT (sizeof (((ptl_hdr_t *)0)->type) == 4); - - /* Ack */ - LASSERT (offsetof (ptl_hdr_t, msg.ack.mlength) == 28); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.ack.mlength) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.ack.dst_wmd) == 32); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.ack.dst_wmd) == 16); - LASSERT (offsetof (ptl_hdr_t, msg.ack.match_bits) == 48); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.ack.match_bits) == 8); - LASSERT (offsetof (ptl_hdr_t, msg.ack.length) == 56); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.ack.length) == 4); - - /* Put */ - LASSERT (offsetof (ptl_hdr_t, msg.put.ptl_index) == 28); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.ptl_index) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.put.ack_wmd) == 32); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.ack_wmd) == 16); - LASSERT (offsetof (ptl_hdr_t, msg.put.match_bits) == 48); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.match_bits) == 8); - LASSERT (offsetof (ptl_hdr_t, msg.put.length) == 56); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.length) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.put.offset) == 60); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.offset) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.put.hdr_data) == 64); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.hdr_data) == 8); - - /* Get */ - LASSERT (offsetof (ptl_hdr_t, msg.get.ptl_index) == 28); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.ptl_index) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.get.return_wmd) == 32); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.return_wmd) == 16); - LASSERT (offsetof (ptl_hdr_t, msg.get.match_bits) == 48); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.match_bits) == 8); - LASSERT (offsetof (ptl_hdr_t, msg.get.length) == 56); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.length) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.get.src_offset) == 60); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.src_offset) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.get.return_offset) == 64); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.return_offset) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.get.sink_length) == 68); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.sink_length) == 4); - - /* Reply */ - LASSERT (offsetof (ptl_hdr_t, msg.reply.dst_wmd) == 32); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.reply.dst_wmd) == 16); - LASSERT (offsetof (ptl_hdr_t, msg.reply.dst_offset) == 48); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.reply.dst_offset) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.reply.length) == 56); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.reply.length) == 4); -} diff --git a/lnet/lnet/lib-msg.c b/lnet/lnet/lib-msg.c deleted file mode 100644 index f10892c..0000000 --- a/lnet/lnet/lib-msg.c +++ /dev/null @@ -1,163 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-msg.c - * Message decoding, parsing and finalizing routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef __KERNEL__ -# include -#else -# define DEBUG_SUBSYSTEM S_PORTALS -# include -#endif - -#include - -int lib_finalize(nal_cb_t * nal, void *private, lib_msg_t *msg) -{ - lib_md_t *md; - lib_eq_t *eq; - int rc; - unsigned long flags; - - /* ni went down while processing this message */ - if (nal->ni.up == 0) { - return -1; - } - - if (msg == NULL) - return 0; - - rc = 0; - if (msg->send_ack) { - ptl_hdr_t ack; - - LASSERT (!ptl_is_wire_handle_none (&msg->ack_wmd)); - - memset (&ack, 0, sizeof (ack)); - ack.type = HTON__u32 (PTL_MSG_ACK); - ack.dest_nid = HTON__u64 (msg->nid); - ack.src_nid = HTON__u64 (nal->ni.nid); - ack.dest_pid = HTON__u32 (msg->pid); - ack.src_pid = HTON__u32 (nal->ni.pid); - PTL_HDR_LENGTH(&ack) = 0; - - ack.msg.ack.dst_wmd = msg->ack_wmd; - ack.msg.ack.match_bits = msg->ev.match_bits; - ack.msg.ack.mlength = HTON__u32 (msg->ev.mlength); - - rc = lib_send (nal, private, NULL, &ack, PTL_MSG_ACK, - msg->nid, msg->pid, NULL, 0, 0); - } - - md = msg->md; - LASSERT (md->pending > 0); /* I've not dropped my ref yet */ - eq = md->eq; - - state_lock(nal, &flags); - - if (eq != NULL) { - ptl_event_t *ev = &msg->ev; - ptl_event_t *eq_slot; - - /* I have to hold the lock while I bump the sequence number - * and copy the event into the queue. If not, and I was - * interrupted after bumping the sequence number, other - * events could fill the queue, including the slot I just - * allocated to this event. On resuming, I would overwrite - * a more 'recent' event with old event state, and - * processes taking events off the queue would not detect - * overflow correctly. - */ - - ev->sequence = eq->sequence++;/* Allocate the next queue slot */ - - /* size must be a power of 2 to handle a wrapped sequence # */ - LASSERT (eq->size != 0 && - eq->size == LOWEST_BIT_SET (eq->size)); - eq_slot = eq->base + (ev->sequence & (eq->size - 1)); - - /* Invalidate unlinked_me unless this is the last - * event for an auto-unlinked MD. Note that if md was - * auto-unlinked, md->pending can only decrease - */ - if ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINKED) == 0 || /* not auto-unlinked */ - md->pending != 1) /* not last ref */ - ev->unlinked_me = PTL_HANDLE_NONE; - - /* Copy the event into the allocated slot, ensuring all the - * rest of the event's contents have been copied _before_ - * the sequence number gets updated. A processes 'getting' - * an event waits on the next queue slot's sequence to be - * 'new'. When it is, _all_ other event fields had better - * be consistent. I assert 'sequence' is the last member, - * so I only need a 2 stage copy. - */ - LASSERT(sizeof (ptl_event_t) == - offsetof(ptl_event_t, sequence) + sizeof(ev->sequence)); - - rc = nal->cb_write (nal, private, (user_ptr)eq_slot, ev, - offsetof (ptl_event_t, sequence)); - LASSERT (rc == 0); - -#ifdef __KERNEL__ - barrier(); -#endif - /* Updating the sequence number is what makes the event 'new' */ - - /* cb_write is not necessarily atomic, so this could - cause a race with PtlEQGet */ - rc = nal->cb_write(nal, private, (user_ptr)&eq_slot->sequence, - (void *)&ev->sequence,sizeof (ev->sequence)); - LASSERT (rc == 0); - -#ifdef __KERNEL__ - barrier(); -#endif - - /* I must also ensure that (a) callbacks are made in the - * same order as the events land in the queue, and (b) the - * callback occurs before the event can be removed from the - * queue, so I can't drop the lock during the callback. */ - if (nal->cb_callback != NULL) - nal->cb_callback(nal, private, eq, ev); - else if (eq->event_callback != NULL) - (void)((eq->event_callback) (ev)); - } - - LASSERT ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINKED) == 0 || - (md->md_flags & PTL_MD_FLAG_UNLINK) != 0); - - md->pending--; - if (md->pending == 0 && /* no more outstanding operations on this md */ - (md->threshold == 0 || /* done its business */ - (md->md_flags & PTL_MD_FLAG_UNLINK) != 0)) /* marked for death */ - lib_md_unlink(nal, md); - - list_del (&msg->msg_list); - nal->ni.counters.msgs_alloc--; - lib_msg_free(nal, msg); - - state_unlock(nal, &flags); - - return rc; -} diff --git a/lnet/lnet/lib-ni.c b/lnet/lnet/lib-ni.c deleted file mode 100644 index aa30329..0000000 --- a/lnet/lnet/lib-ni.c +++ /dev/null @@ -1,128 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-ni.c - * Network status registers and distance functions. - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_PORTALS -#include -#include - -#define MAX_DIST 18446744073709551615UL - -int do_PtlNIDebug(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - PtlNIDebug_in *args = v_args; - PtlNIDebug_out *ret = v_ret; - lib_ni_t *ni = &nal->ni; - - ret->rc = ni->debug; - ni->debug = args->mask_in; - - return 0; -} - -int do_PtlNIStatus(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - /* - * Incoming: - * ptl_handle_ni_t interface_in - * ptl_sr_index_t register_in - * - * Outgoing: - * ptl_sr_value_t * status_out - */ - - PtlNIStatus_in *args = v_args; - PtlNIStatus_out *ret = v_ret; - lib_ni_t *ni = &nal->ni; - lib_counters_t *count = &ni->counters; - - if (!args) - return ret->rc = PTL_SEGV; - - ret->rc = PTL_OK; - ret->status_out = 0; - - /* - * I hate this sort of code.... Hash tables, offset lists? - * Treat the counters as an array of ints? - */ - if (args->register_in == PTL_SR_DROP_COUNT) - ret->status_out = count->drop_count; - - else if (args->register_in == PTL_SR_DROP_LENGTH) - ret->status_out = count->drop_length; - - else if (args->register_in == PTL_SR_RECV_COUNT) - ret->status_out = count->recv_count; - - else if (args->register_in == PTL_SR_RECV_LENGTH) - ret->status_out = count->recv_length; - - else if (args->register_in == PTL_SR_SEND_COUNT) - ret->status_out = count->send_count; - - else if (args->register_in == PTL_SR_SEND_LENGTH) - ret->status_out = count->send_length; - - else if (args->register_in == PTL_SR_MSGS_MAX) - ret->status_out = count->msgs_max; - else - ret->rc = PTL_INV_SR_INDX; - - return ret->rc; -} - - -int do_PtlNIDist(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - /* - * Incoming: - * ptl_handle_ni_t interface_in - * ptl_process_id_t process_in - - * - * Outgoing: - * unsigned long * distance_out - - */ - - PtlNIDist_in *args = v_args; - PtlNIDist_out *ret = v_ret; - - unsigned long dist; - ptl_process_id_t id_in = args->process_in; - ptl_nid_t nid; - int rc; - - nid = id_in.nid; - - if ((rc = nal->cb_dist(nal, nid, &dist)) != 0) { - ret->distance_out = (unsigned long) MAX_DIST; - return PTL_INV_PROC; - } - - ret->distance_out = dist; - - return ret->rc = PTL_OK; -} diff --git a/lnet/lnet/lib-pid.c b/lnet/lnet/lib-pid.c deleted file mode 100644 index 12eebb5..0000000 --- a/lnet/lnet/lib-pid.c +++ /dev/null @@ -1,58 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-pid.c - * - * Process identification routines - * Copyright (C) 2001-2003 Cluster File Systems, Inc. - * Copyright (C) 2001-2003 Cluster File Systems, Inc. - * - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* This should be removed. The NAL should have the PID information */ -#define DEBUG_SUBSYSTEM S_PORTALS - -#if defined (__KERNEL__) -# include -extern int getpid(void); -#else -# include -# include -#endif -#include -#include - -int do_PtlGetId(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - /* - * Incoming: - * ptl_handle_ni_t handle_in - * - * Outgoing: - * ptl_process_id_t * id_out - * ptl_id_t * gsize_out - */ - - PtlGetId_out *ret = v_ret; - lib_ni_t *ni = &nal->ni; - - ret->id_out.nid = ni->nid; - ret->id_out.pid = ni->pid; - - return ret->rc = PTL_OK; -} diff --git a/lnet/packaging/.cvsignore b/lnet/packaging/.cvsignore deleted file mode 100644 index fd1d56a..0000000 --- a/lnet/packaging/.cvsignore +++ /dev/null @@ -1,8 +0,0 @@ -Makefile -Makefile.in -aclocal.m4 -config.log -config.status -config.cache -configure -portals.spec diff --git a/lnet/packaging/Makefile.am b/lnet/packaging/Makefile.am deleted file mode 100644 index 126bc69..0000000 --- a/lnet/packaging/Makefile.am +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright (C) 2002 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -EXTRA_DIST = portals.spec \ No newline at end of file diff --git a/lnet/packaging/portals.spec.in b/lnet/packaging/portals.spec.in deleted file mode 100644 index e196b3f..0000000 --- a/lnet/packaging/portals.spec.in +++ /dev/null @@ -1,116 +0,0 @@ -%define kversion @RELEASE@ -%define linuxdir @LINUX@ -%define version HEAD - -Summary: Sandia Portals Message Passing - utilities -Name: portals -Version: %{version} -Release: 0210101748uml -Copyright: LGPL -Group: Utilities/System -BuildRoot: /var/tmp/portals-%{version}-root -Source: http://sandiaportals.org/portals-%{version}.tar.gz - -%description -Sandia Portals message passing package. Contains kernel modules, libraries and utilities. - -%package -n portals-modules -Summary: Kernel modules and NAL's for portals -Group: Development/Kernel - -%description -n portals-modules -Object-Based Disk storage drivers for Linux %{kversion}. - -%package -n portals-source -Summary: Portals kernel source for rebuilding with other kernels -Group: Development/Kernel - -%description -n portals-source -Portals kernel source for rebuilding with other kernels - -%prep -%setup -n portals-%{version} - -%build -rm -rf $RPM_BUILD_ROOT - -# Create the pristine source directory. -srcdir=$RPM_BUILD_ROOT/usr/src/portals-%{version} -mkdir -p $srcdir -find . -name CVS -prune -o -print | cpio -ap $srcdir - -# Set an explicit path to our Linux tree, if we can. -conf_flag= -linuxdir=%{linuxdir} -test -d $linuxdir && conf_flag=--with-linux=$linuxdir -./configure $conf_flag -make - -%install -make install prefix=$RPM_BUILD_ROOT - -%ifarch alpha -# this hurts me - conf_flag= - linuxdir=%{linuxdir} - test -d $linuxdir && conf_flag=--with-linux=$linuxdir - make clean - ./configure --enable-rtscts-myrinet $conf_flag - make - cp linux/rtscts/rtscts.o $RPM_BUILD_ROOT/lib/modules/%{kversion}/kernel/net/portals/rtscts_myrinet.o - cp user/myrinet_utils/mcpload $RPM_BUILD_ROOT/usr/sbin/mcpload -%endif - - -%files -%attr(-, root, root) %doc COPYING -%attr(-, root, root) /usr/sbin/acceptor -%attr(-, root, root) /usr/sbin/ptlctl -%attr(-, root, root) /usr/sbin/debugctl -%ifarch alpha -%attr(-, root, root) /usr/sbin/mcpload -%endif -%attr(-, root, root) /lib/libmyrnal.a -%attr(-, root, root) /lib/libptlapi.a -%attr(-, root, root) /lib/libptlctl.a -%attr(-, root, root) /lib/libprocbridge.a -%attr(-, root, root) /lib/libptllib.a -%attr(-, root, root) /lib/libtcpnal.a -%attr(-, root, root) /lib/libtcpnalutil.a -%attr(-, root, root) /usr/include/portals/*.h -%attr(-, root, root) /usr/include/portals/base/*.h -%attr(-, root, root) /usr/include/linux/*.h - -%files -n portals-modules -%attr(-, root, root) %doc COPYING -%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/portals.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/kptlrouter.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/kptrxtx.o -%ifarch alpha -%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/p3mod.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/rtscts.o -%endif -%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/*nal.o - -%files -n portals-source -%attr(-, root, root) /usr/src/portals-%{version} - -%post -if [ ! -e /dev/portals ]; then - mknod /dev/portals c 10 240 -fi -depmod -ae || exit 0 - -grep -q portals /etc/modules.conf || \ - echo 'alias char-major-10-240 portals' >> /etc/modules.conf - -grep -q '/dev/portals' /etc/modules.conf || \ - echo 'alias /dev/portals portals' >> /etc/modules.conf - -%postun -depmod -ae || exit 0 - -%clean -#rm -rf $RPM_BUILD_ROOT - -# end of file diff --git a/lnet/router/.cvsignore b/lnet/router/.cvsignore deleted file mode 100644 index e995588..0000000 --- a/lnet/router/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lnet/router/Makefile.am b/lnet/router/Makefile.am deleted file mode 100644 index 1c8087b..0000000 --- a/lnet/router/Makefile.am +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../Rules.linux - -MODULE = kptlrouter -modulenet_DATA = kptlrouter.o -EXTRA_PROGRAMS = kptlrouter - - -#CFLAGS:= @KCFLAGS@ -#CPPFLAGS:=@KCPPFLAGS@ -DEFS = -kptlrouter_SOURCES = router.c proc.c router.h diff --git a/lnet/router/Makefile.mk b/lnet/router/Makefile.mk deleted file mode 100644 index 64bd09b..0000000 --- a/lnet/router/Makefile.mk +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../Kernelenv - -obj-y += kptlrouter.o -kptlrouter-objs := router.o proc.o diff --git a/lnet/router/proc.c b/lnet/router/proc.c deleted file mode 100644 index dd65b34..0000000 --- a/lnet/router/proc.c +++ /dev/null @@ -1,78 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Portals - * http://sourceforge.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "router.h" - -#define KPR_PROC_ROUTER "sys/portals/router" - -int -kpr_proc_read (char *page, char **start, off_t off, int count, int *eof, void *data) -{ - unsigned long long bytes = kpr_fwd_bytes; - unsigned long packets = kpr_fwd_packets; - unsigned long errors = kpr_fwd_errors; - unsigned int qdepth = atomic_read (&kpr_queue_depth); - int len; - - *eof = 1; - if (off != 0) - return (0); - - len = sprintf (page, "%Ld %ld %ld %d\n", bytes, packets, errors, qdepth); - - *start = page; - return (len); -} - -int -kpr_proc_write (struct file *file, const char *ubuffer, unsigned long count, void *data) -{ - /* Ignore what we've been asked to write, and just zero the stats counters */ - kpr_fwd_bytes = 0; - kpr_fwd_packets = 0; - kpr_fwd_errors = 0; - - return (count); -} - -void -kpr_proc_init(void) -{ - struct proc_dir_entry *entry = create_proc_entry (KPR_PROC_ROUTER, S_IFREG | S_IRUGO | S_IWUSR, NULL); - - if (entry == NULL) - { - CERROR("couldn't create proc entry %s\n", KPR_PROC_ROUTER); - return; - } - - entry->data = NULL; - entry->read_proc = kpr_proc_read; - entry->write_proc = kpr_proc_write; -} - -void -kpr_proc_fini(void) -{ - remove_proc_entry(KPR_PROC_ROUTER, 0); -} diff --git a/lnet/router/router.c b/lnet/router/router.c deleted file mode 100644 index 6074c3c..0000000 --- a/lnet/router/router.c +++ /dev/null @@ -1,449 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Portals - * http://sourceforge.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "router.h" - -struct list_head kpr_routes; -struct list_head kpr_nals; - -unsigned long long kpr_fwd_bytes; -unsigned long kpr_fwd_packets; -unsigned long kpr_fwd_errors; -atomic_t kpr_queue_depth; - -/* Mostly the tables are read-only (thread and interrupt context) - * - * Once in a blue moon we register/deregister NALs and add/remove routing - * entries (thread context only)... */ -rwlock_t kpr_rwlock; - -kpr_router_interface_t kpr_router_interface = { - kprri_register: kpr_register_nal, - kprri_lookup: kpr_lookup_target, - kprri_fwd_start: kpr_forward_packet, - kprri_fwd_done: kpr_complete_packet, - kprri_shutdown: kpr_shutdown_nal, - kprri_deregister: kpr_deregister_nal, -}; - -kpr_control_interface_t kpr_control_interface = { - kprci_add_route: kpr_add_route, - kprci_del_route: kpr_del_route, - kprci_get_route: kpr_get_route, -}; - -int -kpr_register_nal (kpr_nal_interface_t *nalif, void **argp) -{ - long flags; - struct list_head *e; - kpr_nal_entry_t *ne; - - CDEBUG (D_OTHER, "Registering NAL %d\n", nalif->kprni_nalid); - - PORTAL_ALLOC (ne, sizeof (*ne)); - if (ne == NULL) - return (-ENOMEM); - - memset (ne, 0, sizeof (*ne)); - memcpy ((void *)&ne->kpne_interface, (void *)nalif, sizeof (*nalif)); - - LASSERT (!in_interrupt()); - write_lock_irqsave (&kpr_rwlock, flags); - - for (e = kpr_nals.next; e != &kpr_nals; e = e->next) - { - kpr_nal_entry_t *ne2 = list_entry (e, kpr_nal_entry_t, kpne_list); - - if (ne2->kpne_interface.kprni_nalid == ne->kpne_interface.kprni_nalid) - { - write_unlock_irqrestore (&kpr_rwlock, flags); - - CERROR ("Attempt to register same NAL %d twice\n", ne->kpne_interface.kprni_nalid); - - PORTAL_FREE (ne, sizeof (*ne)); - return (-EEXIST); - } - } - - list_add (&ne->kpne_list, &kpr_nals); - - write_unlock_irqrestore (&kpr_rwlock, flags); - - *argp = ne; - PORTAL_MODULE_USE; - return (0); -} - -void -kpr_shutdown_nal (void *arg) -{ - long flags; - kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg; - - CDEBUG (D_OTHER, "Shutting down NAL %d\n", ne->kpne_interface.kprni_nalid); - - LASSERT (!ne->kpne_shutdown); - LASSERT (!in_interrupt()); - - write_lock_irqsave (&kpr_rwlock, flags); /* locking a bit spurious... */ - ne->kpne_shutdown = 1; - write_unlock_irqrestore (&kpr_rwlock, flags); /* except it's a memory barrier */ - - while (atomic_read (&ne->kpne_refcount) != 0) - { - CDEBUG (D_NET, "Waiting for refcount on NAL %d to reach zero (%d)\n", - ne->kpne_interface.kprni_nalid, atomic_read (&ne->kpne_refcount)); - - set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (HZ); - } -} - -void -kpr_deregister_nal (void *arg) -{ - long flags; - kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg; - - CDEBUG (D_OTHER, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid); - - LASSERT (ne->kpne_shutdown); /* caller must have issued shutdown already */ - LASSERT (atomic_read (&ne->kpne_refcount) == 0); /* can't be busy */ - LASSERT (!in_interrupt()); - - write_lock_irqsave (&kpr_rwlock, flags); - - list_del (&ne->kpne_list); - - write_unlock_irqrestore (&kpr_rwlock, flags); - - PORTAL_FREE (ne, sizeof (*ne)); - PORTAL_MODULE_UNUSE; -} - - -int -kpr_lookup_target (void *arg, ptl_nid_t target_nid, ptl_nid_t *gateway_nidp) -{ - kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg; - struct list_head *e; - int rc = -ENOENT; - - CDEBUG (D_OTHER, "lookup "LPX64" from NAL %d\n", target_nid, ne->kpne_interface.kprni_nalid); - - if (ne->kpne_shutdown) /* caller is shutting down */ - return (-ENOENT); - - read_lock (&kpr_rwlock); - - /* Search routes for one that has a gateway to target_nid on the callers network */ - - for (e = kpr_routes.next; e != &kpr_routes; e = e->next) - { - kpr_route_entry_t *re = list_entry (e, kpr_route_entry_t, kpre_list); - - if (re->kpre_lo_nid > target_nid || - re->kpre_hi_nid < target_nid) - continue; - - /* found table entry */ - - if (re->kpre_gateway_nalid != ne->kpne_interface.kprni_nalid) /* different NAL */ - rc = -EHOSTUNREACH; - else - { - rc = 0; - *gateway_nidp = re->kpre_gateway_nid; - } - break; - } - - read_unlock (&kpr_rwlock); - - CDEBUG (D_OTHER, "lookup "LPX64" from NAL %d: %d ("LPX64")\n", - target_nid, ne->kpne_interface.kprni_nalid, rc, - (rc == 0) ? *gateway_nidp : (ptl_nid_t)0); - return (rc); -} - -void -kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd) -{ - kpr_nal_entry_t *src_ne = (kpr_nal_entry_t *)arg; - ptl_nid_t target_nid = fwd->kprfd_target_nid; - int nob = fwd->kprfd_nob; - struct list_head *e; - - CDEBUG (D_OTHER, "forward [%p] "LPX64" from NAL %d\n", fwd, - target_nid, src_ne->kpne_interface.kprni_nalid); - - LASSERT (nob >= sizeof (ptl_hdr_t)); /* at least got a packet header */ - LASSERT (nob == lib_iov_nob (fwd->kprfd_niov, fwd->kprfd_iov)); - - atomic_inc (&kpr_queue_depth); - atomic_inc (&src_ne->kpne_refcount); /* source nal is busy until fwd completes */ - - kpr_fwd_packets++; /* (loose) stats accounting */ - kpr_fwd_bytes += nob; - - if (src_ne->kpne_shutdown) /* caller is shutting down */ - goto out; - - fwd->kprfd_router_arg = src_ne; /* stash caller's nal entry */ - - read_lock (&kpr_rwlock); - - /* Search routes for one that has a gateway to target_nid NOT on the caller's network */ - - for (e = kpr_routes.next; e != &kpr_routes; e = e->next) - { - kpr_route_entry_t *re = list_entry (e, kpr_route_entry_t, kpre_list); - - if (re->kpre_lo_nid > target_nid || /* no match */ - re->kpre_hi_nid < target_nid) - continue; - - CDEBUG (D_OTHER, "forward [%p] "LPX64" from NAL %d: match "LPX64" on NAL %d\n", fwd, - target_nid, src_ne->kpne_interface.kprni_nalid, - re->kpre_gateway_nid, re->kpre_gateway_nalid); - - if (re->kpre_gateway_nalid == src_ne->kpne_interface.kprni_nalid) - break; /* don't route to same NAL */ - - /* Search for gateway's NAL's entry */ - - for (e = kpr_nals.next; e != &kpr_nals; e = e->next) - { - kpr_nal_entry_t *dst_ne = list_entry (e, kpr_nal_entry_t, kpne_list); - - if (re->kpre_gateway_nalid != dst_ne->kpne_interface.kprni_nalid) /* no match */ - continue; - - if (dst_ne->kpne_shutdown) /* don't route if NAL is shutting down */ - break; - - fwd->kprfd_gateway_nid = re->kpre_gateway_nid; - atomic_inc (&dst_ne->kpne_refcount); /* dest nal is busy until fwd completes */ - - read_unlock (&kpr_rwlock); - - CDEBUG (D_OTHER, "forward [%p] "LPX64" from NAL %d: "LPX64" on NAL %d\n", fwd, - target_nid, src_ne->kpne_interface.kprni_nalid, - fwd->kprfd_gateway_nid, dst_ne->kpne_interface.kprni_nalid); - - dst_ne->kpne_interface.kprni_fwd (dst_ne->kpne_interface.kprni_arg, fwd); - return; - } - break; - } - - read_unlock (&kpr_rwlock); - out: - kpr_fwd_errors++; - - CDEBUG (D_OTHER, "Failed to forward [%p] "LPX64" from NAL %d\n", fwd, - target_nid, src_ne->kpne_interface.kprni_nalid); - - /* Can't find anywhere to forward to */ - (fwd->kprfd_callback)(fwd->kprfd_callback_arg, -EHOSTUNREACH); - - atomic_dec (&kpr_queue_depth); - atomic_dec (&src_ne->kpne_refcount); -} - -void -kpr_complete_packet (void *arg, kpr_fwd_desc_t *fwd, int error) -{ - kpr_nal_entry_t *dst_ne = (kpr_nal_entry_t *)arg; - kpr_nal_entry_t *src_ne = (kpr_nal_entry_t *)fwd->kprfd_router_arg; - - CDEBUG (D_OTHER, "complete(1) [%p] from NAL %d to NAL %d: %d\n", fwd, - src_ne->kpne_interface.kprni_nalid, dst_ne->kpne_interface.kprni_nalid, error); - - atomic_dec (&dst_ne->kpne_refcount); /* CAVEAT EMPTOR dst_ne can disappear now!!! */ - - (fwd->kprfd_callback)(fwd->kprfd_callback_arg, error); - - CDEBUG (D_OTHER, "complete(2) [%p] from NAL %d: %d\n", fwd, - src_ne->kpne_interface.kprni_nalid, error); - - atomic_dec (&kpr_queue_depth); - atomic_dec (&src_ne->kpne_refcount); /* CAVEAT EMPTOR src_ne can disappear now!!! */ -} - -int -kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid, - ptl_nid_t hi_nid) -{ - long flags; - struct list_head *e; - kpr_route_entry_t *re; - - CDEBUG(D_OTHER, "Add route: %d "LPX64" : "LPX64" - "LPX64"\n", - gateway_nalid, gateway_nid, lo_nid, hi_nid); - - LASSERT(lo_nid <= hi_nid); - - PORTAL_ALLOC (re, sizeof (*re)); - if (re == NULL) - return (-ENOMEM); - - re->kpre_gateway_nalid = gateway_nalid; - re->kpre_gateway_nid = gateway_nid; - re->kpre_lo_nid = lo_nid; - re->kpre_hi_nid = hi_nid; - - LASSERT(!in_interrupt()); - write_lock_irqsave (&kpr_rwlock, flags); - - for (e = kpr_routes.next; e != &kpr_routes; e = e->next) { - kpr_route_entry_t *re2 = list_entry(e, kpr_route_entry_t, - kpre_list); - - if (re->kpre_lo_nid > re2->kpre_hi_nid || - re->kpre_hi_nid < re2->kpre_lo_nid) - continue; - - CERROR ("Attempt to add duplicate routes ["LPX64" - "LPX64"]" - "to ["LPX64" - "LPX64"]\n", - re->kpre_lo_nid, re->kpre_hi_nid, - re2->kpre_lo_nid, re2->kpre_hi_nid); - - write_unlock_irqrestore (&kpr_rwlock, flags); - - PORTAL_FREE (re, sizeof (*re)); - return (-EINVAL); - } - - list_add (&re->kpre_list, &kpr_routes); - - write_unlock_irqrestore (&kpr_rwlock, flags); - return (0); -} - -int -kpr_del_route (ptl_nid_t nid) -{ - long flags; - struct list_head *e; - - CDEBUG(D_OTHER, "Del route "LPX64"\n", nid); - - LASSERT(!in_interrupt()); - write_lock_irqsave(&kpr_rwlock, flags); - - for (e = kpr_routes.next; e != &kpr_routes; e = e->next) { - kpr_route_entry_t *re = list_entry(e, kpr_route_entry_t, - kpre_list); - - if (re->kpre_lo_nid > nid || re->kpre_hi_nid < nid) - continue; - - list_del (&re->kpre_list); - write_unlock_irqrestore(&kpr_rwlock, flags); - - PORTAL_FREE(re, sizeof (*re)); - return (0); - } - - write_unlock_irqrestore(&kpr_rwlock, flags); - return (-ENOENT); -} - -int -kpr_get_route(int idx, int *gateway_nalid, ptl_nid_t *gateway_nid, - ptl_nid_t *lo_nid, ptl_nid_t *hi_nid) -{ - struct list_head *e; - - read_lock(&kpr_rwlock); - - for (e = kpr_routes.next; e != &kpr_routes; e = e->next) { - kpr_route_entry_t *re = list_entry(e, kpr_route_entry_t, - kpre_list); - - if (idx-- == 0) { - *gateway_nalid = re->kpre_gateway_nalid; - *gateway_nid = re->kpre_gateway_nid; - *lo_nid = re->kpre_lo_nid; - *hi_nid = re->kpre_hi_nid; - - read_unlock(&kpr_rwlock); - return (0); - } - } - - read_unlock (&kpr_rwlock); - return (-ENOENT); -} - -static void __exit -kpr_finalise (void) -{ - LASSERT (list_empty (&kpr_nals)); - - while (!list_empty (&kpr_routes)) { - kpr_route_entry_t *re = list_entry(kpr_routes.next, - kpr_route_entry_t, - kpre_list); - - list_del(&re->kpre_list); - PORTAL_FREE(re, sizeof (*re)); - } - - kpr_proc_fini(); - - PORTAL_SYMBOL_UNREGISTER(kpr_router_interface); - PORTAL_SYMBOL_UNREGISTER(kpr_control_interface); - - CDEBUG(D_MALLOC, "kpr_finalise: kmem back to %d\n", - atomic_read(&portal_kmemory)); -} - -static int __init -kpr_initialise (void) -{ - CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n", - atomic_read(&portal_kmemory)); - - rwlock_init(&kpr_rwlock); - INIT_LIST_HEAD(&kpr_routes); - INIT_LIST_HEAD(&kpr_nals); - - kpr_proc_init(); - - PORTAL_SYMBOL_REGISTER(kpr_router_interface); - PORTAL_SYMBOL_REGISTER(kpr_control_interface); - return (0); -} - -MODULE_AUTHOR("Eric Barton"); -MODULE_DESCRIPTION("Kernel Portals Router v0.01"); -MODULE_LICENSE("GPL"); - -module_init (kpr_initialise); -module_exit (kpr_finalise); - -EXPORT_SYMBOL (kpr_control_interface); -EXPORT_SYMBOL (kpr_router_interface); diff --git a/lnet/router/router.h b/lnet/router/router.h deleted file mode 100644 index b8c3bec..0000000 --- a/lnet/router/router.h +++ /dev/null @@ -1,81 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Portals - * http://sourceforge.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef _KPTLROUTER_H -#define _KPTLROUTER_H -#define EXPORT_SYMTAB - -#include -#include -#include -#include -#include -#include -#include -#include - -#define DEBUG_SUBSYSTEM S_PTLROUTER - -#include -#include -#include - -typedef struct -{ - struct list_head kpne_list; - kpr_nal_interface_t kpne_interface; - atomic_t kpne_refcount; - int kpne_shutdown; -} kpr_nal_entry_t; - -typedef struct -{ - struct list_head kpre_list; - int kpre_gateway_nalid; - ptl_nid_t kpre_gateway_nid; - ptl_nid_t kpre_lo_nid; - ptl_nid_t kpre_hi_nid; -} kpr_route_entry_t; - -extern int kpr_register_nal (kpr_nal_interface_t *nalif, void **argp); -extern int kpr_lookup_target (void *arg, ptl_nid_t target_nid, ptl_nid_t *gateway_nidp); -extern void kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd); -extern void kpr_complete_packet (void *arg, kpr_fwd_desc_t *fwd, int error); -extern void kpr_shutdown_nal (void *arg); -extern void kpr_deregister_nal (void *arg); - -extern void kpr_proc_init (void); -extern void kpr_proc_fini (void); - -extern int kpr_add_route (int gateway_nal, ptl_nid_t gateway_nid, - ptl_nid_t lo_nid, ptl_nid_t hi_nid); -extern int kpr_del_route (ptl_nid_t nid); -extern int kpr_get_route (int idx, int *gateway_nal, ptl_nid_t *gateway_nid, - ptl_nid_t *lo_nid, ptl_nid_t *hi_nid); - -extern unsigned long long kpr_fwd_bytes; -extern unsigned long kpr_fwd_packets; -extern unsigned long kpr_fwd_errors; -extern atomic_t kpr_queue_depth; - -#endif /* _KPLROUTER_H */ diff --git a/lnet/tests/.cvsignore b/lnet/tests/.cvsignore deleted file mode 100644 index 051d1bd..0000000 --- a/lnet/tests/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -Makefile -Makefile.in -.deps diff --git a/lnet/tests/Makefile.am b/lnet/tests/Makefile.am deleted file mode 100644 index 7b47ae0..0000000 --- a/lnet/tests/Makefile.am +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../Rules.linux - -LDFLAGS = -m "`$(LD) --help | awk '/supported emulations/ {print $$4}'`" -r -LINK = $(LD) $(LDFLAGS) -o $@ -DEFS = -LIBS = -MODULE = $(basename) -EXTRA_DIST = startserver.sh startclient.sh stopserver.sh stopclient.sh - -noinst_PROGRAMS = pingsrv.o pingcli.o spingsrv.o spingcli.o - -pingsrv_o_SOURCES = ping_srv.c ping.h - -pingcli_o_SOURCES = ping_cli.c ping.h - -spingsrv_o_SOURCES = sping_srv.c ping.h - -spingcli_o_SOURCES = sping_cli.c ping.h diff --git a/lnet/tests/ping.h b/lnet/tests/ping.h deleted file mode 100644 index f07444b..0000000 --- a/lnet/tests/ping.h +++ /dev/null @@ -1,80 +0,0 @@ -#ifndef _KPING_INCLUDED -#define _KPING_INCLUDED - -#include - - -#define PTL_PING_IN_SIZE 256 // n packets per buffer -#define PTL_PING_IN_BUFFERS 2 // n fallback buffers - -#define PTL_PING_CLIENT 4 -#define PTL_PING_SERVER 5 - -#define PING_HEADER_MAGIC 0xDEADBEEF -#define PING_BULK_MAGIC 0xCAFEBABE - -#define PING_HEAD_BITS 0x00000001 -#define PING_BULK_BITS 0x00000002 -#define PING_IGNORE_BITS 0xFFFFFFFC - -#define PTL_PING_ACK 0x01 -#define PTL_PING_VERBOSE 0x02 -#define PTL_PING_VERIFY 0x04 -#define PTL_PING_PREALLOC 0x08 - - -#define NEXT_PRIMARY_BUFFER(index) \ - (((index + 1) >= PTL_PING_IN_BUFFERS) ? 0 : (index + 1)) - -#define PDEBUG(str, err) \ - CERROR ("%s: error=%s (%d)\n", str, ptl_err_str[err], err) - - -/* Ping data to be passed via the ioctl to kernel space */ - -#if __KERNEL__ - - -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -#include -#else -#include -#endif -struct pingsrv_data { - - ptl_handle_ni_t ni; - ptl_handle_me_t me; - ptl_handle_eq_t eq; - void *in_buf; - ptl_process_id_t my_id; - ptl_process_id_t id_local; - ptl_md_t mdin; - ptl_md_t mdout; - ptl_handle_md_t mdin_h; - ptl_handle_md_t mdout_h; - ptl_event_t evnt; - struct task_struct *tsk; -}; /* struct pingsrv_data */ - -struct pingcli_data { - - struct portal_ioctl_data *args; - ptl_handle_me_t me; - ptl_handle_eq_t eq; - char *inbuf; - char *outbuf; - ptl_process_id_t myid; - ptl_process_id_t id_local; - ptl_process_id_t id_remote; - ptl_md_t md_in_head; - ptl_md_t md_out_head; - ptl_handle_md_t md_in_head_h; - ptl_handle_md_t md_out_head_h; - ptl_event_t ev; - struct task_struct *tsk; -}; /* struct pingcli_data */ - - -#endif /* __KERNEL__ */ - -#endif /* _KPING_INCLUDED */ diff --git a/lnet/tests/ping_cli.c b/lnet/tests/ping_cli.c deleted file mode 100644 index 389ffbb..0000000 --- a/lnet/tests/ping_cli.c +++ /dev/null @@ -1,300 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL) - * Author: Brian Behlendorf - * Kedar Sovani (kedar@calsoftinc.com) - * Amey Inamdar (amey@calsoftinc.com) - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define DEBUG_SUBSYSTEM S_PINGER - -#include -#include -#include -#include -#include -#include -#include "ping.h" -/* int portal_debug = D_PING_CLI; */ - - -#define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval)) - -#define MAX_TIME 100000 - -/* This should be enclosed in a structure */ - -static struct pingcli_data *client = NULL; - -static int count = 0; - -static void -pingcli_shutdown(int err) -{ - int rc; - - /* Yes, we are intentionally allowing us to fall through each - * case in to the next. This allows us to pass an error - * code to just clean up the right stuff. - */ - switch (err) { - case 1: - /* Unlink any memory descriptors we may have used */ - if ((rc = PtlMDUnlink (client->md_out_head_h))) - PDEBUG ("PtlMDUnlink", rc); - case 2: - if ((rc = PtlMDUnlink (client->md_in_head_h))) - PDEBUG ("PtlMDUnlink", rc); - - /* Free the event queue */ - if ((rc = PtlEQFree (client->eq))) - PDEBUG ("PtlEQFree", rc); - - if ((rc = PtlMEUnlink (client->me))) - PDEBUG ("PtlMEUnlink", rc); - case 3: - kportal_put_ni (client->args->ioc_nal); - - case 4: - /* Free our buffers */ - - if (client != NULL) - PORTAL_FREE (client, - sizeof(struct pingcli_data)); - } - - - CDEBUG (D_OTHER, "ping client released resources\n"); -} /* pingcli_shutdown() */ - -static int pingcli_callback(ptl_event_t *ev) -{ - int i, magic; - i = *(int *)(ev->mem_desc.start + ev->offset + sizeof(unsigned)); - magic = *(int *)(ev->mem_desc.start + ev->offset); - - if(magic != 0xcafebabe) { - printk ("Unexpected response \n"); - return 1; - } - - if((i == count) || !count) - wake_up_process (client->tsk); - else - printk ("Received response after timeout for %d\n",i); - return 1; -} - - -static struct pingcli_data * -pingcli_start(struct portal_ioctl_data *args) -{ - ptl_handle_ni_t *nip; - unsigned ping_head_magic = PING_HEADER_MAGIC; - unsigned ping_bulk_magic = PING_BULK_MAGIC; - int rc; - struct timeval tv1, tv2; - client->tsk = current; - client->args = args; - CDEBUG (D_OTHER, "pingcli_setup args: nid "LPX64", \ - nal %d, size %u, count: %u, timeout: %u\n", - args->ioc_nid, args->ioc_nal, args->ioc_size, - args->ioc_count, args->ioc_timeout); - - - PORTAL_ALLOC (client->outbuf, STDSIZE + args->ioc_size) ; - if (client->outbuf == NULL) - { - CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); - return (NULL); - } - - PORTAL_ALLOC (client->inbuf, - (args->ioc_size + STDSIZE) * args->ioc_count); - if (client->inbuf == NULL) - { - CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); - return (NULL); - } - - /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (args->ioc_nal)) == NULL) - { - CERROR ("NAL %d not loaded\n", args->ioc_nal); - pingcli_shutdown (4); - return (NULL); - } - - /* Based on the initialization aquire our unique portal ID. */ - if ((rc = PtlGetId (*nip, &client->myid))) - { - CERROR ("PtlGetId error %d\n", rc); - pingcli_shutdown (2); - return (NULL); - } - - /* Setup the local match entries */ - client->id_local.nid = PTL_NID_ANY; - client->id_local.pid = PTL_PID_ANY; - - /* Setup the remote match entries */ - client->id_remote.nid = args->ioc_nid; - client->id_remote.pid = 0; - - if ((rc = PtlMEAttach (*nip, PTL_PING_CLIENT, - client->id_local, 0, ~0, PTL_RETAIN, - PTL_INS_AFTER, &client->me))) - { - CERROR ("PtlMEAttach error %d\n", rc); - pingcli_shutdown (2); - return (NULL); - } - - /* Allocate the event queue for this network interface */ - if ((rc = PtlEQAlloc (*nip, 64, pingcli_callback, &client->eq))) - { - CERROR ("PtlEQAlloc error %d\n", rc); - pingcli_shutdown (2); - return (NULL); - } - - count = args->ioc_count; - - client->md_in_head.start = client->inbuf; - client->md_in_head.length = (args->ioc_size + STDSIZE) - * count; - client->md_in_head.threshold = PTL_MD_THRESH_INF; - client->md_in_head.options = PTL_MD_OP_PUT; - client->md_in_head.user_ptr = NULL; - client->md_in_head.eventq = client->eq; - memset (client->inbuf, 0, (args->ioc_size + STDSIZE) * count); - - /* Attach the incoming buffer */ - if ((rc = PtlMDAttach (client->me, client->md_in_head, - PTL_UNLINK, &client->md_in_head_h))) { - CERROR ("PtlMDAttach error %d\n", rc); - pingcli_shutdown (1); - return (NULL); - } - /* Setup the outgoing ping header */ - client->md_out_head.start = client->outbuf; - client->md_out_head.length = STDSIZE + args->ioc_size; - client->md_out_head.threshold = args->ioc_count; - client->md_out_head.options = PTL_MD_OP_PUT; - client->md_out_head.user_ptr = NULL; - client->md_out_head.eventq = PTL_EQ_NONE; - - memcpy (client->outbuf, &ping_head_magic, sizeof(ping_bulk_magic)); - - count = 0; - - /* Bind the outgoing ping header */ - if ((rc=PtlMDBind (*nip, client->md_out_head, - &client->md_out_head_h))) { - CERROR ("PtlMDBind error %d\n", rc); - pingcli_shutdown (1); - return NULL; - } - while ((args->ioc_count - count)) { - memcpy (client->outbuf + sizeof(unsigned), - &(count), sizeof(unsigned)); - /* Put the ping packet */ - do_gettimeofday (&tv1); - - memcpy(client->outbuf+sizeof(unsigned)+sizeof(unsigned),&tv1, - sizeof(struct timeval)); - - if((rc = PtlPut (client->md_out_head_h, PTL_NOACK_REQ, - client->id_remote, PTL_PING_SERVER, 0, 0, 0, 0))) { - PDEBUG ("PtlPut (header)", rc); - pingcli_shutdown (1); - return NULL; - } - printk ("sent msg no %d", count); - - set_current_state (TASK_INTERRUPTIBLE); - rc = schedule_timeout (20 * args->ioc_timeout); - if (rc == 0) { - printk (" :: timeout .....\n"); - } else { - do_gettimeofday (&tv2); - printk(" :: Reply in %u usec\n", - (unsigned)((tv2.tv_sec - tv1.tv_sec) - * 1000000 + (tv2.tv_usec - tv1.tv_usec))); - } - count++; - } - - if (client->outbuf != NULL) - PORTAL_FREE (client->outbuf, STDSIZE + args->ioc_size); - - if (client->inbuf != NULL) - PORTAL_FREE (client->inbuf, - (args->ioc_size + STDSIZE) * args->ioc_count); - - pingcli_shutdown (2); - - /* Success! */ - return NULL; -} /* pingcli_setup() */ - - - -/* called by the portals_ioctl for ping requests */ -static int kping_client(struct portal_ioctl_data *args) -{ - PORTAL_ALLOC (client, sizeof(struct pingcli_data)); - if (client == NULL) - { - CERROR ("Unable to allocate client structure\n"); - return (0); - } - memset (client, 0, sizeof(struct pingcli_data)); - pingcli_start (args); - - return 0; -} /* kping_client() */ - - -static int __init pingcli_init(void) -{ - PORTAL_SYMBOL_REGISTER(kping_client); - return 0; -} /* pingcli_init() */ - - -static void __exit pingcli_cleanup(void) -{ - PORTAL_SYMBOL_UNREGISTER (kping_client); -} /* pingcli_cleanup() */ - - -MODULE_AUTHOR("Brian Behlendorf (LLNL)"); -MODULE_DESCRIPTION("A simple kernel space ping client for portals testing"); -MODULE_LICENSE("GPL"); - -module_init(pingcli_init); -module_exit(pingcli_cleanup); - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -EXPORT_SYMBOL (kping_client); -#endif diff --git a/lnet/tests/ping_srv.c b/lnet/tests/ping_srv.c deleted file mode 100644 index 1037d09..0000000 --- a/lnet/tests/ping_srv.c +++ /dev/null @@ -1,308 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL) - * Author: Brian Behlendorf - * Amey Inamdar - * Kedar Sovani - * - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_PINGER - -#include -#include -#include "ping.h" - -#include -#include -#include -#include -#include -#include -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include -#else -#include -#endif -#include -#include - -#include -#include - -#define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval)) -#define MAXSIZE (16*1024*1024) - -static unsigned ping_head_magic; -static unsigned ping_bulk_magic; -static int nal = 0; // Your NAL, -static unsigned long packets_valid = 0; // Valid packets -static int running = 1; -atomic_t pkt; - -static struct pingsrv_data *server=NULL; // Our ping server - -static void *pingsrv_shutdown(int err) -{ - int rc; - - /* Yes, we are intentionally allowing us to fall through each - * case in to the next. This allows us to pass an error - * code to just clean up the right stuff. - */ - switch (err) { - case 1: - /* Unlink any memory descriptors we may have used */ - if ((rc = PtlMDUnlink (server->mdin_h))) - PDEBUG ("PtlMDUnlink (out head buffer)", rc); - case 2: - /* Free the event queue */ - if ((rc = PtlEQFree (server->eq))) - PDEBUG ("PtlEQFree", rc); - - /* Unlink the client portal from the ME list */ - if ((rc = PtlMEUnlink (server->me))) - PDEBUG ("PtlMEUnlink", rc); - - case 3: - kportal_put_ni (nal); - - case 4: - - case 5: - if (server->in_buf != NULL) - PORTAL_FREE (server->in_buf, MAXSIZE); - - if (server != NULL) - PORTAL_FREE (server, - sizeof (struct pingsrv_data)); - - } - - CDEBUG (D_OTHER, "ping sever resources released\n"); - return NULL; -} /* pingsrv_shutdown() */ - - -int pingsrv_thread(void *arg) -{ - int rc; - unsigned long magic; - unsigned long ping_bulk_magic = 0xcafebabe; - - kportal_daemonize ("pingsrv"); - server->tsk = current; - - while (running) { - set_current_state (TASK_INTERRUPTIBLE); - if (atomic_read (&pkt) == 0) { - schedule_timeout (MAX_SCHEDULE_TIMEOUT); - continue; - } - - magic = *((int *)(server->evnt.mem_desc.start - + server->evnt.offset)); - - - if(magic != 0xdeadbeef) { - printk("Unexpected Packet to the server\n"); - - } - memcpy (server->in_buf, &ping_bulk_magic, sizeof(ping_bulk_magic)); - - server->mdout.length = server->evnt.rlength; - server->mdout.start = server->in_buf; - server->mdout.threshold = 1; - server->mdout.options = PTL_MD_OP_PUT; - server->mdout.user_ptr = NULL; - server->mdout.eventq = PTL_EQ_NONE; - - /* Bind the outgoing buffer */ - if ((rc = PtlMDBind (server->ni, server->mdout, - &server->mdout_h))) { - PDEBUG ("PtlMDBind", rc); - pingsrv_shutdown (1); - return 1; - } - - - server->mdin.start = server->in_buf; - server->mdin.length = MAXSIZE; - server->mdin.threshold = 1; - server->mdin.options = PTL_MD_OP_PUT; - server->mdin.user_ptr = NULL; - server->mdin.eventq = server->eq; - - if ((rc = PtlMDAttach (server->me, server->mdin, - PTL_UNLINK, &server->mdin_h))) { - PDEBUG ("PtlMDAttach (bulk)", rc); - CDEBUG (D_OTHER, "ping server resources allocated\n"); - } - - if ((rc = PtlPut (server->mdout_h, PTL_NOACK_REQ, - server->evnt.initiator, PTL_PING_CLIENT, 0, 0, 0, 0))) - PDEBUG ("PtlPut", rc); - - atomic_dec (&pkt); - - } - pingsrv_shutdown (1); - running = 1; - return 0; -} - -static int pingsrv_packet(ptl_event_t *ev) -{ - atomic_inc (&pkt); - wake_up_process (server->tsk); - return 1; -} /* pingsrv_head() */ - -static int pingsrv_callback(ptl_event_t *ev) -{ - - if (ev == NULL) { - CERROR ("null in callback, ev=%p\n", ev); - return 0; - } - server->evnt = *ev; - - printk ("received ping from nid "LPX64" " - "(off=%u rlen=%u mlen=%u head=%x seq=%d size=%d)\n", - ev->initiator.nid, ev->offset, ev->rlength, ev->mlength, - *((int *)(ev->mem_desc.start + ev->offset)), - *((int *)(ev->mem_desc.start + ev->offset + sizeof(unsigned))), - *((int *)(ev->mem_desc.start + ev->offset + 2 * - sizeof(unsigned)))); - - packets_valid++; - - return pingsrv_packet(ev); - -} /* pingsrv_callback() */ - - -static struct pingsrv_data *pingsrv_setup(void) -{ - ptl_handle_ni_t *nip; - int rc; - - /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (nal)) == NULL) { - CDEBUG (D_OTHER, "NAL %d not loaded\n", nal); - return pingsrv_shutdown (4); - } - - server->ni= *nip; - - /* Based on the initialization aquire our unique portal ID. */ - if ((rc = PtlGetId (server->ni, &server->my_id))) { - PDEBUG ("PtlGetId", rc); - return pingsrv_shutdown (2); - } - - server->id_local.nid = PTL_NID_ANY; - server->id_local.pid = PTL_PID_ANY; - - /* Attach a match entries for header packets */ - if ((rc = PtlMEAttach (server->ni, PTL_PING_SERVER, - server->id_local,0, ~0, - PTL_RETAIN, PTL_INS_AFTER, &server->me))) { - PDEBUG ("PtlMEAttach", rc); - return pingsrv_shutdown (2); - } - - - if ((rc = PtlEQAlloc (server->ni, 1024, pingsrv_callback, - &server->eq))) { - PDEBUG ("PtlEQAlloc (callback)", rc); - return pingsrv_shutdown (2); - } - - PORTAL_ALLOC (server->in_buf, MAXSIZE); - if(!server->in_buf){ - CDEBUG (D_OTHER,"Allocation error\n"); - return pingsrv_shutdown(2); - } - - /* Setup the incoming buffer */ - server->mdin.start = server->in_buf; - server->mdin.length = MAXSIZE; - server->mdin.threshold = 1; - server->mdin.options = PTL_MD_OP_PUT; - server->mdin.user_ptr = NULL; - server->mdin.eventq = server->eq; - memset (server->in_buf, 0, STDSIZE); - - if ((rc = PtlMDAttach (server->me, server->mdin, - PTL_UNLINK, &server->mdin_h))) { - PDEBUG ("PtlMDAttach (bulk)", rc); - CDEBUG (D_OTHER, "ping server resources allocated\n"); - } - - /* Success! */ - return server; -} /* pingsrv_setup() */ - -static int pingsrv_start(void) -{ - /* Setup our server */ - if (!pingsrv_setup()) { - CDEBUG (D_OTHER, "pingsrv_setup() failed, server stopped\n"); - return -ENOMEM; - } - kernel_thread (pingsrv_thread,NULL,0); - return 0; -} /* pingsrv_start() */ - - - -static int __init pingsrv_init(void) -{ - ping_head_magic = PING_HEADER_MAGIC; - ping_bulk_magic = PING_BULK_MAGIC; - PORTAL_ALLOC (server, sizeof(struct pingsrv_data)); - return pingsrv_start (); -} /* pingsrv_init() */ - - -static void __exit pingsrv_cleanup(void) -{ - remove_proc_entry ("net/pingsrv", NULL); - - running = 0; - wake_up_process (server->tsk); - while (running != 1) { - set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (HZ); - } - -} /* pingsrv_cleanup() */ - - -MODULE_PARM(nal, "i"); -MODULE_PARM_DESC(nal, "Use the specified NAL " - "(6-kscimacnal, 4-toenal, 2-ksocknal, 1-kqswnal)"); - -MODULE_AUTHOR("Brian Behlendorf (LLNL)"); -MODULE_DESCRIPTION("A kernel space ping server for portals testing"); -MODULE_LICENSE("GPL"); - -module_init(pingsrv_init); -module_exit(pingsrv_cleanup); diff --git a/lnet/tests/sping_cli.c b/lnet/tests/sping_cli.c deleted file mode 100644 index 4cef08b..0000000 --- a/lnet/tests/sping_cli.c +++ /dev/null @@ -1,276 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL) - * Author: Brian Behlendorf - * Kedar Sovani (kedar@calsoftinc.com) - * Amey Inamdar (amey@calsoftinc.com) - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -/* This is a striped down version of pinger. It follows a single - * request-response protocol. Doesn't do Bulk data pinging. Also doesn't - * send multiple packets in a single ioctl. - */ - - -#define DEBUG_SUBSYSTEM S_PINGER - -#include -#include -#include -#include -#include -#include -#include "ping.h" -/* int portal_debug = D_PING_CLI; */ - - -#define STDSIZE (sizeof(int) + sizeof(int) + 4) /* The data is 4 bytes - assumed */ - -/* This should be enclosed in a structure */ - -static struct pingcli_data *client = NULL; - -static int count = 0; - -static void -pingcli_shutdown(int err) -{ - int rc; - - /* Yes, we are intentionally allowing us to fall through each - * case in to the next. This allows us to pass an error - * code to just clean up the right stuff. - */ - switch (err) { - case 1: - /* Unlink any memory descriptors we may have used */ - if ((rc = PtlMDUnlink (client->md_out_head_h))) - PDEBUG ("PtlMDUnlink", rc); - case 2: - /* Free the event queue */ - if ((rc = PtlEQFree (client->eq))) - PDEBUG ("PtlEQFree", rc); - - if ((rc = PtlMEUnlink (client->me))) - PDEBUG ("PtlMEUnlink", rc); - case 3: - kportal_put_ni (client->args->ioc_nal); - - case 4: - /* Free our buffers */ - if (client->outbuf != NULL) - PORTAL_FREE (client->outbuf, STDSIZE); - - if (client->inbuf != NULL) - PORTAL_FREE (client->inbuf, STDSIZE); - - - if (client != NULL) - PORTAL_FREE (client, - sizeof(struct pingcli_data)); - } - - - CDEBUG (D_OTHER, "ping client released resources\n"); -} /* pingcli_shutdown() */ - -static int pingcli_callback(ptl_event_t *ev) -{ - wake_up_process (client->tsk); - return 1; -} - - -static struct pingcli_data * -pingcli_start(struct portal_ioctl_data *args) -{ - const ptl_handle_ni_t *nip; - unsigned ping_head_magic = PING_HEADER_MAGIC; - int rc; - - client->tsk = current; - client->args = args; - - CDEBUG (D_OTHER, "pingcli_setup args: nid "LPX64", \ - nal %d, size %u, count: %u, timeout: %u\n", - args->ioc_nid, args->ioc_nal, args->ioc_size, - args->ioc_count, args->ioc_timeout); - - - PORTAL_ALLOC (client->outbuf, STDSIZE) ; - if (client->outbuf == NULL) - { - CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); - return (NULL); - } - - PORTAL_ALLOC (client->inbuf, STDSIZE); - - if (client->inbuf == NULL) - { - CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); - return (NULL); - } - - /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (args->ioc_nal)) == NULL) - { - CERROR ("NAL %d not loaded.\n", args->ioc_nal); - pingcli_shutdown (4); - return (NULL); - } - - /* Based on the initialization aquire our unique portal ID. */ - if ((rc = PtlGetId (*nip, &client->myid))) - { - CERROR ("PtlGetId error %d\n", rc); - pingcli_shutdown (2); - return (NULL); - } - - /* Setup the local match entries */ - client->id_local.nid = PTL_NID_ANY; - client->id_local.pid = PTL_PID_ANY; - - /* Setup the remote match entries */ - client->id_remote.nid = args->ioc_nid; - client->id_remote.pid = 0; - - if ((rc = PtlMEAttach (*nip, PTL_PING_CLIENT, - client->id_local, 0, ~0, PTL_RETAIN, - PTL_INS_AFTER, &client->me))) - { - CERROR ("PtlMEAttach error %d\n", rc); - pingcli_shutdown (2); - return (NULL); - } - - /* Allocate the event queue for this network interface */ - if ((rc = PtlEQAlloc (*nip, 64, pingcli_callback, &client->eq))) - { - CERROR ("PtlEQAlloc error %d\n", rc); - pingcli_shutdown (2); - return (NULL); - } - - - client->md_in_head.start = client->inbuf; - client->md_in_head.length = STDSIZE; - client->md_in_head.threshold = 1; - client->md_in_head.options = PTL_MD_OP_PUT; - client->md_in_head.user_ptr = NULL; - client->md_in_head.eventq = client->eq; - memset (client->inbuf, 0, STDSIZE); - - /* Attach the incoming buffer */ - if ((rc = PtlMDAttach (client->me, client->md_in_head, - PTL_UNLINK, &client->md_in_head_h))) { - CERROR ("PtlMDAttach error %d\n", rc); - pingcli_shutdown (1); - return (NULL); - } - - /* Setup the outgoing ping header */ - client->md_out_head.start = client->outbuf; - client->md_out_head.length = STDSIZE; - client->md_out_head.threshold = 1; - client->md_out_head.options = PTL_MD_OP_PUT; - client->md_out_head.user_ptr = NULL; - client->md_out_head.eventq = PTL_EQ_NONE; - - memcpy (client->outbuf, &ping_head_magic, sizeof(ping_head_magic)); - - /* Bind the outgoing ping header */ - if ((rc=PtlMDBind (*nip, client->md_out_head, - &client->md_out_head_h))) { - CERROR ("PtlMDBind error %d\n", rc); - pingcli_shutdown (1); - return (NULL); - } - /* Put the ping packet */ - if((rc = PtlPut (client->md_out_head_h, PTL_NOACK_REQ, - client->id_remote, PTL_PING_SERVER, 0, 0, 0, 0))) { - PDEBUG ("PtlPut (header)", rc); - pingcli_shutdown (1); - return NULL; - } - - count = 0; - set_current_state (TASK_INTERRUPTIBLE); - rc = schedule_timeout (20 * args->ioc_timeout); - if (rc == 0) { - printk (" Time out on the server\n"); - pingcli_shutdown (2); - return NULL; - } else - printk("Received respose from the server \n"); - - - pingcli_shutdown (2); - - /* Success! */ - return NULL; -} /* pingcli_setup() */ - - - -/* called by the portals_ioctl for ping requests */ -static int kping_client(struct portal_ioctl_data *args) -{ - - PORTAL_ALLOC (client, sizeof(struct pingcli_data)); - memset (client, 0, sizeof(struct pingcli_data)); - if (client == NULL) - { - CERROR ("Unable to allocate client structure\n"); - return (0); - } - pingcli_start (args); - - return 0; -} /* kping_client() */ - - -static int __init pingcli_init(void) -{ - PORTAL_SYMBOL_REGISTER(kping_client); - return 0; -} /* pingcli_init() */ - - -static void __exit pingcli_cleanup(void) -{ - PORTAL_SYMBOL_UNREGISTER (kping_client); -} /* pingcli_cleanup() */ - - -MODULE_AUTHOR("Brian Behlendorf (LLNL)"); -MODULE_DESCRIPTION("A simple kernel space ping client for portals testing"); -MODULE_LICENSE("GPL"); - -module_init(pingcli_init); -module_exit(pingcli_cleanup); - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -EXPORT_SYMBOL (kping_client); -#endif diff --git a/lnet/tests/sping_srv.c b/lnet/tests/sping_srv.c deleted file mode 100644 index a18ea35..0000000 --- a/lnet/tests/sping_srv.c +++ /dev/null @@ -1,295 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL) - * Author: Brian Behlendorf - * Amey Inamdar - * Kedar Sovani - * - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* This is a striped down version of pinger. It follows a single - * request-response protocol. Doesn't do Bulk data pinging. Also doesn't - * send multiple packets in a single ioctl. - */ - -#define DEBUG_SUBSYSTEM S_PINGER - -#include -#include -#include "ping.h" - -#include -#include -#include -#include -#include -#include -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include -#else -#include -#endif -#include -#include - -#include -#include - -#define STDSIZE (sizeof(int) + sizeof(int) + 4) - -static int nal = 0; // Your NAL, -static unsigned long packets_valid = 0; // Valid packets -static int running = 1; -atomic_t pkt; - -static struct pingsrv_data *server=NULL; // Our ping server - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#endif - -static void *pingsrv_shutdown(int err) -{ - int rc; - - /* Yes, we are intentionally allowing us to fall through each - * case in to the next. This allows us to pass an error - * code to just clean up the right stuff. - */ - switch (err) { - case 1: - /* Unlink any memory descriptors we may have used */ - if ((rc = PtlMDUnlink (server->mdin_h))) - PDEBUG ("PtlMDUnlink (out head buffer)", rc); - case 2: - /* Free the event queue */ - if ((rc = PtlEQFree (server->eq))) - PDEBUG ("PtlEQFree", rc); - - /* Unlink the client portal from the ME list */ - if ((rc = PtlMEUnlink (server->me))) - PDEBUG ("PtlMEUnlink", rc); - - case 3: - kportal_put_ni (nal); - - case 4: - - if (server->in_buf != NULL) - PORTAL_FREE (server->in_buf, STDSIZE); - - if (server != NULL) - PORTAL_FREE (server, - sizeof (struct pingsrv_data)); - - } - - CDEBUG (D_OTHER, "ping sever resources released\n"); - return NULL; -} /* pingsrv_shutdown() */ - - -int pingsrv_thread(void *arg) -{ - int rc; - - kportal_daemonize ("pingsrv"); - server->tsk = current; - - while (running) { - set_current_state (TASK_INTERRUPTIBLE); - if (atomic_read (&pkt) == 0) { - schedule_timeout (MAX_SCHEDULE_TIMEOUT); - continue; - } - - server->mdout.start = server->in_buf; - server->mdout.length = STDSIZE; - server->mdout.threshold = 1; - server->mdout.options = PTL_MD_OP_PUT; - server->mdout.user_ptr = NULL; - server->mdout.eventq = PTL_EQ_NONE; - - /* Bind the outgoing buffer */ - if ((rc = PtlMDBind (server->ni, server->mdout, - &server->mdout_h))) { - PDEBUG ("PtlMDBind", rc); - pingsrv_shutdown (1); - return 1; - } - - - server->mdin.start = server->in_buf; - server->mdin.length = STDSIZE; - server->mdin.threshold = 1; - server->mdin.options = PTL_MD_OP_PUT; - server->mdin.user_ptr = NULL; - server->mdin.eventq = server->eq; - - if ((rc = PtlMDAttach (server->me, server->mdin, - PTL_UNLINK, &server->mdin_h))) { - PDEBUG ("PtlMDAttach (bulk)", rc); - CDEBUG (D_OTHER, "ping server resources allocated\n"); - } - - if ((rc = PtlPut (server->mdout_h, PTL_NOACK_REQ, - server->evnt.initiator, PTL_PING_CLIENT, 0, 0, 0, 0))) - PDEBUG ("PtlPut", rc); - - atomic_dec (&pkt); - - } - pingsrv_shutdown (1); - running = 1; - return 0; -} - -static int pingsrv_packet(ptl_event_t *ev) -{ - atomic_inc (&pkt); - wake_up_process (server->tsk); - return 1; -} /* pingsrv_head() */ - -static int pingsrv_callback(ptl_event_t *ev) -{ - - if (ev == NULL) { - CERROR ("null in callback, ev=%p\n", ev); - return 0; - } - server->evnt = *ev; - - printk ("received ping from nid "LPX64" " - "(off=%u rlen=%u mlen=%u head=%x)\n", - ev->initiator.nid, ev->offset, ev->rlength, ev->mlength, - *((int *)(ev->mem_desc.start + ev->offset))); - - packets_valid++; - - return pingsrv_packet(ev); - -} /* pingsrv_callback() */ - - -static struct pingsrv_data *pingsrv_setup(void) -{ - ptl_handle_ni_t *nip; - int rc; - - /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (nal)) == NULL) { - CDEBUG (D_OTHER, "Nal %d not loaded.\n", nal); - return pingsrv_shutdown (4); - } - - server->ni= *nip; - - /* Based on the initialization aquire our unique portal ID. */ - if ((rc = PtlGetId (server->ni, &server->my_id))) { - PDEBUG ("PtlGetId", rc); - return pingsrv_shutdown (2); - } - - server->id_local.nid = PTL_NID_ANY; - server->id_local.pid = PTL_PID_ANY; - - /* Attach a match entries for header packets */ - if ((rc = PtlMEAttach (server->ni, PTL_PING_SERVER, - server->id_local,0, ~0, - PTL_RETAIN, PTL_INS_AFTER, &server->me))) { - PDEBUG ("PtlMEAttach", rc); - return pingsrv_shutdown (2); - } - - - if ((rc = PtlEQAlloc (server->ni, 64, pingsrv_callback, - &server->eq))) { - PDEBUG ("PtlEQAlloc (callback)", rc); - return pingsrv_shutdown (2); - } - - PORTAL_ALLOC (server->in_buf, STDSIZE); - if(!server->in_buf){ - CDEBUG (D_OTHER,"Allocation error\n"); - return pingsrv_shutdown(2); - } - - /* Setup the incoming buffer */ - server->mdin.start = server->in_buf; - server->mdin.length = STDSIZE; - server->mdin.threshold = 1; - server->mdin.options = PTL_MD_OP_PUT; - server->mdin.user_ptr = NULL; - server->mdin.eventq = server->eq; - memset (server->in_buf, 0, STDSIZE); - - if ((rc = PtlMDAttach (server->me, server->mdin, - PTL_UNLINK, &server->mdin_h))) { - PDEBUG ("PtlMDAttach (bulk)", rc); - CDEBUG (D_OTHER, "ping server resources allocated\n"); - } - - /* Success! */ - return server; -} /* pingsrv_setup() */ - -static int pingsrv_start(void) -{ - /* Setup our server */ - if (!pingsrv_setup()) { - CDEBUG (D_OTHER, "pingsrv_setup() failed, server stopped\n"); - return -ENOMEM; - } - kernel_thread (pingsrv_thread,NULL,0); - return 0; -} /* pingsrv_start() */ - - - -static int __init pingsrv_init(void) -{ - PORTAL_ALLOC (server, sizeof(struct pingsrv_data)); - return pingsrv_start (); -} /* pingsrv_init() */ - - -static void __exit pingsrv_cleanup(void) -{ - remove_proc_entry ("net/pingsrv", NULL); - - running = 0; - wake_up_process (server->tsk); - while (running != 1) { - set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (HZ); - } - -} /* pingsrv_cleanup() */ - - -MODULE_PARM(nal, "i"); -MODULE_PARM_DESC(nal, "Use the specified NAL " - "(6-kscimacnal, 4-toenal, 2-ksocknal, 1-kqswnal)"); - -MODULE_AUTHOR("Brian Behlendorf (LLNL)"); -MODULE_DESCRIPTION("A kernel space ping server for portals testing"); -MODULE_LICENSE("GPL"); - -module_init(pingsrv_init); -module_exit(pingsrv_cleanup); diff --git a/lnet/tests/startclient.sh b/lnet/tests/startclient.sh deleted file mode 100644 index c9b7c16..0000000 --- a/lnet/tests/startclient.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/sh - -SIMPLE=${SIMPLE:-0} - -if [ $SIMPLE -eq 0 ]; then - PING=pingcli.o -else - PING=spingcli.o -fi - -case "$1" in - toe) - /sbin/insmod ../oslib/portals.o - /sbin/insmod ../toenal/ktoenal.o - /sbin/insmod ./$PING - echo ktoenal > /tmp/nal - ;; - - tcp) - /sbin/insmod ../oslib/portals.o - /sbin/insmod ../socknal/ksocknal.o - /sbin/insmod ./$PING - echo ksocknal > /tmp/nal - ;; - - elan) - /sbin/insmod ../oslib/portals.o - /sbin/insmod ../qswnal/kqswnal.o - /sbin/insmod ./$PING - echo kqswnal > /tmp/nal - ;; - - *) - echo "Usage : ${0} < tcp | toe | elan >" - exit 1; -esac -exit 0; diff --git a/lnet/tests/startserver.sh b/lnet/tests/startserver.sh deleted file mode 100644 index 942300e..0000000 --- a/lnet/tests/startserver.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/sh - -SIMPLE=${SIMPLE:-0} - -if [ $SIMPLE -eq 0 ]; then - PING=pingsrv.o -else - PING=spingsrv.o -fi - -case "$1" in - toe) - /sbin/insmod ../oslib/portals.o - /sbin/insmod ../toenal/ktoenal.o - /sbin/insmod ./$PING nal=4 - echo ktoenal > /tmp/nal - ;; - - tcp) - /sbin/insmod ../oslib/portals.o - /sbin/insmod ../socknal/ksocknal.o - /sbin/insmod ./$PING nal=2 - echo ksocknal > /tmp/nal - ;; - - elan) - /sbin/insmod ../oslib/portals.o - /sbin/insmod ../qswnal/kqswnal.o - /sbin/insmod ./$PING nal=4 - echo kqswnal > /tmp/nal - ;; - - *) - echo "Usage : ${0} < tcp | toe | elan >" - exit 1; -esac -../utils/acceptor 9999& -exit 0; diff --git a/lnet/tests/stopclient.sh b/lnet/tests/stopclient.sh deleted file mode 100644 index f7e3aa1..0000000 --- a/lnet/tests/stopclient.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/sh - -SIMPLE=${SIMPLE:-1} - -if [ $SIMPLE -eq 0 ]; then - PING=spingcli -else - PING=pingcli -fi - -rmmod $PING -NAL=`cat /tmp/nal`; -rmmod $NAL -rmmod portals diff --git a/lnet/tests/stopserver.sh b/lnet/tests/stopserver.sh deleted file mode 100644 index 3e81831..0000000 --- a/lnet/tests/stopserver.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/sh - -SIMPLE=${SIMPLE:-1} - -if [ $SIMPLE -eq 0 ]; then - PING=spingsrv -else - PING=pingsrv -fi - -rmmod $PING -NAL=`cat /tmp/nal`; -rmmod $NAL -killall -9 acceptor -rm -f /var/run/acceptor-9999.pid -rmmod portals diff --git a/lnet/ulnds/.cvsignore b/lnet/ulnds/.cvsignore deleted file mode 100644 index e995588..0000000 --- a/lnet/ulnds/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lnet/ulnds/Makefile.am b/lnet/ulnds/Makefile.am deleted file mode 100644 index dc427b0..0000000 --- a/lnet/ulnds/Makefile.am +++ /dev/null @@ -1,5 +0,0 @@ -CPPFLAGS= -INCLUDES=-I$(top_srcdir)/portals/include -I$(top_srcdir)/include -I$(srcdir) -lib_LIBRARIES = libtcpnal.a -pkginclude_HEADERS = pqtimer.h dispatch.h table.h timer.h connection.h ipmap.h bridge.h procbridge.h -libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h dispatch.h table.h timer.h address.c procapi.c proclib.c connection.c tcpnal.c connection.h diff --git a/lnet/ulnds/README b/lnet/ulnds/README deleted file mode 100644 index 6cb93d9..0000000 --- a/lnet/ulnds/README +++ /dev/null @@ -1,53 +0,0 @@ -This library implements two NAL interfaces, both running over IP. -The first, tcpnal, creates TCP connections between participating -processes in order to transport the portals requests. The second, -ernal, provides a simple transport protocol which runs over -UDP datagrams. - -The interface functions return both of these values in host order for -convenience and readability. However this means that addresses -exchanged in messages between hosts of different orderings will not -function properly. - -Both NALs use the same support functions in order to schedule events -and communicate with the generic portals implementation. - - ------------------------- - | api | - |_______________________| - | lib | - |_______________________| - | ernal | |tcpnal | - |--------| |----------| - | udpsock| |connection| - |-----------------------| - | timer/select | - ------------------------- - - - These NALs uses the framework from fdnal of a pipe between the api -and library sides. This is wrapped up in the select on the library -side, and blocks on the api side. Performance could be severely -enhanced by collapsing this aritificial barrier, by using shared -memory queues, or by wiring the api layer directly to the library. - - -nid is defined as the low order 24-bits of the IP address of the -physical node left shifted by 8 plus a virtual node number of 0 -through 255 (really only 239). The virtual node number of a tcpnal -application should be specified using the environment variable -PTL_VIRTNODE. pid is now a completely arbitrary number in the -range of 0 to 255. The IP interface used can be overridden by -specifying the appropriate hostid by setting the PTL_HOSTID -environment variable. The value can be either dotted decimal -(n.n.n.n) or hex starting with "0x". -TCPNAL: - As the NAL needs to try to send to a particular nid/pid pair, it - will open up connections on demand. Because the port associated with - the connecting socket is different from the bound port, two - connections will normally be established between a pair of peers, with - data flowing from the anonymous connect (active) port to the advertised - or well-known bound (passive) port of each peer. - - Should the connection fail to open, an error is reported to the - library component, which causes the api request to fail. diff --git a/lnet/ulnds/address.c b/lnet/ulnds/address.c deleted file mode 100644 index b422c3f..0000000 --- a/lnet/ulnds/address.c +++ /dev/null @@ -1,146 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* address.c: - * this file provides functions to aquire the IP address of the node - * and translate them into a NID/PID pair which supports a static - * mapping of virtual nodes into the port range of an IP socket. -*/ - -#include -#include -#include -#include -#include -#include -#include - - -/* Function: get_node_id - * Returns: a 32 bit id for this node, actually a big-endian IP address - * - * get_node_id() determines the host name and uses the resolver to - * find out its ip address. This is fairly fragile and inflexible, but - * explicitly asking about interfaces and their addresses is very - * complicated and nonportable. - */ -static unsigned int get_node_id(void) -{ - char buffer[255]; - unsigned int x; - struct hostent *he; - char * host_envp; - - if (!(host_envp = getenv("PTL_HOSTID"))) - { - gethostname(buffer,sizeof(buffer)); - he=gethostbyname(buffer); - if (he) - x=*(unsigned int *)he->h_addr_list[0]; - else - x = 0; - return(ntohl(x)); - } - else - { - if (host_envp[1] != 'x') - { - int a, b, c, d; - sscanf(host_envp, "%d.%d.%d.%d", &a, &b, &c, &d); - return ((a<<24) | (b<<16) | (c<<8) | d); - } - else - { - long long hostid = strtoll(host_envp, 0, 0); - return((unsigned int) hostid); - } - } -} - - -/* Function: set_address - * Arugments: t: a procnal structure to populate with the request - * - * set_address performs the bit manipulations to set the nid, pid, and - * iptop8 fields of the procnal structures. - * - * TODO: fix pidrequest to try to do dynamic binding if PTL_ID_ANY - */ - -#ifdef DIRECT_IP_MODE -void set_address(bridge t,ptl_pid_t pidrequest) -{ - int port; - if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0; - else port=pidrequest; - t->nal_cb->ni.nid=get_node_id(); - t->nal_cb->ni.pid=port; -} -#else - -void set_address(bridge t,ptl_pid_t pidrequest) -{ - int virtnode, in_addr, port; - ptl_pid_t pid; - - /* get and remember my node id*/ - if (!getenv("PTL_VIRTNODE")) - virtnode = 0; - else - { - int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT - >> PNAL_VNODE_SHIFT); - virtnode = atoi(getenv("PTL_VIRTNODE")); - if (virtnode > maxvnode) - { - fprintf(stderr, "PTL_VIRTNODE of %d is too large - max %d\n", - virtnode, maxvnode); - return; - } - } - - in_addr = get_node_id(); - - t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */ - t->nal_cb->ni.nid = ((in_addr & PNAL_HOSTID_MASK) - << PNAL_VNODE_SHIFT) - + virtnode; - - pid=pidrequest; - /* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */ -#ifdef notyet - if (pid==(unsigned short)PTL_PID_ANY) port = 0; -#endif - if (pid==(unsigned short)PTL_PID_ANY) - { - fprintf(stderr, "portal pid PTL_ID_ANY is not currently supported\n"); - return; - } - else if (pid > PNAL_PID_MASK) - { - fprintf(stderr, "portal pid of %d is too large - max %d\n", - pid, PNAL_PID_MASK); - return; - } - else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT; - t->nal_cb->ni.pid=pid; -} -#endif diff --git a/lnet/ulnds/bridge.h b/lnet/ulnds/bridge.h deleted file mode 100644 index 0b4940f..0000000 --- a/lnet/ulnds/bridge.h +++ /dev/null @@ -1,29 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -#include - -typedef struct bridge { - int alive; - nal_cb_t *nal_cb; - void *lower; - void *local; - void (*shutdown)(struct bridge *); - /* this doesn't really belong here */ - unsigned char iptop8; -} *bridge; - - -nal_t *bridge_init(ptl_interface_t nal, - ptl_pid_t pid_request, - ptl_ni_limits_t *desired, - ptl_ni_limits_t *actual, - int *rc); - -typedef int (*nal_initialize)(bridge); -extern nal_initialize nal_table[PTL_IFACE_MAX]; diff --git a/lnet/ulnds/connection.c b/lnet/ulnds/connection.c deleted file mode 100644 index 310e899..0000000 --- a/lnet/ulnds/connection.c +++ /dev/null @@ -1,294 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* connection.c: - This file provides a simple stateful connection manager which - builds tcp connections on demand and leaves them open for - future use. It also provides the machinery to allow peers - to connect to it -*/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -/* global variable: acceptor port */ -unsigned short tcpnal_acceptor_port = 988; - - -/* Function: compare_connection - * Arguments: connection c: a connection in the hash table - * ptl_process_id_t: an id to verify agains - * Returns: 1 if the connection is the one requested, 0 otherwise - * - * compare_connection() tests for collisions in the hash table - */ -static int compare_connection(void *arg1, void *arg2) -{ - connection c = arg1; - unsigned int * id = arg2; - return((c->ip==id[0]) && (c->port==id[1])); -} - - -/* Function: connection_key - * Arguments: ptl_process_id_t id: an id to hash - * Returns: a not-particularily-well-distributed hash - * of the id - */ -static unsigned int connection_key(unsigned int *id) -{ - return(id[0]^id[1]); -} - - -/* Function: remove_connection - * Arguments: c: the connection to remove - */ -void remove_connection(void *arg) -{ - connection c = arg; - unsigned int id[2]; - - id[0]=c->ip; - id[1]=c->port; - hash_table_remove(c->m->connections,id); - close(c->fd); - free(c); -} - - -/* Function: read_connection: - * Arguments: c: the connection to read from - * dest: the buffer to read into - * len: the number of bytes to read - * Returns: success as 1, or failure as 0 - * - * read_connection() reads data from the connection, continuing - * to read partial results until the request is satisfied or - * it errors. TODO: this read should be covered by signal protection. - */ -int read_connection(connection c, - unsigned char *dest, - int len) -{ - int offset=0,rc; - - if (len){ - do { - if((rc=syscall(SYS_read, c->fd, dest+offset, len-offset))<=0){ - if (errno==EINTR) { - rc=0; - } else { - remove_connection(c); - return(0); - } - } - offset+=rc; - } while (offsetm->handler)(c->m->handler_arg,c)); -} - - -/* Function: allocate_connection - * Arguments: t: tcpnal the allocation is occuring in the context of - * dest: portal endpoint address for this connection - * fd: open file descriptor for the socket - * Returns: an allocated connection structure - * - * just encompasses the action common to active and passive - * connections of allocation and placement in the global table - */ -static connection allocate_connection(manager m, - unsigned int ip, - unsigned short port, - int fd) -{ - connection c=malloc(sizeof(struct connection)); - unsigned int id[2]; - c->m=m; - c->fd=fd; - c->ip=ip; - c->port=port; - id[0]=ip; - id[1]=port; - register_io_handler(fd,READ_HANDLER,connection_input,c); - hash_table_insert(m->connections,c,id); - return(c); -} - - -/* Function: new_connection - * Arguments: t: opaque argument holding the tcpname - * Returns: 1 in order to reregister for new connection requests - * - * called when the bound service socket recieves - * a new connection request, it always accepts and - * installs a new connection - */ -static int new_connection(void *z) -{ - manager m=z; - struct sockaddr_in s; - int len=sizeof(struct sockaddr_in); - int fd=accept(m->bound,(struct sockaddr *)&s,&len); - unsigned int nid=*((unsigned int *)&s.sin_addr); - /* cfs specific hack */ - //unsigned short pid=s.sin_port; - allocate_connection(m,htonl(nid),0/*pid*/,fd); - return(1); -} - - -/* Function: force_tcp_connection - * Arguments: t: tcpnal - * dest: portals endpoint for the connection - * Returns: an allocated connection structure, either - * a pre-existing one, or a new connection - */ -connection force_tcp_connection(manager m, - unsigned int ip, - unsigned short port) -{ - connection c; - struct sockaddr_in addr; - unsigned int id[2]; - - port = tcpnal_acceptor_port; - - id[0]=ip; - id[1]=port; - - if (!(c=hash_table_find(m->connections,id))){ - int fd; - - bzero((char *) &addr, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(ip); - addr.sin_port = htons(port); - - if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) { - perror("tcpnal socket failed"); - exit(-1); - } - if (connect(fd, - (struct sockaddr *)&addr, - sizeof(struct sockaddr_in))) - { - perror("tcpnal connect"); - return(0); - } - return(allocate_connection(m,ip,port,fd)); - } - return(c); -} - - -/* Function: bind_socket - * Arguments: t: the nal state for this interface - * port: the port to attempt to bind to - * Returns: 1 on success, or 0 on error - * - * bind_socket() attempts to allocate and bind a socket to the requested - * port, or dynamically assign one from the kernel should the port be - * zero. Sets the bound and bound_handler elements of m. - * - * TODO: The port should be an explicitly sized type. - */ -static int bind_socket(manager m,unsigned short port) -{ - struct sockaddr_in addr; - int alen=sizeof(struct sockaddr_in); - - if ((m->bound = socket(AF_INET, SOCK_STREAM, 0)) < 0) - return(0); - - bzero((char *) &addr, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = 0; - addr.sin_port = port; - - if (bind(m->bound,(struct sockaddr *)&addr,alen)<0){ - perror ("tcpnal bind"); - return(0); - } - - getsockname(m->bound,(struct sockaddr *)&addr, &alen); - - m->bound_handler=register_io_handler(m->bound,READ_HANDLER, - new_connection,m); - listen(m->bound,5); - m->port=addr.sin_port; - return(1); -} - - -/* Function: shutdown_connections - * Arguments: m: the manager structure - * - * close all connections and reclaim resources - */ -void shutdown_connections(manager m) -{ - close(m->bound); - remove_io_handler(m->bound_handler); - hash_destroy_table(m->connections,remove_connection); - free(m); -} - - -/* Function: init_connections - * Arguments: t: the nal state for this interface - * port: the port to attempt to bind to - * Returns: a newly allocated manager structure, or - * zero if the fixed port could not be bound - */ -manager init_connections(unsigned short pid, - int (*input)(void *, void *), - void *a) -{ - manager m=(manager)malloc(sizeof(struct manager)); - m->connections=hash_create_table(compare_connection,connection_key); - m->handler=input; - m->handler_arg=a; - if (bind_socket(m,pid)) return(m); - free(m); - return(0); -} diff --git a/lnet/ulnds/connection.h b/lnet/ulnds/connection.h deleted file mode 100644 index 6f57287..0000000 --- a/lnet/ulnds/connection.h +++ /dev/null @@ -1,32 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -#include - -typedef struct manager { - table connections; - int bound; - io_handler bound_handler; - int (*handler)(void *, void *); - void *handler_arg; - unsigned short port; -} *manager; - - -typedef struct connection { - unsigned int ip; - unsigned short port; - int fd; - manager m; -} *connection; - -connection force_tcp_connection(manager m, unsigned int ip, unsigned int short); -manager init_connections(unsigned short, int (*f)(void *, void *), void *); -void remove_connection(void *arg); -void shutdown_connections(manager m); -int read_connection(connection c, unsigned char *dest, int len); diff --git a/lnet/ulnds/debug.c b/lnet/ulnds/debug.c deleted file mode 100644 index 529bb2d..0000000 --- a/lnet/ulnds/debug.c +++ /dev/null @@ -1,119 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include -#include -#include -#include - -int smp_processor_id = 1; -char debug_file_path[1024] = "/tmp/lustre-log"; -char debug_file_name[1024]; -FILE *debug_file_fd; - -int portals_do_debug_dumplog(void *arg) -{ - printf("Look in %s\n", debug_file_name); - return 0; -} - - -void portals_debug_print(void) -{ - return; -} - - -void portals_debug_dumplog(void) -{ - printf("Look in %s\n", debug_file_name); - return; -} - - -int portals_debug_init(unsigned long bufsize) -{ - debug_file_fd = stdout; - return 0; -} - -int portals_debug_cleanup(void) -{ - return 0; //close(portals_debug_fd); -} - -int portals_debug_clear_buffer(void) -{ - return 0; -} - -int portals_debug_mark_buffer(char *text) -{ - - fprintf(debug_file_fd, "*******************************************************************************\n"); - fprintf(debug_file_fd, "DEBUG MARKER: %s\n", text); - fprintf(debug_file_fd, "*******************************************************************************\n"); - - return 0; -} - -int portals_debug_copy_to_user(char *buf, unsigned long len) -{ - return 0; -} - -/* FIXME: I'm not very smart; someone smarter should make this better. */ -void -portals_debug_msg (int subsys, int mask, char *file, char *fn, int line, - const char *format, ...) -{ - va_list ap; - unsigned long flags; - struct timeval tv; - int nob; - - - /* NB since we pass a non-zero sized buffer (at least) on the first - * print, we can be assured that by the end of all the snprinting, - * we _do_ have a terminated buffer, even if our message got truncated. - */ - - gettimeofday(&tv, NULL); - - nob += fprintf(debug_file_fd, - "%02x:%06x:%d:%lu.%06lu ", - subsys >> 24, mask, smp_processor_id, - tv.tv_sec, tv.tv_usec); - - nob += fprintf(debug_file_fd, - "(%s:%d:%s() %d+%ld): ", - file, line, fn, 0, - 8192 - ((unsigned long)&flags & 8191UL)); - - va_start (ap, format); - nob += fprintf(debug_file_fd, format, ap); - va_end (ap); - - -} - diff --git a/lnet/ulnds/dispatch.h b/lnet/ulnds/dispatch.h deleted file mode 100644 index 34dd070..0000000 --- a/lnet/ulnds/dispatch.h +++ /dev/null @@ -1,39 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -/* this file is only called dispatch.h to prevent it - from colliding with /usr/include/sys/select.h */ - -typedef struct io_handler *io_handler; - -struct io_handler{ - io_handler *last; - io_handler next; - int fd; - int type; - int (*function)(void *); - void *argument; - int disabled; -}; - - -#define READ_HANDLER 1 -#define WRITE_HANDLER 2 -#define EXCEPTION_HANDLER 4 -#define ALL_HANDLER (READ_HANDLER | WRITE_HANDLER | EXCEPTION_HANDLER) - -io_handler register_io_handler(int fd, - int type, - int (*function)(void *), - void *arg); - -void remove_io_handler (io_handler i); -void init_unix_timer(void); -void select_timer_block(when until); -when now(void); diff --git a/lnet/ulnds/ipmap.h b/lnet/ulnds/ipmap.h deleted file mode 100644 index 85b1e18..0000000 --- a/lnet/ulnds/ipmap.h +++ /dev/null @@ -1,38 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -#define DIRECT_IP_MODE -#ifdef DIRECT_IP_MODE -#define PNAL_NID(in_addr, port) (in_addr) -#define PNAL_PID(pid) (pid) -#define PNAL_IP(in_addr, port) (in_addr) -#define PNAL_PORT(nid, pid) (pid) -#else - -#define PNAL_BASE_PORT 4096 -#define PNAL_HOSTID_SHIFT 24 -#define PNAL_HOSTID_MASK ((1 << PNAL_HOSTID_SHIFT) - 1) -#define PNAL_VNODE_SHIFT 8 -#define PNAL_VNODE_MASK ((1 << PNAL_VNODE_SHIFT) - 1) -#define PNAL_PID_SHIFT 8 -#define PNAL_PID_MASK ((1 << PNAL_PID_SHIFT) - 1) - -#define PNAL_NID(in_addr, port) (((ntohl(in_addr) & PNAL_HOSTID_MASK) \ - << PNAL_VNODE_SHIFT) \ - | (((ntohs(port)-PNAL_BASE_PORT) >>\ - PNAL_PID_SHIFT))) -#define PNAL_PID(port) ((ntohs(port) - PNAL_BASE_PORT) & PNAL_PID_MASK) - -#define PNAL_IP(nid,t) (htonl((((unsigned)(nid))\ - >> PNAL_VNODE_SHIFT)\ - | (t->iptop8 << PNAL_HOSTID_SHIFT))) -#define PNAL_PORT(nid, pid) (htons(((((nid) & PNAL_VNODE_MASK) \ - << PNAL_VNODE_SHIFT) \ - | ((pid) & PNAL_PID_MASK)) \ - + PNAL_BASE_PORT)) -#endif diff --git a/lnet/ulnds/pqtimer.c b/lnet/ulnds/pqtimer.c deleted file mode 100644 index fa2fb4f..0000000 --- a/lnet/ulnds/pqtimer.c +++ /dev/null @@ -1,226 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* timer.c: - * this file implements a simple priority-queue based timer system. when - * combined with a file which implements now() and block(), it can - * be used to provide course-grained time-based callbacks. - */ - -#include -#include -#include - -struct timer { - void (*function)(void *); - void *arg; - when w; - int interval; - int disable; -}; - -typedef struct thunk *thunk; -struct thunk { - void (*f)(void *); - void *a; - thunk next; -}; - -extern when now(void); - -static thunk thunks; -static int internal; -static void (*block_function)(when); -static int number_of_timers; -static int size_of_pqueue; -static timer *timers; - - -static void heal(int where) -{ - int left=(where<<1); - int right=(where<<1)+1; - int min=where; - timer temp; - - if (left <= number_of_timers) - if (timers[left]->w < timers[min]->w) min=left; - if (right <= number_of_timers) - if (timers[right]->w < timers[min]->w) min=right; - if (min != where){ - temp=timers[where]; - timers[where]=timers[min]; - timers[min]=temp; - heal(min); - } -} - -static void add_pqueue(int i) -{ - timer temp; - int parent=(i>>1); - if ((i>1) && (timers[i]->w< timers[parent]->w)){ - temp=timers[i]; - timers[i]=timers[parent]; - timers[parent]=temp; - add_pqueue(parent); - } -} - -static void add_timer(timer t) -{ - if (size_of_pqueue<(number_of_timers+2)){ - int oldsize=size_of_pqueue; - timer *new=(void *)malloc(sizeof(struct timer)*(size_of_pqueue+=10)); - memcpy(new,timers,sizeof(timer)*oldsize); - timers=new; - } - timers[++number_of_timers]=t; - add_pqueue(number_of_timers); -} - -/* Function: register_timer - * Arguments: interval: the time interval from the current time when - * the timer function should be called - * function: the function to call when the time has expired - * argument: the argument to call it with. - * Returns: a pointer to a timer structure - */ -timer register_timer(when interval, - void (*function)(void *), - void *argument) -{ - timer t=(timer)malloc(sizeof(struct timer)); - - t->arg=argument; - t->function=function; - t->interval=interval; - t->disable=0; - t->w=now()+interval; - add_timer(t); - if (!internal && (number_of_timers==1)) - block_function(t->w); - return(t); -} - -/* Function: remove_timer - * Arguments: t: - * Returns: nothing - * - * remove_timer removes a timer from the system, insuring - * that it will never be called. It does not actually - * free the timer due to reentrancy issues. - */ - -void remove_timer(timer t) -{ - t->disable=1; -} - - - -void timer_fire() -{ - timer current; - - current=timers[1]; - timers[1]=timers[number_of_timers--]; - heal(1); - if (!current->disable) { - (*current->function)(current->arg); - } - free(current); -} - -when next_timer(void) -{ - when here=now(); - - while (number_of_timers && (timers[1]->w <= here)) timer_fire(); - if (number_of_timers) return(timers[1]->w); - return(0); -} - -/* Function: timer_loop - * Arguments: none - * Returns: never - * - * timer_loop() is the blocking dispatch function for the timer. - * Is calls the block() function registered with init_timer, - * and handles associated with timers that have been registered. - */ -void timer_loop() -{ - when here; - - while (1){ - thunk z; - here=now(); - - for (z=thunks;z;z=z->next) (*z->f)(z->a); - - if (number_of_timers){ - if (timers[1]->w > here){ - (*block_function)(timers[1]->w); - } else { - timer_fire(); - } - } else { - thunk z; - for (z=thunks;z;z=z->next) (*z->f)(z->a); - (*block_function)(0); - } - } -} - - -/* Function: register_thunk - * Arguments: f: the function to call - * a: the single argument to call it with - * - * Thunk functions get called at irregular intervals, they - * should not assume when, or take a particularily long - * amount of time. Thunks are for background cleanup tasks. - */ -void register_thunk(void (*f)(void *),void *a) -{ - thunk t=(void *)malloc(sizeof(struct thunk)); - t->f=f; - t->a=a; - t->next=thunks; - thunks=t; -} - -/* Function: initialize_timer - * Arguments: block: the function to call to block for the specified interval - * - * initialize_timer() must be called before any other timer function, - * including timer_loop. - */ -void initialize_timer(void (*block)(when)) -{ - block_function=block; - number_of_timers=0; - size_of_pqueue=10; - timers=(timer *)malloc(sizeof(timer)*size_of_pqueue); - thunks=0; -} diff --git a/lnet/ulnds/pqtimer.h b/lnet/ulnds/pqtimer.h deleted file mode 100644 index 11efb0e..0000000 --- a/lnet/ulnds/pqtimer.h +++ /dev/null @@ -1,25 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -typedef unsigned long long when; -when now(void); -typedef struct timer *timer; -timer register_timer(when interval, - void (*function)(void *), - void *argument); -timer register_timer_wait(void); -void remove_timer(timer); -void timer_loop(void); -void initialize_timer(void (*block)(when)); -void timer_fire(void); - - -#define HZ 0x100000000ull - - diff --git a/lnet/ulnds/procapi.c b/lnet/ulnds/procapi.c deleted file mode 100644 index 6da3210..0000000 --- a/lnet/ulnds/procapi.c +++ /dev/null @@ -1,283 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* api.c: - * This file provides the 'api' side for the process-based nals. - * it is responsible for creating the 'library' side thread, - * and passing wrapped portals transactions to it. - * - * Along with initialization, shutdown, and transport to the library - * side, this file contains some stubs to satisfy the nal definition. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -/* Function: forward - * Arguments: nal_t *nal: pointer to my top-side nal structure - * id: the command to pass to the lower layer - * args, args_len:pointer to and length of the request - * ret, ret_len: pointer to and size of the result - * Returns: a portals status code - * - * forwards a packaged api call from the 'api' side to the 'library' - * side, and collects the result - */ -#define forward_failure(operand,fd,buffer,length)\ - if(syscall(SYS_##operand,fd,buffer,length)!=length){\ - lib_fini(b->nal_cb);\ - return(PTL_SEGV);\ - } -static int procbridge_forward(nal_t *n, int id, void *args, ptl_size_t args_len, - void *ret, ptl_size_t ret_len) -{ - bridge b=(bridge)n->nal_data; - procbridge p=(procbridge)b->local; - int lib=p->to_lib[1]; - int k; - - forward_failure(write,lib, &id, sizeof(id)); - forward_failure(write,lib,&args_len, sizeof(args_len)); - forward_failure(write,lib,&ret_len, sizeof(ret_len)); - forward_failure(write,lib,args, args_len); - - do { - k=syscall(SYS_read, p->from_lib[0], ret, ret_len); - } while ((k!=ret_len) && (errno += EINTR)); - - if(k!=ret_len){ - perror("nal: read return block"); - return PTL_SEGV; - } - return (PTL_OK); -} -#undef forward_failure - - -/* Function: shutdown - * Arguments: nal: a pointer to my top side nal structure - * ni: my network interface index - * - * cleanup nal state, reclaim the lower side thread and - * its state using PTL_FINI codepoint - */ -static int procbridge_shutdown(nal_t *n, int ni) -{ - bridge b=(bridge)n->nal_data; - procbridge p=(procbridge)b->local; - int code=PTL_FINI; - - syscall(SYS_write, p->to_lib[1],&code,sizeof(code)); - syscall(SYS_read, p->from_lib[0],&code,sizeof(code)); - - syscall(SYS_close, p->to_lib[0]); - syscall(SYS_close, p->to_lib[1]); - syscall(SYS_close, p->from_lib[0]); - syscall(SYS_close, p->from_lib[1]); - - free(p); - return(0); -} - - -/* Function: validate - * useless stub - */ -static int procbridge_validate(nal_t *nal, void *base, ptl_size_t extent) -{ - return(0); -} - - -/* Function: yield - * Arguments: pid: - * - * this function was originally intended to allow the - * lower half thread to be scheduled to allow progress. we - * overload it to explicitly block until signalled by the - * lower half. - */ -static void procbridge_yield(nal_t *n) -{ - bridge b=(bridge)n->nal_data; - procbridge p=(procbridge)b->local; - - pthread_mutex_lock(&p->mutex); - pthread_cond_wait(&p->cond,&p->mutex); - pthread_mutex_unlock(&p->mutex); -} - - -static void procbridge_lock(nal_t * nal, unsigned long *flags){} -static void procbridge_unlock(nal_t * nal, unsigned long *flags){} -/* api_nal - * the interface vector to allow the generic code to access - * this nal. this is seperate from the library side nal_cb. - * TODO: should be dyanmically allocated - */ -static nal_t api_nal = { - ni: {0}, - nal_data: NULL, - forward: procbridge_forward, - shutdown: procbridge_shutdown, - validate: procbridge_validate, - yield: procbridge_yield, - lock: procbridge_lock, - unlock: procbridge_unlock -}; - -/* Function: bridge_init - * - * Arguments: pid: requested process id (port offset) - * PTL_ID_ANY not supported. - * desired: limits passed from the application - * and effectively ignored - * actual: limits actually allocated and returned - * - * Returns: a pointer to my statically allocated top side NAL - * structure - * - * initializes the tcp nal. we define unix_failure as an - * error wrapper to cut down clutter. - */ -#define unix_failure(operand,fd,buffer,length,text)\ - if(syscall(SYS_##operand,fd,buffer,length)!=length){\ - perror(text);\ - return(NULL);\ - } -#if 0 -static nal_t *bridge_init(ptl_interface_t nal, - ptl_pid_t pid_request, - ptl_ni_limits_t *desired, - ptl_ni_limits_t *actual, - int *rc) -{ - procbridge p; - bridge b; - static int initialized=0; - ptl_ni_limits_t limits = {-1,-1,-1,-1,-1}; - - if(initialized) return (&api_nal); - - init_unix_timer(); - - b=(bridge)malloc(sizeof(struct bridge)); - p=(procbridge)malloc(sizeof(struct procbridge)); - api_nal.nal_data=b; - b->local=p; - - if(pipe(p->to_lib) || pipe(p->from_lib)) { - perror("nal_init: pipe"); - return(NULL); - } - - if (desired) limits = *desired; - unix_failure(write,p->to_lib[1], &pid_request, sizeof(pid_request), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &limits, sizeof(ptl_ni_limits_t), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &nal, sizeof(ptl_interface_t), - "nal_init: write"); - - if(pthread_create(&p->t, NULL, nal_thread, b)) { - perror("nal_init: pthread_create"); - return(NULL); - } - - unix_failure(read,p->from_lib[0], actual, sizeof(ptl_ni_limits_t), - "tcp_init: read"); - unix_failure(read,p->from_lib[0], rc, sizeof(rc), - "nal_init: read"); - - if(*rc) return(NULL); - - initialized = 1; - pthread_mutex_init(&p->mutex,0); - pthread_cond_init(&p->cond, 0); - - return (&api_nal); -} -#endif - -ptl_nid_t tcpnal_mynid; - -nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid) -{ - procbridge p; - bridge b; - static int initialized=0; - ptl_ni_limits_t limits = {-1,-1,-1,-1,-1}; - int rc, nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */ - - if(initialized) return (&api_nal); - - init_unix_timer(); - - b=(bridge)malloc(sizeof(struct bridge)); - p=(procbridge)malloc(sizeof(struct procbridge)); - api_nal.nal_data=b; - b->local=p; - - if(pipe(p->to_lib) || pipe(p->from_lib)) { - perror("nal_init: pipe"); - return(NULL); - } - - if (ptl_size) - limits.max_ptable_index = ptl_size; - if (acl_size) - limits.max_atable_index = acl_size; - - unix_failure(write,p->to_lib[1], &requested_pid, sizeof(requested_pid), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &limits, sizeof(ptl_ni_limits_t), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &nal_type, sizeof(nal_type), - "nal_init: write"); - - if(pthread_create(&p->t, NULL, nal_thread, b)) { - perror("nal_init: pthread_create"); - return(NULL); - } - - unix_failure(read,p->from_lib[0], &rc, sizeof(rc), - "nal_init: read"); - - if(rc) return(NULL); - - b->nal_cb->ni.nid = tcpnal_mynid; - initialized = 1; - pthread_mutex_init(&p->mutex,0); - pthread_cond_init(&p->cond, 0); - - return (&api_nal); -} -#undef unix_failure diff --git a/lnet/ulnds/procbridge.h b/lnet/ulnds/procbridge.h deleted file mode 100644 index 060ae7b..0000000 --- a/lnet/ulnds/procbridge.h +++ /dev/null @@ -1,40 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -#ifndef _PROCBRIDGE_H_ -#define _PROCBRIDGE_H_ - -#include -#include -#include - - -typedef struct procbridge { - pthread_t t; - pthread_cond_t cond; - pthread_mutex_t mutex; - int to_lib[2]; - int from_lib[2]; -} *procbridge; - -extern void *nal_thread(void *); - - -#define PTL_INIT (LIB_MAX_DISPATCH+1) -#define PTL_FINI (LIB_MAX_DISPATCH+2) - -#define MAX_ACLS 1 -#define MAX_PTLS 128 - -extern void set_address(bridge t,ptl_pid_t pidrequest); -extern nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid); - -#endif diff --git a/lnet/ulnds/proclib.c b/lnet/ulnds/proclib.c deleted file mode 100644 index c3ee103..0000000 --- a/lnet/ulnds/proclib.c +++ /dev/null @@ -1,270 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* lib.c: - * This file provides the 'library' side for the process-based nals. - * it is responsible for communication with the 'api' side and - * providing service to the generic portals 'library' - * implementation. 'library' might be better termed 'communication' - * or 'kernel'. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -//#include -#include - -/* the following functions are stubs to satisfy the nal definition - without doing anything particularily useful*/ - -static int nal_write(nal_cb_t *nal, - void *private, - user_ptr dst_addr, - void *src_addr, - ptl_size_t len) -{ - memcpy(dst_addr, src_addr, len); - return 0; -} - -static int nal_read(nal_cb_t * nal, - void *private, - void *dst_addr, - user_ptr src_addr, - size_t len) -{ - memcpy(dst_addr, src_addr, len); - return 0; -} - -static void *nal_malloc(nal_cb_t *nal, - ptl_size_t len) -{ - void *buf = malloc(len); - return buf; -} - -static void nal_free(nal_cb_t *nal, - void *buf, - ptl_size_t len) -{ - free(buf); -} - -static void nal_printf(nal_cb_t *nal, - const char *fmt, - ...) -{ - va_list ap; - - va_start(ap, fmt); - vprintf(fmt, ap); - va_end(ap); -} - - -static void nal_cli(nal_cb_t *nal, - unsigned long *flags) -{ -} - - -static void nal_sti(nal_cb_t *nal, - unsigned long *flags) -{ -} - - -static int nal_dist(nal_cb_t *nal, - ptl_nid_t nid, - unsigned long *dist) -{ - return 0; -} - - - -/* Function: data_from_api - * Arguments: t: the nal state for this interface - * Returns: whether to continue reading from the pipe - * - * data_from_api() reads data from the api side in response - * to a select. - * - * We define data_failure() for syntactic convenience - * of unix error reporting. - */ - -#define data_failure(operand,fd,buffer,length)\ - if(syscall(SYS_##operand,fd,buffer,length)!=length){\ - lib_fini(b->nal_cb);\ - return(0);\ - } -static int data_from_api(void *arg) -{ - bridge b = arg; - procbridge p=(procbridge)b->local; - /* where are these two sizes derived from ??*/ - char arg_block[ 256 ]; - char ret_block[ 128 ]; - ptl_size_t arg_len,ret_len; - int fd=p->to_lib[0]; - int index; - - data_failure(read,fd, &index, sizeof(index)); - - if (index==PTL_FINI) { - lib_fini(b->nal_cb); - if (b->shutdown) (*b->shutdown)(b); - syscall(SYS_write, p->from_lib[1],&b->alive,sizeof(b->alive)); - - /* a heavy-handed but convenient way of shutting down - the lower side thread */ - pthread_exit(0); - } - - data_failure(read,fd, &arg_len, sizeof(arg_len)); - data_failure(read,fd, &ret_len, sizeof(ret_len)); - data_failure(read,fd, arg_block, arg_len); - - lib_dispatch(b->nal_cb, NULL, index, arg_block, ret_block); - - data_failure(write,p->from_lib[1],ret_block, ret_len); - return(1); -} -#undef data_failure - - - -static void wakeup_topside(void *z) -{ - bridge b=z; - procbridge p=b->local; - - pthread_mutex_lock(&p->mutex); - pthread_cond_broadcast(&p->cond); - pthread_mutex_unlock(&p->mutex); -} - - -/* Function: nal_thread - * Arguments: z: an opaque reference to a nal control structure - * allocated and partially populated by the api level code - * Returns: nothing, and only on error or explicit shutdown - * - * This function is the entry point of the pthread initiated on - * the api side of the interface. This thread is used to handle - * asynchronous delivery to the application. - * - * We define a limit macro to place a ceiling on limits - * for syntactic convenience - */ -#define LIMIT(x,y,max)\ - if ((unsigned int)x > max) y = max; - -extern int tcpnal_init(bridge); - -nal_initialize nal_table[PTL_IFACE_MAX]={0,tcpnal_init,0}; - -void *nal_thread(void *z) -{ - bridge b=z; - procbridge p=b->local; - int rc; - ptl_pid_t pid_request; - int nal_type; - ptl_ni_limits_t desired; - ptl_ni_limits_t actual; - - b->nal_cb=(nal_cb_t *)malloc(sizeof(nal_cb_t)); - b->nal_cb->nal_data=b; - b->nal_cb->cb_read=nal_read; - b->nal_cb->cb_write=nal_write; - b->nal_cb->cb_malloc=nal_malloc; - b->nal_cb->cb_free=nal_free; - b->nal_cb->cb_map=NULL; - b->nal_cb->cb_unmap=NULL; - b->nal_cb->cb_printf=nal_printf; - b->nal_cb->cb_cli=nal_cli; - b->nal_cb->cb_sti=nal_sti; - b->nal_cb->cb_dist=nal_dist; - - - register_io_handler(p->to_lib[0],READ_HANDLER,data_from_api,(void *)b); - - if(!(rc = syscall(SYS_read, p->to_lib[0], &pid_request, sizeof(pid_request)))) - perror("procbridge read from api"); - if(!(rc = syscall(SYS_read, p->to_lib[0], &desired, sizeof(ptl_ni_limits_t)))) - perror("procbridge read from api"); - if(!(rc = syscall(SYS_read, p->to_lib[0], &nal_type, sizeof(nal_type)))) - perror("procbridge read from api"); - - actual = desired; - LIMIT(desired.max_match_entries,actual.max_match_entries,MAX_MES); - LIMIT(desired.max_mem_descriptors,actual.max_mem_descriptors,MAX_MDS); - LIMIT(desired.max_event_queues,actual.max_event_queues,MAX_EQS); - LIMIT(desired.max_atable_index,actual.max_atable_index,MAX_ACLS); - LIMIT(desired.max_ptable_index,actual.max_ptable_index,MAX_PTLS); - - set_address(b,pid_request); - - if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b); - /* initialize the generic 'library' level code */ - - rc = lib_init(b->nal_cb, - b->nal_cb->ni.nid, - b->nal_cb->ni.pid, - 10, - actual.max_ptable_index, - actual.max_atable_index); - - /* - * Whatever the initialization returned is passed back to the - * user level code for further interpretation. We just exit if - * it is non-zero since something went wrong. - */ - /* this should perform error checking */ -#if 0 - write(p->from_lib[1], &actual, sizeof(ptl_ni_limits_t)); -#endif - syscall(SYS_write, p->from_lib[1], &rc, sizeof(rc)); - - if(!rc) { - /* the thunk function is called each time the timer loop - performs an operation and returns to blocking mode. we - overload this function to inform the api side that - it may be interested in looking at the event queue */ - register_thunk(wakeup_topside,b); - timer_loop(); - } - return(0); -} -#undef LIMIT - diff --git a/lnet/ulnds/select.c b/lnet/ulnds/select.c deleted file mode 100644 index c4f84f4..0000000 --- a/lnet/ulnds/select.c +++ /dev/null @@ -1,165 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* select.c: - * Provides a general mechanism for registering and dispatching - * io events through the select system call. - */ - -#ifdef sun -#include -#else -#include -#endif - -#include -#include -#include -#include -#include - - -static struct timeval beginning_of_epoch; -static io_handler io_handlers; - -/* Function: now - * - * Return: the current time in canonical units: a 64 bit number - * where the most significant 32 bits contains the number - * of seconds, and the least signficant a count of (1/(2^32))ths - * of a second. - */ -when now() -{ - struct timeval result; - - gettimeofday(&result,0); - return((((unsigned long long)result.tv_sec)<<32)| - (((unsigned long long)result.tv_usec)<<32)/1000000); -} - - -/* Function: register_io_handler - * Arguments: fd: the file descriptor of interest - * type: a mask of READ_HANDLER, WRITE_HANDLER, EXCEPTION_HANDLER - * function: a function to call when io is available on fd - * arg: an opaque correlator to return to the handler - * Returns: a pointer to the io_handler structure - */ -io_handler register_io_handler(int fd, - int type, - int (*function)(void *), - void *arg) -{ - io_handler i=(io_handler)malloc(sizeof(struct io_handler)); - if ((i->fd=fd)>=0){ - i->type=type; - i->function=function; - i->argument=arg; - i->disabled=0; - i->last=&io_handlers; - if ((i->next=io_handlers)) i->next->last=&i->next; - io_handlers=i; - } - return(i); -} - -/* Function: remove_io_handler - * Arguments: i: a pointer to the handler to stop servicing - * - * remove_io_handler() doesn't actually free the handler, due - * to reentrancy problems. it just marks the handler for - * later cleanup by the blocking function. - */ -void remove_io_handler (io_handler i) -{ - i->disabled=1; -} - -static void set_flag(io_handler n,fd_set *fds) -{ - if (n->type & READ_HANDLER) FD_SET(n->fd,fds); - if (n->type & WRITE_HANDLER) FD_SET(n->fd,fds+1); - if (n->type & EXCEPTION_HANDLER) FD_SET(n->fd,fds+2); -} - - -/* Function: select_timer_block - * Arguments: until: an absolute time when the select should return - * - * This function dispatches the various file descriptors' handler - * functions, if the kernel indicates there is io available. - */ -void select_timer_block(when until) -{ - fd_set fds[3]; - struct timeval timeout; - struct timeval *timeout_pointer; - int result; - io_handler j; - io_handler *k; - - /* TODO: loop until the entire interval is expired*/ - if (until){ - when interval=until-now(); - timeout.tv_sec=(interval>>32); - timeout.tv_usec=((interval<<32)/1000000)>>32; - timeout_pointer=&timeout; - } else timeout_pointer=0; - - FD_ZERO(fds); - FD_ZERO(fds+1); - FD_ZERO(fds+2); - for (k=&io_handlers;*k;){ - if ((*k)->disabled){ - j=*k; - *k=(*k)->next; - free(j); - } - if (*k) { - set_flag(*k,fds); - k=&(*k)->next; - } - } - result=select(FD_SETSIZE,fds,fds+1,fds+2,timeout_pointer); - - if (result > 0) - for (j=io_handlers;j;j=j->next){ - if (!(j->disabled) && - ((FD_ISSET(j->fd,fds) && (j->type & READ_HANDLER)) || - (FD_ISSET(j->fd,fds+1) && (j->type & WRITE_HANDLER)) || - (FD_ISSET(j->fd,fds+2) && (j->type & EXCEPTION_HANDLER)))){ - if (!(*j->function)(j->argument)) - j->disabled=1; - } - } -} - -/* Function: init_unix_timer() - * is called to initialize the library - */ -void init_unix_timer() -{ - io_handlers=0; - gettimeofday(&beginning_of_epoch, 0); - initialize_timer(select_timer_block); -} diff --git a/lnet/ulnds/socklnd/Makefile.am b/lnet/ulnds/socklnd/Makefile.am deleted file mode 100644 index dc427b0..0000000 --- a/lnet/ulnds/socklnd/Makefile.am +++ /dev/null @@ -1,5 +0,0 @@ -CPPFLAGS= -INCLUDES=-I$(top_srcdir)/portals/include -I$(top_srcdir)/include -I$(srcdir) -lib_LIBRARIES = libtcpnal.a -pkginclude_HEADERS = pqtimer.h dispatch.h table.h timer.h connection.h ipmap.h bridge.h procbridge.h -libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h dispatch.h table.h timer.h address.c procapi.c proclib.c connection.c tcpnal.c connection.h diff --git a/lnet/ulnds/socklnd/README b/lnet/ulnds/socklnd/README deleted file mode 100644 index 6cb93d9..0000000 --- a/lnet/ulnds/socklnd/README +++ /dev/null @@ -1,53 +0,0 @@ -This library implements two NAL interfaces, both running over IP. -The first, tcpnal, creates TCP connections between participating -processes in order to transport the portals requests. The second, -ernal, provides a simple transport protocol which runs over -UDP datagrams. - -The interface functions return both of these values in host order for -convenience and readability. However this means that addresses -exchanged in messages between hosts of different orderings will not -function properly. - -Both NALs use the same support functions in order to schedule events -and communicate with the generic portals implementation. - - ------------------------- - | api | - |_______________________| - | lib | - |_______________________| - | ernal | |tcpnal | - |--------| |----------| - | udpsock| |connection| - |-----------------------| - | timer/select | - ------------------------- - - - These NALs uses the framework from fdnal of a pipe between the api -and library sides. This is wrapped up in the select on the library -side, and blocks on the api side. Performance could be severely -enhanced by collapsing this aritificial barrier, by using shared -memory queues, or by wiring the api layer directly to the library. - - -nid is defined as the low order 24-bits of the IP address of the -physical node left shifted by 8 plus a virtual node number of 0 -through 255 (really only 239). The virtual node number of a tcpnal -application should be specified using the environment variable -PTL_VIRTNODE. pid is now a completely arbitrary number in the -range of 0 to 255. The IP interface used can be overridden by -specifying the appropriate hostid by setting the PTL_HOSTID -environment variable. The value can be either dotted decimal -(n.n.n.n) or hex starting with "0x". -TCPNAL: - As the NAL needs to try to send to a particular nid/pid pair, it - will open up connections on demand. Because the port associated with - the connecting socket is different from the bound port, two - connections will normally be established between a pair of peers, with - data flowing from the anonymous connect (active) port to the advertised - or well-known bound (passive) port of each peer. - - Should the connection fail to open, an error is reported to the - library component, which causes the api request to fail. diff --git a/lnet/ulnds/socklnd/address.c b/lnet/ulnds/socklnd/address.c deleted file mode 100644 index b422c3f..0000000 --- a/lnet/ulnds/socklnd/address.c +++ /dev/null @@ -1,146 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* address.c: - * this file provides functions to aquire the IP address of the node - * and translate them into a NID/PID pair which supports a static - * mapping of virtual nodes into the port range of an IP socket. -*/ - -#include -#include -#include -#include -#include -#include -#include - - -/* Function: get_node_id - * Returns: a 32 bit id for this node, actually a big-endian IP address - * - * get_node_id() determines the host name and uses the resolver to - * find out its ip address. This is fairly fragile and inflexible, but - * explicitly asking about interfaces and their addresses is very - * complicated and nonportable. - */ -static unsigned int get_node_id(void) -{ - char buffer[255]; - unsigned int x; - struct hostent *he; - char * host_envp; - - if (!(host_envp = getenv("PTL_HOSTID"))) - { - gethostname(buffer,sizeof(buffer)); - he=gethostbyname(buffer); - if (he) - x=*(unsigned int *)he->h_addr_list[0]; - else - x = 0; - return(ntohl(x)); - } - else - { - if (host_envp[1] != 'x') - { - int a, b, c, d; - sscanf(host_envp, "%d.%d.%d.%d", &a, &b, &c, &d); - return ((a<<24) | (b<<16) | (c<<8) | d); - } - else - { - long long hostid = strtoll(host_envp, 0, 0); - return((unsigned int) hostid); - } - } -} - - -/* Function: set_address - * Arugments: t: a procnal structure to populate with the request - * - * set_address performs the bit manipulations to set the nid, pid, and - * iptop8 fields of the procnal structures. - * - * TODO: fix pidrequest to try to do dynamic binding if PTL_ID_ANY - */ - -#ifdef DIRECT_IP_MODE -void set_address(bridge t,ptl_pid_t pidrequest) -{ - int port; - if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0; - else port=pidrequest; - t->nal_cb->ni.nid=get_node_id(); - t->nal_cb->ni.pid=port; -} -#else - -void set_address(bridge t,ptl_pid_t pidrequest) -{ - int virtnode, in_addr, port; - ptl_pid_t pid; - - /* get and remember my node id*/ - if (!getenv("PTL_VIRTNODE")) - virtnode = 0; - else - { - int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT - >> PNAL_VNODE_SHIFT); - virtnode = atoi(getenv("PTL_VIRTNODE")); - if (virtnode > maxvnode) - { - fprintf(stderr, "PTL_VIRTNODE of %d is too large - max %d\n", - virtnode, maxvnode); - return; - } - } - - in_addr = get_node_id(); - - t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */ - t->nal_cb->ni.nid = ((in_addr & PNAL_HOSTID_MASK) - << PNAL_VNODE_SHIFT) - + virtnode; - - pid=pidrequest; - /* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */ -#ifdef notyet - if (pid==(unsigned short)PTL_PID_ANY) port = 0; -#endif - if (pid==(unsigned short)PTL_PID_ANY) - { - fprintf(stderr, "portal pid PTL_ID_ANY is not currently supported\n"); - return; - } - else if (pid > PNAL_PID_MASK) - { - fprintf(stderr, "portal pid of %d is too large - max %d\n", - pid, PNAL_PID_MASK); - return; - } - else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT; - t->nal_cb->ni.pid=pid; -} -#endif diff --git a/lnet/ulnds/socklnd/bridge.h b/lnet/ulnds/socklnd/bridge.h deleted file mode 100644 index 0b4940f..0000000 --- a/lnet/ulnds/socklnd/bridge.h +++ /dev/null @@ -1,29 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -#include - -typedef struct bridge { - int alive; - nal_cb_t *nal_cb; - void *lower; - void *local; - void (*shutdown)(struct bridge *); - /* this doesn't really belong here */ - unsigned char iptop8; -} *bridge; - - -nal_t *bridge_init(ptl_interface_t nal, - ptl_pid_t pid_request, - ptl_ni_limits_t *desired, - ptl_ni_limits_t *actual, - int *rc); - -typedef int (*nal_initialize)(bridge); -extern nal_initialize nal_table[PTL_IFACE_MAX]; diff --git a/lnet/ulnds/socklnd/connection.c b/lnet/ulnds/socklnd/connection.c deleted file mode 100644 index 310e899..0000000 --- a/lnet/ulnds/socklnd/connection.c +++ /dev/null @@ -1,294 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* connection.c: - This file provides a simple stateful connection manager which - builds tcp connections on demand and leaves them open for - future use. It also provides the machinery to allow peers - to connect to it -*/ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -/* global variable: acceptor port */ -unsigned short tcpnal_acceptor_port = 988; - - -/* Function: compare_connection - * Arguments: connection c: a connection in the hash table - * ptl_process_id_t: an id to verify agains - * Returns: 1 if the connection is the one requested, 0 otherwise - * - * compare_connection() tests for collisions in the hash table - */ -static int compare_connection(void *arg1, void *arg2) -{ - connection c = arg1; - unsigned int * id = arg2; - return((c->ip==id[0]) && (c->port==id[1])); -} - - -/* Function: connection_key - * Arguments: ptl_process_id_t id: an id to hash - * Returns: a not-particularily-well-distributed hash - * of the id - */ -static unsigned int connection_key(unsigned int *id) -{ - return(id[0]^id[1]); -} - - -/* Function: remove_connection - * Arguments: c: the connection to remove - */ -void remove_connection(void *arg) -{ - connection c = arg; - unsigned int id[2]; - - id[0]=c->ip; - id[1]=c->port; - hash_table_remove(c->m->connections,id); - close(c->fd); - free(c); -} - - -/* Function: read_connection: - * Arguments: c: the connection to read from - * dest: the buffer to read into - * len: the number of bytes to read - * Returns: success as 1, or failure as 0 - * - * read_connection() reads data from the connection, continuing - * to read partial results until the request is satisfied or - * it errors. TODO: this read should be covered by signal protection. - */ -int read_connection(connection c, - unsigned char *dest, - int len) -{ - int offset=0,rc; - - if (len){ - do { - if((rc=syscall(SYS_read, c->fd, dest+offset, len-offset))<=0){ - if (errno==EINTR) { - rc=0; - } else { - remove_connection(c); - return(0); - } - } - offset+=rc; - } while (offsetm->handler)(c->m->handler_arg,c)); -} - - -/* Function: allocate_connection - * Arguments: t: tcpnal the allocation is occuring in the context of - * dest: portal endpoint address for this connection - * fd: open file descriptor for the socket - * Returns: an allocated connection structure - * - * just encompasses the action common to active and passive - * connections of allocation and placement in the global table - */ -static connection allocate_connection(manager m, - unsigned int ip, - unsigned short port, - int fd) -{ - connection c=malloc(sizeof(struct connection)); - unsigned int id[2]; - c->m=m; - c->fd=fd; - c->ip=ip; - c->port=port; - id[0]=ip; - id[1]=port; - register_io_handler(fd,READ_HANDLER,connection_input,c); - hash_table_insert(m->connections,c,id); - return(c); -} - - -/* Function: new_connection - * Arguments: t: opaque argument holding the tcpname - * Returns: 1 in order to reregister for new connection requests - * - * called when the bound service socket recieves - * a new connection request, it always accepts and - * installs a new connection - */ -static int new_connection(void *z) -{ - manager m=z; - struct sockaddr_in s; - int len=sizeof(struct sockaddr_in); - int fd=accept(m->bound,(struct sockaddr *)&s,&len); - unsigned int nid=*((unsigned int *)&s.sin_addr); - /* cfs specific hack */ - //unsigned short pid=s.sin_port; - allocate_connection(m,htonl(nid),0/*pid*/,fd); - return(1); -} - - -/* Function: force_tcp_connection - * Arguments: t: tcpnal - * dest: portals endpoint for the connection - * Returns: an allocated connection structure, either - * a pre-existing one, or a new connection - */ -connection force_tcp_connection(manager m, - unsigned int ip, - unsigned short port) -{ - connection c; - struct sockaddr_in addr; - unsigned int id[2]; - - port = tcpnal_acceptor_port; - - id[0]=ip; - id[1]=port; - - if (!(c=hash_table_find(m->connections,id))){ - int fd; - - bzero((char *) &addr, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(ip); - addr.sin_port = htons(port); - - if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) { - perror("tcpnal socket failed"); - exit(-1); - } - if (connect(fd, - (struct sockaddr *)&addr, - sizeof(struct sockaddr_in))) - { - perror("tcpnal connect"); - return(0); - } - return(allocate_connection(m,ip,port,fd)); - } - return(c); -} - - -/* Function: bind_socket - * Arguments: t: the nal state for this interface - * port: the port to attempt to bind to - * Returns: 1 on success, or 0 on error - * - * bind_socket() attempts to allocate and bind a socket to the requested - * port, or dynamically assign one from the kernel should the port be - * zero. Sets the bound and bound_handler elements of m. - * - * TODO: The port should be an explicitly sized type. - */ -static int bind_socket(manager m,unsigned short port) -{ - struct sockaddr_in addr; - int alen=sizeof(struct sockaddr_in); - - if ((m->bound = socket(AF_INET, SOCK_STREAM, 0)) < 0) - return(0); - - bzero((char *) &addr, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = 0; - addr.sin_port = port; - - if (bind(m->bound,(struct sockaddr *)&addr,alen)<0){ - perror ("tcpnal bind"); - return(0); - } - - getsockname(m->bound,(struct sockaddr *)&addr, &alen); - - m->bound_handler=register_io_handler(m->bound,READ_HANDLER, - new_connection,m); - listen(m->bound,5); - m->port=addr.sin_port; - return(1); -} - - -/* Function: shutdown_connections - * Arguments: m: the manager structure - * - * close all connections and reclaim resources - */ -void shutdown_connections(manager m) -{ - close(m->bound); - remove_io_handler(m->bound_handler); - hash_destroy_table(m->connections,remove_connection); - free(m); -} - - -/* Function: init_connections - * Arguments: t: the nal state for this interface - * port: the port to attempt to bind to - * Returns: a newly allocated manager structure, or - * zero if the fixed port could not be bound - */ -manager init_connections(unsigned short pid, - int (*input)(void *, void *), - void *a) -{ - manager m=(manager)malloc(sizeof(struct manager)); - m->connections=hash_create_table(compare_connection,connection_key); - m->handler=input; - m->handler_arg=a; - if (bind_socket(m,pid)) return(m); - free(m); - return(0); -} diff --git a/lnet/ulnds/socklnd/connection.h b/lnet/ulnds/socklnd/connection.h deleted file mode 100644 index 6f57287..0000000 --- a/lnet/ulnds/socklnd/connection.h +++ /dev/null @@ -1,32 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -#include - -typedef struct manager { - table connections; - int bound; - io_handler bound_handler; - int (*handler)(void *, void *); - void *handler_arg; - unsigned short port; -} *manager; - - -typedef struct connection { - unsigned int ip; - unsigned short port; - int fd; - manager m; -} *connection; - -connection force_tcp_connection(manager m, unsigned int ip, unsigned int short); -manager init_connections(unsigned short, int (*f)(void *, void *), void *); -void remove_connection(void *arg); -void shutdown_connections(manager m); -int read_connection(connection c, unsigned char *dest, int len); diff --git a/lnet/ulnds/socklnd/debug.c b/lnet/ulnds/socklnd/debug.c deleted file mode 100644 index 529bb2d..0000000 --- a/lnet/ulnds/socklnd/debug.c +++ /dev/null @@ -1,119 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include -#include -#include -#include - -int smp_processor_id = 1; -char debug_file_path[1024] = "/tmp/lustre-log"; -char debug_file_name[1024]; -FILE *debug_file_fd; - -int portals_do_debug_dumplog(void *arg) -{ - printf("Look in %s\n", debug_file_name); - return 0; -} - - -void portals_debug_print(void) -{ - return; -} - - -void portals_debug_dumplog(void) -{ - printf("Look in %s\n", debug_file_name); - return; -} - - -int portals_debug_init(unsigned long bufsize) -{ - debug_file_fd = stdout; - return 0; -} - -int portals_debug_cleanup(void) -{ - return 0; //close(portals_debug_fd); -} - -int portals_debug_clear_buffer(void) -{ - return 0; -} - -int portals_debug_mark_buffer(char *text) -{ - - fprintf(debug_file_fd, "*******************************************************************************\n"); - fprintf(debug_file_fd, "DEBUG MARKER: %s\n", text); - fprintf(debug_file_fd, "*******************************************************************************\n"); - - return 0; -} - -int portals_debug_copy_to_user(char *buf, unsigned long len) -{ - return 0; -} - -/* FIXME: I'm not very smart; someone smarter should make this better. */ -void -portals_debug_msg (int subsys, int mask, char *file, char *fn, int line, - const char *format, ...) -{ - va_list ap; - unsigned long flags; - struct timeval tv; - int nob; - - - /* NB since we pass a non-zero sized buffer (at least) on the first - * print, we can be assured that by the end of all the snprinting, - * we _do_ have a terminated buffer, even if our message got truncated. - */ - - gettimeofday(&tv, NULL); - - nob += fprintf(debug_file_fd, - "%02x:%06x:%d:%lu.%06lu ", - subsys >> 24, mask, smp_processor_id, - tv.tv_sec, tv.tv_usec); - - nob += fprintf(debug_file_fd, - "(%s:%d:%s() %d+%ld): ", - file, line, fn, 0, - 8192 - ((unsigned long)&flags & 8191UL)); - - va_start (ap, format); - nob += fprintf(debug_file_fd, format, ap); - va_end (ap); - - -} - diff --git a/lnet/ulnds/socklnd/dispatch.h b/lnet/ulnds/socklnd/dispatch.h deleted file mode 100644 index 34dd070..0000000 --- a/lnet/ulnds/socklnd/dispatch.h +++ /dev/null @@ -1,39 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -/* this file is only called dispatch.h to prevent it - from colliding with /usr/include/sys/select.h */ - -typedef struct io_handler *io_handler; - -struct io_handler{ - io_handler *last; - io_handler next; - int fd; - int type; - int (*function)(void *); - void *argument; - int disabled; -}; - - -#define READ_HANDLER 1 -#define WRITE_HANDLER 2 -#define EXCEPTION_HANDLER 4 -#define ALL_HANDLER (READ_HANDLER | WRITE_HANDLER | EXCEPTION_HANDLER) - -io_handler register_io_handler(int fd, - int type, - int (*function)(void *), - void *arg); - -void remove_io_handler (io_handler i); -void init_unix_timer(void); -void select_timer_block(when until); -when now(void); diff --git a/lnet/ulnds/socklnd/ipmap.h b/lnet/ulnds/socklnd/ipmap.h deleted file mode 100644 index 85b1e18..0000000 --- a/lnet/ulnds/socklnd/ipmap.h +++ /dev/null @@ -1,38 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -#define DIRECT_IP_MODE -#ifdef DIRECT_IP_MODE -#define PNAL_NID(in_addr, port) (in_addr) -#define PNAL_PID(pid) (pid) -#define PNAL_IP(in_addr, port) (in_addr) -#define PNAL_PORT(nid, pid) (pid) -#else - -#define PNAL_BASE_PORT 4096 -#define PNAL_HOSTID_SHIFT 24 -#define PNAL_HOSTID_MASK ((1 << PNAL_HOSTID_SHIFT) - 1) -#define PNAL_VNODE_SHIFT 8 -#define PNAL_VNODE_MASK ((1 << PNAL_VNODE_SHIFT) - 1) -#define PNAL_PID_SHIFT 8 -#define PNAL_PID_MASK ((1 << PNAL_PID_SHIFT) - 1) - -#define PNAL_NID(in_addr, port) (((ntohl(in_addr) & PNAL_HOSTID_MASK) \ - << PNAL_VNODE_SHIFT) \ - | (((ntohs(port)-PNAL_BASE_PORT) >>\ - PNAL_PID_SHIFT))) -#define PNAL_PID(port) ((ntohs(port) - PNAL_BASE_PORT) & PNAL_PID_MASK) - -#define PNAL_IP(nid,t) (htonl((((unsigned)(nid))\ - >> PNAL_VNODE_SHIFT)\ - | (t->iptop8 << PNAL_HOSTID_SHIFT))) -#define PNAL_PORT(nid, pid) (htons(((((nid) & PNAL_VNODE_MASK) \ - << PNAL_VNODE_SHIFT) \ - | ((pid) & PNAL_PID_MASK)) \ - + PNAL_BASE_PORT)) -#endif diff --git a/lnet/ulnds/socklnd/pqtimer.c b/lnet/ulnds/socklnd/pqtimer.c deleted file mode 100644 index fa2fb4f..0000000 --- a/lnet/ulnds/socklnd/pqtimer.c +++ /dev/null @@ -1,226 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* timer.c: - * this file implements a simple priority-queue based timer system. when - * combined with a file which implements now() and block(), it can - * be used to provide course-grained time-based callbacks. - */ - -#include -#include -#include - -struct timer { - void (*function)(void *); - void *arg; - when w; - int interval; - int disable; -}; - -typedef struct thunk *thunk; -struct thunk { - void (*f)(void *); - void *a; - thunk next; -}; - -extern when now(void); - -static thunk thunks; -static int internal; -static void (*block_function)(when); -static int number_of_timers; -static int size_of_pqueue; -static timer *timers; - - -static void heal(int where) -{ - int left=(where<<1); - int right=(where<<1)+1; - int min=where; - timer temp; - - if (left <= number_of_timers) - if (timers[left]->w < timers[min]->w) min=left; - if (right <= number_of_timers) - if (timers[right]->w < timers[min]->w) min=right; - if (min != where){ - temp=timers[where]; - timers[where]=timers[min]; - timers[min]=temp; - heal(min); - } -} - -static void add_pqueue(int i) -{ - timer temp; - int parent=(i>>1); - if ((i>1) && (timers[i]->w< timers[parent]->w)){ - temp=timers[i]; - timers[i]=timers[parent]; - timers[parent]=temp; - add_pqueue(parent); - } -} - -static void add_timer(timer t) -{ - if (size_of_pqueue<(number_of_timers+2)){ - int oldsize=size_of_pqueue; - timer *new=(void *)malloc(sizeof(struct timer)*(size_of_pqueue+=10)); - memcpy(new,timers,sizeof(timer)*oldsize); - timers=new; - } - timers[++number_of_timers]=t; - add_pqueue(number_of_timers); -} - -/* Function: register_timer - * Arguments: interval: the time interval from the current time when - * the timer function should be called - * function: the function to call when the time has expired - * argument: the argument to call it with. - * Returns: a pointer to a timer structure - */ -timer register_timer(when interval, - void (*function)(void *), - void *argument) -{ - timer t=(timer)malloc(sizeof(struct timer)); - - t->arg=argument; - t->function=function; - t->interval=interval; - t->disable=0; - t->w=now()+interval; - add_timer(t); - if (!internal && (number_of_timers==1)) - block_function(t->w); - return(t); -} - -/* Function: remove_timer - * Arguments: t: - * Returns: nothing - * - * remove_timer removes a timer from the system, insuring - * that it will never be called. It does not actually - * free the timer due to reentrancy issues. - */ - -void remove_timer(timer t) -{ - t->disable=1; -} - - - -void timer_fire() -{ - timer current; - - current=timers[1]; - timers[1]=timers[number_of_timers--]; - heal(1); - if (!current->disable) { - (*current->function)(current->arg); - } - free(current); -} - -when next_timer(void) -{ - when here=now(); - - while (number_of_timers && (timers[1]->w <= here)) timer_fire(); - if (number_of_timers) return(timers[1]->w); - return(0); -} - -/* Function: timer_loop - * Arguments: none - * Returns: never - * - * timer_loop() is the blocking dispatch function for the timer. - * Is calls the block() function registered with init_timer, - * and handles associated with timers that have been registered. - */ -void timer_loop() -{ - when here; - - while (1){ - thunk z; - here=now(); - - for (z=thunks;z;z=z->next) (*z->f)(z->a); - - if (number_of_timers){ - if (timers[1]->w > here){ - (*block_function)(timers[1]->w); - } else { - timer_fire(); - } - } else { - thunk z; - for (z=thunks;z;z=z->next) (*z->f)(z->a); - (*block_function)(0); - } - } -} - - -/* Function: register_thunk - * Arguments: f: the function to call - * a: the single argument to call it with - * - * Thunk functions get called at irregular intervals, they - * should not assume when, or take a particularily long - * amount of time. Thunks are for background cleanup tasks. - */ -void register_thunk(void (*f)(void *),void *a) -{ - thunk t=(void *)malloc(sizeof(struct thunk)); - t->f=f; - t->a=a; - t->next=thunks; - thunks=t; -} - -/* Function: initialize_timer - * Arguments: block: the function to call to block for the specified interval - * - * initialize_timer() must be called before any other timer function, - * including timer_loop. - */ -void initialize_timer(void (*block)(when)) -{ - block_function=block; - number_of_timers=0; - size_of_pqueue=10; - timers=(timer *)malloc(sizeof(timer)*size_of_pqueue); - thunks=0; -} diff --git a/lnet/ulnds/socklnd/pqtimer.h b/lnet/ulnds/socklnd/pqtimer.h deleted file mode 100644 index 11efb0e..0000000 --- a/lnet/ulnds/socklnd/pqtimer.h +++ /dev/null @@ -1,25 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -typedef unsigned long long when; -when now(void); -typedef struct timer *timer; -timer register_timer(when interval, - void (*function)(void *), - void *argument); -timer register_timer_wait(void); -void remove_timer(timer); -void timer_loop(void); -void initialize_timer(void (*block)(when)); -void timer_fire(void); - - -#define HZ 0x100000000ull - - diff --git a/lnet/ulnds/socklnd/procapi.c b/lnet/ulnds/socklnd/procapi.c deleted file mode 100644 index 6da3210..0000000 --- a/lnet/ulnds/socklnd/procapi.c +++ /dev/null @@ -1,283 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* api.c: - * This file provides the 'api' side for the process-based nals. - * it is responsible for creating the 'library' side thread, - * and passing wrapped portals transactions to it. - * - * Along with initialization, shutdown, and transport to the library - * side, this file contains some stubs to satisfy the nal definition. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -/* Function: forward - * Arguments: nal_t *nal: pointer to my top-side nal structure - * id: the command to pass to the lower layer - * args, args_len:pointer to and length of the request - * ret, ret_len: pointer to and size of the result - * Returns: a portals status code - * - * forwards a packaged api call from the 'api' side to the 'library' - * side, and collects the result - */ -#define forward_failure(operand,fd,buffer,length)\ - if(syscall(SYS_##operand,fd,buffer,length)!=length){\ - lib_fini(b->nal_cb);\ - return(PTL_SEGV);\ - } -static int procbridge_forward(nal_t *n, int id, void *args, ptl_size_t args_len, - void *ret, ptl_size_t ret_len) -{ - bridge b=(bridge)n->nal_data; - procbridge p=(procbridge)b->local; - int lib=p->to_lib[1]; - int k; - - forward_failure(write,lib, &id, sizeof(id)); - forward_failure(write,lib,&args_len, sizeof(args_len)); - forward_failure(write,lib,&ret_len, sizeof(ret_len)); - forward_failure(write,lib,args, args_len); - - do { - k=syscall(SYS_read, p->from_lib[0], ret, ret_len); - } while ((k!=ret_len) && (errno += EINTR)); - - if(k!=ret_len){ - perror("nal: read return block"); - return PTL_SEGV; - } - return (PTL_OK); -} -#undef forward_failure - - -/* Function: shutdown - * Arguments: nal: a pointer to my top side nal structure - * ni: my network interface index - * - * cleanup nal state, reclaim the lower side thread and - * its state using PTL_FINI codepoint - */ -static int procbridge_shutdown(nal_t *n, int ni) -{ - bridge b=(bridge)n->nal_data; - procbridge p=(procbridge)b->local; - int code=PTL_FINI; - - syscall(SYS_write, p->to_lib[1],&code,sizeof(code)); - syscall(SYS_read, p->from_lib[0],&code,sizeof(code)); - - syscall(SYS_close, p->to_lib[0]); - syscall(SYS_close, p->to_lib[1]); - syscall(SYS_close, p->from_lib[0]); - syscall(SYS_close, p->from_lib[1]); - - free(p); - return(0); -} - - -/* Function: validate - * useless stub - */ -static int procbridge_validate(nal_t *nal, void *base, ptl_size_t extent) -{ - return(0); -} - - -/* Function: yield - * Arguments: pid: - * - * this function was originally intended to allow the - * lower half thread to be scheduled to allow progress. we - * overload it to explicitly block until signalled by the - * lower half. - */ -static void procbridge_yield(nal_t *n) -{ - bridge b=(bridge)n->nal_data; - procbridge p=(procbridge)b->local; - - pthread_mutex_lock(&p->mutex); - pthread_cond_wait(&p->cond,&p->mutex); - pthread_mutex_unlock(&p->mutex); -} - - -static void procbridge_lock(nal_t * nal, unsigned long *flags){} -static void procbridge_unlock(nal_t * nal, unsigned long *flags){} -/* api_nal - * the interface vector to allow the generic code to access - * this nal. this is seperate from the library side nal_cb. - * TODO: should be dyanmically allocated - */ -static nal_t api_nal = { - ni: {0}, - nal_data: NULL, - forward: procbridge_forward, - shutdown: procbridge_shutdown, - validate: procbridge_validate, - yield: procbridge_yield, - lock: procbridge_lock, - unlock: procbridge_unlock -}; - -/* Function: bridge_init - * - * Arguments: pid: requested process id (port offset) - * PTL_ID_ANY not supported. - * desired: limits passed from the application - * and effectively ignored - * actual: limits actually allocated and returned - * - * Returns: a pointer to my statically allocated top side NAL - * structure - * - * initializes the tcp nal. we define unix_failure as an - * error wrapper to cut down clutter. - */ -#define unix_failure(operand,fd,buffer,length,text)\ - if(syscall(SYS_##operand,fd,buffer,length)!=length){\ - perror(text);\ - return(NULL);\ - } -#if 0 -static nal_t *bridge_init(ptl_interface_t nal, - ptl_pid_t pid_request, - ptl_ni_limits_t *desired, - ptl_ni_limits_t *actual, - int *rc) -{ - procbridge p; - bridge b; - static int initialized=0; - ptl_ni_limits_t limits = {-1,-1,-1,-1,-1}; - - if(initialized) return (&api_nal); - - init_unix_timer(); - - b=(bridge)malloc(sizeof(struct bridge)); - p=(procbridge)malloc(sizeof(struct procbridge)); - api_nal.nal_data=b; - b->local=p; - - if(pipe(p->to_lib) || pipe(p->from_lib)) { - perror("nal_init: pipe"); - return(NULL); - } - - if (desired) limits = *desired; - unix_failure(write,p->to_lib[1], &pid_request, sizeof(pid_request), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &limits, sizeof(ptl_ni_limits_t), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &nal, sizeof(ptl_interface_t), - "nal_init: write"); - - if(pthread_create(&p->t, NULL, nal_thread, b)) { - perror("nal_init: pthread_create"); - return(NULL); - } - - unix_failure(read,p->from_lib[0], actual, sizeof(ptl_ni_limits_t), - "tcp_init: read"); - unix_failure(read,p->from_lib[0], rc, sizeof(rc), - "nal_init: read"); - - if(*rc) return(NULL); - - initialized = 1; - pthread_mutex_init(&p->mutex,0); - pthread_cond_init(&p->cond, 0); - - return (&api_nal); -} -#endif - -ptl_nid_t tcpnal_mynid; - -nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid) -{ - procbridge p; - bridge b; - static int initialized=0; - ptl_ni_limits_t limits = {-1,-1,-1,-1,-1}; - int rc, nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */ - - if(initialized) return (&api_nal); - - init_unix_timer(); - - b=(bridge)malloc(sizeof(struct bridge)); - p=(procbridge)malloc(sizeof(struct procbridge)); - api_nal.nal_data=b; - b->local=p; - - if(pipe(p->to_lib) || pipe(p->from_lib)) { - perror("nal_init: pipe"); - return(NULL); - } - - if (ptl_size) - limits.max_ptable_index = ptl_size; - if (acl_size) - limits.max_atable_index = acl_size; - - unix_failure(write,p->to_lib[1], &requested_pid, sizeof(requested_pid), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &limits, sizeof(ptl_ni_limits_t), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &nal_type, sizeof(nal_type), - "nal_init: write"); - - if(pthread_create(&p->t, NULL, nal_thread, b)) { - perror("nal_init: pthread_create"); - return(NULL); - } - - unix_failure(read,p->from_lib[0], &rc, sizeof(rc), - "nal_init: read"); - - if(rc) return(NULL); - - b->nal_cb->ni.nid = tcpnal_mynid; - initialized = 1; - pthread_mutex_init(&p->mutex,0); - pthread_cond_init(&p->cond, 0); - - return (&api_nal); -} -#undef unix_failure diff --git a/lnet/ulnds/socklnd/procbridge.h b/lnet/ulnds/socklnd/procbridge.h deleted file mode 100644 index 060ae7b..0000000 --- a/lnet/ulnds/socklnd/procbridge.h +++ /dev/null @@ -1,40 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -#ifndef _PROCBRIDGE_H_ -#define _PROCBRIDGE_H_ - -#include -#include -#include - - -typedef struct procbridge { - pthread_t t; - pthread_cond_t cond; - pthread_mutex_t mutex; - int to_lib[2]; - int from_lib[2]; -} *procbridge; - -extern void *nal_thread(void *); - - -#define PTL_INIT (LIB_MAX_DISPATCH+1) -#define PTL_FINI (LIB_MAX_DISPATCH+2) - -#define MAX_ACLS 1 -#define MAX_PTLS 128 - -extern void set_address(bridge t,ptl_pid_t pidrequest); -extern nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid); - -#endif diff --git a/lnet/ulnds/socklnd/proclib.c b/lnet/ulnds/socklnd/proclib.c deleted file mode 100644 index c3ee103..0000000 --- a/lnet/ulnds/socklnd/proclib.c +++ /dev/null @@ -1,270 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* lib.c: - * This file provides the 'library' side for the process-based nals. - * it is responsible for communication with the 'api' side and - * providing service to the generic portals 'library' - * implementation. 'library' might be better termed 'communication' - * or 'kernel'. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -//#include -#include - -/* the following functions are stubs to satisfy the nal definition - without doing anything particularily useful*/ - -static int nal_write(nal_cb_t *nal, - void *private, - user_ptr dst_addr, - void *src_addr, - ptl_size_t len) -{ - memcpy(dst_addr, src_addr, len); - return 0; -} - -static int nal_read(nal_cb_t * nal, - void *private, - void *dst_addr, - user_ptr src_addr, - size_t len) -{ - memcpy(dst_addr, src_addr, len); - return 0; -} - -static void *nal_malloc(nal_cb_t *nal, - ptl_size_t len) -{ - void *buf = malloc(len); - return buf; -} - -static void nal_free(nal_cb_t *nal, - void *buf, - ptl_size_t len) -{ - free(buf); -} - -static void nal_printf(nal_cb_t *nal, - const char *fmt, - ...) -{ - va_list ap; - - va_start(ap, fmt); - vprintf(fmt, ap); - va_end(ap); -} - - -static void nal_cli(nal_cb_t *nal, - unsigned long *flags) -{ -} - - -static void nal_sti(nal_cb_t *nal, - unsigned long *flags) -{ -} - - -static int nal_dist(nal_cb_t *nal, - ptl_nid_t nid, - unsigned long *dist) -{ - return 0; -} - - - -/* Function: data_from_api - * Arguments: t: the nal state for this interface - * Returns: whether to continue reading from the pipe - * - * data_from_api() reads data from the api side in response - * to a select. - * - * We define data_failure() for syntactic convenience - * of unix error reporting. - */ - -#define data_failure(operand,fd,buffer,length)\ - if(syscall(SYS_##operand,fd,buffer,length)!=length){\ - lib_fini(b->nal_cb);\ - return(0);\ - } -static int data_from_api(void *arg) -{ - bridge b = arg; - procbridge p=(procbridge)b->local; - /* where are these two sizes derived from ??*/ - char arg_block[ 256 ]; - char ret_block[ 128 ]; - ptl_size_t arg_len,ret_len; - int fd=p->to_lib[0]; - int index; - - data_failure(read,fd, &index, sizeof(index)); - - if (index==PTL_FINI) { - lib_fini(b->nal_cb); - if (b->shutdown) (*b->shutdown)(b); - syscall(SYS_write, p->from_lib[1],&b->alive,sizeof(b->alive)); - - /* a heavy-handed but convenient way of shutting down - the lower side thread */ - pthread_exit(0); - } - - data_failure(read,fd, &arg_len, sizeof(arg_len)); - data_failure(read,fd, &ret_len, sizeof(ret_len)); - data_failure(read,fd, arg_block, arg_len); - - lib_dispatch(b->nal_cb, NULL, index, arg_block, ret_block); - - data_failure(write,p->from_lib[1],ret_block, ret_len); - return(1); -} -#undef data_failure - - - -static void wakeup_topside(void *z) -{ - bridge b=z; - procbridge p=b->local; - - pthread_mutex_lock(&p->mutex); - pthread_cond_broadcast(&p->cond); - pthread_mutex_unlock(&p->mutex); -} - - -/* Function: nal_thread - * Arguments: z: an opaque reference to a nal control structure - * allocated and partially populated by the api level code - * Returns: nothing, and only on error or explicit shutdown - * - * This function is the entry point of the pthread initiated on - * the api side of the interface. This thread is used to handle - * asynchronous delivery to the application. - * - * We define a limit macro to place a ceiling on limits - * for syntactic convenience - */ -#define LIMIT(x,y,max)\ - if ((unsigned int)x > max) y = max; - -extern int tcpnal_init(bridge); - -nal_initialize nal_table[PTL_IFACE_MAX]={0,tcpnal_init,0}; - -void *nal_thread(void *z) -{ - bridge b=z; - procbridge p=b->local; - int rc; - ptl_pid_t pid_request; - int nal_type; - ptl_ni_limits_t desired; - ptl_ni_limits_t actual; - - b->nal_cb=(nal_cb_t *)malloc(sizeof(nal_cb_t)); - b->nal_cb->nal_data=b; - b->nal_cb->cb_read=nal_read; - b->nal_cb->cb_write=nal_write; - b->nal_cb->cb_malloc=nal_malloc; - b->nal_cb->cb_free=nal_free; - b->nal_cb->cb_map=NULL; - b->nal_cb->cb_unmap=NULL; - b->nal_cb->cb_printf=nal_printf; - b->nal_cb->cb_cli=nal_cli; - b->nal_cb->cb_sti=nal_sti; - b->nal_cb->cb_dist=nal_dist; - - - register_io_handler(p->to_lib[0],READ_HANDLER,data_from_api,(void *)b); - - if(!(rc = syscall(SYS_read, p->to_lib[0], &pid_request, sizeof(pid_request)))) - perror("procbridge read from api"); - if(!(rc = syscall(SYS_read, p->to_lib[0], &desired, sizeof(ptl_ni_limits_t)))) - perror("procbridge read from api"); - if(!(rc = syscall(SYS_read, p->to_lib[0], &nal_type, sizeof(nal_type)))) - perror("procbridge read from api"); - - actual = desired; - LIMIT(desired.max_match_entries,actual.max_match_entries,MAX_MES); - LIMIT(desired.max_mem_descriptors,actual.max_mem_descriptors,MAX_MDS); - LIMIT(desired.max_event_queues,actual.max_event_queues,MAX_EQS); - LIMIT(desired.max_atable_index,actual.max_atable_index,MAX_ACLS); - LIMIT(desired.max_ptable_index,actual.max_ptable_index,MAX_PTLS); - - set_address(b,pid_request); - - if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b); - /* initialize the generic 'library' level code */ - - rc = lib_init(b->nal_cb, - b->nal_cb->ni.nid, - b->nal_cb->ni.pid, - 10, - actual.max_ptable_index, - actual.max_atable_index); - - /* - * Whatever the initialization returned is passed back to the - * user level code for further interpretation. We just exit if - * it is non-zero since something went wrong. - */ - /* this should perform error checking */ -#if 0 - write(p->from_lib[1], &actual, sizeof(ptl_ni_limits_t)); -#endif - syscall(SYS_write, p->from_lib[1], &rc, sizeof(rc)); - - if(!rc) { - /* the thunk function is called each time the timer loop - performs an operation and returns to blocking mode. we - overload this function to inform the api side that - it may be interested in looking at the event queue */ - register_thunk(wakeup_topside,b); - timer_loop(); - } - return(0); -} -#undef LIMIT - diff --git a/lnet/ulnds/socklnd/select.c b/lnet/ulnds/socklnd/select.c deleted file mode 100644 index c4f84f4..0000000 --- a/lnet/ulnds/socklnd/select.c +++ /dev/null @@ -1,165 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* select.c: - * Provides a general mechanism for registering and dispatching - * io events through the select system call. - */ - -#ifdef sun -#include -#else -#include -#endif - -#include -#include -#include -#include -#include - - -static struct timeval beginning_of_epoch; -static io_handler io_handlers; - -/* Function: now - * - * Return: the current time in canonical units: a 64 bit number - * where the most significant 32 bits contains the number - * of seconds, and the least signficant a count of (1/(2^32))ths - * of a second. - */ -when now() -{ - struct timeval result; - - gettimeofday(&result,0); - return((((unsigned long long)result.tv_sec)<<32)| - (((unsigned long long)result.tv_usec)<<32)/1000000); -} - - -/* Function: register_io_handler - * Arguments: fd: the file descriptor of interest - * type: a mask of READ_HANDLER, WRITE_HANDLER, EXCEPTION_HANDLER - * function: a function to call when io is available on fd - * arg: an opaque correlator to return to the handler - * Returns: a pointer to the io_handler structure - */ -io_handler register_io_handler(int fd, - int type, - int (*function)(void *), - void *arg) -{ - io_handler i=(io_handler)malloc(sizeof(struct io_handler)); - if ((i->fd=fd)>=0){ - i->type=type; - i->function=function; - i->argument=arg; - i->disabled=0; - i->last=&io_handlers; - if ((i->next=io_handlers)) i->next->last=&i->next; - io_handlers=i; - } - return(i); -} - -/* Function: remove_io_handler - * Arguments: i: a pointer to the handler to stop servicing - * - * remove_io_handler() doesn't actually free the handler, due - * to reentrancy problems. it just marks the handler for - * later cleanup by the blocking function. - */ -void remove_io_handler (io_handler i) -{ - i->disabled=1; -} - -static void set_flag(io_handler n,fd_set *fds) -{ - if (n->type & READ_HANDLER) FD_SET(n->fd,fds); - if (n->type & WRITE_HANDLER) FD_SET(n->fd,fds+1); - if (n->type & EXCEPTION_HANDLER) FD_SET(n->fd,fds+2); -} - - -/* Function: select_timer_block - * Arguments: until: an absolute time when the select should return - * - * This function dispatches the various file descriptors' handler - * functions, if the kernel indicates there is io available. - */ -void select_timer_block(when until) -{ - fd_set fds[3]; - struct timeval timeout; - struct timeval *timeout_pointer; - int result; - io_handler j; - io_handler *k; - - /* TODO: loop until the entire interval is expired*/ - if (until){ - when interval=until-now(); - timeout.tv_sec=(interval>>32); - timeout.tv_usec=((interval<<32)/1000000)>>32; - timeout_pointer=&timeout; - } else timeout_pointer=0; - - FD_ZERO(fds); - FD_ZERO(fds+1); - FD_ZERO(fds+2); - for (k=&io_handlers;*k;){ - if ((*k)->disabled){ - j=*k; - *k=(*k)->next; - free(j); - } - if (*k) { - set_flag(*k,fds); - k=&(*k)->next; - } - } - result=select(FD_SETSIZE,fds,fds+1,fds+2,timeout_pointer); - - if (result > 0) - for (j=io_handlers;j;j=j->next){ - if (!(j->disabled) && - ((FD_ISSET(j->fd,fds) && (j->type & READ_HANDLER)) || - (FD_ISSET(j->fd,fds+1) && (j->type & WRITE_HANDLER)) || - (FD_ISSET(j->fd,fds+2) && (j->type & EXCEPTION_HANDLER)))){ - if (!(*j->function)(j->argument)) - j->disabled=1; - } - } -} - -/* Function: init_unix_timer() - * is called to initialize the library - */ -void init_unix_timer() -{ - io_handlers=0; - gettimeofday(&beginning_of_epoch, 0); - initialize_timer(select_timer_block); -} diff --git a/lnet/ulnds/socklnd/table.c b/lnet/ulnds/socklnd/table.c deleted file mode 100644 index bef13c5..0000000 --- a/lnet/ulnds/socklnd/table.c +++ /dev/null @@ -1,264 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include -#include - - -/* table.c: - * a very simple hash table implementation with paramerterizable - * comparison and key generation functions. it does resize - * in order to accomidate more entries, but never collapses - * the table - */ - -static table_entry *table_lookup (table t,void *comparator, - unsigned int k, - int (*compare_function)(void *, void *), - int *success) -{ - unsigned int key=k%t->size; - table_entry *i; - - for (i=&(t->entries[key]);*i;i=&((*i)->next)){ - if (compare_function && ((*i)->key==k)) - if ((*t->compare_function)((*i)->value,comparator)){ - *success=1; - return(i); - } - } - *success=0; - return(&(t->entries[key])); -} - - -static void resize_table(table t, int size) -{ - int old_size=t->size; - table_entry *old_entries=t->entries; - int i; - table_entry j,n; - table_entry *position; - int success; - - t->size=size; - t->entries=(table_entry *)malloc(sizeof(table_entry)*t->size); - memset(t->entries,0,sizeof(table_entry)*t->size); - - for (i=0;inext; - position=table_lookup(t,0,j->key,0,&success); - j->next= *position; - *position=j; - } - free(old_entries); -} - - -/* Function: key_from_int - * Arguments: int i: value to compute the key of - * Returns: the key - */ -unsigned int key_from_int(int i) -{ - return(i); -} - - -/* Function: key_from_string - * Arguments: char *s: the null terminated string - * to compute the key of - * Returns: the key - */ -unsigned int key_from_string(char *s) -{ - unsigned int result=0; - unsigned char *n; - int i; - if (!s) return(1); - for (n=s,i=0;*n;n++,i++) result^=(*n*57)^*n*i; - return(result); -} - - -/* Function: hash_create_table - * Arguments: compare_function: a function to compare - * a table instance with a correlator - * key_function: a function to generate a 32 bit - * hash key from a correlator - * Returns: a pointer to the new table - */ -table hash_create_table (int (*compare_function)(void *, void *), - unsigned int (*key_function)(unsigned int *)) -{ - table new=(table)malloc(sizeof(struct table)); - memset(new, 0, sizeof(struct table)); - - new->compare_function=compare_function; - new->key_function=key_function; - new->number_of_entries=0; - new->size=4; - new->entries=(table_entry *)malloc(sizeof(table_entry)*new->size); - memset(new->entries,0,sizeof(table_entry)*new->size); - return(new); -} - - -/* Function: hash_table_find - * Arguments: t: a table to look in - * comparator: a value to access the table entry - * Returns: the element references to by comparator, or null - */ -void *hash_table_find (table t, void *comparator) -{ - int success; - table_entry* entry=table_lookup(t,comparator, - (*t->key_function)(comparator), - t->compare_function, - &success); - if (success) return((*entry)->value); - return(0); -} - - -/* Function: hash_table_insert - * Arguments: t: a table to insert the object - * value: the object to put in the table - * comparator: the value by which the object - * will be addressed - * Returns: nothing - */ -void hash_table_insert (table t, void *value, void *comparator) -{ - int success; - unsigned int k=(*t->key_function)(comparator); - table_entry *position=table_lookup(t,comparator,k, - t->compare_function,&success); - table_entry entry; - - if (success) { - entry = *position; - } else { - entry = (table_entry)malloc(sizeof(struct table_entry)); - memset(entry, 0, sizeof(struct table_entry)); - entry->next= *position; - *position=entry; - t->number_of_entries++; - } - entry->value=value; - entry->key=k; - if (t->number_of_entries > t->size) resize_table(t,t->size*2); -} - -/* Function: hash_table_remove - * Arguments: t: the table to remove the object from - * comparator: the index value of the object to remove - * Returns: - */ -void hash_table_remove (table t, void *comparator) -{ - int success; - table_entry temp; - table_entry *position=table_lookup(t,comparator, - (*t->key_function)(comparator), - t->compare_function,&success); - if(success) { - temp=*position; - *position=(*position)->next; - free(temp); /* the value? */ - t->number_of_entries--; - } -} - -/* Function: hash_iterate_table_entries - * Arguments: t: the table to iterate over - * handler: a function to call with each element - * of the table, along with arg - * arg: the opaque object to pass to handler - * Returns: nothing - */ -void hash_iterate_table_entries(table t, - void (*handler)(void *,void *), - void *arg) -{ - int i; - table_entry *j,*next; - - for (i=0;isize;i++) - for (j=t->entries+i;*j;j=next){ - next=&((*j)->next); - (*handler)(arg,(*j)->value); - } -} - -/* Function: hash_filter_table_entries - * Arguments: t: the table to iterate over - * handler: a function to call with each element - * of the table, along with arg - * arg: the opaque object to pass to handler - * Returns: nothing - * Notes: operations on the table inside handler are not safe - * - * filter_table_entires() calls the handler function for each - * item in the table, passing it and arg. The handler function - * returns 1 if it is to be retained in the table, and 0 - * if it is to be removed. - */ -void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg) -{ - int i; - table_entry *j,*next,v; - - for (i=0;isize;i++) - for (j=t->entries+i;*j;j=next){ - next=&((*j)->next); - if (!(*handler)(arg,(*j)->value)){ - next=j; - v=*j; - *j=(*j)->next; - free(v); - t->number_of_entries--; - } - } -} - -/* Function: destroy_table - * Arguments: t: the table to free - * thunk: a function to call with each element, - * most likely free() - * Returns: nothing - */ -void hash_destroy_table(table t,void (*thunk)(void *)) -{ - table_entry j,next; - int i; - for (i=0;isize;i++) - for (j=t->entries[i];j;j=next){ - next=j->next; - if (thunk) (*thunk)(j->value); - free(j); - } - free(t->entries); - free(t); -} diff --git a/lnet/ulnds/socklnd/table.h b/lnet/ulnds/socklnd/table.h deleted file mode 100644 index 7fab586..0000000 --- a/lnet/ulnds/socklnd/table.h +++ /dev/null @@ -1,39 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -#ifndef E_TABLE -#define E_TABLE - -typedef struct table_entry { - unsigned int key; - void *value; - struct table_entry *next; -} *table_entry; - - -typedef struct table { - unsigned int size; - int number_of_entries; - table_entry *entries; - int (*compare_function)(void *, void *); - unsigned int (*key_function)(unsigned int *); -} *table; - -/* table.c */ -unsigned int key_from_int(int i); -unsigned int key_from_string(char *s); -table hash_create_table(int (*compare_function)(void *, void *), unsigned int (*key_function)(unsigned int *)); -void *hash_table_find(table t, void *comparator); -void hash_table_insert(table t, void *value, void *comparator); -void hash_table_remove(table t, void *comparator); -void hash_iterate_table_entries(table t, void (*handler)(void *, void *), void *arg); -void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg); -void hash_destroy_table(table t, void (*thunk)(void *)); - -#endif diff --git a/lnet/ulnds/socklnd/tcplnd.c b/lnet/ulnds/socklnd/tcplnd.c deleted file mode 100644 index 534fc17..0000000 --- a/lnet/ulnds/socklnd/tcplnd.c +++ /dev/null @@ -1,198 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* tcpnal.c: - This file implements the TCP-based nal by providing glue - between the connection service and the generic NAL implementation */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* Function: tcpnal_send - * Arguments: nal: pointer to my nal control block - * private: unused - * cookie: passed back to the portals library - * hdr: pointer to the portals header - * nid: destination node - * pid: destination process - * data: body of the message - * len: length of the body - * Returns: zero on success - * - * sends a packet to the peer, after insuring that a connection exists - */ -#warning FIXME: "param 'type' is newly added, make use of it!!" -int tcpnal_send(nal_cb_t *n, - void *private, - lib_msg_t *cookie, - ptl_hdr_t *hdr, - int type, - ptl_nid_t nid, - ptl_pid_t pid, - unsigned int niov, - struct iovec *iov, - size_t len) -{ - connection c; - bridge b=(bridge)n->nal_data; - struct iovec tiov[2]; - int count = 1; - - if (!(c=force_tcp_connection((manager)b->lower, - PNAL_IP(nid,b), - PNAL_PORT(nid,pid)))) - return(1); - -#if 0 - /* TODO: these results should be checked. furthermore, provision - must be made for the SIGPIPE which is delivered when - writing on a tcp socket which has closed underneath - the application. there is a linux flag in the sendmsg - call which turns off the signally behaviour, but its - nonstandard */ - syscall(SYS_write, c->fd,hdr,sizeof(ptl_hdr_t)); - LASSERT (niov <= 1); - if (len) syscall(SYS_write, c->fd,iov[0].iov_base,len); -#else - LASSERT (niov <= 1); - - tiov[0].iov_base = hdr; - tiov[0].iov_len = sizeof(ptl_hdr_t); - - if (len) { - tiov[1].iov_base = iov[0].iov_base; - tiov[1].iov_len = len; - count++; - } - - syscall(SYS_writev, c->fd, tiov, count); -#endif - lib_finalize(n, private, cookie); - - return(0); -} - - -/* Function: tcpnal_recv - * Arguments: nal_cb_t *nal: pointer to my nal control block - * void *private: connection pointer passed through - * lib_parse() - * lib_msg_t *cookie: passed back to portals library - * user_ptr data: pointer to the destination buffer - * size_t mlen: length of the body - * size_t rlen: length of data in the network - * Returns: zero on success - * - * blocking read of the requested data. must drain out the - * difference of mainpulated and requested lengths from the network - */ -int tcpnal_recv(nal_cb_t *n, - void *private, - lib_msg_t *cookie, - unsigned int niov, - struct iovec *iov, - ptl_size_t mlen, - ptl_size_t rlen) - -{ - if (mlen) { - LASSERT (niov <= 1); - read_connection(private,iov[0].iov_base,mlen); - lib_finalize(n, private, cookie); - } - - if (mlen!=rlen){ - char *trash=malloc(rlen-mlen); - - /*TODO: check error status*/ - read_connection(private,trash,rlen-mlen); - free(trash); - } - - return(rlen); -} - - -/* Function: from_connection: - * Arguments: c: the connection to read from - * Returns: whether or not to continue reading from this connection, - * expressed as a 1 to continue, and a 0 to not - * - * from_connection() is called from the select loop when i/o is - * available. It attempts to read the portals header and - * pass it to the generic library for processing. - */ -static int from_connection(void *a, void *d) -{ - connection c = d; - bridge b=a; - ptl_hdr_t hdr; - - if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){ - lib_parse(b->nal_cb, &hdr, c); - return(1); - } - return(0); -} - - -static void tcpnal_shutdown(bridge b) -{ - shutdown_connections(b->lower); -} - -/* Function: PTL_IFACE_TCP - * Arguments: pid_request: desired port number to bind to - * desired: passed NAL limits structure - * actual: returned NAL limits structure - * Returns: a nal structure on success, or null on failure - */ -int tcpnal_init(bridge b) -{ - manager m; - - b->nal_cb->cb_send=tcpnal_send; - b->nal_cb->cb_recv=tcpnal_recv; - b->shutdown=tcpnal_shutdown; - - if (!(m=init_connections(PNAL_PORT(b->nal_cb->ni.nid, - b->nal_cb->ni.pid), - from_connection,b))){ - /* TODO: this needs to shut down the - newly created junk */ - return(PTL_NAL_FAILED); - } - /* XXX cfs hack */ - b->nal_cb->ni.pid=0; - b->lower=m; - return(PTL_OK); -} diff --git a/lnet/ulnds/socklnd/timer.h b/lnet/ulnds/socklnd/timer.h deleted file mode 100644 index aaf39d2..0000000 --- a/lnet/ulnds/socklnd/timer.h +++ /dev/null @@ -1,30 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -/* TODO: make this an explicit type when they become available */ -typedef unsigned long long when; - -typedef struct timer { - void (*function)(void *); - void *arg; - when w; - int interval; - int disable; -} *timer; - -timer register_timer(when, void (*f)(void *), void *a); -void remove_timer(timer t); -void timer_loop(void); -void initialize_timer(void); -void register_thunk(void (*f)(void *),void *a); - - -#define HZ 0x100000000ull - - diff --git a/lnet/ulnds/socklnd/utypes.h b/lnet/ulnds/socklnd/utypes.h deleted file mode 100644 index 7eca959..0000000 --- a/lnet/ulnds/socklnd/utypes.h +++ /dev/null @@ -1,12 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -typedef unsigned short uint16; -typedef unsigned long uint32; -typedef unsigned long long uint64; -typedef unsigned char uint8; diff --git a/lnet/ulnds/table.c b/lnet/ulnds/table.c deleted file mode 100644 index bef13c5..0000000 --- a/lnet/ulnds/table.c +++ /dev/null @@ -1,264 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include -#include -#include - - -/* table.c: - * a very simple hash table implementation with paramerterizable - * comparison and key generation functions. it does resize - * in order to accomidate more entries, but never collapses - * the table - */ - -static table_entry *table_lookup (table t,void *comparator, - unsigned int k, - int (*compare_function)(void *, void *), - int *success) -{ - unsigned int key=k%t->size; - table_entry *i; - - for (i=&(t->entries[key]);*i;i=&((*i)->next)){ - if (compare_function && ((*i)->key==k)) - if ((*t->compare_function)((*i)->value,comparator)){ - *success=1; - return(i); - } - } - *success=0; - return(&(t->entries[key])); -} - - -static void resize_table(table t, int size) -{ - int old_size=t->size; - table_entry *old_entries=t->entries; - int i; - table_entry j,n; - table_entry *position; - int success; - - t->size=size; - t->entries=(table_entry *)malloc(sizeof(table_entry)*t->size); - memset(t->entries,0,sizeof(table_entry)*t->size); - - for (i=0;inext; - position=table_lookup(t,0,j->key,0,&success); - j->next= *position; - *position=j; - } - free(old_entries); -} - - -/* Function: key_from_int - * Arguments: int i: value to compute the key of - * Returns: the key - */ -unsigned int key_from_int(int i) -{ - return(i); -} - - -/* Function: key_from_string - * Arguments: char *s: the null terminated string - * to compute the key of - * Returns: the key - */ -unsigned int key_from_string(char *s) -{ - unsigned int result=0; - unsigned char *n; - int i; - if (!s) return(1); - for (n=s,i=0;*n;n++,i++) result^=(*n*57)^*n*i; - return(result); -} - - -/* Function: hash_create_table - * Arguments: compare_function: a function to compare - * a table instance with a correlator - * key_function: a function to generate a 32 bit - * hash key from a correlator - * Returns: a pointer to the new table - */ -table hash_create_table (int (*compare_function)(void *, void *), - unsigned int (*key_function)(unsigned int *)) -{ - table new=(table)malloc(sizeof(struct table)); - memset(new, 0, sizeof(struct table)); - - new->compare_function=compare_function; - new->key_function=key_function; - new->number_of_entries=0; - new->size=4; - new->entries=(table_entry *)malloc(sizeof(table_entry)*new->size); - memset(new->entries,0,sizeof(table_entry)*new->size); - return(new); -} - - -/* Function: hash_table_find - * Arguments: t: a table to look in - * comparator: a value to access the table entry - * Returns: the element references to by comparator, or null - */ -void *hash_table_find (table t, void *comparator) -{ - int success; - table_entry* entry=table_lookup(t,comparator, - (*t->key_function)(comparator), - t->compare_function, - &success); - if (success) return((*entry)->value); - return(0); -} - - -/* Function: hash_table_insert - * Arguments: t: a table to insert the object - * value: the object to put in the table - * comparator: the value by which the object - * will be addressed - * Returns: nothing - */ -void hash_table_insert (table t, void *value, void *comparator) -{ - int success; - unsigned int k=(*t->key_function)(comparator); - table_entry *position=table_lookup(t,comparator,k, - t->compare_function,&success); - table_entry entry; - - if (success) { - entry = *position; - } else { - entry = (table_entry)malloc(sizeof(struct table_entry)); - memset(entry, 0, sizeof(struct table_entry)); - entry->next= *position; - *position=entry; - t->number_of_entries++; - } - entry->value=value; - entry->key=k; - if (t->number_of_entries > t->size) resize_table(t,t->size*2); -} - -/* Function: hash_table_remove - * Arguments: t: the table to remove the object from - * comparator: the index value of the object to remove - * Returns: - */ -void hash_table_remove (table t, void *comparator) -{ - int success; - table_entry temp; - table_entry *position=table_lookup(t,comparator, - (*t->key_function)(comparator), - t->compare_function,&success); - if(success) { - temp=*position; - *position=(*position)->next; - free(temp); /* the value? */ - t->number_of_entries--; - } -} - -/* Function: hash_iterate_table_entries - * Arguments: t: the table to iterate over - * handler: a function to call with each element - * of the table, along with arg - * arg: the opaque object to pass to handler - * Returns: nothing - */ -void hash_iterate_table_entries(table t, - void (*handler)(void *,void *), - void *arg) -{ - int i; - table_entry *j,*next; - - for (i=0;isize;i++) - for (j=t->entries+i;*j;j=next){ - next=&((*j)->next); - (*handler)(arg,(*j)->value); - } -} - -/* Function: hash_filter_table_entries - * Arguments: t: the table to iterate over - * handler: a function to call with each element - * of the table, along with arg - * arg: the opaque object to pass to handler - * Returns: nothing - * Notes: operations on the table inside handler are not safe - * - * filter_table_entires() calls the handler function for each - * item in the table, passing it and arg. The handler function - * returns 1 if it is to be retained in the table, and 0 - * if it is to be removed. - */ -void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg) -{ - int i; - table_entry *j,*next,v; - - for (i=0;isize;i++) - for (j=t->entries+i;*j;j=next){ - next=&((*j)->next); - if (!(*handler)(arg,(*j)->value)){ - next=j; - v=*j; - *j=(*j)->next; - free(v); - t->number_of_entries--; - } - } -} - -/* Function: destroy_table - * Arguments: t: the table to free - * thunk: a function to call with each element, - * most likely free() - * Returns: nothing - */ -void hash_destroy_table(table t,void (*thunk)(void *)) -{ - table_entry j,next; - int i; - for (i=0;isize;i++) - for (j=t->entries[i];j;j=next){ - next=j->next; - if (thunk) (*thunk)(j->value); - free(j); - } - free(t->entries); - free(t); -} diff --git a/lnet/ulnds/table.h b/lnet/ulnds/table.h deleted file mode 100644 index 7fab586..0000000 --- a/lnet/ulnds/table.h +++ /dev/null @@ -1,39 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -#ifndef E_TABLE -#define E_TABLE - -typedef struct table_entry { - unsigned int key; - void *value; - struct table_entry *next; -} *table_entry; - - -typedef struct table { - unsigned int size; - int number_of_entries; - table_entry *entries; - int (*compare_function)(void *, void *); - unsigned int (*key_function)(unsigned int *); -} *table; - -/* table.c */ -unsigned int key_from_int(int i); -unsigned int key_from_string(char *s); -table hash_create_table(int (*compare_function)(void *, void *), unsigned int (*key_function)(unsigned int *)); -void *hash_table_find(table t, void *comparator); -void hash_table_insert(table t, void *value, void *comparator); -void hash_table_remove(table t, void *comparator); -void hash_iterate_table_entries(table t, void (*handler)(void *, void *), void *arg); -void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg); -void hash_destroy_table(table t, void (*thunk)(void *)); - -#endif diff --git a/lnet/ulnds/tcplnd.c b/lnet/ulnds/tcplnd.c deleted file mode 100644 index 534fc17..0000000 --- a/lnet/ulnds/tcplnd.c +++ /dev/null @@ -1,198 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* tcpnal.c: - This file implements the TCP-based nal by providing glue - between the connection service and the generic NAL implementation */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* Function: tcpnal_send - * Arguments: nal: pointer to my nal control block - * private: unused - * cookie: passed back to the portals library - * hdr: pointer to the portals header - * nid: destination node - * pid: destination process - * data: body of the message - * len: length of the body - * Returns: zero on success - * - * sends a packet to the peer, after insuring that a connection exists - */ -#warning FIXME: "param 'type' is newly added, make use of it!!" -int tcpnal_send(nal_cb_t *n, - void *private, - lib_msg_t *cookie, - ptl_hdr_t *hdr, - int type, - ptl_nid_t nid, - ptl_pid_t pid, - unsigned int niov, - struct iovec *iov, - size_t len) -{ - connection c; - bridge b=(bridge)n->nal_data; - struct iovec tiov[2]; - int count = 1; - - if (!(c=force_tcp_connection((manager)b->lower, - PNAL_IP(nid,b), - PNAL_PORT(nid,pid)))) - return(1); - -#if 0 - /* TODO: these results should be checked. furthermore, provision - must be made for the SIGPIPE which is delivered when - writing on a tcp socket which has closed underneath - the application. there is a linux flag in the sendmsg - call which turns off the signally behaviour, but its - nonstandard */ - syscall(SYS_write, c->fd,hdr,sizeof(ptl_hdr_t)); - LASSERT (niov <= 1); - if (len) syscall(SYS_write, c->fd,iov[0].iov_base,len); -#else - LASSERT (niov <= 1); - - tiov[0].iov_base = hdr; - tiov[0].iov_len = sizeof(ptl_hdr_t); - - if (len) { - tiov[1].iov_base = iov[0].iov_base; - tiov[1].iov_len = len; - count++; - } - - syscall(SYS_writev, c->fd, tiov, count); -#endif - lib_finalize(n, private, cookie); - - return(0); -} - - -/* Function: tcpnal_recv - * Arguments: nal_cb_t *nal: pointer to my nal control block - * void *private: connection pointer passed through - * lib_parse() - * lib_msg_t *cookie: passed back to portals library - * user_ptr data: pointer to the destination buffer - * size_t mlen: length of the body - * size_t rlen: length of data in the network - * Returns: zero on success - * - * blocking read of the requested data. must drain out the - * difference of mainpulated and requested lengths from the network - */ -int tcpnal_recv(nal_cb_t *n, - void *private, - lib_msg_t *cookie, - unsigned int niov, - struct iovec *iov, - ptl_size_t mlen, - ptl_size_t rlen) - -{ - if (mlen) { - LASSERT (niov <= 1); - read_connection(private,iov[0].iov_base,mlen); - lib_finalize(n, private, cookie); - } - - if (mlen!=rlen){ - char *trash=malloc(rlen-mlen); - - /*TODO: check error status*/ - read_connection(private,trash,rlen-mlen); - free(trash); - } - - return(rlen); -} - - -/* Function: from_connection: - * Arguments: c: the connection to read from - * Returns: whether or not to continue reading from this connection, - * expressed as a 1 to continue, and a 0 to not - * - * from_connection() is called from the select loop when i/o is - * available. It attempts to read the portals header and - * pass it to the generic library for processing. - */ -static int from_connection(void *a, void *d) -{ - connection c = d; - bridge b=a; - ptl_hdr_t hdr; - - if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){ - lib_parse(b->nal_cb, &hdr, c); - return(1); - } - return(0); -} - - -static void tcpnal_shutdown(bridge b) -{ - shutdown_connections(b->lower); -} - -/* Function: PTL_IFACE_TCP - * Arguments: pid_request: desired port number to bind to - * desired: passed NAL limits structure - * actual: returned NAL limits structure - * Returns: a nal structure on success, or null on failure - */ -int tcpnal_init(bridge b) -{ - manager m; - - b->nal_cb->cb_send=tcpnal_send; - b->nal_cb->cb_recv=tcpnal_recv; - b->shutdown=tcpnal_shutdown; - - if (!(m=init_connections(PNAL_PORT(b->nal_cb->ni.nid, - b->nal_cb->ni.pid), - from_connection,b))){ - /* TODO: this needs to shut down the - newly created junk */ - return(PTL_NAL_FAILED); - } - /* XXX cfs hack */ - b->nal_cb->ni.pid=0; - b->lower=m; - return(PTL_OK); -} diff --git a/lnet/ulnds/timer.h b/lnet/ulnds/timer.h deleted file mode 100644 index aaf39d2..0000000 --- a/lnet/ulnds/timer.h +++ /dev/null @@ -1,30 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -/* TODO: make this an explicit type when they become available */ -typedef unsigned long long when; - -typedef struct timer { - void (*function)(void *); - void *arg; - when w; - int interval; - int disable; -} *timer; - -timer register_timer(when, void (*f)(void *), void *a); -void remove_timer(timer t); -void timer_loop(void); -void initialize_timer(void); -void register_thunk(void (*f)(void *),void *a); - - -#define HZ 0x100000000ull - - diff --git a/lnet/ulnds/utypes.h b/lnet/ulnds/utypes.h deleted file mode 100644 index 7eca959..0000000 --- a/lnet/ulnds/utypes.h +++ /dev/null @@ -1,12 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -typedef unsigned short uint16; -typedef unsigned long uint32; -typedef unsigned long long uint64; -typedef unsigned char uint8; diff --git a/lnet/utils/.cvsignore b/lnet/utils/.cvsignore deleted file mode 100644 index 148310a..0000000 --- a/lnet/utils/.cvsignore +++ /dev/null @@ -1,8 +0,0 @@ -Makefile -Makefile.in -acceptor -debugctl -ptlctl -.deps -routerstat -wirecheck \ No newline at end of file diff --git a/lnet/utils/Makefile.am b/lnet/utils/Makefile.am deleted file mode 100644 index 05af598..0000000 --- a/lnet/utils/Makefile.am +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - - -COMPILE = gcc -Wall -g -I$(srcdir)/../include -LINK = gcc -o $@ - -sbin_PROGRAMS = acceptor ptlctl debugctl routerstat wirecheck -lib_LIBRARIES = libptlctl.a - -acceptor_SOURCES = acceptor.c # -lefence - -wirecheck_SOURCES = wirecheck.c - -libptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h - -ptlctl_SOURCES = ptlctl.c -ptlctl_LDADD = -L. -lptlctl -lncurses # -lefence -ptlctl_DEPENDENCIES = libptlctl.a - -debugctl_SOURCES = debugctl.c -debugctl_LDADD = -L. -lptlctl -lncurses # -lefence -debugctl_DEPENDENCIES = libptlctl.a - -routerstat_SOURCES = routerstat.c diff --git a/lnet/utils/acceptor.c b/lnet/utils/acceptor.c deleted file mode 100644 index c6590db..0000000 --- a/lnet/utils/acceptor.c +++ /dev/null @@ -1,466 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -/* should get this from autoconf somehow */ -#ifndef PIDFILE_DIR -#define PIDFILE_DIR "/var/run" -#endif - -#define PROGNAME "acceptor" - -void create_pidfile(char *name, int port) -{ - char pidfile[1024]; - FILE *fp; - - snprintf(pidfile, sizeof(pidfile), "%s/%s-%d.pid", - PIDFILE_DIR, name, port); - - if ((fp = fopen(pidfile, "w"))) { - fprintf(fp, "%d\n", getpid()); - fclose(fp); - } else { - syslog(LOG_ERR, "%s: %s\n", pidfile, - strerror(errno)); - } -} - -int pidfile_exists(char *name, int port) -{ - char pidfile[1024]; - - snprintf(pidfile, sizeof(pidfile), "%s/%s-%d.pid", - PIDFILE_DIR, name, port); - - if (!access(pidfile, F_OK)) { - fprintf(stderr, "%s: exists, acceptor already running.\n", - pidfile); - return (1); - } - return (0); -} - -int -parse_size (int *sizep, char *str) -{ - int size; - char mod[32]; - - switch (sscanf (str, "%d%1[gGmMkK]", &size, mod)) - { - default: - return (-1); - - case 1: - *sizep = size; - return (0); - - case 2: - switch (*mod) - { - case 'g': - case 'G': - *sizep = size << 30; - return (0); - - case 'm': - case 'M': - *sizep = size << 20; - return (0); - - case 'k': - case 'K': - *sizep = size << 10; - return (0); - - default: - *sizep = size; - return (0); - } - } -} - -void -show_connection (int fd, __u32 net_ip, ptl_nid_t nid) -{ - struct hostent *h = gethostbyaddr ((char *)&net_ip, sizeof net_ip, AF_INET); - __u32 host_ip = ntohl (net_ip); - int rxmem = 0; - int txmem = 0; - int nonagle = 0; - int len; - char host[1024]; - - len = sizeof (txmem); - if (getsockopt (fd, SOL_SOCKET, SO_SNDBUF, &txmem, &len) != 0) - perror ("Cannot get write buffer size"); - - len = sizeof (rxmem); - if (getsockopt (fd, SOL_SOCKET, SO_RCVBUF, &rxmem, &len) != 0) - perror ("Cannot get read buffer size"); - - len = sizeof (nonagle); - if (getsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &nonagle, &len) != 0) - perror ("Cannot get nagle"); - - if (h == NULL) - snprintf (host, sizeof(host), "%d.%d.%d.%d", (host_ip >> 24) & 0xff, - (host_ip >> 16) & 0xff, (host_ip >> 8) & 0xff, host_ip & 0xff); - else - snprintf (host, sizeof(host), "%s", h->h_name); - - syslog (LOG_INFO, "Accepted host: %s NID: "LPX64" snd: %d rcv %d nagle: %s\n", - host, nid, txmem, rxmem, nonagle ? "disabled" : "enabled"); -} - -int -sock_write (int cfd, void *buffer, int nob) -{ - while (nob > 0) - { - int rc = write (cfd, buffer, nob); - - if (rc < 0) - { - if (errno == EINTR) - continue; - - return (rc); - } - - if (rc == 0) - { - fprintf (stderr, "Unexpected zero sock_write\n"); - abort(); - } - - nob -= rc; - buffer = (char *)buffer + nob; - } - - return (0); -} - -int -sock_read (int cfd, void *buffer, int nob) -{ - while (nob > 0) - { - int rc = read (cfd, buffer, nob); - - if (rc < 0) - { - if (errno == EINTR) - continue; - - return (rc); - } - - if (rc == 0) /* EOF */ - { - errno = ECONNABORTED; - return (-1); - } - - nob -= rc; - buffer = (char *)buffer + nob; - } - - return (0); -} - -int -exchange_nids (int cfd, ptl_nid_t my_nid, ptl_nid_t *peer_nid) -{ - int rc; - ptl_hdr_t hdr; - ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid; - - LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid)); - - memset (&hdr, 0, sizeof (hdr)); - - hmv->magic = __cpu_to_le32 (PORTALS_PROTO_MAGIC); - hmv->version_major = __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR); - hmv->version_minor = __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR); - - hdr.src_nid = __cpu_to_le64 (my_nid); - hdr.type = __cpu_to_le32 (PTL_MSG_HELLO); - - /* Assume there's sufficient socket buffering for a portals HELLO header */ - rc = sock_write (cfd, &hdr, sizeof (hdr)); - if (rc != 0) { - perror ("Can't send initial HELLO"); - return (-1); - } - - /* First few bytes down the wire are the portals protocol magic and - * version, no matter what protocol version we're running. */ - - rc = sock_read (cfd, hmv, sizeof (*hmv)); - if (rc != 0) { - perror ("Can't read from peer"); - return (-1); - } - - if (__cpu_to_le32 (hmv->magic) != PORTALS_PROTO_MAGIC) { - fprintf (stderr, "Bad magic %#08x (%#08x expected)\n", - __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC); - return (-1); - } - - if (__cpu_to_le16 (hmv->version_major) != PORTALS_PROTO_VERSION_MAJOR || - __cpu_to_le16 (hmv->version_minor) != PORTALS_PROTO_VERSION_MINOR) { - fprintf (stderr, "Incompatible protocol version %d.%d (%d.%d expected)\n", - __cpu_to_le16 (hmv->version_major), - __cpu_to_le16 (hmv->version_minor), - PORTALS_PROTO_VERSION_MAJOR, - PORTALS_PROTO_VERSION_MINOR); - } - - /* version 0 sends magic/version as the dest_nid of a 'hello' header, - * so read the rest of it in now... */ - LASSERT (PORTALS_PROTO_VERSION_MAJOR == 0); - rc = sock_read (cfd, hmv + 1, sizeof (hdr) - sizeof (*hmv)); - if (rc != 0) { - perror ("Can't read rest of HELLO hdr"); - return (-1); - } - - /* ...and check we got what we expected */ - if (__cpu_to_le32 (hdr.type) != PTL_MSG_HELLO || - __cpu_to_le32 (PTL_HDR_LENGTH (&hdr)) != 0) { - fprintf (stderr, "Expecting a HELLO hdr with 0 payload," - " but got type %d with %d payload\n", - __cpu_to_le32 (hdr.type), - __cpu_to_le32 (PTL_HDR_LENGTH (&hdr))); - return (-1); - } - - *peer_nid = __le64_to_cpu (hdr.src_nid); - return (0); -} - -void -usage (char *myname) -{ - fprintf (stderr, "Usage: %s [-r recv_mem] [-s send_mem] [-n] [-N nal_id] port\n", myname); - exit (1); -} - -int main(int argc, char **argv) -{ - int o, fd, rc, port, pfd; - struct sockaddr_in srvaddr; - int c; - int rxmem = 0; - int txmem = 0; - int noclose = 0; - int nonagle = 1; - int nal = SOCKNAL; - int xchg_nids = 0; - int bind_irq = 0; - - while ((c = getopt (argc, argv, "N:r:s:nlxi")) != -1) - switch (c) - { - case 'r': - if (parse_size (&rxmem, optarg) != 0 || rxmem < 0) - usage (argv[0]); - break; - - case 's': - if (parse_size (&txmem, optarg) != 0 || txmem < 0) - usage (argv[0]); - break; - - case 'n': - nonagle = 0; - break; - - case 'l': - noclose = 1; - break; - - case 'x': - xchg_nids = 1; - break; - - case 'i': - bind_irq = 1; - break; - - case 'N': - if (parse_size(&nal, optarg) != 0 || - nal < 0 || nal > NAL_MAX_NR) - usage(argv[0]); - break; - - default: - usage (argv[0]); - break; - } - - if (optind >= argc) - usage (argv[0]); - - port = atol(argv[optind++]); - - if (pidfile_exists(PROGNAME, port)) - exit(1); - - memset(&srvaddr, 0, sizeof(srvaddr)); - srvaddr.sin_family = AF_INET; - srvaddr.sin_port = htons(port); - srvaddr.sin_addr.s_addr = INADDR_ANY; - - fd = socket(PF_INET, SOCK_STREAM, 0); - if (fd < 0) { - perror("opening socket"); - exit(1); - } - - o = 1; - if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &o, sizeof(o))) { - perror("Cannot set REUSEADDR socket opt"); - exit(1); - } - - if (nonagle) - { - o = 1; - rc = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &o, sizeof (o)); - if (rc != 0) - { - perror ("Cannot disable nagle"); - exit (1); - } - } - - if (txmem != 0) - { - rc = setsockopt (fd, SOL_SOCKET, SO_SNDBUF, &txmem, sizeof (txmem)); - if (rc != 0) - { - perror ("Cannot set write buffer size"); - exit (1); - } - } - - if (rxmem != 0) - { - rc = setsockopt (fd, SOL_SOCKET, SO_RCVBUF, &rxmem, sizeof (rxmem)); - if (rc != 0) - { - perror ("Cannot set read buffer size"); - exit (1); - } - } - - rc = bind(fd, (struct sockaddr *)&srvaddr, sizeof(srvaddr)); - if ( rc == -1 ) { - perror("bind: "); - exit(1); - } - - if (listen(fd, 127)) { - perror("listen: "); - exit(1); - } - fprintf(stderr, "listening on port %d\n", port); - - pfd = open("/dev/portals", O_RDWR); - if ( pfd < 0 ) { - perror("opening portals device"); - exit(1); - } - - rc = daemon(1, noclose); - if (rc < 0) { - perror("daemon(): "); - exit(1); - } - - openlog(PROGNAME, LOG_PID, LOG_DAEMON); - syslog(LOG_INFO, "started, listening on port %d\n", port); - create_pidfile(PROGNAME, port); - - while (1) { - struct sockaddr_in clntaddr; - int len = sizeof(clntaddr); - int cfd; - struct portal_ioctl_data data; - ptl_nid_t peer_nid; - - cfd = accept(fd, (struct sockaddr *)&clntaddr, &len); - if ( cfd < 0 ) { - perror("accept"); - exit(0); - continue; - } - - if (!xchg_nids) - peer_nid = ntohl (clntaddr.sin_addr.s_addr); /* HOST byte order */ - else - { - PORTAL_IOC_INIT (data); - data.ioc_nal = nal; - rc = ioctl (pfd, IOC_PORTAL_GET_NID, &data); - if (rc < 0) - { - perror ("Can't get my NID"); - close (cfd); - continue; - } - - rc = exchange_nids (cfd, data.ioc_nid, &peer_nid); - if (rc != 0) - { - close (cfd); - continue; - } - } - - show_connection (cfd, clntaddr.sin_addr.s_addr, peer_nid); - - PORTAL_IOC_INIT(data); - data.ioc_fd = cfd; - data.ioc_nal = nal; - data.ioc_nal_cmd = NAL_CMD_REGISTER_PEER_FD; - data.ioc_nid = peer_nid; - data.ioc_flags = bind_irq; - - if (ioctl(pfd, IOC_PORTAL_NAL_CMD, &data) < 0) { - perror("ioctl failed"); - - } else { - printf("client registered\n"); - } - rc = close(cfd); - if (rc) - perror ("close failed"); - } - - closelog(); - exit(0); - -} diff --git a/lnet/utils/debug.c b/lnet/utils/debug.c deleted file mode 100644 index 9ab1c73d..0000000 --- a/lnet/utils/debug.c +++ /dev/null @@ -1,618 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Some day I'll split all of this functionality into a cfs_debug module - * of its own. That day is not today. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#define BUG() /* workaround for module.h includes */ -#include - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#include -#endif - -#include -#include -#include "parser.h" - -static char rawbuf[8192]; -static char *buf = rawbuf; -static int max = 8192; -//static int g_pfd = -1; -static int subsystem_array[1 << 8]; -static int debug_mask = ~0; - -static const char *portal_debug_subsystems[] = - {"undefined", "mdc", "mds", "osc", "ost", "class", "obdfs", "llite", - "rpc", "ext2obd", "portals", "socknal", "qswnal", "pinger", "filter", - "obdtrace", "echo", "ldlm", "lov", "gmnal", "router", "ptldb", NULL}; -static const char *portal_debug_masks[] = - {"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl", - "blocks", "net", "warning", "buffs", "other", "dentry", "portals", - "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace", NULL}; - -struct debug_daemon_cmd { - char *cmd; - unsigned int cmdv; -}; - -static const struct debug_daemon_cmd portal_debug_daemon_cmd[] = { - {"start", DEBUG_DAEMON_START}, - {"stop", DEBUG_DAEMON_STOP}, - {"pause", DEBUG_DAEMON_PAUSE}, - {"continue", DEBUG_DAEMON_CONTINUE}, - {0, 0} -}; - -static int do_debug_mask(char *name, int enable) -{ - int found = 0, i; - - for (i = 0; portal_debug_subsystems[i] != NULL; i++) { - if (strcasecmp(name, portal_debug_subsystems[i]) == 0 || - strcasecmp(name, "all_subs") == 0) { - printf("%s output from subsystem \"%s\"\n", - enable ? "Enabling" : "Disabling", - portal_debug_subsystems[i]); - subsystem_array[i] = enable; - found = 1; - } - } - for (i = 0; portal_debug_masks[i] != NULL; i++) { - if (strcasecmp(name, portal_debug_masks[i]) == 0 || - strcasecmp(name, "all_types") == 0) { - printf("%s output of type \"%s\"\n", - enable ? "Enabling" : "Disabling", - portal_debug_masks[i]); - if (enable) - debug_mask |= (1 << i); - else - debug_mask &= ~(1 << i); - found = 1; - } - } - - return found; -} - -int dbg_initialize(int argc, char **argv) -{ - memset(subsystem_array, 1, sizeof(subsystem_array)); - return 0; -} - -int jt_dbg_filter(int argc, char **argv) -{ - int i; - - if (argc < 2) { - fprintf(stderr, "usage: %s \n", - argv[0]); - return 0; - } - - for (i = 1; i < argc; i++) - if (!do_debug_mask(argv[i], 0)) - fprintf(stderr, "Unknown subsystem or debug type: %s\n", - argv[i]); - return 0; -} - -int jt_dbg_show(int argc, char **argv) -{ - int i; - - if (argc < 2) { - fprintf(stderr, "usage: %s \n", - argv[0]); - return 0; - } - - for (i = 1; i < argc; i++) - if (!do_debug_mask(argv[i], 1)) - fprintf(stderr, "Unknown subsystem or debug type: %s\n", - argv[i]); - - return 0; -} - -static int applymask(char* procpath, int value) -{ - int rc; - char buf[64]; - int len = snprintf(buf, 64, "%d", value); - - int fd = open(procpath, O_WRONLY); - if (fd == -1) { - fprintf(stderr, "Unable to open %s: %s\n", - procpath, strerror(errno)); - return fd; - } - rc = write(fd, buf, len+1); - if (rc<0) { - fprintf(stderr, "Write to %s failed: %s\n", - procpath, strerror(errno)); - return rc; - } - close(fd); - return 0; -} - -extern char *dump_filename; -extern int dump(int dev_id, int opc, void *buf); - -static void applymask_all(unsigned int subs_mask, unsigned int debug_mask) -{ - if (!dump_filename) { - applymask("/proc/sys/portals/subsystem_debug", subs_mask); - applymask("/proc/sys/portals/debug", debug_mask); - } else { - struct portals_debug_ioctl_data data; - - data.hdr.ioc_len = sizeof(data); - data.hdr.ioc_version = 0; - data.subs = subs_mask; - data.debug = debug_mask; - - dump(OBD_DEV_ID, PTL_IOC_DEBUG_MASK, &data); - } - printf("Applied subsystem_debug=%d, debug=%d to /proc/sys/portals\n", - subs_mask, debug_mask); -} - -int jt_dbg_list(int argc, char **argv) -{ - int i; - - if (argc != 2) { - fprintf(stderr, "usage: %s \n", argv[0]); - return 0; - } - - if (strcasecmp(argv[1], "subs") == 0) { - printf("Subsystems: all_subs"); - for (i = 0; portal_debug_subsystems[i] != NULL; i++) - printf(", %s", portal_debug_subsystems[i]); - printf("\n"); - } else if (strcasecmp(argv[1], "types") == 0) { - printf("Types: all_types"); - for (i = 0; portal_debug_masks[i] != NULL; i++) - printf(", %s", portal_debug_masks[i]); - printf("\n"); - } - else if (strcasecmp(argv[1], "applymasks") == 0) { - unsigned int subsystem_mask = 0; - for (i = 0; portal_debug_subsystems[i] != NULL; i++) { - if (subsystem_array[i]) subsystem_mask |= (1 << i); - } - applymask_all(subsystem_mask, debug_mask); - } - return 0; -} - -/* if 'raw' is true, don't strip the debug information from the front of the - * lines */ -static void dump_buffer(FILE *fd, char *buf, int size, int raw) -{ - char *p, *z; - unsigned long subsystem, debug, dropped = 0, kept = 0; - int max_sub, max_type; - - for (max_sub = 0; portal_debug_subsystems[max_sub] != NULL; max_sub++) - ; - for (max_type = 0; portal_debug_masks[max_type] != NULL; max_type++) - ; - - while (size) { - p = memchr(buf, '\n', size); - if (!p) - break; - subsystem = strtoul(buf, &z, 16); - debug = strtoul(z + 1, &z, 16); - - z++; - /* for some reason %*s isn't working. */ - *p = '\0'; - if (subsystem < max_sub && - subsystem_array[subsystem] && - (!debug || (debug_mask & debug))) { - if (raw) - fprintf(fd, "%s\n", buf); - else - fprintf(fd, "%s\n", z); - //printf("%s\n", buf); - kept++; - } else { - //fprintf(stderr, "dropping line (%lx:%lx): %s\n", subsystem, debug, buf); - dropped++; - } - *p = '\n'; - p++; - size -= (p - buf); - buf = p; - } - - printf("Debug log: %lu lines, %lu kept, %lu dropped.\n", - dropped + kept, kept, dropped); -} - -int jt_dbg_debug_kernel(int argc, char **argv) -{ - int rc, raw = 1; - FILE *fd = stdout; - const int databuf_size = (6 << 20); - struct portal_ioctl_data data, *newdata; - char *databuf = NULL; - - if (argc > 3) { - fprintf(stderr, "usage: %s [file] [raw]\n", argv[0]); - return 0; - } - - if (argc > 1) { - fd = fopen(argv[1], "w"); - if (fd == NULL) { - fprintf(stderr, "fopen(%s) failed: %s\n", argv[1], - strerror(errno)); - return -1; - } - } - if (argc > 2) - raw = atoi(argv[2]); - - databuf = malloc(databuf_size); - if (!databuf) { - fprintf(stderr, "No memory for buffer.\n"); - goto out; - } - - memset(&data, 0, sizeof(data)); - data.ioc_plen1 = databuf_size; - data.ioc_pbuf1 = databuf; - - if (portal_ioctl_pack(&data, &buf, max) != 0) { - fprintf(stderr, "portal_ioctl_pack failed.\n"); - goto out; - } - - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_DEBUG, buf); - if (rc) { - fprintf(stderr, "IOC_PORTAL_GET_DEBUG failed: %s\n", - strerror(errno)); - goto out; - } - - newdata = (struct portal_ioctl_data *)buf; - if (newdata->ioc_size > 0) - dump_buffer(fd, databuf, newdata->ioc_size, raw); - - out: - if (databuf) - free(databuf); - if (fd != stdout) - fclose(fd); - return 0; -} - -int jt_dbg_debug_daemon(int argc, char **argv) -{ - int i, rc; - unsigned int cmd = 0; - FILE *fd = stdout; - struct portal_ioctl_data data; - - if (argc <= 1) { - fprintf(stderr, "usage: %s [start file <#MB>|stop|pause|" - "continue]\n", argv[0]); - return 0; - } - for (i = 0; portal_debug_daemon_cmd[i].cmd != NULL; i++) { - if (strcasecmp(argv[1], portal_debug_daemon_cmd[i].cmd) == 0) { - cmd = portal_debug_daemon_cmd[i].cmdv; - break; - } - } - if (portal_debug_daemon_cmd[i].cmd == NULL) { - fprintf(stderr, "usage: %s [start file <#MB>|stop|pause|" - "continue]\n", argv[0]); - return 0; - } - memset(&data, 0, sizeof(data)); - if (cmd == DEBUG_DAEMON_START) { - if (argc < 3) { - fprintf(stderr, "usage: %s [start file <#MB>|stop|" - "pause|continue]\n", argv[0]); - return 0; - } - if (access(argv[2], F_OK) != 0) { - fd = fopen(argv[2], "w"); - if (fd != NULL) { - fclose(fd); - remove(argv[2]); - goto ok; - } - } - if (access(argv[2], W_OK) == 0) - goto ok; - fprintf(stderr, "fopen(%s) failed: %s\n", argv[2], - strerror(errno)); - return -1; -ok: - data.ioc_inllen1 = strlen(argv[2]) + 1; - data.ioc_inlbuf1 = argv[2]; - data.ioc_misc = 0; - if (argc == 4) { - unsigned long size; - errno = 0; - size = strtoul(argv[3], NULL, 0); - if (errno) { - fprintf(stderr, "file size(%s): error %s\n", - argv[3], strerror(errno)); - return -1; - } - data.ioc_misc = size; - } - } - data.ioc_count = cmd; - if (portal_ioctl_pack(&data, &buf, max) != 0) { - fprintf(stderr, "portal_ioctl_pack failed.\n"); - return -1; - } - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_SET_DAEMON, buf); - if (rc < 0) { - fprintf(stderr, "IOC_PORTAL_SET_DEMON failed: %s\n", - strerror(errno)); - return rc; - } - return 0; -} - -int jt_dbg_debug_file(int argc, char **argv) -{ - int rc, fd = -1, raw = 1; - FILE *output = stdout; - char *databuf = NULL; - struct stat statbuf; - - if (argc > 4 || argc < 2) { - fprintf(stderr, "usage: %s [output] [raw]\n", argv[0]); - return 0; - } - - fd = open(argv[1], O_RDONLY); - if (fd < 0) { - fprintf(stderr, "fopen(%s) failed: %s\n", argv[1], - strerror(errno)); - return -1; - } -#warning FIXME: cleanup fstat issue here -#ifndef SYS_fstat64 -#define __SYS_fstat__ SYS_fstat -#else -#define __SYS_fstat__ SYS_fstat64 -#endif - rc = syscall(__SYS_fstat__, fd, &statbuf); - if (rc < 0) { - fprintf(stderr, "fstat failed: %s\n", strerror(errno)); - goto out; - } - - if (argc >= 3) { - output = fopen(argv[2], "w"); - if (output == NULL) { - fprintf(stderr, "fopen(%s) failed: %s\n", argv[2], - strerror(errno)); - goto out; - } - } - - if (argc == 4) - raw = atoi(argv[3]); - - databuf = mmap(NULL, statbuf.st_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE, fd, 0); - if (databuf == NULL) { - fprintf(stderr, "mmap failed: %s\n", strerror(errno)); - goto out; - } - - dump_buffer(output, databuf, statbuf.st_size, raw); - - out: - if (databuf) - munmap(databuf, statbuf.st_size); - if (output != stdout) - fclose(output); - if (fd > 0) - close(fd); - return 0; -} - -int jt_dbg_clear_debug_buf(int argc, char **argv) -{ - int rc; - struct portal_ioctl_data data; - - if (argc != 1) { - fprintf(stderr, "usage: %s\n", argv[0]); - return 0; - } - - memset(&data, 0, sizeof(data)); - if (portal_ioctl_pack(&data, &buf, max) != 0) { - fprintf(stderr, "portal_ioctl_pack failed.\n"); - return -1; - } - - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_CLEAR_DEBUG, buf); - if (rc) { - fprintf(stderr, "IOC_PORTAL_CLEAR_DEBUG failed: %s\n", - strerror(errno)); - return -1; - } - return 0; -} - -int jt_dbg_mark_debug_buf(int argc, char **argv) -{ - int rc; - struct portal_ioctl_data data; - char *text; - time_t now = time(NULL); - - if (argc > 2) { - fprintf(stderr, "usage: %s [marker text]\n", argv[0]); - return 0; - } - - if (argc == 2) { - text = argv[1]; - } else { - text = ctime(&now); - text[strlen(text) - 1] = '\0'; /* stupid \n */ - } - - memset(&data, 0, sizeof(data)); - data.ioc_inllen1 = strlen(text) + 1; - data.ioc_inlbuf1 = text; - if (portal_ioctl_pack(&data, &buf, max) != 0) { - fprintf(stderr, "portal_ioctl_pack failed.\n"); - return -1; - } - - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_MARK_DEBUG, buf); - if (rc) { - fprintf(stderr, "IOC_PORTAL_MARK_DEBUG failed: %s\n", - strerror(errno)); - return -1; - } - return 0; -} - - -int jt_dbg_modules(int argc, char **argv) -{ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - struct mod_paths { - char *name, *path; - } *mp, mod_paths[] = { - {"portals", "lustre/portals/libcfs"}, - {"ksocknal", "lustre/portals/knals/socknal"}, - {"obdclass", "lustre/obdclass"}, - {"ptlrpc", "lustre/ptlrpc"}, - {"obdext2", "lustre/obdext2"}, - {"ost", "lustre/ost"}, - {"osc", "lustre/osc"}, - {"mds", "lustre/mds"}, - {"mdc", "lustre/mdc"}, - {"llite", "lustre/llite"}, - {"obdecho", "lustre/obdecho"}, - {"ldlm", "lustre/ldlm"}, - {"obdfilter", "lustre/obdfilter"}, - {"extN", "lustre/extN"}, - {"lov", "lustre/lov"}, - {"fsfilt_ext3", "lustre/obdclass"}, - {"fsfilt_extN", "lustre/obdclass"}, - {"mds_ext2", "lustre/mds"}, - {"mds_ext3", "lustre/mds"}, - {"mds_extN", "lustre/mds"}, - {"ptlbd", "lustre/ptlbd"}, - {NULL, NULL} - }; - char *path = ".."; - char *kernel = "linux"; - - if (argc >= 2) - path = argv[1]; - if (argc == 3) - kernel = argv[2]; - if (argc > 3) { - printf("%s [path] [kernel]\n", argv[0]); - return 0; - } - - for (mp = mod_paths; mp->name != NULL; mp++) { - struct module_info info; - int rc; - size_t crap; - int query_module(const char *name, int which, void *buf, - size_t bufsize, size_t *ret); - - rc = query_module(mp->name, QM_INFO, &info, sizeof(info), - &crap); - if (rc < 0) { - if (errno != ENOENT) - printf("query_module(%s) failed: %s\n", - mp->name, strerror(errno)); - } else { - printf("add-symbol-file %s/%s/%s.o 0x%0lx\n", path, - mp->path, mp->name, - info.addr + sizeof(struct module)); - } - } - - return 0; -#else - printf("jt_dbg_module is not yet implemented for Linux 2.5\n"); - return 0; -#endif /* linux 2.5 */ -} - -int jt_dbg_panic(int argc, char **argv) -{ - int rc; - struct portal_ioctl_data data; - - if (argc != 1) { - fprintf(stderr, "usage: %s\n", argv[0]); - return 0; - } - - memset(&data, 0, sizeof(data)); - if (portal_ioctl_pack(&data, &buf, max) != 0) { - fprintf(stderr, "portal_ioctl_pack failed.\n"); - return -1; - } - - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_PANIC, buf); - if (rc) { - fprintf(stderr, "IOC_PORTAL_PANIC failed: %s\n", - strerror(errno)); - return -1; - } - return 0; -} diff --git a/lnet/utils/debugctl.c b/lnet/utils/debugctl.c deleted file mode 100644 index 02cb9b4..0000000 --- a/lnet/utils/debugctl.c +++ /dev/null @@ -1,66 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Some day I'll split all of this functionality into a cfs_debug module - * of its own. That day is not today. - * - */ - -#include -#include -#include -#include -#include "parser.h" - - -command_t list[] = { - {"debug_kernel", jt_dbg_debug_kernel, 0, "usage: debug_kernel [file] [raw], get debug buffer and print it [to a file]"}, - {"debug_daemon", jt_dbg_debug_daemon, 0, "usage: debug_daemon [start file [#MB]|stop|pause|continue], control debug daemon to dump debug buffer to a file"}, - {"debug_file", jt_dbg_debug_file, 0, "usage: debug_file [output] [raw], read debug buffer from input and print it [to output]"}, - {"clear", jt_dbg_clear_debug_buf, 0, "clear kernel debug buffer"}, - {"mark", jt_dbg_mark_debug_buf, 0, "insert a marker into the kernel debug buffer (args: [marker text])"}, - {"filter", jt_dbg_filter, 0, "filter certain messages (args: subsystem/debug ID)\n"}, - {"show", jt_dbg_show, 0, "enable certain messages (args: subsystem/debug ID)\n"}, - {"list", jt_dbg_list, 0, "list subsystem and debug types (args: subs or types)\n"}, - {"modules", jt_dbg_modules, 0, "provide gdb-friendly module info (arg: )"}, - {"panic", jt_dbg_panic, 0, "cause the kernel to panic"}, - {"dump", jt_ioc_dump, 0, "usage: dump file, save ioctl buffer to file"}, - {"help", Parser_help, 0, "help"}, - {"exit", Parser_quit, 0, "quit"}, - {"quit", Parser_quit, 0, "quit"}, - { 0, 0, 0, NULL } -}; - -int main(int argc, char **argv) -{ - if (dbg_initialize(argc, argv) < 0) - exit(2); - - register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH); - - Parser_init("debugctl > ", list); - if (argc > 1) - return Parser_execarg(argc - 1, &argv[1], list); - - Parser_commands(); - - unregister_ioc_dev(PORTALS_DEV_ID); - return 0; -} diff --git a/lnet/utils/l_ioctl.c b/lnet/utils/l_ioctl.c deleted file mode 100644 index 722bb57..0000000 --- a/lnet/utils/l_ioctl.c +++ /dev/null @@ -1,281 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -struct ioc_dev { - const char * dev_name; - int dev_fd; -}; - -static struct ioc_dev ioc_dev_list[10]; - -struct dump_hdr { - int magic; - int dev_id; - int opc; -}; - -char * dump_filename; - -static int -open_ioc_dev(int dev_id) -{ - const char * dev_name; - - if (dev_id < 0 || dev_id >= sizeof(ioc_dev_list)) - return -EINVAL; - - dev_name = ioc_dev_list[dev_id].dev_name; - if (dev_name == NULL) { - fprintf(stderr, "unknown device id: %d\n", dev_id); - return -EINVAL; - } - - if (ioc_dev_list[dev_id].dev_fd < 0) { - int fd = open(dev_name, O_RDWR); - - if (fd < 0) { - fprintf(stderr, "opening %s failed: %s\n" - "hint: the kernel modules may not be loaded\n", - dev_name, strerror(errno)); - return fd; - } - ioc_dev_list[dev_id].dev_fd = fd; - } - - return ioc_dev_list[dev_id].dev_fd; -} - - -static int -do_ioctl(int dev_id, int opc, void *buf) -{ - int fd, rc; - - fd = open_ioc_dev(dev_id); - if (fd < 0) - return fd; - - rc = ioctl(fd, opc, buf); - return rc; - -} - -static FILE * -get_dump_file() -{ - FILE *fp = NULL; - - if (!dump_filename) { - fprintf(stderr, "no dump filename\n"); - } else - fp = fopen(dump_filename, "a"); - return fp; -} - -/* - * The dump file should start with a description of which devices are - * used, but for now it will assumed whatever app reads the file will - * know what to do. */ -int -dump(int dev_id, int opc, void *buf) -{ - FILE *fp; - struct dump_hdr dump_hdr; - struct portal_ioctl_hdr * ioc_hdr = (struct portal_ioctl_hdr *) buf; - int rc; - - printf("dumping opc %x to %s\n", opc, dump_filename); - - - dump_hdr.magic = 0xdeadbeef; - dump_hdr.dev_id = dev_id; - dump_hdr.opc = opc; - - fp = get_dump_file(); - if (fp == NULL) { - fprintf(stderr, "%s: %s\n", dump_filename, - strerror(errno)); - return -EINVAL; - } - - rc = fwrite(&dump_hdr, sizeof(dump_hdr), 1, fp); - if (rc == 1) - rc = fwrite(buf, ioc_hdr->ioc_len, 1, fp); - fclose(fp); - if (rc != 1) { - fprintf(stderr, "%s: %s\n", dump_filename, - strerror(errno)); - return -EINVAL; - } - - return 0; -} - -/* register a device to send ioctls to. */ -int -register_ioc_dev(int dev_id, const char * dev_name) -{ - - if (dev_id < 0 || dev_id >= sizeof(ioc_dev_list)) - return -EINVAL; - - unregister_ioc_dev(dev_id); - - ioc_dev_list[dev_id].dev_name = dev_name; - ioc_dev_list[dev_id].dev_fd = -1; - - return dev_id; -} - -void -unregister_ioc_dev(int dev_id) -{ - - if (dev_id < 0 || dev_id >= sizeof(ioc_dev_list)) - return; - if (ioc_dev_list[dev_id].dev_name != NULL && - ioc_dev_list[dev_id].dev_fd >= 0) - close(ioc_dev_list[dev_id].dev_fd); - - ioc_dev_list[dev_id].dev_name = NULL; - ioc_dev_list[dev_id].dev_fd = -1; -} - -/* If this file is set, then all ioctl buffers will be - appended to the file. */ -int -set_ioctl_dump(char * file) -{ - if (dump_filename) - free(dump_filename); - - dump_filename = strdup(file); - return 0; -} - -int -l_ioctl(int dev_id, int opc, void *buf) -{ - if (dump_filename) - return dump(dev_id, opc, buf); - else - return do_ioctl(dev_id, opc, buf); -} - -/* Read an ioctl dump file, and call the ioc_func for each ioctl buffer - * in the file. For example: - * - * parse_dump("lctl.dump", l_ioctl); - * - * Note: if using l_ioctl, then you also need to register_ioc_dev() for - * each device used in the dump. - */ -int -parse_dump(char * dump_file, int (*ioc_func)(int dev_id, int opc, void *)) -{ - int fd, line =0; - struct stat st; - char *buf, *end; - - fd = syscall(SYS_open, dump_file, O_RDONLY); - -#warning FIXME: cleanup fstat issue here -#ifndef SYS_fstat64 -#define __SYS_fstat__ SYS_fstat -#else -#define __SYS_fstat__ SYS_fstat64 -#endif - if (syscall(__SYS_fstat__, fd, &st)) { - perror("stat fails"); - exit(1); - } - - if (st.st_size < 1) { - fprintf(stderr, "KML is empty\n"); - exit(1); - } - - buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE , fd, 0); - end = buf + st.st_size; - close(fd); - while (buf < end) { - struct dump_hdr *dump_hdr = (struct dump_hdr *) buf; - struct portal_ioctl_hdr * data; - char tmp[8096]; - int rc; - - line++; - - data = (struct portal_ioctl_hdr *) (buf + sizeof(*dump_hdr)); - if (buf + data->ioc_len > end ) { - fprintf(stderr, "dump file overflow, %p + %d > %p\n", buf, - data->ioc_len, end); - return -1; - } -#if 0 - printf ("dump_hdr: %lx data: %lx\n", - (unsigned long)dump_hdr - (unsigned long)buf, (unsigned long)data - (unsigned long)buf); - - printf("%d: opcode %x len: %d ver: %x ", line, dump_hdr->opc, - data->ioc_len, data->ioc_version); -#endif - - memcpy(tmp, data, data->ioc_len); - - rc = ioc_func(dump_hdr->dev_id, dump_hdr->opc, tmp); - if (rc) { - printf("failed: %d\n", rc); - exit(1); - } - - buf += data->ioc_len + sizeof(*dump_hdr); - } - return 0; -} - -int -jt_ioc_dump(int argc, char **argv) -{ - if (argc > 2) { - fprintf(stderr, "usage: %s [hostname]\n", argv[0]); - return 0; - } - printf("setting dumpfile to: %s\n", argv[1]); - - set_ioctl_dump(argv[1]); - return 0; -} diff --git a/lnet/utils/parser.c b/lnet/utils/parser.c deleted file mode 100644 index 4d93645..0000000 --- a/lnet/utils/parser.c +++ /dev/null @@ -1,703 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#ifdef HAVE_LIBREADLINE -#define READLINE_LIBRARY -#include -#endif -//extern char **completion_matches __P((char *, rl_compentry_func_t *)); -extern void using_history(void); -extern void stifle_history(int); -extern void add_history(char *); - -#include "parser.h" - -static command_t * top_level; /* Top level of commands, initialized by - * InitParser */ -static char * parser_prompt = NULL;/* Parser prompt, set by InitParser */ -static int done; /* Set to 1 if user types exit or quit */ - - -/* static functions */ -static char *skipwhitespace(char *s); -static char *skiptowhitespace(char *s); -static command_t *find_cmd(char *name, command_t cmds[], char **next); -static int process(char *s, char **next, command_t *lookup, command_t **result, - char **prev); -static void print_commands(char *str, command_t *table); - -static char * skipwhitespace(char * s) -{ - char * t; - int len; - - len = (int)strlen(s); - for (t = s; t <= s + len && isspace(*t); t++); - return(t); -} - - -static char * skiptowhitespace(char * s) -{ - char * t; - - for (t = s; *t && !isspace(*t); t++); - return(t); -} - -static int line2args(char *line, char **argv, int maxargs) -{ - char *arg; - int i = 0; - - arg = strtok(line, " \t"); - if ( arg ) { - argv[i] = arg; - i++; - } else - return 0; - - while( (arg = strtok(NULL, " \t")) && (i <= maxargs)) { - argv[i] = arg; - i++; - } - return i; -} - -/* find a command -- return it if unique otherwise print alternatives */ -static command_t *Parser_findargcmd(char *name, command_t cmds[]) -{ - command_t *cmd; - - for (cmd = cmds; cmd->pc_name; cmd++) { - if (strcmp(name, cmd->pc_name) == 0) - return cmd; - } - return NULL; -} - -int Parser_execarg(int argc, char **argv, command_t cmds[]) -{ - command_t *cmd; - - cmd = Parser_findargcmd(argv[0], cmds); - if ( cmd ) { - return (cmd->pc_func)(argc, argv); - } else { - printf("Try interactive use without arguments or use one of:\n"); - for (cmd = cmds; cmd->pc_name; cmd++) - printf("\"%s\" ", cmd->pc_name); - printf("\nas argument.\n"); - } - return -1; -} - -/* returns the command_t * (NULL if not found) corresponding to a - _partial_ match with the first token in name. It sets *next to - point to the following token. Does not modify *name. */ -static command_t * find_cmd(char * name, command_t cmds[], char ** next) -{ - int i, len; - - if (!cmds || !name ) - return NULL; - - /* This sets name to point to the first non-white space character, - and next to the first whitespace after name, len to the length: do - this with strtok*/ - name = skipwhitespace(name); - *next = skiptowhitespace(name); - len = *next - name; - if (len == 0) - return NULL; - - for (i = 0; cmds[i].pc_name; i++) { - if (strncasecmp(name, cmds[i].pc_name, len) == 0) { - *next = skipwhitespace(*next); - return(&cmds[i]); - } - } - return NULL; -} - -/* Recursively process a command line string s and find the command - corresponding to it. This can be ambiguous, full, incomplete, - non-existent. */ -static int process(char *s, char ** next, command_t *lookup, - command_t **result, char **prev) -{ - *result = find_cmd(s, lookup, next); - *prev = s; - - /* non existent */ - if ( ! *result ) - return CMD_NONE; - - /* found entry: is it ambigous, i.e. not exact command name and - more than one command in the list matches. Note that find_cmd - points to the first ambiguous entry */ - if ( strncasecmp(s, (*result)->pc_name, strlen((*result)->pc_name)) && - find_cmd(s, (*result) + 1, next)) - return CMD_AMBIG; - - /* found a unique command: component or full? */ - if ( (*result)->pc_func ) { - return CMD_COMPLETE; - } else { - if ( *next == '\0' ) { - return CMD_INCOMPLETE; - } else { - return process(*next, next, (*result)->pc_sub_cmd, result, prev); - } - } -} - -#ifdef HAVE_LIBREADLINE -static command_t * match_tbl; /* Command completion against this table */ -static char * command_generator(const char * text, int state) -{ - static int index, - len; - char *name; - - /* Do we have a match table? */ - if (!match_tbl) - return NULL; - - /* If this is the first time called on this word, state is 0 */ - if (!state) { - index = 0; - len = (int)strlen(text); - } - - /* Return next name in the command list that paritally matches test */ - while ( (name = (match_tbl + index)->pc_name) ) { - index++; - - if (strncasecmp(name, text, len) == 0) { - return(strdup(name)); - } - } - - /* No more matches */ - return NULL; -} - -/* probably called by readline */ -static char **command_completion(char * text, int start, int end) -{ - command_t * table; - char * pos; - - match_tbl = top_level; - for (table = find_cmd(rl_line_buffer, match_tbl, &pos); - table; - table = find_cmd(pos, match_tbl, &pos)) { - - if (*(pos - 1) == ' ') match_tbl = table->pc_sub_cmd; - } - - return(completion_matches(text, command_generator)); -} -#endif - -/* take a string and execute the function or print help */ -int execute_line(char * line) -{ - command_t *cmd, *ambig; - char *prev; - char *next, *tmp; - char *argv[MAXARGS]; - int i; - int rc = 0; - - switch( process(line, &next, top_level, &cmd, &prev) ) { - case CMD_AMBIG: - fprintf(stderr, "Ambiguous command \'%s\'\nOptions: ", line); - while( (ambig = find_cmd(prev, cmd, &tmp)) ) { - fprintf(stderr, "%s ", ambig->pc_name); - cmd = ambig + 1; - } - fprintf(stderr, "\n"); - break; - case CMD_NONE: - fprintf(stderr, "No such command, type help\n"); - break; - case CMD_INCOMPLETE: - fprintf(stderr, - "'%s' incomplete command. Use '%s x' where x is one of:\n", - line, line); - fprintf(stderr, "\t"); - for (i = 0; cmd->pc_sub_cmd[i].pc_name; i++) { - fprintf(stderr, "%s ", cmd->pc_sub_cmd[i].pc_name); - } - fprintf(stderr, "\n"); - break; - case CMD_COMPLETE: - i = line2args(line, argv, MAXARGS); - rc = (cmd->pc_func)(i, argv); - - if (rc == CMD_HELP) - fprintf(stderr, "%s\n", cmd->pc_help); - - break; - } - - return rc; -} - -int -noop_fn () -{ - return (0); -} - -/* just in case you're ever in an airplane and discover you - forgot to install readline-dev. :) */ -int init_input() -{ - int interactive = isatty (fileno (stdin)); - -#ifdef HAVE_LIBREADLINE - using_history(); - stifle_history(HISTORY); - - if (!interactive) - { - rl_prep_term_function = (rl_vintfunc_t *)noop_fn; - rl_deprep_term_function = (rl_voidfunc_t *)noop_fn; - } - - rl_attempted_completion_function = (CPPFunction *)command_completion; - rl_completion_entry_function = (void *)command_generator; -#endif - return interactive; -} - -#ifndef HAVE_LIBREADLINE -#define add_history(s) -char * readline(char * prompt) -{ - char line[2048]; - int n = 0; - if (prompt) - printf ("%s", prompt); - if (fgets(line, sizeof(line), stdin) == NULL) - return (NULL); - n = strlen(line); - if (n && line[n-1] == '\n') - line[n-1] = '\0'; - return strdup(line); -} -#endif - -/* this is the command execution machine */ -int Parser_commands(void) -{ - char *line, *s; - int rc = 0; - int interactive; - - interactive = init_input(); - - while(!done) { - line = readline(interactive ? parser_prompt : NULL); - - if (!line) break; - - s = skipwhitespace(line); - - if (*s) { - add_history(s); - rc = execute_line(s); - } - - free(line); - } - return rc; -} - - -/* sets the parser prompt */ -void Parser_init(char * prompt, command_t * cmds) -{ - done = 0; - top_level = cmds; - if (parser_prompt) free(parser_prompt); - parser_prompt = strdup(prompt); -} - -/* frees the parser prompt */ -void Parser_exit(int argc, char *argv[]) -{ - done = 1; - free(parser_prompt); - parser_prompt = NULL; -} - -/* convert a string to an integer */ -int Parser_int(char *s, int *val) -{ - int ret; - - if (*s != '0') - ret = sscanf(s, "%d", val); - else if (*(s+1) != 'x') - ret = sscanf(s, "%o", val); - else { - s++; - ret = sscanf(++s, "%x", val); - } - - return(ret); -} - - -void Parser_qhelp(int argc, char *argv[]) { - - printf("Available commands are:\n"); - - print_commands(NULL, top_level); - printf("For more help type: help command-name\n"); -} - -int Parser_help(int argc, char **argv) -{ - char line[1024]; - char *next, *prev, *tmp; - command_t *result, *ambig; - int i; - - if ( argc == 1 ) { - Parser_qhelp(argc, argv); - return 0; - } - - line[0]='\0'; - for ( i = 1 ; i < argc ; i++ ) { - strcat(line, argv[i]); - } - - switch ( process(line, &next, top_level, &result, &prev) ) { - case CMD_COMPLETE: - fprintf(stderr, "%s: %s\n",line, result->pc_help); - break; - case CMD_NONE: - fprintf(stderr, "%s: Unknown command.\n", line); - break; - case CMD_INCOMPLETE: - fprintf(stderr, - "'%s' incomplete command. Use '%s x' where x is one of:\n", - line, line); - fprintf(stderr, "\t"); - for (i = 0; result->pc_sub_cmd[i].pc_name; i++) { - fprintf(stderr, "%s ", result->pc_sub_cmd[i].pc_name); - } - fprintf(stderr, "\n"); - break; - case CMD_AMBIG: - fprintf(stderr, "Ambiguous command \'%s\'\nOptions: ", line); - while( (ambig = find_cmd(prev, result, &tmp)) ) { - fprintf(stderr, "%s ", ambig->pc_name); - result = ambig + 1; - } - fprintf(stderr, "\n"); - break; - } - return 0; -} - - -void Parser_printhelp(char *cmd) -{ - char *argv[] = { "help", cmd }; - Parser_help(2, argv); -} - -/************************************************************************* - * COMMANDS * - *************************************************************************/ - - -static void print_commands(char * str, command_t * table) { - command_t * cmds; - char buf[80]; - - for (cmds = table; cmds->pc_name; cmds++) { - if (cmds->pc_func) { - if (str) printf("\t%s %s\n", str, cmds->pc_name); - else printf("\t%s\n", cmds->pc_name); - } - if (cmds->pc_sub_cmd) { - if (str) { - sprintf(buf, "%s %s", str, cmds->pc_name); - print_commands(buf, cmds->pc_sub_cmd); - } else { - print_commands(cmds->pc_name, cmds->pc_sub_cmd); - } - } - } -} - -char *Parser_getstr(const char *prompt, const char *deft, char *res, - size_t len) -{ - char *line = NULL; - int size = strlen(prompt) + strlen(deft) + 8; - char *theprompt; - theprompt = malloc(size); - assert(theprompt); - - sprintf(theprompt, "%s [%s]: ", prompt, deft); - - line = readline(theprompt); - free(theprompt); - - if ( line == NULL || *line == '\0' ) { - strncpy(res, deft, len); - } else { - strncpy(res, line, len); - } - - if ( line ) { - free(line); - return res; - } else { - return NULL; - } -} - -/* get integer from prompt, loop forever to get it */ -int Parser_getint(const char *prompt, long min, long max, long deft, int base) -{ - int rc; - long result; - char *line; - int size = strlen(prompt) + 40; - char *theprompt = malloc(size); - assert(theprompt); - sprintf(theprompt,"%s [%ld, (0x%lx)]: ", prompt, deft, deft); - - fflush(stdout); - - do { - line = NULL; - line = readline(theprompt); - if ( !line ) { - fprintf(stdout, "Please enter an integer.\n"); - fflush(stdout); - continue; - } - if ( *line == '\0' ) { - free(line); - result = deft; - break; - } - rc = Parser_arg2int(line, &result, base); - free(line); - if ( rc != 0 ) { - fprintf(stdout, "Invalid string.\n"); - fflush(stdout); - } else if ( result > max || result < min ) { - fprintf(stdout, "Error: response must lie between %ld and %ld.\n", - min, max); - fflush(stdout); - } else { - break; - } - } while ( 1 ) ; - - if (theprompt) - free(theprompt); - return result; - -} - -/* get boolean (starting with YyNn; loop forever */ -int Parser_getbool(const char *prompt, int deft) -{ - int result = 0; - char *line; - int size = strlen(prompt) + 8; - char *theprompt = malloc(size); - assert(theprompt); - - fflush(stdout); - - if ( deft != 0 && deft != 1 ) { - fprintf(stderr, "Error: Parser_getbool given bad default (%d).\n", - deft); - assert ( 0 ); - } - sprintf(theprompt, "%s [%s]: ", prompt, (deft==0)? "N" : "Y"); - - do { - line = NULL; - line = readline(theprompt); - if ( line == NULL ) { - result = deft; - break; - } - if ( *line == '\0' ) { - result = deft; - break; - } - if ( *line == 'y' || *line == 'Y' ) { - result = 1; - break; - } - if ( *line == 'n' || *line == 'N' ) { - result = 0; - break; - } - if ( line ) - free(line); - fprintf(stdout, "Invalid string. Must start with yY or nN\n"); - fflush(stdout); - } while ( 1 ); - - if ( line ) - free(line); - if ( theprompt ) - free(theprompt); - return result; -} - -/* parse int out of a string or prompt for it */ -long Parser_intarg(const char *inp, const char *prompt, int deft, - int min, int max, int base) -{ - long result; - int rc; - - rc = Parser_arg2int(inp, &result, base); - - if ( rc == 0 ) { - return result; - } else { - return Parser_getint(prompt, deft, min, max, base); - } -} - -/* parse int out of a string or prompt for it */ -char *Parser_strarg(char *inp, const char *prompt, const char *deft, - char *answer, int len) -{ - if ( inp == NULL || *inp == '\0' ) { - return Parser_getstr(prompt, deft, answer, len); - } else - return inp; -} - -/* change a string into a number: return 0 on success. No invalid characters - allowed. The processing of base and validity follows strtol(3)*/ -int Parser_arg2int(const char *inp, long *result, int base) -{ - char *endptr; - - if ( (base !=0) && (base < 2 || base > 36) ) - return 1; - - *result = strtol(inp, &endptr, base); - - if ( *inp != '\0' && *endptr == '\0' ) - return 0; - else - return 1; -} - -/* Convert human readable size string to and int; "1k" -> 1000 */ -int Parser_size (int *sizep, char *str) { - int size; - char mod[32]; - - switch (sscanf (str, "%d%1[gGmMkK]", &size, mod)) { - default: - return (-1); - - case 1: - *sizep = size; - return (0); - - case 2: - switch (*mod) { - case 'g': - case 'G': - *sizep = size << 30; - return (0); - - case 'm': - case 'M': - *sizep = size << 20; - return (0); - - case 'k': - case 'K': - *sizep = size << 10; - return (0); - - default: - *sizep = size; - return (0); - } - } -} - -/* Convert a string boolean to an int; "enable" -> 1 */ -int Parser_bool (int *b, char *str) { - if (!strcasecmp (str, "no") || - !strcasecmp (str, "n") || - !strcasecmp (str, "off") || - !strcasecmp (str, "disable")) - { - *b = 0; - return (0); - } - - if (!strcasecmp (str, "yes") || - !strcasecmp (str, "y") || - !strcasecmp (str, "on") || - !strcasecmp (str, "enable")) - { - *b = 1; - return (0); - } - - return (-1); -} - -int Parser_quit(int argc, char **argv) -{ - argc = argc; - argv = argv; - done = 1; - return 0; -} diff --git a/lnet/utils/parser.h b/lnet/utils/parser.h deleted file mode 100644 index dead9f5..0000000 --- a/lnet/utils/parser.h +++ /dev/null @@ -1,73 +0,0 @@ -#ifndef _PARSER_H_ -#define _PARSER_H_ - -#define HISTORY 100 /* Don't let history grow unbounded */ -#define MAXARGS 100 - -#define CMD_COMPLETE 0 -#define CMD_INCOMPLETE 1 -#define CMD_NONE 2 -#define CMD_AMBIG 3 -#define CMD_HELP 4 - -typedef struct parser_cmd { - char *pc_name; - int (* pc_func)(int, char **); - struct parser_cmd * pc_sub_cmd; - char *pc_help; -} command_t; - -typedef struct argcmd { - char *ac_name; - int (*ac_func)(int, char **); - char *ac_help; -} argcmd_t; - -typedef struct network { - char *type; - char *server; - int port; -} network_t; - -int Parser_quit(int argc, char **argv); -void Parser_init(char *, command_t *); /* Set prompt and load command list */ -int Parser_commands(void); /* Start the command parser */ -void Parser_qhelp(int, char **); /* Quick help routine */ -int Parser_help(int, char **); /* Detailed help routine */ -void Parser_printhelp(char *); /* Detailed help routine */ -void Parser_exit(int, char **); /* Shuts down command parser */ -int Parser_execarg(int argc, char **argv, command_t cmds[]); -int execute_line(char * line); - -/* Converts a string to an integer */ -int Parser_int(char *, int *); - -/* Prompts for a string, with default values and a maximum length */ -char *Parser_getstr(const char *prompt, const char *deft, char *res, - size_t len); - -/* Prompts for an integer, with minimum, maximum and default values and base */ -int Parser_getint(const char *prompt, long min, long max, long deft, - int base); - -/* Prompts for a yes/no, with default */ -int Parser_getbool(const char *prompt, int deft); - -/* Extracts an integer from a string, or prompts if it cannot get one */ -long Parser_intarg(const char *inp, const char *prompt, int deft, - int min, int max, int base); - -/* Extracts a word from the input, or propmts if it cannot get one */ -char *Parser_strarg(char *inp, const char *prompt, const char *deft, - char *answer, int len); - -/* Extracts an integer from a string with a base */ -int Parser_arg2int(const char *inp, long *result, int base); - -/* Convert human readable size string to and int; "1k" -> 1000 */ -int Parser_size(int *sizep, char *str); - -/* Convert a string boolean to an int; "enable" -> 1 */ -int Parser_bool(int *b, char *str); - -#endif diff --git a/lnet/utils/portals.c b/lnet/utils/portals.c deleted file mode 100644 index 90d66f5..0000000 --- a/lnet/utils/portals.c +++ /dev/null @@ -1,985 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include "parser.h" - -unsigned int portal_debug; -unsigned int portal_printk; -unsigned int portal_stack; - - -static ptl_nid_t g_nid = 0; -static unsigned int g_nal = 0; -static unsigned short g_port = 0; - -static int g_socket_txmem = 0; -static int g_socket_rxmem = 0; -static int g_socket_nonagle = 1; - -typedef struct -{ - char *name; - int num; -} name2num_t; - -static name2num_t nalnames[] = { - {"tcp", SOCKNAL}, - {"toe", TOENAL}, - {"elan", QSWNAL}, - {"gm", GMNAL}, - {"scimac", SCIMACNAL}, - {NULL, -1} -}; - -static name2num_t * -name2num_lookup_name (name2num_t *table, char *str) -{ - while (table->name != NULL) - if (!strcmp (str, table->name)) - return (table); - else - table++; - return (NULL); -} - -static name2num_t * -name2num_lookup_num (name2num_t *table, int num) -{ - while (table->name != NULL) - if (num == table->num) - return (table); - else - table++; - return (NULL); -} - -int -ptl_name2nal (char *str) -{ - name2num_t *e = name2num_lookup_name (nalnames, str); - - return ((e == NULL) ? 0 : e->num); -} - -static char * -nal2name (int nal) -{ - name2num_t *e = name2num_lookup_num (nalnames, nal); - - return ((e == NULL) ? "???" : e->name); -} - -int -ptl_parse_nid (ptl_nid_t *nidp, char *str) -{ - struct hostent *he; - int a; - int b; - int c; - int d; - - if (sscanf (str, "%d.%d.%d.%d", &a, &b, &c, &d) == 4 && - (a & ~0xff) == 0 && (b & ~0xff) == 0 && - (c & ~0xff) == 0 && (d & ~0xff) == 0) - { - __u32 addr = (a<<24)|(b<<16)|(c<<8)|d; - - *nidp = (ptl_nid_t)addr; - return (0); - } - - if ((('a' <= str[0] && str[0] <= 'z') || - ('A' <= str[0] && str[0] <= 'Z')) && - (he = gethostbyname (str)) != NULL) - { - __u32 addr = *(__u32 *)he->h_addr; - - *nidp = (ptl_nid_t)ntohl(addr); /* HOST byte order */ - return (0); - } - - if (sscanf (str, "%i", &a) == 1) - { - *nidp = (ptl_nid_t)a; - return (0); - } - - if (sscanf (str, "%x", &a) == 1) - { - *nidp = (ptl_nid_t) a; - return (0); - } - - return (-1); -} - -char * -ptl_nid2str (char *buffer, ptl_nid_t nid) -{ - __u32 addr = htonl((__u32)nid); /* back to NETWORK byte order */ - struct hostent *he = gethostbyaddr ((const char *)&addr, sizeof (addr), AF_INET); - - if (he != NULL) - strcpy (buffer, he->h_name); - else - sprintf (buffer, "0x"LPX64, nid); - - return (buffer); -} - -int -sock_write (int cfd, void *buffer, int nob) -{ - while (nob > 0) - { - int rc = write (cfd, buffer, nob); - - if (rc < 0) - { - if (errno == EINTR) - continue; - - return (rc); - } - - if (rc == 0) - { - fprintf (stderr, "Unexpected zero sock_write\n"); - abort(); - } - - nob -= rc; - buffer = (char *)buffer + nob; - } - - return (0); -} - -int -sock_read (int cfd, void *buffer, int nob) -{ - while (nob > 0) - { - int rc = read (cfd, buffer, nob); - - if (rc < 0) - { - if (errno == EINTR) - continue; - - return (rc); - } - - if (rc == 0) /* EOF */ - { - errno = ECONNABORTED; - return (-1); - } - - nob -= rc; - buffer = (char *)buffer + nob; - } - - return (0); -} - -int ptl_initialize(int argc, char **argv) -{ - register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH); - return 0; -} - - -int jt_ptl_network(int argc, char **argv) -{ - int nal; - - if (argc != 2 || - (nal = ptl_name2nal (argv[1])) == 0) - { - name2num_t *entry; - - fprintf(stderr, "usage: %s \n", argv[0]); - for (entry = nalnames; entry->name != NULL; entry++) - fprintf (stderr, "%s%s", entry == nalnames ? "<" : "|", entry->name); - fprintf(stderr, ">\n"); - } - else - g_nal = nal; - - return (0); -} - -int -exchange_nids (int cfd, ptl_nid_t my_nid, ptl_nid_t *peer_nid) -{ - int rc; - ptl_hdr_t hdr; - ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid; - - LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid)); - - memset (&hdr, 0, sizeof (hdr)); - - hmv->magic = __cpu_to_le32 (PORTALS_PROTO_MAGIC); - hmv->version_major = __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR); - hmv->version_minor = __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR); - - hdr.src_nid = __cpu_to_le64 (my_nid); - hdr.type = __cpu_to_le32 (PTL_MSG_HELLO); - - /* Assume there's sufficient socket buffering for a portals HELLO header */ - rc = sock_write (cfd, &hdr, sizeof (hdr)); - if (rc != 0) { - perror ("Can't send initial HELLO"); - return (-1); - } - - /* First few bytes down the wire are the portals protocol magic and - * version, no matter what protocol version we're running. */ - - rc = sock_read (cfd, hmv, sizeof (*hmv)); - if (rc != 0) { - perror ("Can't read from peer"); - return (-1); - } - - if (__cpu_to_le32 (hmv->magic) != PORTALS_PROTO_MAGIC) { - fprintf (stderr, "Bad magic %#08x (%#08x expected)\n", - __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC); - return (-1); - } - - if (__cpu_to_le16 (hmv->version_major) != PORTALS_PROTO_VERSION_MAJOR || - __cpu_to_le16 (hmv->version_minor) != PORTALS_PROTO_VERSION_MINOR) { - fprintf (stderr, "Incompatible protocol version %d.%d (%d.%d expected)\n", - __cpu_to_le16 (hmv->version_major), - __cpu_to_le16 (hmv->version_minor), - PORTALS_PROTO_VERSION_MAJOR, - PORTALS_PROTO_VERSION_MINOR); - } - - /* version 0 sends magic/version as the dest_nid of a 'hello' header, - * so read the rest of it in now... */ - LASSERT (PORTALS_PROTO_VERSION_MAJOR == 0); - rc = sock_read (cfd, hmv + 1, sizeof (hdr) - sizeof (*hmv)); - if (rc != 0) { - perror ("Can't read rest of HELLO hdr"); - return (-1); - } - - /* ...and check we got what we expected */ - if (__cpu_to_le32 (hdr.type) != PTL_MSG_HELLO || - __cpu_to_le32 (PTL_HDR_LENGTH (&hdr)) != 0) { - fprintf (stderr, "Expecting a HELLO hdr with 0 payload," - " but got type %d with %d payload\n", - __cpu_to_le32 (hdr.type), - __cpu_to_le32 (PTL_HDR_LENGTH (&hdr))); - return (-1); - } - - *peer_nid = __le64_to_cpu (hdr.src_nid); - return (0); -} - -int jt_ptl_connect(int argc, char **argv) -{ - if (argc < 2) { - usage: - fprintf(stderr, "usage: %s or \n", - argv[0]); - return 0; - } - if (g_nal == 0) { - fprintf(stderr, "Error: you must run the 'network' command " - "first.\n"); - return -1; - } - if (g_nal == SOCKNAL || g_nal == TOENAL) { - ptl_nid_t peer_nid; - struct hostent *he; - struct portal_ioctl_data data; - struct sockaddr_in srvaddr; - char *flag; - int fd, rc; - int nonagle = 0; - int rxmem = 0; - int txmem = 0; - int bind_irq = 0; - int xchange_nids = 0; - int o; - int olen; - - if (argc < 3) { - goto usage; - } - - he = gethostbyname(argv[1]); - if (!he) { - fprintf(stderr, "gethostbyname error: %s\n", - strerror(errno)); - return -1; - } - - g_port = atol(argv[2]); - - if (argc > 3) - for (flag = argv[3]; *flag != 0; flag++) - switch (*flag) - { - case 'i': - bind_irq = 1; - break; - - case 'x': - xchange_nids = 1; - break; - - default: - fprintf (stderr, "unrecognised flag '%c'\n", - *flag); - return (-1); - } - - memset(&srvaddr, 0, sizeof(srvaddr)); - srvaddr.sin_family = AF_INET; - srvaddr.sin_port = htons(g_port); - srvaddr.sin_addr.s_addr = *(__u32 *)he->h_addr; - - fd = socket(PF_INET, SOCK_STREAM, 0); - if ( fd < 0 ) { - fprintf(stderr, "socket() failed: %s\n", - strerror(errno)); - return -1; - } - - if (g_socket_nonagle) - { - o = 1; - if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &o, sizeof (o)) != 0) - { - fprintf(stderr, "cannot disable nagle: %s\n", strerror(errno)); - return (-1); - } - } - - if (g_socket_rxmem != 0) - { - o = g_socket_rxmem; - if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &o, sizeof (o)) != 0) - { - fprintf(stderr, "cannot set receive buffer size: %s\n", strerror(errno)); - return (-1); - } - } - - if (g_socket_txmem != 0) - { - o = g_socket_txmem; - if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &o, sizeof (o)) != 0) - { - fprintf(stderr, "cannot set send buffer size: %s\n", strerror(errno)); - return (-1); - } - } - - rc = connect(fd, (struct sockaddr *)&srvaddr, sizeof(srvaddr)); - if ( rc == -1 ) { - fprintf(stderr, "connect() failed: %s\n", - strerror(errno)); - return -1; - } - - olen = sizeof (txmem); - if (getsockopt (fd, SOL_SOCKET, SO_SNDBUF, &txmem, &olen) != 0) - fprintf (stderr, "Can't get send buffer size: %s\n", strerror (errno)); - olen = sizeof (rxmem); - if (getsockopt (fd, SOL_SOCKET, SO_RCVBUF, &rxmem, &olen) != 0) - fprintf (stderr, "Can't get receive buffer size: %s\n", strerror (errno)); - olen = sizeof (nonagle); - if (getsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &nonagle, &olen) != 0) - fprintf (stderr, "Can't get nagle: %s\n", strerror (errno)); - - if (xchange_nids) { - - PORTAL_IOC_INIT (data); - data.ioc_nal = g_nal; - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_NID, &data); - if (rc != 0) - { - fprintf (stderr, "failed to get my nid: %s\n", - strerror (errno)); - close (fd); - return (-1); - } - - rc = exchange_nids (fd, data.ioc_nid, &peer_nid); - if (rc != 0) - { - close (fd); - return (-1); - } - } - else - peer_nid = ntohl (srvaddr.sin_addr.s_addr); /* HOST byte order */ - - printf("Connected host: %s NID "LPX64" snd: %d rcv: %d nagle: %s\n", argv[1], - peer_nid, txmem, rxmem, nonagle ? "Disabled" : "Enabled"); - - PORTAL_IOC_INIT(data); - data.ioc_fd = fd; - data.ioc_nal = g_nal; - data.ioc_nal_cmd = NAL_CMD_REGISTER_PEER_FD; - data.ioc_nid = peer_nid; - data.ioc_flags = bind_irq; - - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data); - if (rc) { - fprintf(stderr, "failed to register fd with portals: " - "%s\n", strerror(errno)); - close (fd); - return -1; - } - - g_nid = peer_nid; - printf("Connection to "LPX64" registered with socknal\n", g_nid); - - rc = close(fd); - if (rc) { - fprintf(stderr, "close failed: %d\n", rc); - } - } else if (g_nal == QSWNAL) { - g_nid = atoi(argv[1]); - } else if (g_nal == GMNAL) { - g_nid = atoi(argv[1]); - } else if (g_nal == SCIMACNAL) { - unsigned int tmpnid; - if(sscanf(argv[1], "%x", &tmpnid) == 1) { - g_nid=tmpnid; - } - else { - fprintf(stderr, "nid %s invalid for SCI nal\n", argv[1]); - } - - - } else { - fprintf(stderr, "This should never happen. Also it is very " - "bad.\n"); - } - - return 0; -} - -int jt_ptl_disconnect(int argc, char **argv) -{ - if (argc > 2) { - fprintf(stderr, "usage: %s [hostname]\n", argv[0]); - return 0; - } - if (g_nal == 0) { - fprintf(stderr, "Error: you must run the 'network' command " - "first.\n"); - return -1; - } - if (g_nal == SOCKNAL || g_nal == TOENAL) { - struct hostent *he; - struct portal_ioctl_data data; - int rc; - - PORTAL_IOC_INIT(data); - if (argc == 2) { - he = gethostbyname(argv[1]); - if (!he) { - fprintf(stderr, "gethostbyname error: %s\n", - strerror(errno)); - return -1; - } - - data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */ - - } else { - printf("Disconnecting ALL connections.\n"); - /* leave ioc_nid zeroed == disconnect all */ - } - data.ioc_nal = g_nal; - data.ioc_nal_cmd = NAL_CMD_CLOSE_CONNECTION; - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data); - if (rc) { - fprintf(stderr, "failed to remove connection: %s\n", - strerror(errno)); - return -1; - } - } else if (g_nal == QSWNAL) { - printf("'disconnect' doesn't make any sense for " - "elan.\n"); - } else if (g_nal == GMNAL) { - printf("'disconnect' doesn't make any sense for " - "GM.\n"); - } else if (g_nal == SCIMACNAL) { - printf("'disconnect' doesn't make any sense for " - "SCI.\n"); - } else { - fprintf(stderr, "This should never happen. Also it is very " - "bad.\n"); - return -1; - } - - return 0; -} - -int jt_ptl_push_connection (int argc, char **argv) -{ - if (argc > 2) { - fprintf(stderr, "usage: %s [hostname]\n", argv[0]); - return 0; - } - if (g_nal == 0) { - fprintf(stderr, "Error: you must run the 'network' command " - "first.\n"); - return -1; - } - if (g_nal == SOCKNAL || g_nal == TOENAL) { - struct hostent *he; - struct portal_ioctl_data data; - int rc; - - PORTAL_IOC_INIT(data); - if (argc == 2) { - he = gethostbyname(argv[1]); - if (!he) { - fprintf(stderr, "gethostbyname error: %s\n", - strerror(errno)); - return -1; - } - - data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */ - - } else { - printf("Pushing ALL connections.\n"); - /* leave ioc_nid zeroed == disconnect all */ - } - data.ioc_nal = g_nal; - data.ioc_nal_cmd = NAL_CMD_PUSH_CONNECTION; - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data); - if (rc) { - fprintf(stderr, "failed to push connection: %s\n", - strerror(errno)); - return -1; - } - } else if (g_nal == QSWNAL) { - printf("'push' doesn't make any sense for elan.\n"); - } else if (g_nal == GMNAL) { - printf("'push' doesn't make any sense for GM.\n"); - } else if (g_nal == SCIMACNAL) { - printf("'push' doesn't make any sense for SCI.\n"); - } else { - fprintf(stderr, "This should never happen. Also it is very " - "bad.\n"); - return -1; - } - - return 0; -} - -int jt_ptl_ping(int argc, char **argv) -{ - int rc; - ptl_nid_t nid; - long count = 1; - long size = 4; - long timeout = 1; - struct portal_ioctl_data data; - - if (argc < 2) { - fprintf(stderr, "usage: %s nid [count] [size] [timeout (secs)]\n", argv[0]); - return 0; - } - - if (g_nal == 0) { - fprintf(stderr, "Error: you must run the 'network' command " - "first.\n"); - return -1; - } - - if (ptl_parse_nid (&nid, argv[1]) != 0) - { - fprintf (stderr, "Can't parse nid \"%s\"\n", argv[1]); - return (-1); - } - - if (argc > 2) - { - count = atol(argv[2]); - - if (count < 0 || count > 20000) - { - fprintf(stderr, "are you insane? %ld is a crazy count.\n", count); - return -1; - } - } - - if (argc > 3) - size= atol(argv[3]); - - if (argc > 4) - timeout = atol (argv[4]); - - PORTAL_IOC_INIT (data); - data.ioc_count = count; - data.ioc_size = size; - data.ioc_nid = nid; - data.ioc_nal = g_nal; - data.ioc_timeout = timeout; - - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_PING, &data); - if (rc) { - fprintf(stderr, "failed to start pinger: %s\n", - strerror(errno)); - return -1; - } - return 0; -} - -int jt_ptl_shownid(int argc, char **argv) -{ - struct portal_ioctl_data data; - int rc; - - if (argc > 1) { - fprintf(stderr, "usage: %s\n", argv[0]); - return 0; - } - - if (g_nal == 0) { - fprintf(stderr, "Error: you must run the 'network' command first\n"); - return -1; - } - - PORTAL_IOC_INIT (data); - data.ioc_nal = g_nal; - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_NID, &data); - if (rc < 0) - fprintf(stderr, "getting my NID failed: %s\n", - strerror (errno)); - else - printf(LPX64"\n", data.ioc_nid); - return 0; -} - -int jt_ptl_mynid(int argc, char **argv) -{ - int rc; - char hostname[1024]; - char *nidstr; - struct portal_ioctl_data data; - ptl_nid_t mynid; - - if (argc > 2) { - fprintf(stderr, "usage: %s [NID]\n", argv[0]); - fprintf(stderr, "NID defaults to the primary IP address of the machine.\n"); - return 0; - } - - if (g_nal == 0) { - fprintf(stderr, "Error: you must run the 'network' command " - "first.\n"); - return -1; - } - - if (argc >= 2) - nidstr = argv[1]; - else if (gethostname(hostname, sizeof(hostname)) != 0) { - fprintf(stderr, "gethostname failed: %s\n", - strerror(errno)); - return -1; - } - else - nidstr = hostname; - - rc = ptl_parse_nid (&mynid, nidstr); - if (rc != 0) { - fprintf (stderr, "Can't convert '%s' into a NID\n", nidstr); - return -1; - } - - PORTAL_IOC_INIT(data); - data.ioc_nid = mynid; - data.ioc_nal = g_nal; - data.ioc_nal_cmd = NAL_CMD_REGISTER_MYNID; - - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data); - if (rc < 0) - fprintf(stderr, "setting my NID failed: %s\n", - strerror(errno)); - else - printf("registered my nid "LPX64" (%s)\n", mynid, hostname); - return 0; -} - -int -jt_ptl_fail_nid (int argc, char **argv) -{ - int rc; - ptl_nid_t nid; - unsigned int threshold; - struct portal_ioctl_data data; - - if (argc < 2 || argc > 3) - { - fprintf (stderr, "usage: %s nid|\"_all_\" [count (0 == mend)]\n", argv[0]); - return (0); - } - - if (g_nal == 0) { - fprintf(stderr, "Error: you must run the 'network' command " - "first.\n"); - return (-1); - } - - if (!strcmp (argv[1], "_all_")) - nid = PTL_NID_ANY; - else if (ptl_parse_nid (&nid, argv[1]) != 0) - { - fprintf (stderr, "Can't parse nid \"%s\"\n", argv[1]); - return (-1); - } - - if (argc < 3) - threshold = PTL_MD_THRESH_INF; - else if (sscanf (argv[2], "%i", &threshold) != 1) { - fprintf (stderr, "Can't parse count \"%s\"\n", argv[2]); - return (-1); - } - - PORTAL_IOC_INIT (data); - data.ioc_nal = g_nal; - data.ioc_nid = nid; - data.ioc_count = threshold; - - rc = l_ioctl (PORTALS_DEV_ID, IOC_PORTAL_FAIL_NID, &data); - if (rc < 0) - fprintf (stderr, "IOC_PORTAL_FAIL_NID failed: %s\n", - strerror (errno)); - else - printf ("%s %s\n", threshold == 0 ? "Unfailing" : "Failing", argv[1]); - - return (0); -} - -int -jt_ptl_rxmem (int argc, char **argv) -{ - int size; - - if (argc > 1) - { - if (Parser_size (&size, argv[1]) != 0 || size < 0) - { - fprintf (stderr, "Can't parse size %s\n", argv[1]); - return (0); - } - - g_socket_rxmem = size; - } - printf ("Socket rmem = %d\n", g_socket_rxmem); - return (0); -} - -int -jt_ptl_txmem (int argc, char **argv) -{ - int size; - - if (argc > 1) - { - if (Parser_size (&size, argv[1]) != 0 || size < 0) - { - fprintf (stderr, "Can't parse size %s\n", argv[1]); - return (0); - } - g_socket_txmem = size; - } - printf ("Socket txmem = %d\n", g_socket_txmem); - return (0); -} - -int -jt_ptl_nagle (int argc, char **argv) -{ - int enable; - - if (argc > 1) - { - if (Parser_bool (&enable, argv[1]) != 0) - { - fprintf (stderr, "Can't parse boolean %s\n", argv[1]); - return (0); - } - g_socket_nonagle = !enable; - } - printf ("Nagle %s\n", g_socket_nonagle ? "disabled" : "enabled"); - return (0); -} - -int -jt_ptl_add_route (int argc, char **argv) -{ - struct portal_ioctl_data data; - ptl_nid_t nid1; - ptl_nid_t nid2; - ptl_nid_t gateway_nid; - int rc; - - if (argc < 3) - { - fprintf (stderr, "usage: %s gateway target [target]\n", argv[0]); - return (0); - } - - if (g_nal == 0) { - fprintf(stderr, "Error: you must run the 'network' command " - "first.\n"); - return (-1); - } - - if (ptl_parse_nid (&gateway_nid, argv[1]) != 0) - { - fprintf (stderr, "Can't parse gateway NID \"%s\"\n", argv[1]); - return (-1); - } - - if (ptl_parse_nid (&nid1, argv[2]) != 0) - { - fprintf (stderr, "Can't parse first target NID \"%s\"\n", argv[2]); - return (-1); - } - - if (argc < 4) - nid2 = nid1; - else if (ptl_parse_nid (&nid2, argv[3]) != 0) - { - fprintf (stderr, "Can't parse second target NID \"%s\"\n", argv[4]); - return (-1); - } - - PORTAL_IOC_INIT(data); - data.ioc_nid = gateway_nid; - data.ioc_nal = g_nal; - data.ioc_nid2 = MIN (nid1, nid2); - data.ioc_nid3 = MAX (nid1, nid2); - - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_ADD_ROUTE, &data); - if (rc != 0) - { - fprintf (stderr, "IOC_PORTAL_ADD_ROUTE failed: %s\n", strerror (errno)); - return (-1); - } - - return (0); -} - -int -jt_ptl_del_route (int argc, char **argv) -{ - struct portal_ioctl_data data; - ptl_nid_t nid; - int rc; - - if (argc < 2) - { - fprintf (stderr, "usage: %s targetNID\n", argv[0]); - return (0); - } - - if (ptl_parse_nid (&nid, argv[1]) != 0) - { - fprintf (stderr, "Can't parse target NID \"%s\"\n", argv[1]); - return (-1); - } - - PORTAL_IOC_INIT(data); - data.ioc_nid = nid; - - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_DEL_ROUTE, &data); - if (rc != 0) - { - fprintf (stderr, "IOC_PORTAL_DEL_ROUTE ("LPX64") failed: %s\n", nid, strerror (errno)); - return (-1); - } - - return (0); -} - -int -jt_ptl_print_routes (int argc, char **argv) -{ - char buffer[3][128]; - struct portal_ioctl_data data; - int rc; - int index; - int gateway_nal; - ptl_nid_t gateway_nid; - ptl_nid_t nid1; - ptl_nid_t nid2; - - - for (index = 0;;index++) - { - PORTAL_IOC_INIT(data); - data.ioc_count = index; - - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_ROUTE, &data); - if (rc != 0) - break; - - gateway_nal = data.ioc_nal; - gateway_nid = data.ioc_nid; - nid1 = data.ioc_nid2; - nid2 = data.ioc_nid3; - - printf ("%8s %18s : %s - %s\n", - nal2name (gateway_nal), - ptl_nid2str (buffer[0], gateway_nid), - ptl_nid2str (buffer[1], nid1), - ptl_nid2str (buffer[2], nid2)); - } - return (0); -} - diff --git a/lnet/utils/ptlctl.c b/lnet/utils/ptlctl.c deleted file mode 100644 index 8c56d93..0000000 --- a/lnet/utils/ptlctl.c +++ /dev/null @@ -1,65 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include -#include -#include -#include - -#include "parser.h" - - -command_t list[] = { - {"network", jt_ptl_network, 0,"setup the NAL (args: nal name)"}, - {"connect", jt_ptl_connect, 0, "connect to a remote nid (args: | for tcp/elan respectively)"}, - {"disconnect", jt_ptl_disconnect, 0, "disconnect from a remote nid (args: [hostname]"}, - {"push", jt_ptl_push_connection, 0, "flush connection to a remote nid (args: [hostname]"}, - {"ping", jt_ptl_ping, 0, "do a ping test (args: nid [count] [size] [timeout])"}, - {"shownid", jt_ptl_shownid, 0, "print the local NID"}, - {"mynid", jt_ptl_mynid, 0, "inform the socknal of the local NID (args: [hostname])"}, - {"add_route", jt_ptl_add_route, 0, "add an entry to the routing table (args: gatewayNID targetNID [targetNID])"}, - {"del_route", jt_ptl_del_route, 0, "delete an entry from the routing table (args: targetNID"}, - {"print_routes", jt_ptl_print_routes, 0, "print the routing table (args: none)"}, - {"recv_mem", jt_ptl_rxmem, 0, "Set socket receive buffer size (args: [size])"}, - {"send_mem", jt_ptl_txmem, 0, "Set socket send buffer size (args: [size])"}, - {"nagle", jt_ptl_nagle, 0, "Enable/Disable Nagle (args: [on/off])"}, - {"dump", jt_ioc_dump, 0, "usage: dump file, save ioctl buffer to file"}, - {"fail", jt_ptl_fail_nid, 0, "usage: fail nid|_all_ [count]"}, - {"help", Parser_help, 0, "help"}, - {"exit", Parser_quit, 0, "quit"}, - {"quit", Parser_quit, 0, "quit"}, - { 0, 0, 0, NULL } -}; - -int main(int argc, char **argv) -{ - if (ptl_initialize(argc, argv) < 0) - exit(1); - - Parser_init("ptlctl > ", list); - if (argc > 1) - return Parser_execarg(argc - 1, &argv[1], list); - - Parser_commands(); - - return 0; -} diff --git a/lnet/utils/routerstat.c b/lnet/utils/routerstat.c deleted file mode 100644 index 37da12c..0000000 --- a/lnet/utils/routerstat.c +++ /dev/null @@ -1,99 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -double -timenow () -{ - struct timeval tv; - - gettimeofday (&tv, NULL); - return (tv.tv_sec + tv.tv_usec / 1000000.0); -} - -void -do_stat (int fd) -{ - static char buffer[1024]; - static double last = 0.0; - double now; - double t; - long long bytes; - long packets; - long errors; - long depth; - int n; - - lseek (fd, 0, SEEK_SET); - now = timenow(); - n = read (fd, buffer, sizeof (buffer)); - if (n < 0) - { - fprintf (stderr, "Can't read statfile\n"); - exit (1); - } - buffer[n] = 0; - - n = sscanf (buffer, "%Ld %ld %ld %ld", &bytes, &packets, &errors, &depth); - - if (n < 3) - { - fprintf (stderr, "Can't parse statfile\n"); - exit (1); - } - - if (last == 0.0) - printf ("%Ld bytes, %ld packets (sz %Ld) %ld errors", - bytes, packets, (long long)((packets == 0) ? 0LL : bytes/packets), errors); - else - { - t = now - last; - - printf ("%9Ld (%7.2fMb/s), %7ld packets (sz %5Ld, %5ld/s) %ld errors (%ld/s)", - bytes, ((double)bytes)/((1<<20) * t), - packets, (long long)((packets == 0) ? 0LL : bytes/packets), (long)(packets/t), - errors, (long)(errors/t)); - } - - if (n == 4) - printf (" (%ld)\n", depth); - else - printf ("\n"); - - fflush (stdout); - - lseek (fd, 0, SEEK_SET); - write (fd, "\n", 1); - last = timenow(); -} - -int main (int argc, char **argv) -{ - int interval = 0; - int fd; - - if (argc > 1) - interval = atoi (argv[1]); - - fd = open ("/proc/sys/portals/router", O_RDWR); - if (fd < 0) - { - fprintf (stderr, "Can't open stat: %s\n", strerror (errno)); - return (1); - } - - do_stat (fd); - if (interval == 0) - return (0); - - for (;;) - { - sleep (interval); - do_stat (fd); - } -} diff --git a/lnet/utils/wirecheck.c b/lnet/utils/wirecheck.c deleted file mode 100644 index 6a4377b..0000000 --- a/lnet/utils/wirecheck.c +++ /dev/null @@ -1,141 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#include -#include -#include -#include - -#define BLANK_LINE() \ -do { \ - printf ("\n"); \ -} while (0) - -#define COMMENT(c) \ -do { \ - printf (" /* "c" */\n"); \ -} while (0) - -#define STRINGIFY(a) #a - -#define CHECK_DEFINE(a) \ -do { \ - printf (" LASSERT ("#a" == "STRINGIFY(a)");\n"); \ -} while (0) - -#define CHECK_VALUE(a) \ -do { \ - printf (" LASSERT ("#a" == %d);\n", a); \ -} while (0) - -#define CHECK_MEMBER_OFFSET(s,m) \ -do { \ - CHECK_VALUE(offsetof(s, m)); \ -} while (0) - -#define CHECK_MEMBER_SIZEOF(s,m) \ -do { \ - CHECK_VALUE((int)sizeof(((s *)0)->m)); \ -} while (0) - -#define CHECK_MEMBER(s,m) \ -do { \ - CHECK_MEMBER_OFFSET(s, m); \ - CHECK_MEMBER_SIZEOF(s, m); \ -} while (0) - -#define CHECK_STRUCT(s) \ -do { \ - BLANK_LINE (); \ - COMMENT ("Checks for struct "#s); \ - CHECK_VALUE((int)sizeof(s)); \ -} while (0) - -void -check_ptl_handle_wire (void) -{ - CHECK_STRUCT (ptl_handle_wire_t); - CHECK_MEMBER (ptl_handle_wire_t, wh_interface_cookie); - CHECK_MEMBER (ptl_handle_wire_t, wh_object_cookie); -} - -void -check_ptl_magicversion (void) -{ - CHECK_STRUCT (ptl_magicversion_t); - CHECK_MEMBER (ptl_magicversion_t, magic); - CHECK_MEMBER (ptl_magicversion_t, version_major); - CHECK_MEMBER (ptl_magicversion_t, version_minor); -} - -void -check_ptl_hdr (void) -{ - CHECK_STRUCT (ptl_hdr_t); - CHECK_MEMBER (ptl_hdr_t, dest_nid); - CHECK_MEMBER (ptl_hdr_t, src_nid); - CHECK_MEMBER (ptl_hdr_t, dest_pid); - CHECK_MEMBER (ptl_hdr_t, src_pid); - CHECK_MEMBER (ptl_hdr_t, type); - - BLANK_LINE (); - COMMENT ("Ack"); - CHECK_MEMBER (ptl_hdr_t, msg.ack.mlength); - CHECK_MEMBER (ptl_hdr_t, msg.ack.dst_wmd); - CHECK_MEMBER (ptl_hdr_t, msg.ack.match_bits); - CHECK_MEMBER (ptl_hdr_t, msg.ack.length); - - BLANK_LINE (); - COMMENT ("Put"); - CHECK_MEMBER (ptl_hdr_t, msg.put.ptl_index); - CHECK_MEMBER (ptl_hdr_t, msg.put.ack_wmd); - CHECK_MEMBER (ptl_hdr_t, msg.put.match_bits); - CHECK_MEMBER (ptl_hdr_t, msg.put.length); - CHECK_MEMBER (ptl_hdr_t, msg.put.offset); - CHECK_MEMBER (ptl_hdr_t, msg.put.hdr_data); - - BLANK_LINE (); - COMMENT ("Get"); - CHECK_MEMBER (ptl_hdr_t, msg.get.ptl_index); - CHECK_MEMBER (ptl_hdr_t, msg.get.return_wmd); - CHECK_MEMBER (ptl_hdr_t, msg.get.match_bits); - CHECK_MEMBER (ptl_hdr_t, msg.get.length); - CHECK_MEMBER (ptl_hdr_t, msg.get.src_offset); - CHECK_MEMBER (ptl_hdr_t, msg.get.return_offset); - CHECK_MEMBER (ptl_hdr_t, msg.get.sink_length); - - BLANK_LINE (); - COMMENT ("Reply"); - CHECK_MEMBER (ptl_hdr_t, msg.reply.dst_wmd); - CHECK_MEMBER (ptl_hdr_t, msg.reply.dst_offset); - CHECK_MEMBER (ptl_hdr_t, msg.reply.length); -} - -int -main (int argc, char **argv) -{ - printf ("void lib_assert_wire_constants (void)\n" - "{\n"); - - COMMENT ("Wire protocol assertions generated by 'wirecheck'"); - BLANK_LINE (); - - COMMENT ("Constants..."); - CHECK_DEFINE (PORTALS_PROTO_MAGIC); - CHECK_DEFINE (PORTALS_PROTO_VERSION_MAJOR); - CHECK_DEFINE (PORTALS_PROTO_VERSION_MINOR); - - CHECK_VALUE (PTL_MSG_ACK); - CHECK_VALUE (PTL_MSG_PUT); - CHECK_VALUE (PTL_MSG_GET); - CHECK_VALUE (PTL_MSG_REPLY); - CHECK_VALUE (PTL_MSG_HELLO); - - check_ptl_handle_wire (); - check_ptl_magicversion (); - check_ptl_hdr (); - - printf ("}\n\n"); - - return (0); -} diff --git a/lustre/.cvsignore b/lustre/.cvsignore deleted file mode 100644 index 776ef36..0000000 --- a/lustre/.cvsignore +++ /dev/null @@ -1,18 +0,0 @@ -.Xrefs -.Xrefs-2.5 -aclocal.m4 -config.log -config.status -config.cache -configure -Makefile -Makefile.in -.deps -tags -TAGS -lustre*.tar.gz -cscope.files -cscope.out -autom4te-2.53.cache -autom4te.cache - diff --git a/lustre/BUGS b/lustre/BUGS deleted file mode 100644 index 9cf6fa2..0000000 --- a/lustre/BUGS +++ /dev/null @@ -1,15 +0,0 @@ -include /dev/obd in the documentation - - -attach: attaching ext2obd allows ext2 module to be unloaded. Unload, -then do cleanup, get Oops... - -syncing: invalid IOCTL - -create: more than one object - -preallocate: IOCTL - -statfs: - -restoresnap: decrements directory count for ext2 diff --git a/lustre/BUILDING b/lustre/BUILDING deleted file mode 100644 index deaa5e8..0000000 --- a/lustre/BUILDING +++ /dev/null @@ -1,25 +0,0 @@ -BUILDING LUSTRE ---------------- - -To build the lustre obd module, you must first build portals. - -Portals is available from the same CVS repository of the lustre -project module portals, see http://www.lustre.org - -To build: - sh autogen.sh - ./configure --enable-linuxdir=/usr/src/linux --enable-portalsdir=/usr/src/portals - make - -To play with Lustre Lite: - cd obd/tests - sh llmount.sh - -To clean up: - sh llmountcleanup.sh - -Feedback: - lustre-devel@lists.sf.net - lustre-discuss@lists.sf.net - -- Peter - \ No newline at end of file diff --git a/lustre/COPYING b/lustre/COPYING deleted file mode 100644 index c69cfd8..0000000 --- a/lustre/COPYING +++ /dev/null @@ -1,352 +0,0 @@ - - NOTE! This copyright does *not* cover user programs that use kernel - services by normal system calls - this is merely considered normal use - of the kernel, and does *not* fall under the heading of "derived work". - Also note that the GPL below is copyrighted by the Free Software - Foundation, but the instance of code that it refers to (the Linux - kernel) is copyrighted by me and others who actually wrote it. - - Linus Torvalds - ----------------------------------------- - - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc. - 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) 19yy - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) 19yy name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - , 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General -Public License instead of this License. diff --git a/lustre/ChangeLog b/lustre/ChangeLog deleted file mode 100644 index 89eaef7..0000000 --- a/lustre/ChangeLog +++ /dev/null @@ -1,406 +0,0 @@ -tbd - * version v0_7 - * bug fixes - - imports and exports cleanup too early, need refcounts (349, 879, 1045) - - per-import/export recovery handling (958, 931, 959) - - multiple last-rcvd slots, for serving multiple FSes (949) - - connections are again shared between multiple imp/exports (963, 964) - - "umount -f" would hang if any requests needed to be sent (393, 978) - - avoid pinning large req buffer by copying for queued messages (989) - - add "uuid" to "lctl device" command to help upcalls (991) - - "open" RPCs with transnos would confuse recovery counters (1037) - - do proper endian conversion of all wire messages (288, 340, 891) - - remove OST bulk get LBUGs, fix ost_brw_write cleanup (1126) - - call waiting locks callback from LDLM recovery thread (1127, 1151) - - fix ptlrpc_connection leak in target_handle_connect (1174) - - fix import refcounting bug in OST and MDS cleanup (1134) - - if an invalid-at-open-time OSC returned before close(), LBUG (1150) - - fix very unlikely obd_types race condition (501) - - remove osc_open hack for echo_client (1187) - - we leaked exports/dlmimps for forcibly disconnected clients (1143) - - a failure in read_inode2 leads to deadlock (1139) - - cancel ack-locks as soon as transaction is committed (1072) - - fix major leaks and crashes in the bulk I/O path (937, 1057) - - make sure to commitrw after any preprw to avoid deadlock (1162) - - failing to execute a file in a lustre FS would lock inode (1203) - - small DEBUG_REQ fix to avoid dereferencing a NULL (1227) - - don't ASSERT while cleaning up an incompletely-setup obd (1248) - - obd_uuid2tgt would walk off the end of the list (1255) - - on IA64 the osc would give portals incorrect bulk size (1258) - - fix debug daemon ioctl interface; allows daemon on ia64 (1274) - - fix lock inversion caused by new llite matching code (1282) - - limit the number of dirty pages on a client to 10MB (1286) - - timed out locks were not being corrected cancelled (1289) - - fix O_DIRECT above 4GB on IA-32 (1292) - * major user-visible changes - - fail out/fail over policy now controlled by the upcall (993) - * protocol changes - - add OBD_PING to check server availability and failure (954) - - lustre messages are now sent in sending host order (288, 340, 891) - - add eadatalen to MDS getattr reply (340) - - OST read replies may contain second buffer, with per-page status (593) - -2003-03-11 Phil Schwan - * version v0_6 - * bug fixes - - LDLM_DEBUG macro fix, for gcc 3.2 (850) - - failed open()s could cause deadlock; fixed (867, 869) - - stop cancelling OST locks when files are closed (481) - - overlapping XID spaces caused network corruption (851, 853) - - fix unsafe fsfilt counter arithmetic; change to atomic_t - - setattr_raw added, to do single-RPC, server-side setattrs - - lmc/lconf syntax change for OST UUIDs - - fix crashy race condition between ptlrpc_free_req and osc_close - - don't use request in mdc_enqueue if we hit a timeout (889) - - don't set the inode i_size for regular files from the MDS (896) - - handle out of order completion AST (842) - - don't LBUG if a lock request times out after receiving AST (913) - - avoid d_rehash race in ll_find_alias by rehashing inside dcache_lock - - if a bad lock AST arrives, send an error instead of dropping entirely - - return 0 from revalidate2 if ll_intent_lock returns -EINTR (912) - - fix leak in bulk IO when only partially completed (899, 900, 926) - - fix O_DIRECT for ia64 (55) - - (almost) eliminate Lustre-kernel-thread effects on load average (722) - - C-z after timeout could hang a process forever; fixed (977) - * Features - - client-side I/O cache (678, 924, 929, 941, 970) - * protocol changes - - READPAGE and SETATTRs which don't take server-side locks get - their own portal - -2003-02-11 Phil Schwan - * version v0_5_20 - * bug fixes - - Fix ldlm_lock_match on the MDS to avoid matching remote locks (592) - - Fix fsfilt_extN_readpage() to read a full page of directory - entries, or fake the remainder if PAGE_SIZE != blocksize (500) - - Avoid extra mdc_getattr() in ll_intent_lock when possible (534, 604) - - Fix imbalanced LOV object allocation and out-of-bound access (469) - - Most intent operations were removed, in favour of a new RPC mode - that does a single RPC to the server and bypasses most of the VFS - - All LDLM resource ID arrays were removed in favour of ldlm_res_id - - Aggressively cancel local locks on DLM servers - - mds_reint_unlink sends EA to the client if it's the last nlink. - client uses that EA to unlink OST objects. - - mds_reint_{rename,unlink,link} were rewritten to take ordered locks - - recursive symlinks were fixed (439) - - fixed NULL deref in DEBUG_REQ - - filter_update_lastobjid no longer calls sync, which annoyed extN - - fixed multi-client small-writes to a single file problem (445) - - fixed mtime updates during file writes (607) - - fixed vector writes on obdfilter causing problems when ENOSPC (670) - - fixed bug in obd_brw_read/write() (under guise of testing 367) - - fixed Linux OST size reporting problem (444, 656) - - OST now updates object mtime with writes or setattr (607, 619) - - client verifies file size before zeroing page past EOF (445) - - OST now writes last allocated objid to disk with allocation (108) - - LOV on echo now works (409) - * protocol changes - - mds_reint_unlink sends a new buffer, with the EA included. this - buffer is only valid if body->valid & OBD_MD_FLEASIZE, which is only - set if a regular file was being unlinked, and it was the last link - - use PtlGet from the target for bulk writes (315) - - OST now updates object mtime with writes or setattr (607, 619) - - LDLM now has a grant-time callback to revalidate locked items, if - necessary (604) - - Many MDS operations were reorganized to combat race conditions - * other changes - - Merge b_intel branch (updated lprocfs code) - now at /proc/fs/lustre - - configure check to avoid gcc version 2.96 20000731-2.96-98) (606) - -2003-01-06 Andreas Dilger - * version v0_5_19 - * bug fixes - - Fully reactivate OST imports after reconnection (512, others) - - Make sure client sees our -ENOTCONN from mds_handle (513 - partial) - - More graceful error handling for truncating on dead OST (515) - - Don't error out unless we're actually accessing dead stripes (474) - - Fix garbage sizes when stripes are missing (410) - - LRU counters were broken, causing constant lock purge (433, 432) - - garbage on read from stripes with failed OSTs (441) - - mark OSCs as active before reconnecting during recovery (438) - - lov_enqueue and lov_cancel need to handle inactive OSTs (403) - - lfind did not preserve OST order in output (443) - - symlinks cause hung clients, incorrect data (439) - - stop dereferencing request after dropping refcount (457) - - don't LASSERT(spin_is_locked) on non-SMP (455) - - fixes for many rename() bugs - - fstat didn't correctly synchronize attributes (399) - - server must handle lock cancellation during blocking AST prep (487) - - bulk descriptors were free()d too soon (511) - - fix paths in lconf, which would load incorrect modules (451, 507) - - fix confusing lconf 'host not found' error message (386) - - fix lock order deadlock on OST (O/R i_sem before journal ops, 478) - - fix race condition in mdc_blocking_ast() for inode access (526) - - fix lov_unpackmd() unpacking wrong number of stripes (537) - - fix lov_set_osc_active() marking wrong OSC inactive (440) - - fix bad lstripe lov_unpackmd() assertion (fix layering too) (527) - - fix multiple writes of stripe MD to MDS (358, maybe 519) - - fix lstripe in several ways (kernel side) (527) - - fix request leak in ldlm_cli_enqueue (262) - - incorrect OSC was marked inactive after OST failure - - call mds_fs_cleanup before unmounting filesystem (524) - - fix races between taking ns_lock and ldlm_lock_change_resource - - fix races updating LOV export open file list - - fix lov_enqueue error path, avoid decref-ing bad lock handle - - fix recovery NULL deref in ldlm_cli_cancel_unused - - fix some DLM races by using new hash table for lock handles (419) - - permit the client to specify desired inodes, at replay - - duplicate requests when we queue them for replay reintegration - - fix last_rcvd offset calculation - - sync after each recovered transaction, so we always make progress - - never, not always, ERESTART requests without transnos - - store the lov_desc in the MDS, so we don't depend on getlovinfo to - set it - - skip replay if the MDS says that the client is already connected - - don't check for a recovery-enabled export to match lctl's UUID - - don't INC_USE_COUNT for phantom exports - - don't crash when cleaning up phantom exports (567) - - don't double-finish or set replay data for errored mdc_open requests - - abort requests when they time out, so we don't get old replies - - send/receive replies for AST messages again - - if the client says that it doesn't have the lock, cancel it on the - server - - if we timeout during I/O, don't try to cancel an in-use lock; instead - mark it as destroyed, it will all work out when decref is called - - fix module use counts (22, 581) - * protocol changes - - ASTs now expect a reply (server cancels lock on error reply) - -2002-12-02 Andreas Dilger - * version v0_5_18 - * bug fixes - - fix many simultaneous client startup (392) - - fix dentry->d_it clobbering - - credentials weren't being shipped for readdir/getattr operations - - remove invalid assertions triggered during some concurrent MD - updates - - proper Lustre versions added (336, 389) - - fix memory leak for create error case (398) - - fix LOV locking bug that would get cli/srv out of sync - - fix echo client over LOV (409) - - fix dbench 2, extN refcount problem (170, 258, 356, 418) - - fix double-O_EXCL intent crash (424) - - avoid sending multiple lock CANCELs (352) - * Features - - MDS can do multi-client recovery (modulo bugs in new code) - * Documentation - - many updates, edits, cleanups - -2002-11-18 Phil Schwan - * version v0_5_17 - * bug fixes - - fix null d_it dereference (346) - - fix full OST/dbench hang (333) - - fix permission problem with file removal (286) - - fix removal of OSCs from LOV when they fail - - fix NULL deref during bulk timeout (214) - - fix problems related to multiple filesystems on one MDS (241) - - fixed serious subtle metadata locking bugs - - free locks on clients when inodes are removed due to memory - pressure (201) - - fix inode pointer in lock data (285) - - partial support for multiple MDS on a single host (241) - - data locks weren't cancelled at clear_inode time (290, 311) - - intent locks could lead to unbounded lock growth (205) - - added a maximum lock count, an LRU list, and a flusher - - fix multiple rename (365) - - properly abstracted the echo client - - OSC locked 1 byte too many; fixed - - rewrote brw callback code: - - fixed recovery bugs related to LOVs (306) - - fixed too-many-pages-in-one-write crash (191) - - fixed (again) crash in sync_io_timeout (214) - - probably fixed callback-related race (385) - * protocol change - - Add capability to MDS protocol - - LDLM cancellations and callbacks on different portals - -2002-10-28 Andreas Dilger - * version v0_5_16 - * bug fixes: - - limit client IOV size to PTL_MD_MAX_IOV (611336, 191) - - defer open object destruction to close time (601981, 138) - - open/close OST file handle in obdo (OBD_MD_FLHANDLE) (601981, 138) - - move LDLM_ENQUEUE/CONVERT back to MDS portal (625069) - - abstract ll_lookup2, fix ll_revalidate2 to use abstraction (256) - - don't call obd_setattr in ll_file_release for destroyed objects - * protocol change to lustre_msg: move |version| and add |flags| - * protocol change to osc_punch: "start" in "o_size", "end" in "o_blocks" - * lock replay: for LDLM_FL_REPLAY trust client to do right thing - * added replay of create, unlink, link and rename operations during - MDS failover; recovery should be much more robust now - * remove failed OSCs from LOVs (only lov_create uses this so far) - * the lustre-HOWTO was brought (more) up to date (582544) - -2002-10-23 Phil Schwan - * version v0_5_15 - * bug fixes: - - in-use dentries weren't being reused properly (617851) - - prevent multiple LDLM setup (599178) - - fix LOV size calculations for truncate (617853) - - fix client handling of MDS intent errors (POSIX) - - fix permission bug in lovstripe.c test (624321) - - fix MDS thread deadlock - move LDLM handler to DLM portal (625069) - - truncate past end of file could corrupt data - - proper cleanup after timeouts, crashes, etc (592524, 550815) - - a race in recovery could return ETIMEDOUT to apps (623947) - - building outside the source directory was fixed - * the lustre-HOWTO was brought (more) up to date (582544) - * major progress was made on recovery functionality - -2002-10-10 Phil Schwan - * version v0_5_14 - * bug fixes: - - recovery deadlock fix - - rm -rf causes LBUG fix (617817) - - file open by multiple tasks fix (618962) - - directory permissions bugs (602707 and 620007) - - journal_stop fixed with locking (611313) - - O_APPEND failures resolved (618273, perhaps 614459) - - lconf PATH fix (619770) - - IA64 build fix (621450) - - RPC buffer sizes scale with amount of memory - -2002-10-01 Phil Schwan - * version v0_5_13 - * bug fixes: - - locks would be cancelled without throwing away data pages, - resulting in inconsistent data (605627) - - inode attributes were not always being refreshed (605627, 612449) - - lconf now continues to cleanup after lctl reports an error - - MDS now enforces user permissions (602707) - - lprocfs cleanup fixed, but not yet enabled (614157) - - fixed infinite server hang, should a client not respond to an AST - - avoid going into recovery if user calls readlink() with a buffer - that's too small (613941) - - AST RPCs no longer require replies (614867) -- this may be changed - - don't crash server if client sends an IOV that's too big (611336) - - fixed lock conversion deadlock (611892) - - fixed the following of symlinks (614622) - * recovery: the server can remove locks from a client that dies, other - clients can make progress - * more extN patch fixes - * compile-time configurable ptlrpc buffer allocations - * documentation - - collaborative read cache document - - Lustre Lite Performance CDR document-in-progress - -2002-09-20 Andreas Dilger - * version v0_5_12 - * bug fix - - fix typo in patch-2.4.18 - -2002-09-20 Andreas Dilger - * version v0_5_11 - * bug fixes - - clear ptlrpc request each time in handle_incoming_request() - - unlink of files now destroys the object on the OST - -2002-09-19 Peter Braam - * version 0_5_10 - * add hard link support - * change obdfile creation method - * kernel patch changed - -2002-09-19 Peter Braam - * version 0_5_9 - * bug fix - - stack overflow bug in extN fixed - -2002-09-18 Andreas Dilger - * version 0_5_8 - * documentation updates - - add man pages for config tools - - update tests/README to describe testing with new config tools - - finish metadata API descriptions - * bug fixes and cleanups - - statfs workaround for 16TB limit - - LOV stripe allocation improved, can stripe on subset of OSTs - - LOV file size/IO offset was wrong for files > 4GB in size - - object EA data was being dropped, caused files to be unreadable - - memory overflow with non-LOV OST caused memory corruption - - fixed regression tests to work with new config tools, obdfilter - - fixed bug when directory size became larger than 1 block - - fixed bug (for single client case) when PWD was deleted - - invalidate local directory pages when doing intent-based ops - - avoid LDLM oops when lock callback contained bad data - -2002-09-09 Andreas Dilger - * version 0_5_7 - * documentation updates - * bug fixes and cleanups - - configuration tools - - LOV - - imports/exports - - 64-bit compile warnings - - 64-bit internal statfs data - - many more - * test_brw on persistent OST devices - * MDS recovery - * lprocfs (disabled) - -2002-09-04 Andreas Dilger - * version 0_5_6 - * documentation updates - * bug fixes and cleanups - * configuration tools - -2002-08-30 Peter J. Braam - - * version v0_5_5 - * many small fixes to 0_5_4 - * io/network handling - * thinkos in MDS operations - -2002-08-24 Peter J. Braam - - * version v0_5_4 - * crucial basic fixes to 0.5.3 - * IOR, Iozone work over Elan - * EOF locks added - -2002-08-07 Phil Schwan - * version 0_5_3, our first alpha - * we use the new Portals iovs - * documentation updates - * bug fixes and cleanups - * small changes in the DLM wire protocol - -2002-07-25 Peter J. Braam - * version 0_5_1 with some initial stability, - * locking on MD and file I/O. - * documentation updates - * several bug fixes since 0.5.0 - * small changes in wire protocol - -2002-07-18 Phil Schwan - * version v0_4_5 - * delivered as Lustre Light Alpha - * fixed a crash after handling invalid MDS requests - * fixed directory pages for architectures with non-4k pages sizes - -2002-07-11 Andreas Dilger - * release version v0_4_4 - * Moves TCP acceptor to be on port 2432 (unused Coda port) instead - of 1234. - * Fixes a number of interruption problems with OST operations. - * Update documentation for portals header changes - * Move all wire protocol structs/defines to lustre_idl.h - * Fixes symlink length bug. - * Add tcpdump to repository. - -2002-07-05 Andreas Dilger - * release version v0_4_3 - * Fixes statfs for inodes on extN. - * Fixes bug in runtests which would delete /etc/hosts. - * Use 64-bit object IDs wherever possible (not into VFS though) - Remove ost_get_info, which is unused by lustre, and out of date. - -2002-07-03 Peter Braam - * release version v0_4_2 Fixes a lookup error (type not passed) - * move forward to head of Portals - * move forward to latest Lustre kernel - -2002-06-25 Peter Braam - * release version v0_4_1. Hopefully stable on single node use. diff --git a/lustre/FDL b/lustre/FDL deleted file mode 100644 index b42936b..0000000 --- a/lustre/FDL +++ /dev/null @@ -1,355 +0,0 @@ - GNU Free Documentation License - Version 1.1, March 2000 - - Copyright (C) 2000 Free Software Foundation, Inc. - 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - -0. PREAMBLE - -The purpose of this License is to make a manual, textbook, or other -written document "free" in the sense of freedom: to assure everyone -the effective freedom to copy and redistribute it, with or without -modifying it, either commercially or noncommercially. Secondarily, -this License preserves for the author and publisher a way to get -credit for their work, while not being considered responsible for -modifications made by others. - -This License is a kind of "copyleft", which means that derivative -works of the document must themselves be free in the same sense. It -complements the GNU General Public License, which is a copyleft -license designed for free software. - -We have designed this License in order to use it for manuals for free -software, because free software needs free documentation: a free -program should come with manuals providing the same freedoms that the -software does. But this License is not limited to software manuals; -it can be used for any textual work, regardless of subject matter or -whether it is published as a printed book. We recommend this License -principally for works whose purpose is instruction or reference. - - -1. APPLICABILITY AND DEFINITIONS - -This License applies to any manual or other work that contains a -notice placed by the copyright holder saying it can be distributed -under the terms of this License. The "Document", below, refers to any -such manual or work. Any member of the public is a licensee, and is -addressed as "you". - -A "Modified Version" of the Document means any work containing the -Document or a portion of it, either copied verbatim, or with -modifications and/or translated into another language. - -A "Secondary Section" is a named appendix or a front-matter section of -the Document that deals exclusively with the relationship of the -publishers or authors of the Document to the Document's overall subject -(or to related matters) and contains nothing that could fall directly -within that overall subject. (For example, if the Document is in part a -textbook of mathematics, a Secondary Section may not explain any -mathematics.) The relationship could be a matter of historical -connection with the subject or with related matters, or of legal, -commercial, philosophical, ethical or political position regarding -them. - -The "Invariant Sections" are certain Secondary Sections whose titles -are designated, as being those of Invariant Sections, in the notice -that says that the Document is released under this License. - -The "Cover Texts" are certain short passages of text that are listed, -as Front-Cover Texts or Back-Cover Texts, in the notice that says that -the Document is released under this License. - -A "Transparent" copy of the Document means a machine-readable copy, -represented in a format whose specification is available to the -general public, whose contents can be viewed and edited directly and -straightforwardly with generic text editors or (for images composed of -pixels) generic paint programs or (for drawings) some widely available -drawing editor, and that is suitable for input to text formatters or -for automatic translation to a variety of formats suitable for input -to text formatters. A copy made in an otherwise Transparent file -format whose markup has been designed to thwart or discourage -subsequent modification by readers is not Transparent. A copy that is -not "Transparent" is called "Opaque". - -Examples of suitable formats for Transparent copies include plain -ASCII without markup, Texinfo input format, LaTeX input format, SGML -or XML using a publicly available DTD, and standard-conforming simple -HTML designed for human modification. Opaque formats include -PostScript, PDF, proprietary formats that can be read and edited only -by proprietary word processors, SGML or XML for which the DTD and/or -processing tools are not generally available, and the -machine-generated HTML produced by some word processors for output -purposes only. - -The "Title Page" means, for a printed book, the title page itself, -plus such following pages as are needed to hold, legibly, the material -this License requires to appear in the title page. For works in -formats which do not have any title page as such, "Title Page" means -the text near the most prominent appearance of the work's title, -preceding the beginning of the body of the text. - - -2. VERBATIM COPYING - -You may copy and distribute the Document in any medium, either -commercially or noncommercially, provided that this License, the -copyright notices, and the license notice saying this License applies -to the Document are reproduced in all copies, and that you add no other -conditions whatsoever to those of this License. You may not use -technical measures to obstruct or control the reading or further -copying of the copies you make or distribute. However, you may accept -compensation in exchange for copies. If you distribute a large enough -number of copies you must also follow the conditions in section 3. - -You may also lend copies, under the same conditions stated above, and -you may publicly display copies. - - -3. COPYING IN QUANTITY - -If you publish printed copies of the Document numbering more than 100, -and the Document's license notice requires Cover Texts, you must enclose -the copies in covers that carry, clearly and legibly, all these Cover -Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on -the back cover. Both covers must also clearly and legibly identify -you as the publisher of these copies. The front cover must present -the full title with all words of the title equally prominent and -visible. You may add other material on the covers in addition. -Copying with changes limited to the covers, as long as they preserve -the title of the Document and satisfy these conditions, can be treated -as verbatim copying in other respects. - -If the required texts for either cover are too voluminous to fit -legibly, you should put the first ones listed (as many as fit -reasonably) on the actual cover, and continue the rest onto adjacent -pages. - -If you publish or distribute Opaque copies of the Document numbering -more than 100, you must either include a machine-readable Transparent -copy along with each Opaque copy, or state in or with each Opaque copy -a publicly-accessible computer-network location containing a complete -Transparent copy of the Document, free of added material, which the -general network-using public has access to download anonymously at no -charge using public-standard network protocols. If you use the latter -option, you must take reasonably prudent steps, when you begin -distribution of Opaque copies in quantity, to ensure that this -Transparent copy will remain thus accessible at the stated location -until at least one year after the last time you distribute an Opaque -copy (directly or through your agents or retailers) of that edition to -the public. - -It is requested, but not required, that you contact the authors of the -Document well before redistributing any large number of copies, to give -them a chance to provide you with an updated version of the Document. - - -4. MODIFICATIONS - -You may copy and distribute a Modified Version of the Document under -the conditions of sections 2 and 3 above, provided that you release -the Modified Version under precisely this License, with the Modified -Version filling the role of the Document, thus licensing distribution -and modification of the Modified Version to whoever possesses a copy -of it. In addition, you must do these things in the Modified Version: - -A. Use in the Title Page (and on the covers, if any) a title distinct - from that of the Document, and from those of previous versions - (which should, if there were any, be listed in the History section - of the Document). You may use the same title as a previous version - if the original publisher of that version gives permission. -B. List on the Title Page, as authors, one or more persons or entities - responsible for authorship of the modifications in the Modified - Version, together with at least five of the principal authors of the - Document (all of its principal authors, if it has less than five). -C. State on the Title page the name of the publisher of the - Modified Version, as the publisher. -D. Preserve all the copyright notices of the Document. -E. Add an appropriate copyright notice for your modifications - adjacent to the other copyright notices. -F. Include, immediately after the copyright notices, a license notice - giving the public permission to use the Modified Version under the - terms of this License, in the form shown in the Addendum below. -G. Preserve in that license notice the full lists of Invariant Sections - and required Cover Texts given in the Document's license notice. -H. Include an unaltered copy of this License. -I. Preserve the section entitled "History", and its title, and add to - it an item stating at least the title, year, new authors, and - publisher of the Modified Version as given on the Title Page. If - there is no section entitled "History" in the Document, create one - stating the title, year, authors, and publisher of the Document as - given on its Title Page, then add an item describing the Modified - Version as stated in the previous sentence. -J. Preserve the network location, if any, given in the Document for - public access to a Transparent copy of the Document, and likewise - the network locations given in the Document for previous versions - it was based on. These may be placed in the "History" section. - You may omit a network location for a work that was published at - least four years before the Document itself, or if the original - publisher of the version it refers to gives permission. -K. In any section entitled "Acknowledgements" or "Dedications", - preserve the section's title, and preserve in the section all the - substance and tone of each of the contributor acknowledgements - and/or dedications given therein. -L. Preserve all the Invariant Sections of the Document, - unaltered in their text and in their titles. Section numbers - or the equivalent are not considered part of the section titles. -M. Delete any section entitled "Endorsements". Such a section - may not be included in the Modified Version. -N. Do not retitle any existing section as "Endorsements" - or to conflict in title with any Invariant Section. - -If the Modified Version includes new front-matter sections or -appendices that qualify as Secondary Sections and contain no material -copied from the Document, you may at your option designate some or all -of these sections as invariant. To do this, add their titles to the -list of Invariant Sections in the Modified Version's license notice. -These titles must be distinct from any other section titles. - -You may add a section entitled "Endorsements", provided it contains -nothing but endorsements of your Modified Version by various -parties--for example, statements of peer review or that the text has -been approved by an organization as the authoritative definition of a -standard. - -You may add a passage of up to five words as a Front-Cover Text, and a -passage of up to 25 words as a Back-Cover Text, to the end of the list -of Cover Texts in the Modified Version. Only one passage of -Front-Cover Text and one of Back-Cover Text may be added by (or -through arrangements made by) any one entity. If the Document already -includes a cover text for the same cover, previously added by you or -by arrangement made by the same entity you are acting on behalf of, -you may not add another; but you may replace the old one, on explicit -permission from the previous publisher that added the old one. - -The author(s) and publisher(s) of the Document do not by this License -give permission to use their names for publicity for or to assert or -imply endorsement of any Modified Version. - - -5. COMBINING DOCUMENTS - -You may combine the Document with other documents released under this -License, under the terms defined in section 4 above for modified -versions, provided that you include in the combination all of the -Invariant Sections of all of the original documents, unmodified, and -list them all as Invariant Sections of your combined work in its -license notice. - -The combined work need only contain one copy of this License, and -multiple identical Invariant Sections may be replaced with a single -copy. If there are multiple Invariant Sections with the same name but -different contents, make the title of each such section unique by -adding at the end of it, in parentheses, the name of the original -author or publisher of that section if known, or else a unique number. -Make the same adjustment to the section titles in the list of -Invariant Sections in the license notice of the combined work. - -In the combination, you must combine any sections entitled "History" -in the various original documents, forming one section entitled -"History"; likewise combine any sections entitled "Acknowledgements", -and any sections entitled "Dedications". You must delete all sections -entitled "Endorsements." - - -6. COLLECTIONS OF DOCUMENTS - -You may make a collection consisting of the Document and other documents -released under this License, and replace the individual copies of this -License in the various documents with a single copy that is included in -the collection, provided that you follow the rules of this License for -verbatim copying of each of the documents in all other respects. - -You may extract a single document from such a collection, and distribute -it individually under this License, provided you insert a copy of this -License into the extracted document, and follow this License in all -other respects regarding verbatim copying of that document. - - -7. AGGREGATION WITH INDEPENDENT WORKS - -A compilation of the Document or its derivatives with other separate -and independent documents or works, in or on a volume of a storage or -distribution medium, does not as a whole count as a Modified Version -of the Document, provided no compilation copyright is claimed for the -compilation. Such a compilation is called an "aggregate", and this -License does not apply to the other self-contained works thus compiled -with the Document, on account of their being thus compiled, if they -are not themselves derivative works of the Document. - -If the Cover Text requirement of section 3 is applicable to these -copies of the Document, then if the Document is less than one quarter -of the entire aggregate, the Document's Cover Texts may be placed on -covers that surround only the Document within the aggregate. -Otherwise they must appear on covers around the whole aggregate. - - -8. TRANSLATION - -Translation is considered a kind of modification, so you may -distribute translations of the Document under the terms of section 4. -Replacing Invariant Sections with translations requires special -permission from their copyright holders, but you may include -translations of some or all Invariant Sections in addition to the -original versions of these Invariant Sections. You may include a -translation of this License provided that you also include the -original English version of this License. In case of a disagreement -between the translation and the original English version of this -License, the original English version will prevail. - - -9. TERMINATION - -You may not copy, modify, sublicense, or distribute the Document except -as expressly provided for under this License. Any other attempt to -copy, modify, sublicense or distribute the Document is void, and will -automatically terminate your rights under this License. However, -parties who have received copies, or rights, from you under this -License will not have their licenses terminated so long as such -parties remain in full compliance. - - -10. FUTURE REVISIONS OF THIS LICENSE - -The Free Software Foundation may publish new, revised versions -of the GNU Free Documentation License from time to time. Such new -versions will be similar in spirit to the present version, but may -differ in detail to address new problems or concerns. See -http://www.gnu.org/copyleft/. - -Each version of the License is given a distinguishing version number. -If the Document specifies that a particular numbered version of this -License "or any later version" applies to it, you have the option of -following the terms and conditions either of that specified version or -of any later version that has been published (not as a draft) by the -Free Software Foundation. If the Document does not specify a version -number of this License, you may choose any version ever published (not -as a draft) by the Free Software Foundation. - - -ADDENDUM: How to use this License for your documents - -To use this License in a document you have written, include a copy of -the License in the document and put the following copyright and -license notices just after the title page: - - Copyright (c) YEAR YOUR NAME. - Permission is granted to copy, distribute and/or modify this document - under the terms of the GNU Free Documentation License, Version 1.1 - or any later version published by the Free Software Foundation; - with the Invariant Sections being LIST THEIR TITLES, with the - Front-Cover Texts being LIST, and with the Back-Cover Texts being LIST. - A copy of the license is included in the section entitled "GNU - Free Documentation License". - -If you have no Invariant Sections, write "with no Invariant Sections" -instead of saying which ones are invariant. If you have no -Front-Cover Texts, write "no Front-Cover Texts" instead of -"Front-Cover Texts being LIST"; likewise for Back-Cover Texts. - -If your document contains nontrivial examples of program code, we -recommend releasing these examples in parallel under your choice of -free software license, such as the GNU General Public License, -to permit their use in free software. diff --git a/lustre/Makefile.am b/lustre/Makefile.am deleted file mode 100644 index 47d3c28..0000000 --- a/lustre/Makefile.am +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -AUTOMAKE_OPTIONS = foreign - -if LINUX25 -DIRS24 = -else -DIRS24 = ptlbd -endif - -if LIBLUSTRE -SUBDIRS = portals lov obdclass ptlrpc obdecho ldlm osc utils mdc #liblustre -else -# NOTE: keep extN before obdclass, mds, and obdfilter. Keep obdclass as early -# as possible, to have the best chance at stopping with "wrong kernel version" -# instead of some related build failure. -SUBDIRS = portals $(DIRS24) obdclass mds utils ldlm obdfilter mdc osc ost -SUBDIRS+= llite obdecho lov cobd tests doc scripts conf ptlrpc -endif - -DIST_SUBDIRS = $(SUBDIRS) liblustre -EXTRA_DIST = BUGS FDL Rules include kernel_patches - -# We get the version from the spec file. -CONFIGURE_DEPENDENCIES = scripts/lustre.spec.in - -dist-hook: - find $(distdir) -name .deps | xargs rm -rf - find $(distdir) -name CVS | xargs rm -rf - -include $(top_srcdir)/Rules - -rpms: dist Makefile - rpmbuild -ta $(distdir).tar.gz - diff --git a/lustre/Makefile.mk b/lustre/Makefile.mk deleted file mode 100644 index e540148..0000000 --- a/lustre/Makefile.mk +++ /dev/null @@ -1,4 +0,0 @@ -include fs/lustre/portals/Kernelenv - -obj-y += portals/ -obj-y += mds/ diff --git a/lustre/README b/lustre/README deleted file mode 100644 index 1a80657..0000000 --- a/lustre/README +++ /dev/null @@ -1,8 +0,0 @@ -Instructions for building, configuring and running Lustre can be found in -the file doc/lustre-HOWTO.txt. - -If you have checked lustre directly out of CVS, then you either need to -get lyx to build the lustre-HOWTO.txt from the source file, get the PDF -version from the lustre.org website, or install the lustre-doc RPM for -the formatted text version (or read the somewhat cryptic lustre-HOWTO.lin -file if you are desperate). diff --git a/lustre/Rules b/lustre/Rules deleted file mode 100644 index 0d92246..0000000 --- a/lustre/Rules +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -# Build a kernel module, name.o, and install it in $(moduledir) by: -# MODULE = name -# module_DATA = name.o -# EXTRA_PROGRAMS = name -# name_SOURCES = my.c files.c -# include $(top_srcdir)/Rules - -if LINUX25 - -# We still need to link each module with vermagic.o to get rid of "kernel taited" warnings. -basename=$(shell echo $< | sed -e 's/\.c//g' | sed -e 's/-//g' | sed -e 's/\.o//g') -AM_CPPFLAGS=-I$(top_builddir)/include -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -mpreferred-stack-boundary=2 -DKBUILD_MODNAME=$(MODULE) -DKBUILD_BASENAME=$(basename) - -else - -AM_CPPFLAGS=-I$(top_builddir)/include - -endif - -$(MODULE).o: $($(MODULE)_OBJECTS) - $(LD) -m "`$(LD) --help | awk '/supported emulations/ {print $$4}'`" -r -o $(MODULE).o $($(MODULE)_OBJECTS) - -tags: - rm -f $(top_srcdir)/TAGS - rm -f $(top_srcdir)/tags - find $(top_srcdir) -name '*.[hc]' | grep -v ".orig" | xargs etags -a - find $(top_srcdir) -name '*.[hc]' | grep -v ".orig" | xargs ctags -a - diff --git a/lustre/autogen.sh b/lustre/autogen.sh deleted file mode 100644 index 9deed73..0000000 --- a/lustre/autogen.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -aclocal && -automake --add-missing && -${AUTOCONF:-autoconf} diff --git a/lustre/cobd/.cvsignore b/lustre/cobd/.cvsignore deleted file mode 100644 index e995588..0000000 --- a/lustre/cobd/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lustre/cobd/Makefile.am b/lustre/cobd/Makefile.am deleted file mode 100644 index 781c6ce..0000000 --- a/lustre/cobd/Makefile.am +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (C) 2002 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -DEFS= - -MODULE = cobd -modulefs_DATA = cobd.o -EXTRA_PROGRAMS = cobd -LINX= - -cobd_SOURCES = cache_obd.c lproc_cache.c $(LINX) - -include $(top_srcdir)/Rules diff --git a/lustre/cobd/cache_obd.c b/lustre/cobd/cache_obd.c deleted file mode 100644 index 5efb545..0000000 --- a/lustre/cobd/cache_obd.c +++ /dev/null @@ -1,337 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_COBD - -#include -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -#include -#endif -#include -#include -#include -#include -#include -#include - -static int cobd_attach(struct obd_device *dev, obd_count len, void *data) -{ - struct lprocfs_static_vars lvars; - - lprocfs_init_vars(&lvars); - return lprocfs_obd_attach(dev, lvars.obd_vars); -} - -static int cobd_detach(struct obd_device *dev) -{ - return lprocfs_obd_detach(dev); -} - -static int -cobd_setup (struct obd_device *dev, obd_count len, void *buf) -{ - struct obd_ioctl_data *data = (struct obd_ioctl_data *)buf; - struct cache_obd *cobd = &dev->u.cobd; - struct obd_device *target; - struct obd_device *cache; - struct obd_uuid target_uuid; - struct obd_uuid cache_uuid; - int rc; - - if (data->ioc_inlbuf1 == NULL || - data->ioc_inlbuf2 == NULL) - return (-EINVAL); - - obd_str2uuid(&target_uuid, data->ioc_inlbuf1); - target = class_uuid2obd (&target_uuid); - - obd_str2uuid(&cache_uuid, data->ioc_inlbuf2); - cache = class_uuid2obd (&cache_uuid); - if (target == NULL || - cache == NULL) - return (-EINVAL); - - /* don't bother checking attached/setup; - * obd_connect() should, and it can change underneath us */ - rc = obd_connect (&cobd->cobd_target, target, &target_uuid); - if (rc != 0) - return (rc); - - rc = obd_connect (&cobd->cobd_cache, cache, &cache_uuid); - if (rc != 0) - goto fail_0; - - return (0); - - fail_0: - obd_disconnect (&cobd->cobd_target, 0 ); - return (rc); -} - -static int -cobd_cleanup (struct obd_device *dev, int force, int failover) -{ - struct cache_obd *cobd = &dev->u.cobd; - int rc; - - if (!list_empty (&dev->obd_exports)) - return (-EBUSY); - - rc = obd_disconnect (&cobd->cobd_cache, failover); - if (rc != 0) - CERROR ("error %d disconnecting cache\n", rc); - - rc = obd_disconnect (&cobd->cobd_target, failover); - if (rc != 0) - CERROR ("error %d disconnecting target\n", rc); - - return (0); -} - -static int -cobd_connect (struct lustre_handle *conn, struct obd_device *obd, - struct obd_uuid *cluuid) -{ - int rc = class_connect (conn, obd, cluuid); - - CERROR ("rc %d\n", rc); - return (rc); -} - -static int -cobd_disconnect (struct lustre_handle *conn, int failover) -{ - int rc = class_disconnect (conn, failover); - - CERROR ("rc %d\n", rc); - return (rc); -} - -static int -cobd_get_info(struct lustre_handle *conn, obd_count keylen, - void *key, __u32 *vallen, void *val) -{ - struct obd_device *obd = class_conn2obd(conn); - struct cache_obd *cobd; - - if (obd == NULL) { - CERROR("invalid client cookie "LPX64"\n", conn->cookie); - return -EINVAL; - } - - cobd = &obd->u.cobd; - - /* intercept cache utilisation info? */ - - return obd_get_info(&cobd->cobd_target, keylen, key, vallen, val); -} - -static int cobd_statfs(struct obd_export *exp, struct obd_statfs *osfs) -{ - struct obd_export *cobd_exp; - int rc; - - if (exp->exp_obd == NULL) - return -EINVAL; - - cobd_exp = class_conn2export(&exp->exp_obd->u.cobd.cobd_target); - rc = obd_statfs(cobd_exp, osfs); - class_export_put(cobd_exp); - return rc; -} - -static int -cobd_getattr(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *lsm) -{ - struct obd_device *obd = class_conn2obd(conn); - struct cache_obd *cobd; - - if (obd == NULL) { - CERROR("invalid client cookie "LPX64"\n", conn->cookie); - return -EINVAL; - } - - cobd = &obd->u.cobd; - return (obd_getattr (&cobd->cobd_target, oa, lsm)); -} - -static int -cobd_open(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *lsm, struct obd_trans_info *oti, - struct obd_client_handle *och) -{ - struct obd_device *obd = class_conn2obd(conn); - struct cache_obd *cobd; - - if (obd == NULL) { - CERROR("invalid client cookie "LPX64"\n", conn->cookie); - return -EINVAL; - } - - cobd = &obd->u.cobd; - return (obd_open (&cobd->cobd_target, oa, lsm, oti, och)); -} - -static int -cobd_close(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *lsm, struct obd_trans_info *oti) -{ - struct obd_device *obd = class_conn2obd(conn); - struct cache_obd *cobd; - - if (obd == NULL) { - CERROR("invalid client cookie "LPX64"\n", conn->cookie); - return -EINVAL; - } - - cobd = &obd->u.cobd; - return (obd_close (&cobd->cobd_target, oa, lsm, oti)); -} - -static int cobd_preprw(int cmd, struct obd_export *exp, struct obdo *obdo, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_remote *nb, - struct niobuf_local *res, void **desc_private, - struct obd_trans_info *oti) -{ - struct obd_export *cobd_exp; - int rc; - - if (exp->exp_obd == NULL) - return -EINVAL; - - if ((cmd & OBD_BRW_WRITE) != 0) - return -EOPNOTSUPP; - - cobd_exp = class_conn2export(&exp->exp_obd->u.cobd.cobd_target); - rc = obd_preprw(cmd, cobd_exp, obdo, objcount, obj, niocount, nb, res, - desc_private, oti); - class_export_put(cobd_exp); - return rc; -} - -static int cobd_commitrw(int cmd, struct obd_export *exp, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_local *local, - void *desc_private, struct obd_trans_info *oti) -{ - struct obd_export *cobd_exp; - int rc; - - if (exp->exp_obd == NULL) - return -EINVAL; - - if ((cmd & OBD_BRW_WRITE) != 0) - return -EOPNOTSUPP; - - cobd_exp = class_conn2export(&exp->exp_obd->u.cobd.cobd_target); - rc = obd_commitrw(cmd, cobd_exp, objcount, obj, niocount, local, - desc_private, oti); - class_export_put(cobd_exp); - return rc; -} - -static inline int -cobd_brw(int cmd, struct lustre_handle *conn, - struct lov_stripe_md *lsm, obd_count oa_bufs, - struct brw_page *pga, struct obd_trans_info *oti) -{ - struct obd_device *obd = class_conn2obd(conn); - struct cache_obd *cobd; - - if (obd == NULL) { - CERROR("invalid client cookie "LPX64"\n", conn->cookie); - return -EINVAL; - } - - if ((cmd & OBD_BRW_WRITE) != 0) - return -EOPNOTSUPP; - - cobd = &obd->u.cobd; - return (obd_brw (cmd, &cobd->cobd_target, - lsm, oa_bufs, pga, oti)); -} - -static int -cobd_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len, - void *karg, void *uarg) -{ - struct obd_device *obd = class_conn2obd(conn); - struct cache_obd *cobd; - - if (obd == NULL) { - CERROR("invalid client cookie "LPX64"\n", conn->cookie); - return -EINVAL; - } - - /* intercept? */ - - cobd = &obd->u.cobd; - return (obd_iocontrol (cmd, &cobd->cobd_target, len, karg, uarg)); -} - -static struct obd_ops cobd_ops = { - o_owner: THIS_MODULE, - o_attach: cobd_attach, - o_detach: cobd_detach, - - o_setup: cobd_setup, - o_cleanup: cobd_cleanup, - - o_connect: cobd_connect, - o_disconnect: cobd_disconnect, - - o_get_info: cobd_get_info, - o_statfs: cobd_statfs, - - o_getattr: cobd_getattr, - o_open: cobd_open, - o_close: cobd_close, - o_preprw: cobd_preprw, - o_commitrw: cobd_commitrw, - o_brw: cobd_brw, - o_iocontrol: cobd_iocontrol, -}; - -static int __init cobd_init(void) -{ - struct lprocfs_static_vars lvars; - ENTRY; - - printk(KERN_INFO "Lustre Caching OBD driver; info@clusterfs.com\n"); - - lprocfs_init_vars(&lvars); - RETURN(class_register_type(&cobd_ops, lvars.module_vars, - OBD_CACHE_DEVICENAME)); -} - -static void __exit cobd_exit(void) -{ - class_unregister_type(OBD_CACHE_DEVICENAME); -} - -MODULE_AUTHOR("Cluster File Systems, Inc. "); -MODULE_DESCRIPTION("Lustre Caching OBD driver"); -MODULE_LICENSE("GPL"); - -module_init(cobd_init); -module_exit(cobd_exit); diff --git a/lustre/cobd/lproc_cache.c b/lustre/cobd/lproc_cache.c deleted file mode 100644 index fd7474b..0000000 --- a/lustre/cobd/lproc_cache.c +++ /dev/null @@ -1,91 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ -#define DEBUG_SUBSYSTEM S_CLASS - -#include -#include - -#ifndef LPROCFS -struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; -struct lprocfs_vars lprocfs_module_vars[] = { {0} }; -#else -/* Common STATUS namespace */ -static int rd_target(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct obd_device *dev = (struct obd_device*)data; - struct lustre_handle *conn; - struct obd_export *exp; - int rc; - - LASSERT(dev != NULL); - conn = &dev->u.cobd.cobd_target; - - if (!dev->obd_set_up) { - rc = snprintf (page, count, "not set up\n"); - } else { - exp = class_conn2export(conn); - LASSERT(exp != NULL); - rc = snprintf(page, count, "%s\n", - exp->exp_obd->obd_uuid.uuid); - class_export_put(exp); - } - return (rc); -} - -static int rd_cache(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct obd_device *dev = (struct obd_device*)data; - struct lustre_handle *conn; - struct obd_export *exp; - int rc; - - LASSERT(dev != NULL); - conn = &dev->u.cobd.cobd_cache; - - if (!dev->obd_set_up) { - rc = snprintf (page, count, "not set up\n"); - } else { - exp = class_conn2export(conn); - LASSERT (exp != NULL); - rc = snprintf(page, count, "%s\n", - exp->exp_obd->obd_uuid.uuid); - class_export_put(exp); - } - return (rc); -} - -struct lprocfs_vars lprocfs_obd_vars[] = { - { "uuid", lprocfs_rd_uuid, 0, 0 }, - { "target_uuid", rd_target, 0, 0 }, - { "cache_uuid", rd_cache, 0, 0 }, - { 0 } -}; - -struct lprocfs_vars lprocfs_module_vars[] = { - { "num_refs", lprocfs_rd_numrefs, 0, 0 }, - { 0 } -}; -#endif /* LPROCFS */ - -LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars) diff --git a/lustre/conf/.cvsignore b/lustre/conf/.cvsignore deleted file mode 100644 index 282522d..0000000 --- a/lustre/conf/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile -Makefile.in diff --git a/lustre/conf/Makefile.am b/lustre/conf/Makefile.am deleted file mode 100644 index a205d10..0000000 --- a/lustre/conf/Makefile.am +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -EXTRA_DIST = lustre.dtd lustre.schema slapd-lustre.conf lustre2ldif.xsl top.ldif -ldapconfdir = $(sysconfdir)/openldap -ldapschemadir = $(sysconfdir)/openldap/schema -ldapconf_SCRIPTS = slapd-lustre.conf -ldapschema_SCRIPTS = lustre.schema -pkglibdir = '${exec_prefix}/usr/lib/$(PACKAGE)' -pkglib_DATA = top.ldif lustre2ldif.xsl - -include $(top_srcdir)/Rules - diff --git a/lustre/conf/lustre.dtd b/lustre/conf/lustre.dtd deleted file mode 100644 index 51d1d1a..0000000 --- a/lustre/conf/lustre.dtd +++ /dev/null @@ -1,132 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/lustre/conf/lustre2ldif.xsl b/lustre/conf/lustre2ldif.xsl deleted file mode 100644 index 5fc6f9c..0000000 --- a/lustre/conf/lustre2ldif.xsl +++ /dev/null @@ -1,287 +0,0 @@ - - - - -fs=lustre -config=,fs=lustre - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/lustre/conf/slapd-lustre.conf b/lustre/conf/slapd-lustre.conf deleted file mode 100644 index 8558f64..0000000 --- a/lustre/conf/slapd-lustre.conf +++ /dev/null @@ -1,11 +0,0 @@ -####################################################################### -# lustre ldap config database -####################################################################### - -database ldbm -suffix "fs=lustre" -rootdn "cn=Manager,fs=lustre" -include /etc/openldap/schema/lustre.schema -rootpw secret -directory /var/lib/ldap/lustre -index objectClass eq, uuid eq diff --git a/lustre/conf/top.ldif b/lustre/conf/top.ldif deleted file mode 100644 index d0cfdac..0000000 --- a/lustre/conf/top.ldif +++ /dev/null @@ -1,4 +0,0 @@ -dn: fs=lustre -fs:lustre -objectClass: lustre -lustreDesc: Lustre Config diff --git a/lustre/configure.in b/lustre/configure.in deleted file mode 100644 index 8e12135..0000000 --- a/lustre/configure.in +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright (C) 2001-2003 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -AC_INIT -AC_CANONICAL_SYSTEM - -# Automake variables. Steal the version number from lustre.spec.in. -AM_INIT_AUTOMAKE(lustre, builtin([esyscmd], [sed -ne '/^%define version /{ s/.*version //; p; q; }' scripts/lustre.spec.in])) -#AM_MAINTAINER_MODE - -# LLNL patches their ext3 and calls it extN -AC_ARG_ENABLE(extN, [ --enable-extN use extN instead of ext3 for lustre backend]) -AM_CONDITIONAL(EXTN, test x$enable_extN = xyes) - -AC_ARG_WITH(obd-buffer-size, [ --with-obd-buffer-size=[size] set lctl ioctl maximum (default=8K)],OBD_BUFFER_SIZE=$with_obd_buffer_size,OBD_BUFFER_SIZE=8192) -AC_DEFINE_UNQUOTED(OBD_MAX_IOCTL_BUFFER, $OBD_BUFFER_SIZE, [IOCTL Buffer Size]) - -sinclude(portals/build.m4) -sinclude(portals/archdep.m4) - -if test x$enable_inkernel = xyes ; then -cp Makefile.mk Makefile.in -cp mds/Makefile.mk mds/Makefile.in -cp portals/Kernelenv.mk portals/Kernelenv.in -cp portals/Makefile.mk portals/Makefile.in -cp portals/libcfs/Makefile.mk portals/libcfs/Makefile.in -cp portals/portals/Makefile.mk portals/portals/Makefile.in -cp portals/knals/Makefile.mk portals/knals/Makefile.in -cp portals/knals/socknal/Makefile.mk portals/knals/socknal/Makefile.in -cp portals/router/Makefile.mk portals/router/Makefile.in -fi - -AM_CONFIG_HEADER(portals/include/config.h) - -AC_OUTPUT([Makefile portals/Makefile portals/Kernelenv \ - portals/libcfs/Makefile portals/portals/Makefile \ - portals/unals/Makefile portals/knals/Makefile \ - portals/router/Makefile portals/knals/socknal/Makefile \ - portals/knals/gmnal/Makefile portals/knals/qswnal/Makefile \ - portals/knals/scimacnal/Makefile portals/knals/toenal/Makefile \ - portals/utils/Makefile portals/tests/Makefile portals/doc/Makefile \ - ldlm/Makefile obdecho/Makefile ptlrpc/Makefile liblustre/Makefile \ - lov/Makefile osc/Makefile mdc/Makefile mds/Makefile ost/Makefile \ - cobd/Makefile ptlbd/Makefile conf/Makefile tests/Makefile \ - utils/Makefile utils/Lustre/Makefile obdfilter/Makefile \ - obdclass/Makefile llite/Makefile doc/Makefile scripts/Makefile \ - scripts/lustre.spec]) diff --git a/lustre/doc/.cvsignore b/lustre/doc/.cvsignore deleted file mode 100644 index fdf1642..0000000 --- a/lustre/doc/.cvsignore +++ /dev/null @@ -1,23 +0,0 @@ -.Xrefs -config.log -config.status -configure -Makefile -Makefile.in -.deps -tags -TAGS -OBD-HOWTO.html -OBD-HOWTO.txt -lustre-HOWTO.lyx -lustre-HOWTO.txt -lustre-pdf.bbl -lustre-pdf.blg -lustre-pdf.log -lustre-pdf.out -lustre-pdf.toc -*.eps -lustre.lyx -*.tex -*.pdf -*.aux diff --git a/lustre/doc/Makefile.am b/lustre/doc/Makefile.am deleted file mode 100644 index d261050..0000000 --- a/lustre/doc/Makefile.am +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright (C) 2001, 2002 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution -LYX2PDF = GS_OPTIONS=-dCompatibilityLevel=1.1 $(srcdir)/tex2pdf -overwrite -TEX2PDF = GS_OPTIONS=-dCompatibilityLevel=1.1 $(srcdir)/tex2pdf -overwrite -LYX2PS = lyx --export ps -LYX2TEX = lyx --export latex -LYX2TXT = lyx --export text -LYX2HTML = lyx --export html -LATEX = latex -DVIPS = dvips -PS2PDF = ps2pdf -TEXEXPAND = texexpand -SUFFIXES = .lin .lyx .pdf .ps .sgml .html .txt .tex .fig .eps .dvi - -DOCS = lustre.pdf lustre-HOWTO.txt -HOWTODOC = lustre-HOWTO.txt -IMAGES := $(patsubst %.fig,%.eps,$(wildcard *.fig)) -LYXFILES= $(filter-out $(patsubst %.lin,%.lyx,$(wildcard *.lin)),\ - $(wildcard *.lin *.lyx)) - -MAINTAINERCLEANFILES = $(IMAGES) $(DOCS) $(VERSIONED) -CLEANFILES = *.aux *.tex doc.old/*.aux doc.old/*.tex *.eps *.log *.pdf -VERSIONED = lustre-HOWTO.lyx lustre.lyx doc.old/lustre-HOWTO.lyx doc.old/lustre.lyx -GENERATED = $(VERSIONED) lustre-full.tex lustre-chbar.tex - -EXTRA_DIST = chbar.sh postbar tex2pdf $(DOCS) $(IMAGES) $(LYXFILES) lustre.bib - -all: $(HOWTODOC) -docs: $(DOCS) - -# These variables are set by lbuild/check-build. -RPMRELEASE ?= RELEASE -KERNTYPE ?= chaos -KERNRPM ?= kernel-2.4.18lustre13-RELEASE.i386.rpm - -# update date and version in document -date := $(shell date +%x) -tag := $(shell echo '$$Name: $$' | sed -e 's/^\$$Na''me: *\$$$$/HEAD/; s/^\$$Na''me: \(.*\) \$$$$/\1/') -addversion = sed -e 's|@T''AG@|$(tag)|g; s|@VER''SION@|$(VERSION)|g; s|@DA''TE@|$(date)|g; s|@RPM''RELEASE@|$(RPMRELEASE)|g; s|@KERN''TYPE@|$(KERNTYPE)|g; s|@KERN''RPM@|$(KERNRPM)|g' - -# Regenerate when the $(VERSION) or $Name: $ changes. -.INTERMEDIATE: $(GENERATED) -$(VERSIONED) : %.lyx: %.lin Makefile - $(addversion) $< > $@ - -.lyx.pdf: - @echo $(LYX2PDF) $< && $(LYX2PDF) $< || printf "\n*** Warning: not creating PDF docs; install lyx to rectify this\n" - -.lyx.ps: - @echo $(LYX2PS) $< && $(LYX2PS) $< || printf "\n*** Warning: not creating PostScript docs; install lyx to rectify this\n" - -.lyx.tex: - @echo $(LYX2TEX) $< && $(LYX2TEX) $< || printf "\n*** Warning: not creating LaTeX docs; install lyx to rectify this\n" - -.lyx.txt: - @echo $(LYX2TXT) $< && $(LYX2TXT) $< || printf "\n*** Warning: not creating text docs; install lyx to rectify this\n" - -.lyx.html: - @echo $(LYX2HTML) $< && $(LYX2HTML) $< || printf "\n*** Warning: not creating HTML docs; install lyx to rectify this\n" - -.tex.pdf: - $(TEX2PDF) $< - -.tex.dvi: - $(LATEX) $< - $(LATEX) $< - -.dvi.ps: - $(DVIPS) $< -o $@ - -.ps.pdf: - $(PS2PDF) $< $@ - -lustre.tex lustre.pdf lustre.txt lustre.html: $(IMAGES) $(LYXFILES) lustre-HOWTO.lyx -.fig.eps: - -fig2dev -L eps $< > $@ - -syncweb: lustre.pdf - cp lustre.pdf /usr/src/www/content/lustre/docs/lustre.pdf - ( cd /usr/src/www ; make lustre ; make synclustre ) -.PHONY: syncweb chbar - -# Build a changebar document from the files in doc.old and this directory. -chbar: lustre-chbar.pdf - -# FIXME: Temporary rules until pdftex displays changebars correctly. -lustre-chbar.pdf: lustre-chbar-nopdf.ps - $(PS2PDF) $< $@ -lustre-chbar-nopdf.ps: lustre-chbar-nopdf.dvi - $(DVIPS) $< -o $@ -lustre-chbar-nopdf.dvi: lustre-chbar-nopdf.tex - $(LATEX) $< - $(LATEX) $< -lustre-chbar-nopdf.tex: lustre-chbar.tex - sed -e 's/^\(.*usepackage.*pdftex\)/%\1/' $< > $@ - -%-chbar.tex: chbar.sh postbar doc.old/%-full.tex %-full.tex - $(SHELL) $(srcdir)/chbar.sh doc.old/$*-full.tex $*-full.tex | $(srcdir)/postbar > $@ - -# This rule needs to come before the next %-full.tex rule. -doc.old/lustre.tex: doc.old/lustre-HOWTO.lyx -doc.old/%-full.tex: doc.old/%.tex - cd doc.old && $(TEXEXPAND) -texinputs=. -output=$*-full.tex $*.tex - -# This rule needs to come after the more specific doc.old rule. -%-full.tex: %.tex - $(TEXEXPAND) -texinputs=. -texinputs=$(srcdir) -output=$@ $< - -# Check out the old directory if it doesn't exist. -doc.old/lustre.lin doc.old/lustre-HOWTO.lin: - @if test "X$(OLD)" = X; then \ - echo "You must populate doc.old or specify a CVS tag like OLD=v0_5_1"; \ - exit 1; \ - fi - rm -rf doc.old - mkdir doc.old - cvs checkout -r $(OLD) -d doc.old lustre/doc - -dist-hook: - rm -rf $(distdir)/figs/CVS - -include $(top_srcdir)/Rules diff --git a/lustre/doc/VERSIONING b/lustre/doc/VERSIONING deleted file mode 100644 index 839c746..0000000 --- a/lustre/doc/VERSIONING +++ /dev/null @@ -1,91 +0,0 @@ -Lustre versioning -================= - -0.0.1 2/19/2002 -0.0.2 3/14/2002 describe branches / stable tag -0.0.3 6/10/2002 describe release mechanisms - -This document describes versioning of source and binaries for Lustre. - -Packages -======== - -RPM's that you build should get 3 figure versions, CVS versions will -be 4 digits, and can correspond to test RPM's, and lead up to the -package version. So let's plan on releasing - -So you'd build 2 sets of test rpms this week: - -0.0.9.1 -0.0.9.2 - -we decide it's fine then and we release - -0.1.0 - -We go on developing with - -0.1.0.{1,2,3,4,...} - -as test releases and then we release: - -0.1.1 - -The 0.1 sequence is an unstable sequence, like 2.5 for the kernel is. -So we expect lots of 0.1.X releases leading up to a stable 0.2 (or -1.0) at the time of deployment. - -CVS -=== - -Versions will have 4 digits: - major.minor.patch.test - -Such versions will be tagged in CVS as: - v1_2_11_7 -and referred to as: - 1.2.11.7 -encoded as: - 0x01021107 - -Usage: ------- - -New numbers are used as follows: - -1. major: - - increased when major new functionality becomes available -2. minor: - - even: for each new release with new functionality - - odd : when a new development cycle starts after a release -3. patch: - - when a development snapshot or release update becomes available - - all these are announced on lustre-devel@lists.sf.net -4. test: - - when developers feel it is time to exchange a named version - -What will run, what won't ? ---------------------------- - -1. If the test level is non-zero, i.e. there are 4 digits in the - version, no guarantees of any kind are made. - -2. For three digit releases/tags the code should perform - according to the announcement. - -Moving tags ------------ - -The last stable release will be tagged: CVS tag "t_last_stable" -The last operational development snapshot will be CVS tag "dstable" - -Branches --------- - -Any and all development must be done on branches, and can only merge to the -HEAD if _at_least_ tests/acceptance-small.sh and IOR with 5 SMP nodes and -2 clients/node with 1GB file/client pass without any errors or cleanup -problems. Additional tests may be added in the future, so the tests in the -current CVS head must pass before a branch can be merged back to the trunk. - -See http://lustre.org/docs/branches.html for details on CVS branch usage. diff --git a/lustre/doc/chbar.sh b/lustre/doc/chbar.sh deleted file mode 100755 index 7825241..0000000 --- a/lustre/doc/chbar.sh +++ /dev/null @@ -1,243 +0,0 @@ -#!/bin/sh -# Gadget to take two LaTeX files and produce a third which -# has changebars highlighting the difference between them. -# -# Version 1.2 -# Author: -# Don Ward, Careful Computing (don@careful.co.uk) -# v1.0 April 1989 -# v1.1 Feb 93 Amended to use changebar.sty (v3.0) and dvips -# v1.2 Aug 95 Added support for LaTeX209/LaTeX2e -# Added RCS support to retrive old files - -CMD=`basename $0` - -SED=sed -RM="rm -f" -DIFF=diff -ED=ed -AWK=awk -GREP=grep -MV=mv -CAT=cat -MKDIR=mkdir -CO="co" - -TMPDIR=${TMP-/tmp}/$CMD.$$ -trap 'test $DEBUG = NO && rm -rf $TMPDIR' 0 1 2 3 6 7 13 15 -mkdir $TMPDIR || { echo "cannot create directory \`$TMPDIR'." >&2; exit 1; } -TMPFILE=${TMPDIR}/$CMD.$$ -SED_CMD_FILE=$TMPFILE.sed - -usage() -{ -$CAT << _END_ -Usage: - $CMD [-hgG] [-d dir] old new [output] - default output is stdout - - $CMD [-hgG] [-d dir] old - new file on stdin, output on stdout - - $CMD [-hgG] -d dir -r rev files - old file retrieved using RCS - - Gadget to take two LaTeX files and produce a third which - has changebars highlighting the difference between them. - Changebars are inserted for differences after '\begin{document}'. - - Feature: \`new' can not be named \`-'. - - Options are: - -d dir : Write the output to file \`dir/new', if \`new' is given or - to file \`dir/old'. - If \`dir' does not exist, it is created. - If \`output' is given, it is discarded. - - -r rev : If the LaTeX \`files' are kept under control of the - Revision Control System RCS, the old files of - the revision \`rev' can be retrived. - \`rev' is specified using the RCS conventions. - This option must be used together with the \`-d dir' option. - \`files' must be a nonempty list of files. - - -h : Print this info text. - -g : Print some debugging info. - -G : Even more debug info. - - Version 1.2: August 3. 1995 -_END_ -exit 1 -} - -# parse options and arguments -DEBUG="NO" -DIR= -REV= -# process options -while getopts d:r:gGh i $* -do - case $i in - d ) DIR=$OPTARG;; - r ) REV=$OPTARG;; - g ) DEBUG="YES" ;; - G ) set -x; DEBUG="YES";; - h | \ - * ) usage ;; - esac -done - -shift `expr $OPTIND - 1` - -case $# in - 1 ) OLD=$1; NEW="-"; OUT="" ;; - 2 ) OLD=$1; NEW=$2; OUT="" ;; - 3 ) OLD=$1; NEW=$2; OUT="$3" ;; - * ) usage ;; -esac - -# check correct options -if [ ! -z "$DIR" ] -then - [ -d $DIR ] || $MKDIR $DIR -fi - -if [ ! -z "$REV" ] -then - [ -z "$DIR" ] && usage - FILES=$* -else - FILES=$NEW -fi - -# do the work -for NEW in $FILES -do - if [ ! -z "$DIR" ] - then - if [ $NEW = "-" ] - then - OUT=$DIR/$OLD - else - OUT=$DIR/$NEW - fi - fi - if [ ! -z "$REV" ] - then - OLD=${TMPFILE}.old - $CO -p"$REV" -q $NEW > $OLD - fi - - [ $DEBUG = "YES" ] && echo "OLD=\`$OLD' NEW=\`$NEW' OUT=\`$OUT'" - - # gather some info about the file - # Since we have for sure only the name of the OLD file, ... - $GREP "^\\\\begin{document}" $OLD > /dev/null - if [ $? -eq 0 ] - then - [ $DEBUG = "YES" ] && echo "contains a \\begin{document}" - HAS_BEGIN_DOC="YES" - else - [ $DEBUG = "YES" ] && echo "contains no \\begin{document}" - HAS_BEGIN_DOC="NO" - fi - - # Method to do the work: - # 1 Use diff to get an ed script to go from file1 to file2. - # 2 Breath on it a bit (with sed) to insert changebar commands. - # 3 Apply modified ed script to produce (nearly) the output. - # 4 Use awk to insert the changebars option into the \documentstyle - # and to handle changebar commands inside verbatim environments. - # 5 Remove changebars before \begin{document} with sed - - # SED commands to edit ED commands to edit old file - $CAT > $SED_CMD_FILE <<\_END_ -/^\.$/i\ -\\cbend{}% -/^[0-9][0-9]*[ac]$/a\ -\\cbstart{}% -/^[0-9][0-9]*,[0-9][0-9]*[ac]$/a\ -\\cbstart{}% -/^[0-9][0-9]*d$/a\ -i\ -\\cbdelete{}%\ -. -/^[0-9][0-9]*,[0-9][0-9]*d$/a\ -i\ -\\cbdelete{}%\ -. -_END_ - - # note DIFF accepts `-' as stdin - $DIFF -b -e $OLD $NEW | \ - ( $SED -f $SED_CMD_FILE ; echo w ${TMPFILE}.1 ; echo q ) | \ - $ED - $OLD - - # AWK commands to insert Changebars style and to protect - # changebar commands in verbatim environments - # and to tell what driver is in use; we assume the `dvips' driver - - $AWK ' - BEGIN {kind=""; # we saw now \documentXXX[]{} - } - /^\\documentstyle/{ - kind = "209"; - if (index($0, "changebar") == 0 ) { - opts = index($0, "[") - if (opts > 0) - printf "%schangebar,%s\n",substr($0,1,opts),substr($0,opts+1) - else - printf "\\documentstyle[changebar]%s\n", substr($0,15) - next - } - } - /^\\documentclass/{ - kind = "2e"; - printf "%s\n", $0 - printf "\\usepackage[dvips]{changebar}\n" - next - } - /\\begin{document}/ {if (kind == "209" ) {print "\\driver{dvips}"}} - /\\begin{verbatim}/{++nesting} - /\\end{verbatim}/{--nesting} - /\\cbstart{}%|\\cbend{}%|\cbdelete{}%/ { - if ( nesting > 0) { - # changebar command in a verbatim environment: Temporarily exit, - # do the changebar command and reenter. - # - # The obvious ( printf "\\end{verbatim}%s\\begin{verbatim} , $0 ) - # leaves too much vertical space around the changed line(s). - # The following magic seeems to work - # - print "\\end{verbatim}\\nointerlineskip" - print "\\vskip -\\ht\\strutbox\\vskip -\\ht\\strutbox" - printf "\\vbox to 0pt{\\vskip \\ht\\strutbox%s\\vss}\n", $0 - print "\\begin{verbatim}" - next - } - } - { print $0 } - ' ${TMPFILE}.1 > ${TMPFILE}.2 - - # if a \begin{document} is contained in the file, - # remove the changebar commands before them - - if [ $HAS_BEGIN_DOC = "YES" ] - then - SED_CMD="1,/\\\\begin{document}/s/\(\\\\cb[sed][tne][adl][^{}]*{}%\)$/%%\1/" - $SED "$SED_CMD" ${TMPFILE}.2 > ${TMPFILE}.3 - else - $CAT ${TMPFILE}.2 > ${TMPFILE}.3 - fi - if [ -z "$OUT" ] - then - $CAT ${TMPFILE}.3 - else - $MV ${TMPFILE}.3 $OUT - fi - -done - -[ $DEBUG = "NO" ] && $RM ${TMPFILE}.* - -############################################################### diff --git a/lustre/doc/lconf.lyx b/lustre/doc/lconf.lyx deleted file mode 100644 index 0b1416f..0000000 --- a/lustre/doc/lconf.lyx +++ /dev/null @@ -1,286 +0,0 @@ -#LyX 1.2 created this file. For more info see http://www.lyx.org/ -\lyxformat 220 -\textclass amsart -\language english -\inputencoding auto -\fontscheme times -\graphics default -\paperfontsize default -\spacing single -\papersize letterpaper -\paperpackage a4 -\use_geometry 0 -\use_amsmath 0 -\use_natbib 0 -\use_numerical_citations 0 -\paperorientation portrait -\secnumdepth 3 -\tocdepth 3 -\paragraph_separation skip -\defskip medskip -\quotes_language english -\quotes_times 2 -\papercolumns 1 -\papersides 1 -\paperpagestyle default - -\layout Section - -lconf -\layout Subsection - -NAME -\layout Description - -lconf Lustre filesystem configuration utility. -\layout Subsection - -SYNOPSIS -\layout Standard - - -\series bold -lconf\SpecialChar ~ -[--node ] [-d,--cleanup] [--noexec] [--gdb] [--nosetup] - [--nomod] [-n,--noexec] [-v,--verbose] [-h,--help] -\layout Subsection - -DESCRIPTION -\layout Standard - -This program configures a node following directives in the . - There will be single configuration file for all the nodes in a single cluster. - This file should be distributed to all the nodes in the cluster or kept - in a location accessible to all the nodes. - One option is to store the cluster configuration information in LDAP format - on an LDAP server that can be reached from all the cluster nodes. -\layout Description - ---ldapurl\SpecialChar ~ - LDAP server URL -\layout Description - ---config\SpecialChar ~ - Cluster configuration name used for LDAP query -\layout Description - ---select\SpecialChar ~ - Select a particular node for a service -\layout Description - ---node\SpecialChar ~ -node_name Specify a specific node to configure. - By default, lconf will search for nodes with the local hostname and 'localhost'. - When -\emph on - --node -\emph default - is used, only -\emph on -node_name -\emph default - is searched for. - If a matching node is not found in the config, then lconf exits with an - error. -\layout Description - ---d|--cleanup Unconfigure a node. - The same config and -\emph on ---node -\emph default - argument used for configuration needs to be used for cleanup as well. - This will attempt to undo all of the configuration steps done by lconf, - including unloading the kernel modules. -\layout Description - ---force Forced unmounting and/or obd detach during cleanup. - Default is 0. - -\layout Description - ---mds_ost_conn Open connections to OSTs on MDS. -\layout Description - ---failover Used to shutdown without saving state. - Default is 0. - This will allow the node to give up service to another node for failover - purposes. - This will not be a clean shutdown. -\layout Description - ---noexec Print, but don't execute, the steps lconf will perform. - This is useful for debugging a configuration, and when used with -\emph on ---node -\emph default -, can be run on any host. -\layout Description - ---gdb Causes lconf to print a message and pause for 5 seconds after creating - a gdb module script and before doing any Lustre configuration (the gdb - module script is always created, however). -\layout Description - ---gdb_script\SpecialChar ~ - Full name of gdb debug script. - Default is /tmp/ogdb. -\layout Description - ---dump_path\SpecialChar ~ - Path to save debug dumps. - Default is /tmp/lustre_log -\layout Description - ---recover\SpecialChar ~ - Recover a device. -\layout Description - ---nosetup Only load modules, do not configure devices or services. -\layout Description - ---group\SpecialChar ~ - The group of devices to cleanup/configure. -\layout Description - ---nomod Only setup devices and services, do not load modules. -\layout Description - ---noexec,-n Don't do anything, but print what would happen. - Useful for debugging purposes. -\layout Description - ---verbose,-v Be verbose and show actions while going along. -\layout Description - ---timeout\SpecialChar ~ - Set the recovery timeout period. -\layout Description - ---lustre_upcall\SpecialChar ~ - Set the location of the Lustre upcall scripts used - by the client for recovery -\layout Description - ---portals_upcall\SpecialChar ~ - Specify the location of the Portals upcall scripts - used by the client for recovery -\layout Description - ---upcall\SpecialChar ~ - Set the location of both Lustre and Portals upcall scripts - used by the client for recovery -\layout Description - ---lctl-dump\SpecialChar ~ - Dump all ioctls to the specified file -\layout Description - ---dump\SpecialChar ~ - Dump the kernel debug log to the specified file before portals - is unloaded during cleanup. -\layout Description - ---reformat Reformat all the devices. - This is essential on the first time the file system is brought up. -\layout Description - --h,--help Print help. -\layout Description - ---maxlevel\SpecialChar ~ - Perform configuration of devices and services up to level - given. - -\emph on -level -\emph default - can take the values -\series bold -net, dev, svc, fs. - -\series default -When used in conjunction with cleanup, services are torn down up to a certain - level. - Default is 100. -\layout Description - ---minlevel\SpecialChar ~ - Specify the minimum level of services to configure/cleanup. - Default is 0. -\layout Description - ---lustre=src_dir Specify the base directory for Lustre sources, this parameter - will cause lconf to load the lustre modules from this soure tree. -\layout Description - ---ptldebug\SpecialChar ~ -debug\SpecialChar ~ -level This options can be used to set the required debug - level. -\layout Description - ---subsystem\SpecialChar ~ - Set the portals debug subsystem. -\layout Subsection - -EXAMPLES -\layout Standard - -On client nodes this is typically invoked as: -\layout LyX-Code - - -\size small -lconf --node client config.xml -\layout Standard - -in order to give clients, regardless of hostname, a single configuration. -\layout Standard - -Required debug levels can be set like this: -\layout LyX-Code - - -\size small - ## Everything but these -\layout LyX-Code - - -\size small -lconf --ptldebug -\begin_inset Quotes eld -\end_inset - -~(portals | malloc | trace) -\begin_inset Quotes erd -\end_inset - - -\layout LyX-Code - -\layout LyX-Code - - -\size small -## Only these debug types -\layout LyX-Code - - -\size small -lconf --ptldebug -\begin_inset Quotes eld -\end_inset - -ldlm|ha -\begin_inset Quotes erd -\end_inset - - -\layout Subsection - -BUGS -\layout Standard - -None are known. -\the_end diff --git a/lustre/doc/lctl.lyx b/lustre/doc/lctl.lyx deleted file mode 100644 index b3f3f3e..0000000 --- a/lustre/doc/lctl.lyx +++ /dev/null @@ -1,736 +0,0 @@ -#LyX 1.2 created this file. For more info see http://www.lyx.org/ -\lyxformat 220 -\textclass amsart-plain -\language english -\inputencoding auto -\fontscheme times -\graphics default -\paperfontsize default -\spacing single -\papersize letterpaper -\paperpackage a4 -\use_geometry 0 -\use_amsmath 0 -\use_natbib 0 -\use_numerical_citations 0 -\paperorientation portrait -\secnumdepth 3 -\tocdepth 3 -\paragraph_separation skip -\defskip medskip -\quotes_language english -\quotes_times 2 -\papercolumns 1 -\papersides 1 -\paperpagestyle default - -\layout Section - -lctl -\layout Subsection - -NAME -\layout Description - -lctl Low level Lustre filesystem configuration utility. -\layout Subsection - -SYNOPSIS -\layout Standard - - -\series bold -lctl -\layout Standard - - -\series bold -lctl\SpecialChar ~ ---device\SpecialChar ~ - -\layout Standard - - -\series bold -lctl\SpecialChar ~ ---threads\SpecialChar ~ -\SpecialChar ~ -\SpecialChar ~ -\SpecialChar ~ - -\layout Subsection - -DESCRIPTION -\layout Standard - -The program can be invoked in interactive mode by issuing -\series bold -lctl. - -\series default - After that, commands are issued as below. - The most common commands in lctl are (in matching pairs) -\family typewriter -\size small -device -\family default -\size default - and -\family typewriter -\size small -attach -\family default -\size default -, -\family typewriter -\size small -detach -\family default -\size default - and -\family typewriter -\size small -setup -\family default -\size default -, -\family typewriter -\size small -cleanup -\family default -\size default - and -\family typewriter -\size small -connect -\family default -\size default -, -\family typewriter -\size small -disconnect -\family default -\size default - and -\family typewriter -\size small -help -\family default -\size default -, and -\family typewriter -\size small -quit -\family default -\size default -. - To get a complete listing of available commands, type -\family typewriter -\size small -help -\family default -\size default - at the lctl prompt. - To get basic help on the meaning and syntax of a command, type -\family typewriter -\size small -help command -\family default -\size default -. - Command completion is activated with the -\family typewriter -\size small -TAB -\family default -\size default - key, and command history is available via the up- and down-arrow keys. - -\layout Standard - -For non-interactive single threaded use, one uses the second invocation, - which runs -\emph on -command -\emph default - after connecting to the device -\emph on -. - -\emph default - -\layout Description - ---device The device number to be used for the operation. - The value of devno is an integer, normally found by calling -\emph on -lctl name2dev -\emph default -on a device name. - -\layout Description - ---threads How many threads should be forked doing the command specified. - The numthreads variable is a strictly positive integer indicating how many - threads should be started. - The -\emph on -devno -\emph default -option is used as above. -\layout Description - ---ignore_errors\SpecialChar ~ -|\SpecialChar ~ -ignore_errors Ignore errors during script processing -\layout Description - -dump Save ioctls to a file -\layout LyX-Code - -\layout Description - -Network\SpecialChar ~ -Configuration -\begin_deeper -\layout Description - -network\SpecialChar ~ - Indicate what kind of network applies for the - configuration commands that follow. -\layout Description - -connect\SpecialChar ~ -[[\SpecialChar ~ -]\SpecialChar ~ -|\SpecialChar ~ -] This will establish a connection to - a remote network network -\emph on -id -\emph default - given by the hostname/port combination, or the elan -\emph on -id -\emph default -. -\layout Description - -disconnect\SpecialChar ~ - Disconnect from a remote -\emph on -nid -\emph default -. -\layout Description - -mynid\SpecialChar ~ -[nid] Informs the socknal of the local -\emph on -nid -\emph default -. - It defaults to hostname for tcp networks and is automatically setup for - elan/myrinet networks. -\layout Description - -add_uuid\SpecialChar ~ -\SpecialChar ~ - Associate a given UUID with an -\emph on -nid. -\layout Description - -close_uuid\SpecialChar ~ - Disconnect a UUID. -\layout Description - -del_uuid\SpecialChar ~ - Delete a UUID association. -\layout Description - -add_route\SpecialChar ~ -\SpecialChar ~ -\SpecialChar ~ -[target] Add an entry to the routing table for - the given target. -\layout Description - -del_route\SpecialChar ~ - Delete an entry for the target from the routing table. -\layout Description - -route_list Print the complete routing table. -\layout Description - -recv_mem\SpecialChar ~ -[size] Set the socket -\emph on -receive -\emph default - buffer size; if the size is omitted, the default size for the buffer is - printed. -\layout Description - -send_mem\SpecialChar ~ -[size] Set send buffer size for the socket; if size is omitted, - the default size for the buffer is printed. -\layout Description - -nagle\SpecialChar ~ -[on/off] Enable/disable nagle; omitting the argument will cause the - default value to be printed. -\layout Description - -fail\SpecialChar ~ -nid|all\SpecialChar ~ -[count] Fail/restore communications. - Ommiting tha count implies fail indefinitely, count of zero indicates that - communication should be restored. - A non-zero count indicates the number of portals messages to be dropped - after which the communication is restored. -\end_deeper -\layout Description - -Device\SpecialChar ~ -Selection -\begin_deeper -\layout Description - -newdev Create a new device. -\layout Description - -name2dev This command can be used to determine a device number for the given - device name. -\layout Description - -device This will select the specified OBD device. - All other commands depend on the device being set. - -\layout Description - -device_list Show all the devices. -\layout Description - -lustre_build_version Print the Lustre build version. -\end_deeper -\layout Description - -Device\SpecialChar ~ -Configuration -\begin_deeper -\layout Description - -attach\SpecialChar ~ -type\SpecialChar ~ -[name\SpecialChar ~ -[uuid]] -\shape italic -\emph on -Attach -\shape default -\emph default - a type to the current device (which you need to set using the -\family typewriter -\size small -device -\family default -\size default - command) and give that device a name and UUID. - This allows us to identify the device for use later, and also tells us - what type of device we will have. -\layout Description - -setup\SpecialChar ~ - Type specific device setup commands. - For obdfilter, a setup command tells the driver which block device it should - use for storage and what type of filesystem is on that device. - -\layout Description - -cleanup Cleanup a previously setup device. -\layout Description - -detach Remove driver (and name and UUID) from the current device. -\layout Description - -lov_setconfig\SpecialChar ~ -lov-uuid\SpecialChar ~ -stripe-count\SpecialChar ~ -default-stripe-size\SpecialChar ~ -offset\SpecialChar ~ -pattern\SpecialChar ~ -UUID1\SpecialChar ~ -[UUID2...] - Write LOV configuration to an MDS device. -\layout Description - -lov_getconfig\SpecialChar ~ -lov-uuid Read LOV configuration from an MDS device. - Returns default-stripe-count, default-stripe-size, offset, pattern, and - a list of OST UUID's. -\end_deeper -\layout Description - -Device\SpecialChar ~ -Operations -\begin_deeper -\layout Description - -probe\SpecialChar ~ -[timeout] Build a connection handle to a device. - This command is used to suspend configuration until the lctl command has - ensured that the MDS and OSC services are available. - This is to avoid mount failures in a rebooting cluster. -\layout Description - -close -\emph on - -\emph default -Close the -\emph on - -\emph default -connection handle -\layout Description - -getattr\SpecialChar ~ - Get attributes for an OST object -\emph on - -\emph default - . -\layout Description - -setattr\SpecialChar ~ -\SpecialChar ~ - Set mode attribute for OST object -\emph on - -\emph default -. -\layout Description - -create\SpecialChar ~ -[num\SpecialChar ~ -[mode\SpecialChar ~ -[verbose]]] Create the specified number -\emph on - -\emph default - of OST objects with the given -\emph on - -\emph default -. -\layout Description - -destroy\SpecialChar ~ -\SpecialChar ~ -starting\SpecialChar ~ -at\SpecialChar ~ - Destroy < -\emph on -num -\emph default -> number of objects starting from the object with object id < -\emph on -objid -\emph default ->. -\layout Description - -test_getattr\SpecialChar ~ -\SpecialChar ~ -[verbose\SpecialChar ~ -[[t]objid]] Do -\emph on - -\emph default - -\emph on -getattrs -\emph default - on OST object -\emph on - -\emph default - ( -\emph on -objectid -\emph default -+1 on each thread). -\layout Description - -test_brw\SpecialChar ~ -[t]\SpecialChar ~ -[write\SpecialChar ~ -[verbose\SpecialChar ~ -[npages\SpecialChar ~ -[[t]objid]]]] Do -\emph on - -\emph default - bulk -\emph on -read -\emph default -/ -\emph on -writes -\emph default - on OST object -\emph on - -\emph default - ( -\emph on - -\emph default - per I/O). -\layout Description - -test_ldlm Perform lock manager test. -\layout Description - -ldlm_regress_start\SpecialChar ~ -%s\SpecialChar ~ -[numthreads\SpecialChar ~ -[refheld\SpecialChar ~ -[numres\SpecialChar ~ -[numext]]]] Start lock manager - stress test. -\layout Description - -ldlm_regress_stop Stop lock manager stress test. -\layout Description - -dump_ldlm Dump all lock manager state, this is very useful for debugging -\layout Description - -activate Activate an import -\layout Description - -deacttivate De-activate an import -\layout Description - -recover\SpecialChar ~ - -\layout Description - -lookup\SpecialChar ~ -\SpecialChar ~ - -\layout Description - -notransno Disable sending of committed transnumber updates -\layout Description - -readonly Disable writes to the underlying device -\layout Description - -abort_recovery Abort recovery on MDS device -\layout Description - -mount_option Dump mount options to a file -\layout Description - -get_stripe show stripe info for an echo client object. -\layout Description - -set_stripe\SpecialChar ~ -[\SpecialChar ~ -width!count[@offset]\SpecialChar ~ -[:id:id....] set stripe info for an echo - client -\layout Description - -unset_stripe\SpecialChar ~ - unset stripe info for an echo client object. -\end_deeper -\layout Description - -Debug -\begin_deeper -\layout Description - -debug_daemon debug daemon control and dump to a file -\layout Description - -debug_kernel\SpecialChar ~ -[file]\SpecialChar ~ -[raw] Get debug buffer and dump to a -\emph on -fileusage -\emph default -. -\layout Description - -debug_file\SpecialChar ~ -\SpecialChar ~ -[output]\SpecialChar ~ -[raw] Read debug buffer from input and dump to - -\emph on -outputusage -\emph default -. -\layout Description - -clear Clear kernel debug buffer. -\layout Description - -mark\SpecialChar ~ - Insert marker text in kernel debug buffer. -\layout Description - -filter\SpecialChar ~ - Filter message type from the kernel debug - buffer. -\layout Description - -show\SpecialChar ~ - Show specific type of messages. -\layout Description - -debug_list\SpecialChar ~ - List all the subsystem and debug types. -\layout Description - -panic Force the kernel to panic. -\end_deeper -\layout Description - -Control -\begin_deeper -\layout Description - -help Show a complete list of commands; -\emph on -help -\emph default - can be used to get help on specific command. -\layout Description - -exit Close the lctl session. -\layout Description - -quit Close the lctl session. -\end_deeper -\layout Subsection - -EXAMPLES -\layout Description - -attach -\layout LyX-Code - - -\size small -# lctl -\newline -lctl > newdev -\newline -lctl > attach obdfilter OBDDEV OBDUUID -\size default - -\newline - -\layout Description - -connect -\layout LyX-Code - - -\size small -lctl > name2dev OSCDEV -\newline -2 -\newline -lctl > device 2 -\newline -lctl > connect -\size default - -\newline - -\layout Description - -getattr -\layout LyX-Code - - -\size small -lctl > getattr 12 -\newline -id: 12 -\newline -grp: 0 -\newline -atime: 1002663714 -\newline -mtime: 1002663535 -\newline -ctime: 1002663535 -\newline -size: 10 -\newline -blocks: 8 -\newline -blksize: 4096 -\newline -mode: 100644 -\newline -uid: 0 -\newline -gid: 0 -\newline -flags: 0 -\newline -obdflags: 0 -\newline -nlink: 1 -\newline -valid: ffffffff -\newline -inline: -\newline -obdmd: -\newline -lctl > disconnect -\newline -Finished (success) -\newline - -\layout Description - -setup -\emph on - -\layout LyX-Code - - -\size small -lctl > setup /dev/loop0 extN -\newline -lctl > quit -\size default - -\newline - -\layout LyX-Code - -\layout Subsection - -BUGS -\layout Standard - -None are known. -\the_end diff --git a/lustre/doc/lmc.lyx b/lustre/doc/lmc.lyx deleted file mode 100644 index 7a90023..0000000 --- a/lustre/doc/lmc.lyx +++ /dev/null @@ -1,541 +0,0 @@ -#LyX 1.2 created this file. For more info see http://www.lyx.org/ -\lyxformat 220 -\textclass amsart -\language english -\inputencoding auto -\fontscheme times -\graphics default -\paperfontsize default -\spacing single -\papersize letterpaper -\paperpackage a4 -\use_geometry 0 -\use_amsmath 0 -\use_natbib 0 -\use_numerical_citations 0 -\paperorientation portrait -\secnumdepth 3 -\tocdepth 3 -\paragraph_separation skip -\defskip medskip -\quotes_language english -\quotes_times 2 -\papercolumns 1 -\papersides 1 -\paperpagestyle default - -\layout Section - -lmc -\layout Subsection - -NAME -\layout Description - -lmc Lustre configuration maker. -\layout Subsection - -SYNOPSIS -\layout Standard - - -\series bold -lmc [options] --add [args] -\layout Standard - - -\series bold -lmc [options] --remove [args] -- [NOT IMPLEMENTED] -\layout Standard - - -\series bold -lmc [options] --convert [args] -- [NOT IMPLEMENTED] -\layout Subsection - -DESCRIPTION -\layout Standard - -At present -\series bold -\emph on -lmc -\series default -\emph default -, when invoked, adds configuration data to the config file. - In future, -\series bold -\emph on -lmc -\series default -\emph default - will also be able to remove configuration data or convert its format. - A Lustre cluster consists of several components - MDS's, mount-points, - OSTs, LOVs and whatever those reference (e.g nets and profiles). - A single configuration file would be generated for the complete cluster. - In the -\series bold -\emph on -lmc -\series default -\emph default - command line interface, each of these components is associated with an - -\emph on -objecttype -\emph default -. - -\layout Standard - -The -\emph on -objecttype -\emph default - refers to a collection of related configuration entities and can be one - of -\series bold -net -\series default -, -\series bold - MDS -\series default -, -\series bold - LOV -\series default -, -\series bold - OST -\series default -, -\series bold - mtpt -\series default -, -\series bold - route -\series default -, -\series bold - -\series default -or -\series bold - echo_client. - -\series default -We describe the arguments required for the addition of each -\emph on -objecttype -\emph default -. - -\layout Standard - -To generate configuration data associated with systems in a Lustre cluster: -\layout Description - - -\emph on -- -\emph default --add\SpecialChar ~ -node Adds a new node in the cluster configuration. - -\begin_deeper -\layout Standard - -The arguments required are: -\layout Description - ---node\SpecialChar ~ -''node_name'' This will create a new node with the given name if not - already present. -\layout Description - ---timeout\SpecialChar ~ - Timeout before going into recovery -\layout Description - ---lustre_upcall\SpecialChar ~ - Set the location of the Lustre upcall scripts used - by the client for recovery -\layout Description - ---portals_upcall\SpecialChar ~ - Specify the location of the Portals upcall scripts - used by the client for recovery -\layout Description - ---upcall\SpecialChar ~ - Specify the location of both (Lustre and Portals) upcall - scripts used by the client for recovery -\end_deeper -\layout Description - - -\emph on -- -\emph default --add\SpecialChar ~ -net Adds a network device descriptor for the given node, with parameters - as indicated. -\begin_deeper -\layout Standard - -The arguments required are: -\layout Description - ---node\SpecialChar ~ -''node_name'' This will create a new node with the given name if not - already present. - This is also used to specify a specific node for other elements. -\layout Description - ---nettype\SpecialChar ~ - This can be -\series bold -tcp, elan, gm, scimac. -\layout Description - ---nid\SpecialChar ~ -nid The network -\emph on -id -\emph default -, e.g. - ElanID or IP address as used by Portals. - If -\emph on -nid -\emph default - is '*', then the local address of the interface with specified -\series bold -nettype -\series default -is will be substituted when the node is configured with lconf. - An -\emph on -nid -\emph default - of -\emph on -'*' -\emph default - should be used only for the generic -\emph on -client -\emph default - configuration. -\layout Description - ---hostaddr addr -\layout Description - ---router Optional flag to mark this node as a router -\layout Description - ---profile\SpecialChar ~ -[not\SpecialChar ~ -implemented] Optional flag to mark this node as a profile node. - This would be very useful to configure several client nodes in large clusters. - It will allow user to define -\series bold -profiles -\series default - for the various client configurations, and then load the correct profile - on the client nodes using lconf. - -\layout Description - ---port\SpecialChar ~ -[port] Optional arguement to indicate the tcp port. - The default is 988. - -\layout Description - ---tcpbuf\SpecialChar ~ - Optional arguement. - The default TCP buffer size is 1MB. -\layout Description - ---irq_affinity\SpecialChar ~ -0|1 Optional arguement. - Default is 0. -\layout Description - ---nid_exchange\SpecialChar ~ -0|1 Optional arguement since some OSTs might not have the - required support. - This is turned off by default, value of 1 will turn it ON. - -\end_deeper -\layout Description - ---add\SpecialChar ~ -mds -\begin_deeper -\layout Description - ---node\SpecialChar ~ - Name of the node on which the MDS resides -\layout Description - ---mds\SpecialChar ~ - -\layout Description - ---dev\SpecialChar ~ - Path of device on local system. - If the is a file, then a loop device is created and used as the block device. -\layout Description - ---size\SpecialChar ~ - Optional arguement indicating the size of the device to be - created (used typically for loop devices). -\layout Description - ---node\SpecialChar ~ - Adds an MDS to the specified node. - This requires a -\emph on ---node -\emph default - argument, and it must not be a profile node. -\layout Description - ---fstype\SpecialChar ~ -extN|ext3 Optional arguement used to specify the file system type. - Default is ext3. -\layout Description - ---journal_size\SpecialChar ~ - Optional arguement to specify the journal size for - the ext2/ext3 file system. - The size should be in the units expected by -\series bold -mkfs -\series default -, so for ext3 it should be in MB. - If this is option is not used, the ext2/ext3 filesystem will be configured - with the default journal size. -\end_deeper -\layout Description - ---add\SpecialChar ~ -lov Creates an LOV with the specified parameters. - The -\emph on -mds_name -\emph default - must already exist in the descriptor. -\begin_deeper -\layout Description - ---lov\SpecialChar ~ - -\layout Description - ---mds\SpecialChar ~ - -\layout Description - ---stripe_sz\SpecialChar ~ - -\layout Description - ---stripe_cnt\SpecialChar ~ - A value of 0 for this means to stripe on all available - OSTs. - Default is 0. -\layout Description - ---stripe_pattern\SpecialChar ~ - Only Pattern 0 (RAID 0) is supported currently. -\end_deeper -\layout Description - ---add\SpecialChar ~ -ost Creates an OBD, OST, and OSC. - The OST and OBD are created on the specified node. -\begin_deeper -\layout Description - ---ost\SpecialChar ~ - Assign a name to the OST device. -\layout Description - ---node\SpecialChar ~ - Node on which the OST service is run, can not be a profile - node. -\layout Description - ---dev\SpecialChar ~ - Path of device on local system. - If this is a file, then a loop device is created and used as the block - device. -\layout Description - ---size\SpecialChar ~ -[size] -\layout Description - ---osdtype\SpecialChar ~ -obdfilter|obdecho -\layout Description - ---lov\SpecialChar ~ - Optional arguement. - Name of LOV to which this OSC will be attached. - -\layout Description - ---ostuuid\SpecialChar ~ -UUID Specify the UUID of the OST device. - -\layout Description - ---fstype\SpecialChar ~ -extN|ext3 Optional arguement used to specify the file system type. - Default is ext3. -\layout Description - ---journal_size\SpecialChar ~ - Optional arguement to specify the journal size for - the ext2/ext3 file system. - The size should be in the units expected by -\series bold -mkfs -\series default -, so for ext3 it should be in MB. - If this is option is not used, the ext2/ext3 filesystem will be configured - with the default journal size. -\end_deeper -\layout Description - ---add\SpecialChar ~ -mtpt Creates a mount-point on the specified node. - Either an LOV or OSC name can be used. -\begin_deeper -\layout Description - ---node\SpecialChar ~ -node Node or profile node that will use the -\emph on -mtpt -\emph default -. -\layout Description - ---path\SpecialChar ~ -/mnt/path\SpecialChar ~ - -\layout Description - ---mds\SpecialChar ~ -mds_name -\layout Description - ---ost\SpecialChar ~ -ost_name\SpecialChar ~ -|\SpecialChar ~ ---lov\SpecialChar ~ -lov_name -\end_deeper -\layout Description - ---add\SpecialChar ~ -route Creates a static route through a gateway to a specific -\emph on -nid -\emph default - or a range of -\emph on -nid -\emph default -'s. -\begin_deeper -\layout Description - ---node\SpecialChar ~ -node Node or profile node to add the route to. -\layout Description - ---gw\SpecialChar ~ -nid The -\emph on -nid -\emph default - of the gateway (must be a local interface or a peer). -\layout Description - ---tgt\SpecialChar ~ -nid For a specific route, this is the target -\emph on -nid. -\layout Description - ---lo\SpecialChar ~ -nid For a range route, this is the lo value -\emph on -nid. -\layout Description - ---hi\SpecialChar ~ -nid For a range route, this is the hi value -\emph on -nid. -\end_deeper -\layout Description - ---add\SpecialChar ~ -echo-client Used for testing purpose only. - -\begin_deeper -\layout Description - ---node\SpecialChar ~ -node -\emph on - -\layout Description - ---obd\SpecialChar ~ -obd_name -\end_deeper -\layout List -\labelwidthstring 00.00.0000 - - -\series bold -Options -\layout Description - ---output\SpecialChar ~ -filename Send output to the file. - If the file exists, it will be overwritten. -\layout Description - ---merge\SpecialChar ~ -filename -\emph on - -\emph default -Add the new element to an existing file. - -\layout Subsection - -EXAMPLES -\layout Standard - -Real life examples are given in the Lustre-conf manual page. -\layout Subsection - -BUGS -\layout Standard - -None are known. -\the_end diff --git a/lustre/doc/postbar b/lustre/doc/postbar deleted file mode 100755 index 349d41c..0000000 --- a/lustre/doc/postbar +++ /dev/null @@ -1,151 +0,0 @@ -#! /usr/bin/perl -# postbar - Massage chbar.sh output into valid LaTeX -# Copyright (C) 2002 Cluster File Systems, Inc. -# Gord Eagle , 2002-08-10 - -my $progname = $0; -$progname =~ s|^.*/||; -my $CHANGE_ENVIRONMENT = '\\\\(begin|end)\\{([^\\}]+)\\}'; -my (@envname, @envdepth, @envbuf); -my $phony_preamble = 0; -my $cbdepth = 0; -my $cbfound = 0; - -# Tell whether an environment cannot have arbitrary changebars. -sub fragile_environment -{ - my ($env) = @_; - return $env ne 'document'; -} - - -# Tell whether we can hava arbitrary stuff. -sub toplevel -{ - my ($env) = @_; - return $env eq 'document'; -} - - -sub out -{ - my (@msg) = @_; - if ($#envbuf < 0 || toplevel($envname[0])) { - print @msg; - } else { - $envbuf[0] .= join('', @msg); - } -} - - -# Leave an environment. -sub end_environment -{ - my ($env) = @_; - - #out("%$progname end $env\n"); - if ($envname[0] ne $env) { - die "Expecting \\end{$envname[0]} but got \\end{$env}\n"; - } - - if ($cbfound) { - # Did we find a changebar? - $cbfound = !toplevel($envname[1]); - if (!$cbfound) { - # We found one, and the parent environment is the top level. - if ($cbdepth == $envdepth[0]) { - # There was no change in depth, so mark the environment. - $envbuf[0] = "\\cbstart{}%$progname\n" . $envbuf[0]; - out("\\cbend{}%$progname\n"); - } elsif ($envdepth[0] > $cbdepth) { - # There were more ends in the environment, so append them. - for (my $i = 0; $i < $envdepth[0] - $cbdepth; $i ++) { - out("\\cbend{}%$progname\n"); - } - } else { - # There were more starts, so prepend them. - my $starts; - for (my $i = 0; $i < $cbdepth - $envdepth[0]; $i ++) { - $starts .= "\\cbstart{}%$progname\n"; - } - $envbuf[0] = $starts . $envbuf[0]; - } - } - } - - # Drop the environment from the list. - shift(@envname); - shift(@envdepth); - out(shift(@envbuf)); -} - - -while ($_ = ) { - chomp; - my $env; - if (!/\\begin.*\\end/ && /$CHANGE_ENVIRONMENT/o) { - $env = $2; - if ($1 eq 'begin') { - # Enter the new environment. - unshift(@envname, $env); - unshift(@envdepth, $cbdepth); - unshift(@envbuf, ''); - #out("%$progname depth=$cbdepth, $#envname ($env)\n"); - } elsif (!$phony_preamble) { - out("$_\n"); - end_environment($env); - next; - } - } - - if ($#envname >= 0 && /^\\documentclass/) { - $phony_preamble = 1; - } - - if ($phony_preamble) { - # Comment out and ignore the redundant preambles. - out("%$progname $_\n"); - $phony_preamble = 0 if ($env eq 'document'); - next; - } elsif ($#envname >= 0) { - # Track the current changebar depth. - if (/^\\cbstart/) { - $cbdepth ++; - if (!toplevel($envname[0])) { - $cbfound = 1; - out("%$progname $_\n"); - next; - } - } elsif (/^\\cbend/) { - if ($cbdepth == 0) { - die "$progname: Too many \\cbend{}s\n"; - } - $cbdepth --; - if (!toplevel($envname[0])) { - $cbfound = 1; - out("%$progname $_\n"); - next; - } - } elsif (/^\\cbdelete/ && fragile_environment($envname[0])) { - # What to do with delete bars? - out("%$progname $_\n"); - next; - } - out("$_\n"); - } else { - out("$_\n"); - # Add the options to the usepackage. - if (/^\\usepackage.*\{changebar\}$/) { - # Prevent PostScript dictionary overflow errors. - out("\\def\\cb\@maxpoint{15}\n"); - - # Show the bars. - out("\\outerbarstrue\n"); - } - } - - if (defined($env)) { - } -} - -exit(0); diff --git a/lustre/doc/tex2pdf b/lustre/doc/tex2pdf deleted file mode 100755 index d9a7176..0000000 --- a/lustre/doc/tex2pdf +++ /dev/null @@ -1,3043 +0,0 @@ -#!/usr/bin/perl -w - -# tex2pdf - script for translating latex docs to pdf -# -# Copyright (C) 2000-2002 by Steffen Evers and others -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License version 2 as -# published by the Free Software Foundation. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -# -# The GNU General Public License is also available online: -# http://www.gnu.org/licenses/gpl.html -# -# Thanks a lot to all the people that have already contributed to this project! -# -# The changelog including the credits has become too long. So, I have removed it -# from the script, but it is still available online (see below). -# -# Special thanks to the following people for their contribution -# (see the changelog for details): -# Matej Cepl, Herbert Voss, Nicolas Marsgui, Bruce Foster, Mark van Rossum, -# Matt Bandy, Garrick Chien Welsh, Stacy J. Prowell, Pavel Sedivy, -# Holger Daszler, Olaf Gabler, Ahmet Sekercioglui, Richard, Steffen Macke, -# Rainer Dorsch & friends, Jean-Pierre Chretien, Fernando Perez, -# Ha Duong Minh, Oscar Lopez -# -# Project Homepage: http://tex2pdf.berlios.de -# Developer Homepage: http://developer.berlios.de/projects/tex2pdf -# Mailing lists: http://developer.berlios.de/mail/?group_id=57 -# Changelog: http://tex2pdf.berlios.de/changelog.html -# -# Anyone is invited to help to improve tex2pdf. Therefore any kind of feedback -# is welcome. Maybe you even would like to hack the code and send us your -# changes. This would help a lot and is highly appreciated. Think about it :-) -# Subscribing to the developer mailing list might be a first step (see above). -# -# Send feedback to: tex2pdf-devel@lists.berlios.de -# - -######## Imports - -use File::Basename; -use File::Copy; -use Getopt::Long; -use Sys::Hostname; -use Cwd; -use strict; - -####### global variables - -my $MYRELEASE="3.0.21"; -my $MYHOSTNAME=hostname; -my $MYNAME=basename $0; -my $MYUSER=$ENV{'USER'}; -my $USER_HOME=$ENV{'HOME'}; -if (not $MYUSER) { $MYUSER = 'nobody'; } -if (not $USER_HOME) { $USER_HOME = '/tmp'; } - -my @TMPFILES=(); -my @TMP_TEX_FILES=(); -my $NUM_PARAM_MIN=0; -my $NUM_PARAM_MAX=9; -my @REF_DOCS; -my $MTP_PREAMBLE_FILENAME="preamble.cfg"; -my $MTP_TMP_BASESUFFIX="-mp"; -my @EPS_SUFFIXES=('eps','ps','ps.gz','eps.gz' ); -my $PDF_ORIG_SUFFIX='pdf.orig'; -my @BITMAP_SUFFIXES=( 'jpg', 'png', 'tif' ); - -# (initial) log file of this script -# this log file will be moved to the specified log_dir after configuration -# and the variable will be updated to the new name -my $MYLOGFILE="$USER_HOME/tex2pdf-$$.log"; -my $LOGFILE_VERBOSITY=9; - -### text token for no value -my $NIL="NOVALUE"; -my $UNDEF="undefined"; - -### token for boolean 'false', 'no' -my $NO="no"; -my $FALSE=0; - -### token for boolean 'true', 'yes' -my $YES="yes"; -my $TRUE=1; - -### file to store private parameters -# If you only want to change your private parameters change them there -# default: $HOME/.tex2pdf3rc -my $RC_FILENAME="$USER_HOME/.tex2pdf3rc"; -my $MYRCFILE_VERSION=7; -my $RCVERSION_STRING="rcfile_version"; - -## set global variable configured to prevent access to configuration -# parameters before configuration process is finished -my $CONFIGURED=$FALSE; -my $PRE_CONFIG_VERBOSITY=4; - -########################## NEW PERL CONFIGURATON - -my %CONFIGURATION = (); -my %PARAMETER_LIST = (); -my @PARAMETER_ORDER = (); -my %PARAMETER_TYPES = (); - -## Array index for the various information in each parameter specifcation -## referenced by %PARAMETER_LIST -my $TYPE=0; -my $OPT_ALIAS=1; -my $OPT_SPEC=2; -my $DEF_VALUE=3; -my $DESCRIPTION=4; -my $QUESTION=5; -my $EXPLANATION=6; - -&add_param_type('paper', - [ ['a4paper' , 'a4 paper' ], - [ 'letterpaper', 'letter paper' ], - [ 'legalpaper', 'legal paper' ], - [ 'executivepaper', 'executive paper' ], - [ $NIL, 'do not set value - leave it to hyperref' ] - ] ); - -&add_param_type('color', - [ [ 'yellow', 'LaTeX color yellow' ], - [ 'red', 'LaTeX color red' ], - [ 'green', 'LaTeX color green' ], - [ 'cyan', 'LaTeX color cyan' ], - [ 'blue', 'LaTeX color blue' ], - [ 'magenta', 'LaTeX color magenta' ], - [ 'black', 'LaTeX color black' ], - [ $NIL , 'do not set this value - leave it to hyperref' ] - ] ); - -&add_param_type('destination', - [ [ 'source', 'directory of the LaTeX source document' ] , - [ 'input', 'root directory of referenced material' ], - [ 'custom', 'custom directory as specified' ] - ] ); - -### Option parameters: these parameters have no default value and can not -# be configured interactively, but only as a command line option -# an option parameter is not allowed to have a default value, question or -# explanation -# and all parameter of type action are treated as option parameters -# $key, $type, $def_value, $opt_alias, $opt_spec, $description, $question, $explanation - -&add_param('help', 'action', undef, '|h', '', - 'print a short help text and exit'); - -&add_param('version', 'action', undef, '|v', '', - 'print the version of this script and exit'); - -&add_param('print_config', 'action', undef, '|o', '', - 'print the current configuration and exit'); - -&add_param('configure', 'action', undef, '|c', '', - 'configure all parameters interactivly, store them and exit'); - -&add_param('title', 'text', undef, '|t', '=s', - 'set PDF info title for specified document'); - -&add_param('author', 'text', undef, '|a', '=s', - 'set PDF info author for specified document'); - -&add_param('input_path', 'directory', undef, '', '=s', - 'set path for referenced material in main document'); - -### Full parameters -# parameter type action is not allowed -# $key, $type, $def_value, $opt_alias, $opt_spec, $description, $question, $explanation - -&add_param('logdir', 'directory',"$USER_HOME/tex2pdf-log/", '', '=s', - 'set directory for saving log files', - 'What log directory should be used?', - "The log directory is used to store information about the generation\n" - ."process for later review, e.g. for debugging."); - -&add_param('lyxrc_path', 'directory', "$USER_HOME/.lyx/", '', '=s', - 'set the configuration directory of LyX', - 'What is the directory for your LyX configuration data?', - "If I have to generate a LateX file from a LyX file I need to clear two " - ."temporary\nfiles in your LyX configuration directory. They will " - ."be backuped and normally\ndo not contain any valuable data anyway. If " - ."you do not use LyX, simply leave\nthe default."); - -&add_param('lyx_exec', 'text', "lyx", '', '=s', - 'specify the LyX executable for converting lyx to latex', - 'Which executable should I use for converting LyX docs to LaTeX?', - "I use LyX to generate a LateX file from a LyX file. As you might use\n" - ."several versions of LyX at the same time or do not have it in your path" - ."\nyou can give me the apropriate executable here (e.g. '/usr/bin/lyx')." - ."\nIn most cases the default 'lyx' should be fine."); - -&add_param('debug', 'bool', $NO, '', '!', -'do not delete temporary files after execution and be as verbose as possible', - 'Do you want to debug this script?', - "I will not remove any temporary files. This could cause problems on a " - ."second\nexecution as I might refuse to overwrite these files for security " - ."reasons.\nYou have to remove them manually in this case. Additionally, I " - ."will provide\nas much information during execution as possible."); - -&add_param('delete_pdf_images', 'bool', $NO, '', '!', - 'delete generated PDF image files after execution', - 'Should generated PDF image files be deleted after execution?', - "Pdflatex cannot handle EPS images. Therefore all such images need to be\n" - ."translated to PDF in advance. After a successful generation of the final " - ."PDF \ndocument or after encountering an error I could leave this PDF " - ."images for\n later executions or simply delete them."); - -&add_param('clean_on_abort', 'bool', $YES, '', '!', - 'also delete temporary files after abort', - 'Should temporary files be deleted when aborting?', - "When the generation of the PDF file fails for some reason you might still " - ."want\nto keep already generated temporary files for some reason, e.g. " - ."debugging.\nIf you do not want to keep them set this parameter to 'yes'."); - -&add_param('tmp_base_suffix', 'text', '-pdf', '', '=s', - 'specify the extension of the basename for temporary TeX files', - 'What string should be used as basename suffix for temporary files?', - "I have to find names for my temporary files. Therefore I construct a new " - ."name\nfrom the original filename and this string. Me and various called " - ."applications\nwill then create several files with this constructed " - ."basename and different\nextensions. When cleaning up I will simple " - ."delete all files with the\nconstructed basenames I have used."); - -&add_param('overwrite', 'bool', $NO, '', '!', - 'ignore existence of files with same basename as temporary TeX files', - 'Should I overwrite existing (temporary) files?', - "In spite of the precaution with the appended base suffix, there still " - ."might\nexist files with an identical basename. If you set this option I " - ."will consider\nsuch files as old temporary files and overwrite them " - ."during generation.\nHowever, I will not remove any files with this " - ."constructed basename on the\nclean up. You have to remove them manually."); - -&add_param('clean_logs', 'bool', $YES, '|l', '!', - 'delete all log files in log directory before execution', - 'Should the old log files be removed before execution?', - "I can remove old log files prior to execution. However, you might " - ."experience\nproblems if you run the script on several documents at the " - ."same time. If you\nwant to be on the safe side, answer '$NO'. Than you " - ."have to remove the logs\nmanually from time to time."); - -&add_param('check_commands', 'bool', $YES, '', '!', - 'make sure that required shell commands does exist before start', - 'Should I look for required executables?', - "As I use several different applications for PDF generation you might want " - ."to \nmake sure that they are available before the real work starts."); - -&add_param('destination', 'destination', 'source', '', '=s', - 'specifiy final location of generated PDF document', - 'Where should I store the resulting PDF document?', - "Depending on the application that starts this script (or you if you call " - ."it\ndirectly) you might want to have the resulting PDF document located " - ."at\ndifferent places."); - -&add_param('custom_path', 'directory', $USER_HOME.'/', '|d', '=s', - "specify custom path for 'destination' parameter", - 'What custom directory should be used?', - "When ever you specifiy to store the generated PDF document (command line " - ."or\nconfiguration) in a custom directory I will put it here."); - -&add_param('colorlinks', 'three', $YES, '', '!', - 'activate colored links in PDF doc (hyperref)', - 'Should colors be used for links?', - "I can use different colors for links inside the PDF document.\nYou can " - ."use '$UNDEF' to tell me that you would like to leave this up to\n" - ."independent hyperref configuration."); - -&add_param('paper', 'paper', 'a4paper', '|p', '=s', - 'specify papersize of the PDF doc (hyperref)', - 'What papersize should be used?', - "I can set the papersize of the resulting PDF document"); - -&add_param('citecolor', 'color', 'blue', '', '=s', - 'specify color of citations in PDF doc (hyperref)', - 'What color should be used for citation?', ""); - -&add_param('urlcolor', 'color', 'blue', '', '=s', - 'specify color of URLs in PDF doc (hyperref)', - 'What color should be used for URLs?', ""); - -&add_param('linkcolor', 'color', 'blue', '', '=s', - 'specify color of internal links in PDF doc (hyperref)', - 'What color should be used for normal internal links?', ""); - -&add_param('pagecolor', 'color', 'blue', '', '=s', - 'specify color of links to other pages in PDF doc (hyperref)', - 'What color should be used for page links?', ""); - -&add_param('link_toc_page', 'bool', $YES, '', '!', - 'link table of contents to pages instead of sections (hyperref)', - 'Should TOC be linked to pages?', - "The table of contents of the resulting PDF document is normally linked to " - ."the\ncorresponding section. However, you can also link it to the " - ."corresponding page\ninstead."); - -&add_param('default_title', 'text', $NIL, '', '=s', - 'set default PDF info title', - 'What is the default title?', - "A PDF document contains meta data about itself: the document info.\nOne " - ."of the info fields is the document title. You can set a default value\n" - ."which will be used in the case the script cannot determine a proper " - ."title from\nthe LaTeX document and you have not set one on the command " - ."line.\nYou can use '$NIL' to tell me that you would like to leave this " - ."up to\nindependent hyperref configuration."); - -&add_param('default_author', 'text', $NIL, '', '=s', - 'set default PDF info author', - 'What is the default author?', - "A PDF document contains meta data about itself: the document info.\nOne " - ."of the info fields is the document author. You can set a default value\n" - ."which will be used in the case the script cannot determine a proper " - ."author\nfrom the LaTeX document and you have not set one on the command " - ."line.\nYou can use '$NIL' to tell me that you would like to leave this " - ."up to\nindependent hyperref configuration."); - -&add_param('force_index', 'bool', $NO, '|i', '!', - 'force explicit inclusion of (existing) index in PDF doc', - 'Should the call of makeindex be forced?', - "Older versions of pdflatex have not included the index of a document\n" - ."automatically. If you are missing the index in your document you can " - ."force the\ncall of makeindex on the condition that an index file was " - ."generated."); - -&add_param('makeindex_opts', 'text', '', '', '=s', - 'specify extra options for shell execution of makeindex', - 'What additional options for makeindex should be used?', - "Sometimes, people would like to pass some extra options over to makeindex. " - ."This\nis the right place to do that. Everyone else can leave this empty."); - -&add_param('bibtex', 'three', $NIL, '|b', '!', - 'set bibtex behavior', - 'How should bibtex be used?', - "The bibtex usage can be specified.\nPossible values are: '$YES' (always " - ."run bibtex), '$NO' (never run bibtex)\nand '$UNDEF' (scan tex file for " - ."a bibtex entry and run it if required)."); - -&add_param('gloss', 'three', $NIL, '', '!', - 'set gloss behavior', - 'How should gloss be used?', - "The gloss usage can be specified.\nPossible values are: '$YES' (always " - ."run bibtex on file.gls), '$NO' (never run bibtex on file.gls)\nand '$UNDEF' (scan tex file for " - ."a gloss entry and run it if required)."); - -&add_param('thumbpdf', 'bool', $NO, '|n', '!', - 'generate thumbnails for PDF document', - 'Should PNG thumbnails be created?', - "I can use thumbpdf to include thumbnails of the document pages in the PDF " - ."file.\nThis requires Ghostscript 5.50 or higher."); - -&add_param('ppower', 'bool', $NO, '|w', '!', - 'postprocess PDF document with ppower', - 'Should ppower postprocess the PDF document?', - "I can use ppower to postprocess the PDF " - ."file.\nThis requires ppower 4.0 or higher."); - -&add_param('authorindex', 'bool', $NO, '', '!', - 'generate author index for PDF document', - 'Should authorindex process the PDF document?', - "I can use authorindex to process to include an author index in the PDF " - ."file.\nThis requires authorindex."); - - -&add_param('mtp_preamble', 'bool', $NO, '|r', '!', - "add the file $MTP_PREAMBLE_FILENAME at the current dir to metapost files", - "Should the $MTP_PREAMBLE_FILENAME file be added to the metapost files?", - "I can add $MTP_PREAMBLE_FILENAME to metapost " - ."files.\nThis requires an existing $MTP_PREAMBLE_FILENAME file."); - -&add_param('maxrun', 'integer', 6, '', '=i', - 'specify maximal number of pdflatex runs if problems are detected', - 'What should be the maximum number of runs for pdflatex (1-6)?', ""); - -&add_param('minrun', 'integer', 2, '', '=i', - 'specify minimal number of pdflatex runs if no problems are detected', - 'What should be the minimum number of runs for pdflatex (1-6)?', ""); - -&add_param('verbosity', 'integer', 5, '', '=i', - 'set the level of verbosity', - 'Which level of verbosity do you want (0-9)', - "Different people want different amounts of information about what the " - ."script\nactually does. Therefore you can adjust the verbosity to your " - ."personal needs\nby setting this parameter to a value from 0 to 9. 0 " - ."means no output at all\nand 9 means maximal output."); - -&add_param('pdftex_opts', 'text', '', '', '=s', - 'specify extra options for shell execution of pdflatex', - 'What additional options for pdflatex should be used?', - "Sometimes, people would like to pass some extra options over to pdflatex. " - ."This\nis the right place to do that. Everyone else can leave this empty."); - -&add_param('hyperref_args', 'text', '', '', '=s', - 'specify extra arguments for the hyperref package', - 'What additional arguments should be passed to the hyperref package?', - "Sometimes, people would like to pass some extra options over to hyperref. " - ."This\nis the right place to do that. Everyone else can leave this empty."); - -# the following parameter types should be set now: -# 'color' => \@VALUES, -# 'destination' => \@VALUES, -# 'paper' => \@VALUES, -# 'action' => undef, -# 'three' => undef, -# 'bool' => undef, -# 'integer' => undef, -# 'text' => undef, -# 'directory' => undef - -##### Functions ########################################### - -### handle a status report with a given priority level -# write it to the log file if log file if configuration is done -# write it to stdout if verbosity is set lower or equal -# -# The following priority levels exist: -# 1: minimal fatal error messages -# 2: additional information about fatal error -# 3: non-fatal error message -# 4: warning -# 5: major step of the process -# 6: minor step of the process -# 7: progress report of minor step -# 8: long status report from called applications -# 9: debug info -# -# parameter 1: priority level -# parameter 2: list of output strings -# return value: none - -sub report { - my $level; - my $verbosity; - my $log_verbosity; - my @output; - - if (@_ < 2 ) { - @output = ( "Oppss! Report function got only 1 argument!" ); - $level = 9; - } else { - ($level, @output) = @_; - } - - if($CONFIGURED) { - if( ¶m_value('debug') eq $NO ) { - $verbosity = ¶m_value('verbosity'); - $log_verbosity = $LOGFILE_VERBOSITY; - } else { - $verbosity = 9; - $log_verbosity = 9; - } - } else { - $verbosity = $PRE_CONFIG_VERBOSITY; - $log_verbosity = $LOGFILE_VERBOSITY; - } - - if ( $level <= $log_verbosity ) { - open LOGFILE, ">> $MYLOGFILE"; - print LOGFILE @output,"\n"; - close LOGFILE; - } - - if( $level <= $verbosity ) { - print @output,"\n"; - } -} - -### process system command and do the appropriate reports -# parameter 1: the system command to process -# parameter 2: flag - TRUE: abort on failure, FALSE: continue on failures -# parameter 3: priority level for output of system command -# parameter 4: specific failure message -# return value: TRUE - success, FALSE - failure - -sub system_command { - my $command = $_[0]; - my $fatal_failure= $_[1]; - my $output_priority = $_[2]; - my $fail_message = $_[3]; - my $system_out; - - $system_out = `$command 2>&1`; - - if ($?) { - if ($fatal_failure) { - &report(2, $system_out) if ($system_out); - &abort($fail_message.": $!"); - } else { - &report($output_priority, $system_out) if ($system_out); - &report(3, $fail_message.": $!"); - } - return $FALSE; - } - - return $TRUE; -} - -### Index of the first occurence of a string in an array -# parameter 1: text -# parameter 2: list -# return value: index or -1 if not element of the array - -sub array_index { - my ($text, @list) = @_; - - if(!defined($text)) { - &report(9, "Oppss! Cannot compare nothing."); - return -1; - } - - foreach (0..$#list) { - if ( $list[$_] eq $text ) { return $_; } - } - - return -1; -} - -### extract the last N lines of a text file -# abort on failures -# parameter 1: file to read -# parameter 2: N - number of lines to print (undef/0: all lines) -# return value: last N lines - -sub file_tail { - my $file_name = $_[0]; - my $no_of_lines = defined($_[1]) ? $_[1] : 0; - my @cache=(); - - &check_file($file_name); - open(TAIL_SOURCE, "<$file_name") - or &abort("Could not read file $file_name: $!"); - - if($no_of_lines == 0) { - # use entire file - while() { - if(!$_) { $_="\n"; } - push(@cache, $_); - } - } else { - # only last N lines - - # fill up cache - while(@cache < $no_of_lines and ) { - if(!$_) { $_="\n"; } - push(@cache, $_); - } - - # always cache the last N lines up to the end of the file - while() { - if(!$_) { $_="\n"; } - shift(@cache); - push(@cache, $_); - } - } - - close TAIL_SOURCE; - - return @cache; -} - -### return all lines of FILE which match match regexp EXPR -# parameter 1: FILE - file to read -# parameter 2: EXPR - regular expression to match -# parameter 3: true: exit on first occurence, otherwise get all (default: false) -# return value: array of matching lines - -sub grep_file { - my $file_name = $_[0]; - my $regexp = $_[1]; - my $first_only = $_[2] ? $TRUE : $FALSE; - my @result=(); - - ### open file and abort if not possible - &check_file($file_name); - open(GREP_SOURCE, "<$file_name") - or &abort("Could not read file $file_name: $!"); - - while() { - if(m#$regexp#) { - push(@result, $_); - if($first_only) { last; } - } - } - - close GREP_SOURCE; - return @result; -} - -### Removing all temporary files - -sub clean_up { - &report(6, "Removing temporary files ..."); - foreach (@TMPFILES) { - unlink($_) if(-f $_); - } - - foreach (@TMP_TEX_FILES) { - # make sure that we have a good tex file name in order - # to avoid unintended removals - if( $_ ne "" and -f $_.'.tex' ) { - unlink glob($_.".???"); - } else { - &report(3, "Bad file in temp tex files list: $_"); - } - } -} - -### Output of all temp files - -sub print_temp_files { - if (scalar @TMPFILES > 0) { - print "Stored the following explicit temporary files:\n"; - foreach (@TMPFILES) { - if (-f $_) { - print "> ".$_."\n"; - } else { - print "> ".$_." (does not exist)\n"; - } - } - print "\n"; - } - - if (scalar @TMP_TEX_FILES > 0) { - print "Stored the following temporary TeX base names:\n"; - foreach (@TMP_TEX_FILES) { - if( $_ ne "" and -f $_.'.tex' ) { - print "> ".$_.": "; - foreach my $file (glob($_.".???")) { - print basename($file)." "; - } - print "\n"; - } else { - print "> ".$_.": bad file for temp TeX file\n"; - } - } - print "\n"; - } -} - -### exit with an error message - -sub abort { - &report(1, @_); - if ( $CONFIGURED and ¶m_value('clean_on_abort') eq $YES - and ¶m_value('debug') eq $NO) { - &clean_up; - } else { - &print_temp_files; - } - &report(2, "Aborting ..."); - exit 1; -} - -### Check for required command with 'which'; abort if not found -# parameter $1: command to check -# parameter $2: remark if specified command is not found - -sub checkCommand { - my $command = $_[0]; - my $message = $_[1]; - my $which_output; - - $which_output = `which $command 2>&1`; - chomp $which_output; - $_ = $which_output; - s|^(.*/)?([^/]+)$|$2|; - - if ( $_ ne $command ) { - &report(2, "\n$which_output"); - &report(1, "\nRequired command '$command' seems not to be in your path."); - if ( defined($message) ) { - &report(2, "$message"); - } - &report(2, "Aborting ..."); - exit 1; - } -} - -###################### Generic configuration functions - -### interactively answer a question with yes or no -# parameter 1: question -# parameter 2: default value (not set means $NIL) -# parameter 3: yes: allow undefined as third value -# no : only yes/no allowed (default) -# return value: the given answer - -sub question_ynu { - my $user_input; - my $question = $_[0]; - my $default = defined($_[1]) ? $_[1] : $NIL; - my $undef_allowed = $_[2]; - my $response = undef; - - if (defined($undef_allowed) and $undef_allowed eq $YES) { - $undef_allowed = $TRUE; - } else { - $undef_allowed = $FALSE; - } - - if( $default =~ /^y(es)?/i ) { - $question .= ' [y]: '; - $default = $YES; - } elsif ( $default eq $NIL and $undef_allowed ) { - $question .= ' [u]: '; - $default = $NIL; - } else { - $question .= ' [n]: '; - $default = $NO; - } - while (! defined($response)) { - print $question; - $user_input = ; - chomp($user_input); - - if( $user_input =~ /^y(es)?/i ) { - $response=$YES; - } elsif ( $user_input =~ /^no?/i ) { - $response=$NO; - } elsif ( $user_input =~ /^u(ndef(ined)?)?/i and $undef_allowed ) { - $response=$NIL; - } elsif ( $user_input eq "" ) { - $response=$default; - } else { - print "Please respond with y(es)"; - print ", u(ndefined)" if($undef_allowed); - print " or n(o).\n"; - } - } - return $response; -} - -### interactively input a positive integer number -# parameter 1: question -# parameter 2: default value -# parameter 3: min value -# parameter 4: max value -# return value: the input number - -sub input_number { - my $question = $_[0]; - my $default = $_[1]; - my $min_limit = $_[2]; - my $max_limit = $_[3]; - my $response= undef; - - while (! defined($response)) { - print "$question [$default]: "; - my $user_input = ; - chomp($user_input); - - if ($user_input eq "") { - $response=$default; - } else { - $_ = $user_input; - if (s/^([0-9]+)$/$1/ and $_ >= $min_limit and $_ <= $max_limit ) { - $response = $_; - } else { - print "Invalid input. Please enter a positve integer from $min_limit to $max_limit.\n"; - } - } - } - return $response; -} - -### interactively choose between several given values -# parameter 1: question -# parameter 2: default value -# parameter 3: reference to an array of possible values arrays -# return value: the chosen value - -sub choose_value { - my ($question, $default, $enum_array_ref)=@_; - my $default_no=1; - my $chosen_no; - my @possible_values = @$enum_array_ref; - my @value_array; - my $value_key; - my $value_output; - - print "$question\n"; - foreach (0..$#possible_values) { - my $no = $_ + 1; - @value_array = @{$possible_values[$_]}; - $value_key = $value_array[0]; - $value_output = $value_array[1]; - - print "$no) " . $value_output . "\n"; - if ( $default eq $value_key ) { $default_no=$no; } - } - - $chosen_no=&input_number("Please enter the corresponding number", $default_no, 1, $#possible_values); - - @value_array = @{$possible_values[$chosen_no - 1]}; - $value_key = $value_array[0]; - return $value_key; -} - -### interactively answer a question -# parameter 1: question -# parameter 2: current value -# return value: the new value - -sub input_text { - my $question=$_[0]; - my $default=$_[1]; - my $response= undef; - - print "Suggested value: $default\n"; - if ( &question_ynu("Do you want to keep this value?", $YES) eq $YES ) { - $response= $default; - } else { - print "$question "; - my $user_input = ; - chomp($user_input); - - $response = $user_input; - } - return $response; -} - -##### Make sure that specified file exists and is readable; abort if missing -# parameter 1: file to check -# parameter 2: remark if check fails on specified file - -sub check_file { - my $file = $_[0]; - my $message = defined($_[1]) ? $_[1] : "Required file cannot be accessed!"; - - if ( ! -f $file ) { - &report(2, "\nSorry. I cannot find '$file'."); - &abort($message); - } elsif ( ! -r $file ) { - &report(2, "\nSorry. File '$file' exists, but is not readable."); - &abort($message); - } -} - -##### Make sure that specified directory exists and is writable -# parameter 1: directory to check -# parameter 2: remark if check fails on specified directory -# parameter 3: if yes, creation is allowed -# return value: $TRUE - ok; $FALSE - error - -sub check_dir { - my $directory = $_[0]; - my $message = defined($_[1]) ? $_[1] : "Not a valid path!"; - my $allow_creation = defined($_[2]) ? $_[2] : $NO; - - if ( index($directory, "/") != 0 ) { - &report(3, "\nSorry. '$directory' is not an absolute path."); - &report(3, $message); - return $FALSE; - } elsif ( ! -d $directory ) { - # dir does not exist - if ( $allow_creation eq $YES ) { - # creation allowed - &report(4, "\nI cannot find '$directory'. Try to create it."); - if ( mkdir($directory, 0755) ) { - &report(7, "Creation of '$directory' was successful."); - return $TRUE; - } else { - &report(3, "Creation of '$directory' failed."); - &report(3, $message); - return $FALSE; - } - } else { - # creation not allowed - &report(3, "\nSorry. Directory '$directory' does not exist."); - &report(3, $message); - return $FALSE; - } - } elsif ( ! -w $directory ) { - # dir not writable - &report(3, "\nSorry. Directory '$directory' exists, but is not writable."); - &report(3, $message); - return $FALSE; - } - return $TRUE; -} - -### interactively input a directory for data storage (absolute path) -# parameter 1: question -# parameter 2: default dir -# parameter 3: if 'yes' allow creation of directory -# return value: the specified directory - -sub input_dir { - my $question = $_[0]; - my $default_dir = $_[1]; - my $allow_creation = defined($_[2]) ? $_[2] : $NO; - my $user_input; - my $response = undef; - - if ( defined($default_dir) and index($default_dir, "/") == 0 - and ( (! -d $default_dir and $allow_creation eq $YES) - or (-d $default_dir and -w $default_dir) ) ) { - $question .= " [$default_dir]: "; - } else { - $default_dir = undef; - $question .= ": "; - } - - while (! defined($response)) { - print "$question"; - $user_input = ; - chomp($user_input); - - if( $user_input eq "" and defined($default_dir) ) { - # user has only pressed and thereby confirmed default value - if( ! &check_dir($default_dir,"Default value was not valid. Please, give different directory.", $allow_creation ) ) { - # default dir does not exist and cannot be created - $default_dir = undef; - $question = "$_[0]: "; - } else { - # valid default dir has already existed or has been created - $response = $default_dir; - } - } else { - # user has given a directory - if( &check_dir($user_input,"This is not a valid directory!", $allow_creation) ) { - $response = $user_input; - } - } - } - return $response; -} - -#### add a new parameter type with corresponding additional data argument -#### parameters types -# parameter 1: type key -# parameter 2: additonal data for the type -# e.g. for enum type: reference to list of arrays -# return value: none - -sub add_param_type { - - my ($type, $scalar_argument) = @_; - - $PARAMETER_TYPES{$type} = $scalar_argument; -} - -#### add a new parameter to the adminstrated parameters -#### -# parameter 1: key -# parameter 2: type -# parameter 3: default value -# parameter 4: alias for command line options -# parameter 5: specification for command line options -# parameter 6: short description for help -# parameter 7: question -# parameter 8: explanation -# return value: none - -sub add_param { - - my ($key, $type, $def_value, $opt_alias, $opt_spec, $description, $question, $explanation) = @_; - - $CONFIGURATION{$key} = $def_value; - - $PARAMETER_LIST{$key} = [ $type, $opt_alias, $opt_spec, $def_value, $description, $question, $explanation ]; - - push(@PARAMETER_ORDER, $key); - - if (! exists $PARAMETER_TYPES{$type}) { - $PARAMETER_TYPES{$type} = undef; - } -} - -### get the value of an existing parameter -# parameter 1: a parameter key -# return value: reference to the array of possible value entries -# (undef if not valid) - -sub type_enum_array { - my $key = $_[0]; - my $values_ref; - - if(! exists($PARAMETER_TYPES{$key})) { - &abort("unknown type: $key"); - } - - $values_ref = $PARAMETER_TYPES{$key}; - - if(ref $values_ref ne 'ARRAY') { - $values_ref = undef; - } - - return $values_ref; -} - -### get the value of an existing parameter -# parameter 1: a parameter key -# return value: parameter value - -sub param_value { - my $key = $_[0]; - my $current_value; - - exists($CONFIGURATION{$key}) - or &abort("unknown parameter: $key"); - $current_value = $CONFIGURATION{$key}; - - return $current_value; -} - -### get the type of an existing parameter -# parameter 1: a parameter key -# return value: parameter value - -sub param_type { - my $key = $_[0]; - my $def_ref; - my $type; - - exists($PARAMETER_LIST{$key}) - or &abort("unknown parameter: $key"); - $def_ref = $PARAMETER_LIST{$key}; - $type = @{$def_ref}[$TYPE]; - - exists($PARAMETER_TYPES{$type}) - or &abort("parameter has unknown type: $key (type: $type)"); - - return $type; -} - -### determine if the given parameter is a full one (instead of option only) -# parameter 1: a parameter key -# return value: $TRUE - full parameter; $FALSE otherwise - -sub full_param { - my $key = $_[0]; - my @param_def; - - exists($PARAMETER_LIST{$key}) - or &abort("unknown parameter: $key"); - @param_def = @{$PARAMETER_LIST{$key}}; - - if ($param_def[$TYPE] ne 'action' and defined ($param_def[$QUESTION])) { - return $TRUE; - } else { - return $FALSE; - } -} - -### get the output needed for configuration of this parameter -# parameter 1: a parameter key -# return value: array - (description, explanation, question) - -sub param_config_output { - my $key = $_[0]; - my @param_def; - my $description; - my $explanation; - my $question; - - exists($PARAMETER_LIST{$key}) - or &abort("unknown parameter: $key"); - @param_def = @{$PARAMETER_LIST{$key}}; - $description = $param_def[$DESCRIPTION]; - $explanation = $param_def[$EXPLANATION]; - $question = $param_def[$QUESTION]; - - return ($description, $explanation, $question); -} - -### set the value of an existing parameter -# parameter 1: a parameter key -# parameter 2: the new value -# return value: none - -sub set_param_value { - my $key = $_[0]; - my $new_value = $_[1]; - - exists($CONFIGURATION{$key}) - or &abort("unknown parameter: $key"); - $CONFIGURATION{$key}=$new_value; -} - -### get option specifier for getopts -# parameter 1: option key -# return value: string - option specifier - -sub option_specifier { - my $key = $_[0]; - my $spec; - my $def_ref; - - exists($PARAMETER_LIST{$key}) - or &abort("unknown parameter: $key"); - $def_ref = $PARAMETER_LIST{$key}; - $spec = $key . ${$def_ref}[$OPT_ALIAS] . ${$def_ref}[$OPT_SPEC]; - - return $spec; -} - -### handle an option -# parameter 1: a parameter/option key -# parameter 2: the option value -# return value: none - -sub handle_option { - my $key = $_[0]; - my $value = $_[1]; - my $type; - - $type = ¶m_type($key); - - if ($type eq 'action') { - &handle_action_opt($key, $value); - } elsif ( $type eq 'bool' or $type eq 'three') { - my $bool_value = $value ? $YES : $NO; - &set_param_value($key, $bool_value); - } elsif ( $type eq 'directory') { - if (! &check_dir($value)) { - &report(2, "$key requires an existing writable directory as an absolute path."); - &abort("Illegal value: $value"); - } - &set_param_value($key, $value); - } elsif (defined(&type_enum_array($type))){ - &set_enum_param($key, $value); - } else { - &set_param_value($key, $value); - } -} - -### handle all action options -# parameter 1: a option key -# parameter 2: the option value -# return value: none - -sub handle_action_opt { - my $key = $_[0]; - my $value = $_[1]; - - if ($key eq 'help') { - &print_help; - - } elsif ($key eq 'version') { - &print_version; - - } elsif ($key eq 'configure') { - if ( -f $RC_FILENAME ) { - &read_configuration($RC_FILENAME); - } - &configure; - &write_configuration($RC_FILENAME); - &print_configuration; - - } elsif ($key eq 'print_config') { - if ( -f $RC_FILENAME ) { - &read_configuration($RC_FILENAME); - } - &print_configuration; - - } else { - &print_usage; - exit 1; - } - exit 0; -} - -### set a variable by a command line option to a possible values; abort on error -# parameter 1: parameter key -# parameter 2: value - -sub set_enum_value { - my ($key, $value) = @_; - my $type; - my $enum_array_ref; - my @allowed_values=(); - - $type = ¶m_type($key); - $enum_array_ref = &type_enum_array($type); - &abort("Internal error: No value array for parameter $key.") - if(!defined($enum_array_ref)); - - ### find out if the given value is allowed - foreach my $value_array_ref (${$enum_array_ref}) { - if(${$value_array_ref}[0] eq $value) { - ### found it, so it is okay! - &set_param_value($key, $value); - return; - } - } - - ### value is not listed, so not allowed - ### make a list of all allowed values - foreach my $value_array_ref (${$enum_array_ref}) { - push(@allowed_values, ${$value_array_ref}[0]); - } - - &report(2, "\n$key allows: " . @allowed_values . ".\n"); - &abort("Illegal value: $value"); -} - -### configure an existing parameter interactively -# parameter 1: a parameter key -# return value: none - -sub config_param { - my $key = $_[0]; - my $type; - my $new_value; - my $current_value; - my $description; - my $explanation; - my $question; - - ### get required information about this parameter - $type = ¶m_type($key); - $current_value = ¶m_value($key); - ($description, $explanation, $question) = ¶m_config_output($key); - - ### tell the user the facts - print "\n\n--------------------------------------------\n"; - print "Parameter: ".$key."\n"; - print $description."\n\n"; - print $explanation."\n\n" if($explanation ne ""); - - ### ask him what he wants - if ($type eq 'bool') { - $new_value=&question_ynu($question, $current_value, $NO); - } elsif ($type eq 'three') { - $new_value=&question_ynu($question, $current_value, $YES); - } elsif ($type eq 'directory') { - $new_value=&input_dir($question, $current_value, $YES); - } elsif ($type eq 'text') { - $new_value=&input_text($question, $current_value); - } elsif ($type eq 'integer') { - $new_value=&input_number($question, $current_value, - $NUM_PARAM_MIN, $NUM_PARAM_MAX); - } else { - my $enum_array_ref; - - $enum_array_ref=&type_enum_array($type); - if (! defined($enum_array_ref)) { - &abort("Do not know how to configure this parameter: $key (type: $type)"); - } - - $new_value=&choose_value($question,$current_value,$enum_array_ref); - } - - ### store his choice - &set_param_value($key, $new_value); -} - -### save configuration in rc file -# parameter 1: file name -# return value: none - -sub write_configuration { - my $file_name = $_[0]; - my $date; - - open(RCFILE, ">$file_name") or - &abort("Could not open configuration file for writing ($file_name)"); - select RCFILE; - - $date = `date`; - chomp($date); - - print "# Configuration file for $MYNAME V$MYRELEASE\n"; - print "# Generated $date by $MYUSER on $MYHOSTNAME\n"; - print "$RCVERSION_STRING=$MYRCFILE_VERSION\n"; - - foreach my $key (@PARAMETER_ORDER) { - my $value = $CONFIGURATION{$key}; - if(&full_param($key)) { - print $key.'='.$value."\n"; - } - } - - print "# EOF\n"; - select STDOUT; - close RCFILE; -} - -### print the configuration parameters - -sub print_configuration { - print "\nConfiguration for $MYNAME V$MYRELEASE\n"; - - foreach my $key (@PARAMETER_ORDER) { - my $value = $CONFIGURATION{$key}; - if(&full_param($key)) { - print $key.'='.$value."\n"; - } - } - - print "\n"; -} - -### load parameters from rc file -# parameter 1: file name -# return value: version of read rc file or 0 if no version given - -sub read_configuration { - my $file_name = $_[0]; - my $file_version= 0; - - &check_file($file_name, "Could not access configuration file"); - open(RCFILE, "<$file_name") or - &abort("Could not open configuration file for reading ($file_name)"); - - while () { - chomp; - if( /^([^#=]+)=(.*)$/ ) { - if( exists $CONFIGURATION{$1} ) { - $CONFIGURATION{$1} = $2; - } elsif ( $1 eq $RCVERSION_STRING ) { - $file_version = $2; - } else { - print "Ignoring unknown parameter in RC file: $1=$2\n"; - } - } - } - close RCFILE; - - return $file_version; -} - -### print script version - -sub print_version { - print "\n$MYNAME Version $MYRELEASE\n"; -} - -###################### Specific functions (for use with this script only) - -### print usage of command - -sub print_usage { - print "\nUsage: $MYNAME [OPTIONS] DOCUMENT.lyx\n"; - print " $MYNAME [OPTIONS] DOCUMENT[.tex]\n\n"; - print " $MYNAME -c | --configure modify/set up configuration\n"; - print " $MYNAME -h | --help give a short help\n"; - print " $MYNAME -o | --print_config print current configuration\n"; - print " $MYNAME -v | --version print my version\n\n"; -} - -### print command help - -sub print_help { - &print_version; - &print_usage; - - foreach my $key (@PARAMETER_ORDER) { - my @param_def = @{$PARAMETER_LIST{$key}}; - my $description = $param_def[$DESCRIPTION]; - my $takes_value = $param_def[$OPT_SPEC] =~ /[=:]/ ? $TRUE : $FALSE; - my $negation = $param_def[$OPT_SPEC] eq '!' ? $TRUE : $FALSE; - my $alias = $param_def[$OPT_ALIAS]; - - $alias =~ s/\|(([a-zA-Z])(\||$))/ | -$1/g; - $alias =~ s/\|(([a-zA-Z][a-zA-Z0-9_]+)(\||$))/ | --$1/g; - - print "--"; - print "[no]" if($negation); - print $key.$alias; - print " VALUE" if ($takes_value); - print ":\n ".$description."\n\n"; - } - print "\n"; -} - -### configure all tex2pdf parameters interactively -# parameters: none -# return value: none - -sub configure { - - print "\n--------------------------------------------------------\n"; - print "\n***** Configuration for $MYNAME *****\n\n"; - print "The following answers are considered as defaults in later "; - print "executions\n"; - print "of $MYNAME. You can change these values by using the option "; - print "--configure \nagain."; - print "Additionally, all command-line options override these settings.\n"; - print "Many parameters can be set to '$NIL' or '$UNDEF'. This means that NO"; - print "\nvalue at all (not even an empty value) is passed over to the "; - print "called\napplication (e.g. latex package hyperref).\n"; - - $NUM_PARAM_MIN=1; - $NUM_PARAM_MAX=9; - - foreach my $key (@PARAMETER_ORDER) { - if(&full_param($key)) { - &config_param($key); - } - } - - print "\nConfiguration for $MYNAME finished.\n\n"; -} - -### check if the most important executables are installed on the system -# parameters: none - -sub check_commands { - my $exec_epstopdf; - ### check for which command - &checkCommand("which","You can switch off all command checks to fix this."); - - ### pdftex executables - # Homepage: http://tug.org/applications/pdftex - &checkCommand("pdflatex","See pdftex homepage for details: http://tug.org/applications/pdftex"); - &checkCommand("epstopdf","See pdftex homepage for details: http://tug.org/applications/pdftex"); - $exec_epstopdf = `which epstopdf`; - chomp $exec_epstopdf; - my $compat = "-dCompatibilityLevel=1\\.1"; - if (defined($ENV{'GS_OPTIONS'}) && $ENV{'GS_OPTIONS'} =~ /$compat/o) { - &report(9, "Good: ghostscript option '-dCompatibilityLevel=1.1' detected " - ."in\n'\$GS_OPTIONS'."); - } elsif (&grep_file($exec_epstopdf, $compat, $TRUE) > 0) { - &report(9, "Good: ghostscript option '-dCompatibilityLevel=1.1' detected " - ."in\n'$exec_epstopdf'."); - } else { - &report(4, "\nWARNING: no ghostscript option '-dCompatibilityLevel=1.1' " - ."in\n'$exec_epstopdf'.\n" - ."You might run into trouble with the conversions of bitmaps.\n" - ."Adjusting epstopdf or setting the environment variable GS_OPTIONS " - ."to \n".'"$GS_OPTIONS -dCompatibilityLevel=1.1" before calling this ' - ."script\nmight help in this case.\n"); - } - - if ( ¶m_value('thumbpdf') eq $YES ) { - &checkCommand("thumbpdf","You can switch off thumbpdf support to fix this."); - } - - if ( ¶m_value('ppower') eq $YES ) { - &checkCommand("ppower","You can switch off ppower support to fix this."); - } - - ### authorindex perl script - if ( ¶m_value('authorindex') eq $YES ) { - &checkCommand("authorindex","You can switch off authorindex support to fix this."); - } - - ### bibtex executable - if ( ¶m_value('bibtex') ne $NO or ¶m_value('gloss') ne $NO) { - &checkCommand("bibtex","You can switch off BibTeX support to fix this."); - } -} - -#### generate the tmp file name from the original tex filename -#### and make sure that they are not the same -# parameter 1: orignal filename (with or without a path or .tex) -# parameter 2: path for the tmp file (default: doc path) -# return value: tmp name - -sub reserve_tmp_texname { - my $original_name = $_[0]; - my $tmp_path = $_[1]; - my $tmp_base_suffix = ¶m_value('tmp_base_suffix'); - my $overwrite = ¶m_value('overwrite'); - my $original_path; - my $original_base; - my $suffix; - my $pathed_tmp_base; - my @existing_files; - - # separate path, base and suffix - ($original_base,$original_path,$suffix) = fileparse($original_name, '\.tex'); - - # set the path of the tmp file - if(!$tmp_path) { - $tmp_path=$original_path; - } else { - $tmp_path .= '/' if( $tmp_path ne "" and ! ($tmp_path =~ m#/$#) ); - } - - # abort if no absolute path is given - if( index($tmp_path, "/") != 0 ) { - &abort("Internal error: Illegal argument for reserve_tmp_texname:". - "Given file has no absolute path: $original_name"); - } - - # make sure that tmp_base_suffix is set correctly - if($tmp_base_suffix eq "") { - &abort("Temporary filename base suffix is empty."); - } - - $pathed_tmp_base = $tmp_path.$original_base.$tmp_base_suffix; - - # make sure no file with this base exists in this directory - @existing_files = glob "$pathed_tmp_base.*"; - if (@existing_files != 0) { - &report(3, "Problems detected while reserving temporay file name!\n", - "In this directory are already files with this basename.\n", - "A list of the conflicting, existing files:\n", - join("\n", @existing_files), "\n"); - if ($overwrite eq $YES) { - &report(4, "As you have activated the parameter 'overwrite' I will " - ."continue.\n", - "However, in order to protect the existing files I will not\n", - "delete any files with this basename at the final clean-up."); - } else { - &report(2, "You could activate the parameter 'overwrite' or remove ", - "the\n corresponding files in order to avoid these problems."); - &abort("No temporary name found for $original_name."); - } - } else { - push(@TMP_TEX_FILES, $pathed_tmp_base); - } - - return $pathed_tmp_base.$suffix; -} - -### generate LaTeX file from LyX document with LyX itself -# parameter ($1): Lyx document -# parameter ($2): Latex document - -sub generate_tex_file { - my $lyx_doc = $_[0]; - my $tex_doc = $_[1]; - my $lyx_dir; - my $lyx_output; - my $lyx_exec=¶m_value('lyx_exec'); - - $lyx_dir = ¶m_value('lyxrc_path'); - $lyx_dir .= '/' if( ! ($lyx_dir =~ m#/$#) ); - $lyx_dir .= '/' if( ! ($lyx_dir =~ m#/$#) ); - - ### Check if LyX file can be accessed - &check_file($lyx_doc,"Cannot read the specified LyX document!"); - - ### Check if LaTeX file exists and is newer than the LyX file - if ( -f $tex_doc and -M $tex_doc < -M $lyx_doc ) { - &report(4, "\nLaTeX file is newer than LyX document ($lyx_doc).\n", - "Using existing TeX file: $tex_doc\n", - "Remove it to force its new generation."); - } else { - ### export LaTeX file with LyX (needs a display!) - &checkCommand($lyx_exec, "Cannot generate LaTeX document without LyX!"); - &report(6, "\nExporting LaTeX file"); - - ### move some files out of the way that stop LyX from exporting - foreach my $file ($lyx_dir."lyxpipe.out",$lyx_dir."lyxpipe.in",$tex_doc) { - if ( -f $file ) { rename($file, $file.'~'); } - } - - $lyx_output = `$lyx_exec --export latex $lyx_doc 2>&1`; - - ### check if LaTeX file now really exists - if ( ! -f $tex_doc ) { - &report(2, "Lyx Output:\n$lyx_output"); - &report(2, "\nSorry. I cannot find '$tex_doc'."); - &abort("The LaTeX document was not generated by LyX!"); - } else { - &report(8, "Lyx Output:\n$lyx_output"); - } - } -} - -#### search TeX document for a certain text tag (e.g. author, title) -# parameter 1: file to parse -# parameter 2: full TeX tag name -# return value: list of the contents strings of all matching tags - -sub extract_tag_contents { - my $source=$_[0]; - my $tag_name=$_[1]; - my $contents; - my @results=(); - my $error_message="Could not read TeX document to extract $tag_name"; - - &check_file($source, $error_message.'.'); - open(EXTRACT_SOURCE, "<$source") or - &abort($error_message." ($source)."); - - - while() { - ### ignore comments - s/(^|[^\\])%.*/$1/; - # ignore \thanks{} - s/\\thanks\{.*?\}//g; - # change \and to and - s/\\and/ and/g; - - $contents .= $_; - } - - close EXTRACT_SOURCE; - - $_ = $contents; - - # add contents of all occurences of this tag in a line to result list - while ( /\\($tag_name)(\[[^]]*?\])*?{+([^{}]*?)}/s ) { - my $text = $3; - $_ = $'; - # remove newlines - $text =~ s/\n//g; - $text="" if (!defined($text)); - push(@results, $text); - } - - return @results; -} - -#### search for filenames in given TeX Tag in entire document -### skip all comments and duplicates while parsing -# parameter 1: file to parse -# parameter 2: full TeX tag name -# parameter 3: reference to a list of possible filename suffixes (without '.') -# parameter 4: regexp for suffix to ignore when specified in TeX file -# (undef if not used) -# return value: list of identified files - -sub identify_files { - my $source=$_[0]; - my $tag_name=$_[1]; - my @suffixes=@{$_[2]}; - my $ignore_suffix=$_[3]; - my @matched_tags; - my @found_files=(); - my $regexp_suffixes; - - # create one large regexp from given suffixes and escape dots in them - $regexp_suffixes= '.('.join('|', @suffixes).')'; - $regexp_suffixes =~ s/\./\\./g; - - @matched_tags = &extract_tag_contents($source, $tag_name); - - foreach my $tag_contents (@matched_tags) { - my $path; - my $base; - my $suffix; - my $kpse_result; - my $working_dir = cwd."/"; - - ($base,$path,$suffix) = fileparse($tag_contents, $regexp_suffixes); - - # if a suffix is specified in the tag_contents handle it as requested - # - # 1. $suffix: TRUE if $suffix is defined and not of zero length - # means: a valid suffix has been found in the filename - # 2. defined($ignore_suffix): TRUE if $ignore_suffix is defined - # means: a regexp for suffixes to be ignored has been specified as - # parameter4 - # 3. $suffix =~ /$ignore_suffix/: TRUE if $suffix matches the regexp - # means: the suffix in the filename is wanted to be ignored - # - # The IF statement will be executed when: - # a valid suffix has been found in the filename (1) - # AND regexp for suffixes to be ignored has NOT been specified (not 2) - # OR - # a valid suffix has been found in the filename (1) - # AND regexp for suffixes to be ignored has been specified (2) - # AND the suffix in the filename is NOT wanted to be ignored (not 3) - # - # The stuff that is executed if the entire IF statement is TRUE does the - # following: accept the found suffix and consider it as the only possible - # file name. - if($suffix and not (defined($ignore_suffix) and $suffix =~ /$ignore_suffix/)){ - $kpse_result=`kpsewhich $tag_contents`; - # print warning and skip this tag if kpsewhich could not find it - if (!$kpse_result) { - &report(4, "WARNING - Could not identify referenced file:\n", - " Ignoring '$tag_contents'."); - next; - } - } else { - # if there is a '.' in the basename assume that this is a reference - # to a file of another type and skip it - if( $base =~ /\./ ) { - &report(9, "Found an unknown extension. Ignoring '$tag_contents'."); - next; - } - - # search for all possible files with allowed suffixes - foreach my $allowed_suffix (@suffixes) { - if (not $allowed_suffix =~ /[\]\)\(\|\[\\]/ ) { - # suffix is not a regexp, but a real extension - my $possible_file= $path.$base.'.'.$allowed_suffix; - $kpse_result=`kpsewhich $possible_file`; - if ($kpse_result) { - last; - } - } - } - } - - # if kpsewhich could not find any file with an allowed suffix - # assume that this reference is of a different type and skip it - # quietly - if (!$kpse_result) { - &report(9, "No suitable file found. Ignoring '$tag_contents'."); - next; - } - - # expand '.' in kpsewhich output to the current path - $kpse_result =~ s#^\./#$working_dir#; - - # remove trailing newline - chomp($kpse_result); - - # add file to the found file list if it is not already on it - if( &array_index($kpse_result, @found_files) < 0 ) { - push(@found_files, $kpse_result); - } - } - - return @found_files; -} - -### Build a list of all files which are included from the root file. -# This function recurses, and is maybe smart enough to detect cycles. -# Be sure to set REF_DOCS to the empty string prior to calling this. -# parameter 1: tex file to start with -# no return value -# result is appended to global variable @REF_DOCS - -sub get_file_list { - my $source = $_[0]; - my @imports = (); - - # This is the cycle avoidance logic. - if ( &array_index($source, @REF_DOCS) < 0 ) { - # Make sure the file can be accessed - &check_file($source, "Included TeX file seems not to be available. Path problem?"); - - # Save the argument in the list of files. - push(@REF_DOCS, $source); - - # Get the list of files included by the argument. - @imports=&identify_files($source, 'include|input', ['tex']); - - # Recurse. - foreach my $file (@imports) { - if( ! ($file =~ /\.tex$/) ) { $file .= '.tex'; } - &get_file_list($file); - } - } -} - -### do the required modifications in the LaTeX preamble -# parameter 1: original preamble from the source file -# lines before \begin{document} tag (without this tag) -# parameter 2: reference to hyperref parameter list -# return value: adjusted preamble - -sub adjust_preamble { - my $preamble = $_[0]; - my $hyperref_params_ref = $_[1]; - my $extra_code; - my $result; - - $_ = $preamble; - - # protect pdflatex execution mode - s/^(\\batchmode)$/% $1/m; - - # insert a4paper in the documentclass when a4wide is used - # fixes problem that hyperref defaults to letter otherwise - if ( /^[^%]*\\usepackage(\[widemargins\])?\{(a4|a4wide)\}/m ) { - # check if package parameters with [] brackets are present - if ( not s/^(\\documentclass\[.*?)\]/$1,a4paper]/m ) { - s/^\\documentclass/$&\[a4paper\]/m; - } - } - - ### collect additional LaTeX code - - $extra_code = "\n" . '\usepackage{pslatex}' . "\n"; - - if ( ¶m_value('thumbpdf') eq $YES ) { - $extra_code .= '\usepackage{thumbpdf}' . "\n"; - } else { - $extra_code .= "% no thumbpdf support\n"; - } - -# if ( ¶m_value('ppower') eq $YES ) { -# $extra_code .= '\usepackage{mpmulti}' . "\n"; -# } else { -# $extra_code .= "% no ppower support\n"; -# } - - if ( ¶m_value('authorindex') eq $YES ) { - $extra_code .= '\usepackage[pages]{authorindex}' . "\n"; - $extra_code .= '\let\cite=\aicite' . "\n"; - } else { - $extra_code .= "% no authorindex support\n"; - } - - $extra_code .= '\makeatletter' . "\n"; - $extra_code .= '\usepackage[' . join(',', @$hyperref_params_ref) - . ']{hyperref}' . "\n"; - $extra_code .= '\makeatother' . "\n"; - - ### insert the extra LaTeX code directly after documentclass - m/^(\\documentclass)(\[[^]]*\])?(\{.*\})/m; - return $` . $& . $extra_code . $'; -} - -### adjust all filenames in the LaTeX code to the tmp files -# parameter 1: original LaTeX code from the source file -# return value: adjusted code - -sub adjust_filenames { - my $code = $_[0]; - my $tmp_suffix = ¶m_value('tmp_base_suffix'); - my $result; - - $_ = $code; - - # cut off the suffix of eps, ps, *.gz and pstex graphics - s/((\\includegraphics)(\[[^]]*?\])?(\{[^}]+?))\.(e?ps|pstex|e?ps\.gz)\n?\}/$1}/sg; - - # replace the suffix 'pstex_t' with 'pdf_t' - s/(\\input\{[^}]+?\.)pstex_t\n?\}/$1pdf_t}/sg; - - if ( ¶m_value('mtp_preamble') eq $NO ) { - # cut off the suffix of mmp graphics - s/(\\multiinclude(\[[^]]*?\])?\{[^}]+?)\.mmp\n?\}/$1}/sg; - } else { - # replace the suffix '.#' with '-mp.#' - s/(\\includegraphics(\[[^]]*?\])?\{[^}]+?)\.(\d+?)\n?\}/$1$MTP_TMP_BASESUFFIX\.$3}/sg; - - # replace the suffix '.#' with '-mp.#' - s/(\\convertMPtoPDF(\[[^]]*?\])?\{[^}]+?)\.(\d+?)\n?\}/$1$MTP_TMP_BASESUFFIX\.$3}/sg; - - # cut off optional suffix '.mmp' and append '-mp' in any case - s/(\\multiinclude(\[[^]]*?\])?\{[^}]+?)(\.mmp)?\n?\}/$1$MTP_TMP_BASESUFFIX}/sg; - } - - # insert the tmp_suffix in tex filenames - # I assume that files with no extension are TeX files as well; correct? - s#(\\(input|include)\{([^}]*?/)?[^}/.]+?)((\.tex)?\n?\})#$1$tmp_suffix$4#sg; - - return $_; -} - -### Convert given tex file to the temp tex file we need for pdftex -### major task is to change the reference in the tex files to the -### corresponding tmp files -# parameter 1: tex source file -# parameter 2: tex tmp file -# parameter 3: reference to hyperref parameter list or -# 'undef' if preamble should not be changed - -sub convert_tex2tmp { - my $source = $_[0]; - my $target = $_[1]; - my $hyperref_params_ref = $_[2]; - my $contents; - my $preamble; - my $body; - my $adjust_preamble = defined($hyperref_params_ref) ? $YES : $NO; - my $read_err_msg = "Could not read original TeX document to generate temporary document"; - - ### open source and target file - &check_file($source, $read_err_msg . '.'); - open(SOURCE_FILE, "<$source") or - &abort($read_err_msg . " ($source)."); - - ### read in the LaTeX source file - $contents = ""; - while() { - $contents .= $_; - } - - close SOURCE_FILE; - - ### prepare the LaTeX code for PDF generation - if ( $adjust_preamble eq $YES ) { - $contents =~ m/^ *\\begin\{document\} *$/m; - $preamble = $`; - $body = $&.$'; - $preamble = &adjust_preamble($preamble, $hyperref_params_ref); - $preamble = &adjust_filenames($preamble); - } else { - $preamble = ""; - $body = $contents; - } - - $body = &adjust_filenames($body); - - ### write the new LaTeX target file - open(TARGET_FILE, ">$target") or - &abort("Could not open file to write temporary TeX document ($target)."); - - print TARGET_FILE $preamble.$body; - - close TARGET_FILE; -} - -### Convert the given EPS image to PDF -# parameters $1: EPS image filename with absolute path -# return value: none - -sub convert_eps2pdf { - my $image = $_[0]; - my $image_path; - my $image_base; - my $image_name; - my $suffix; - my $image_target; - my $zipped = 0; - my $dummy; - - ($image_base,$image_path,$suffix) = fileparse($image, '\.eps', '\.ps', '\.pstex', '\.gz'); - if ($suffix eq "\.gz") { - $zipped = 1; - ($image_base,$dummy,$suffix) = fileparse($image_base, '\.eps', '\.ps', '\.pstex'); - } - $image_name = $image_base . $suffix; - $image_target = $image_path . $image_base . '.pdf'; - - #### check if image file really exists - #&check_file($image, "Could not convert referenced image."); - - ### return if image directory is not writeable - if (! -w $image_path) { - &report(4, "WARNING - Image directory not writable: $image_path\n", - " Skipping '$image_name', assume you have converted it manually."); - return; - } - - if ( ! -f $image_target or -M $image_target > -M $image ) { - &report(7, "Converting image $image_name to $image_target ...\n"); - if ($zipped > 0) { - &system_command("gunzip -c $image | epstopdf -f -outfile=$image_target", - $TRUE, 8, "epstopdf failed on $image_name"); - } else { - &system_command("epstopdf -outfile=$image_target $image", - $TRUE, 8, "epstopdf failed on $image_name"); - } - if (¶m_value('delete_pdf_images') eq $YES) { - push(@TMPFILES, $image_target); - } - } else { - &report(7, "$image_base.pdf newer than $image_name, conversion skipped..."); - } -} - -### Convert the given PSTEX_T file to PDF_T -# parameters 1: PSTEX_T filename with absolute path -# return value: none - -sub convert_pstex2pdf { - my $pstex_file = $_[0]; - my $pstex_path; - my $pstex_base; - my $pstex_name; - my $suffix; - my $pstex_target; - my @eps_images; - - ($pstex_base,$pstex_path,$suffix) = fileparse($pstex_file, ('\.pstex_t')); - $pstex_name = $pstex_base . $suffix; - $pstex_target = $pstex_path . $pstex_base . '.pdf_t'; - - #### check if image file really exists - #&check_file($pstex_file, "Could not convert referenced file."); - - ### return if directory is not writeable - if (! -w $pstex_path) { - &report(4, "WARNING - Directory not writable: $pstex_path\n", - " Skipping '$pstex_name', assume you have converted it manually."); - return; - } - - # descend into file - &report(7, "Converting file $pstex_name ...\n"); - - # find included EPS image(s) - @eps_images=&identify_files($pstex_file, 'includegraphics', - ['pstex', 'pstex\.gz']); - - # create .pdf_t file - &convert_tex2tmp($pstex_file, $pstex_target, undef); - - # put tmp file in the tmp file list - push(@TMPFILES, $pstex_target); - - # convert image(s) to pdf - foreach my $image (@eps_images) { - &convert_eps2pdf($image); - } -} - -### Convert the given MP image to PDF -# parameters $1: MP image filename with absolute path -# return value: none - -sub convert_mp2pdf { - my $image = $_[0]; - my $image_path; - my $image_base; - my $image_name; - my $suffix; - my $image_target; - my $image_src; - my @mps_fig=(); - my $mp_fig; - - ($image_base,$image_path,$suffix) = fileparse($image, '\.mp|\.mmp'); - $image_name = $image_base . $suffix; - $image_src=$image_path . $image_base . $suffix; - - @mps_fig= &grep_file($image_path.$image_name,'beginfig',$TRUE); - $_=$mps_fig[0]; - /(\d)/; - $mp_fig=$1; - if (¶m_value('mtp_preamble') eq $YES) { - $image_target = $image_path . $image_base . $MTP_TMP_BASESUFFIX . '.' . $mp_fig; - $image_name=$image_base.$MTP_TMP_BASESUFFIX.$suffix; - } else { - $image_target = $image_path . $image_base . '.' . $mp_fig; - } - - #### check if image file really exists - #&check_file($image, "Could not convert referenced image."); - - ### return if image directory is not writeable - if (! -w $image_path) { - &report(4, "$MYNAME: WARNING - Image directory not writable: $image_path\n", - " Skipping '$image_name', assume you have converted it manually."); - return; - } - - if ( ! -f $image_target - or (-M $image_target > -M $image_src) - or ( ( ¶m_value('mtp_preamble') eq $YES) - and (-M $image_target > -M $image_path.$MTP_PREAMBLE_FILENAME) ) ) { - &report(7, "Converting image $image_name ...\n"); - my $working_dir = cwd."/"; - chdir("$image_path") or &abort("cannot cd to $image_path($!)"); - if ( ¶m_value('mtp_preamble') eq $YES ) { - &modify_mp_file($image_base,$suffix); - } - &system_command("mpost $image_name", - $TRUE, 8, "mpost failed on $image_name"); - chdir("$working_dir") or &abort("cannot cd to $working_dir($!)"); - if (¶m_value('delete_pdf_images') eq $YES) { - push(@TMPFILES, $image_target); - } - } else { - if ( ¶m_value('mtp_preamble') eq $YES ) { - &report(7, "$image_base$MTP_TMP_BASESUFFIX.$mp_fig newer than $image_base$suffix, conversion skipped..."); - } else { - &report(7, "$image_base.$mp_fig newer than $image_base$suffix, conversion skipped..."); - } - } -} - -### Convert the given MP image to PDF -# parameters $1: MP image filename with absolute path -# return value: none - -sub modify_mp_file { - my $base=$_[0]; - my $suffix=$_[1]; - my $preamble_file=$MTP_PREAMBLE_FILENAME; - - my $mp_source=$base.$suffix; - my $mp_target=$base.$MTP_TMP_BASESUFFIX.$suffix; - - ### open source and target file - open(SOURCE_FILE, "<$mp_source") or - &abort("Could not open $mp_source to add $preamble_file ($mp_source)."); - - open(TARGET_FILE, ">$mp_target") or - &abort("Could not open $mp_target to add $preamble_file ($mp_source)."); - - open(PREAMBLE_FILE, "<$preamble_file") or - &abort("Could not open $preamble_file to be added to metapost files ($mp_source)."); - - ### set target file as stdout - select TARGET_FILE; - print 'verbatimtex' . "\n"; - print '%&latex' . "\n"; # This forces metapost to use latex in the compilation - print '\documentclass[english]{article}' . "\n"; # we have to decide if this sentence goes in preamble.cfg or here - # english must be added - # to preamble in order to - # make work mpost, why ??? - while() { - print $_ - } - print '\begin{document}' . "\n"; # we have to decide if this sentence goes in preamble.cfg or here - print 'etex' . "\n"; - while() { - print $_ - } - print 'verbatimtex' . "\n"; - print '\end{document}' . "\n"; - print 'etex' . "\n"; - - ### set STDOUT as stdout again - select STDOUT; - - ### close files - close SOURCE_FILE; - close TARGET_FILE; - close PREAMBLE_FILE; - -} - - -### Convert included images to pdf -# parameter 1: tex source file -# no return value - -sub convert_images { - my $tex_source=$_[0]; - my @pstex_file_list; - my @image_list; - my @mp_image_list; - my @mmp_image_list; - my @major_suffixes; - my $ignore_regexp; - - &report(6, "\nConverting images referenced in $tex_source."); - - ##### Get images of major type from the source file - @major_suffixes = (@BITMAP_SUFFIXES, $PDF_ORIG_SUFFIX, @EPS_SUFFIXES); - $ignore_regexp = '.('.join('|',@EPS_SUFFIXES).')'; - $ignore_regexp =~ s/\./\\./g; - - &report(6, "\nScanning for major image types (".join(', ',@major_suffixes)."):"); - @image_list = &identify_files($tex_source,'includegraphics', - \@major_suffixes, $ignore_regexp ); - - if ( @image_list > 0 ) { - &report(7, join("\n", @image_list)); - } else { - &report(7, "None."); - } - - ##### Get PSTEX_T files from the source file - &report(6, "\nScanning for PSTEX_T files (.pstex_t):"); - @pstex_file_list=&identify_files($tex_source, 'input', ['pstex_t']); - if ( @pstex_file_list > 0 ) { - &report(7, join("\n", @pstex_file_list)); - } else { - &report(7, "None."); - } - - ##### Get MP images from the source file - &report(6, "\nScanning for MP images (.mp):"); - @mp_image_list=&identify_files($tex_source, - 'convertMPtoPDF|includegraphics',['mp','(\d+)'],'\.(\d+)'); - # FIXME - # fixed for now by ignoring strange extension in identify_files - # - # the above could cause problems as identify_files is expecting a list of - # real extensions and not of regexps - # maybe the only way to fix this is to adjust identify_files to ignore - # invalid extension when testing them with kpsewhich - # ALTERNATIVE: design identify_files to use preffered_suffixes instead - # of ignore_suffixes - - if ( @mp_image_list > 0 ) { - &report(7, join("\n", @mp_image_list)); - } else { - &report(7, "None."); - } - - ##### Get MMP images from the source file - &report(6, "\nScanning for MMP images (.mmp):"); - @mmp_image_list=&identify_files($tex_source,'multiinclude',['mmp']); - if ( @mmp_image_list > 0 ) { - &report(7, join("\n", @mmp_image_list)); - } else { - &report(7, "None."); - } - - ### Convert EPS images to PDF, copy pdf.orig image files to pdf, - ### and simply ignore all other - if ( @image_list > 0) { - my $handled_suffixes; - - &report(6, "\nProcessing images of major types:"); - - # create one large regexp from suffixes and escape dots in them - $handled_suffixes = '.('.join('|',(@EPS_SUFFIXES, $PDF_ORIG_SUFFIX)).')'; - $handled_suffixes =~ s/\./\\./g; - - foreach my $image (@image_list) { - my $path; - my $base; - my $suffix; - ($base,$path,$suffix) = fileparse($image, $handled_suffixes); - if (not $suffix) { - &report(7, "No special handling required for image: $base$suffix"); - } elsif ($suffix eq ('.'.$PDF_ORIG_SUFFIX) ) { - my $image_target = $path.$base.'.pdf'; - - if ( ! -f $image_target or -M $image_target > -M $image ) { - &report(7, "Create temporary PDF file from original: $base$suffix"); - copy($image, $image_target) - or &abort("Could not create tmp file: $!"); - if (¶m_value('delete_pdf_images') eq $YES) { - push(@TMPFILES, $image_target); - } - } else { - &report(7, "$base.pdf newer than $base$suffix, copy skipped ..."); - } - } else { - &convert_eps2pdf($image); - } - } - } - - ### Convert all PSTEX_T files to PDF_T - if ( @pstex_file_list > 0 ) { - &report(6, "\nConverting pstex_t docs to pdf_t docs"); - foreach my $image (@pstex_file_list) { - &convert_pstex2pdf($image); - } - } - - ### Convert all MP images to PDF - if ( @mp_image_list > 0) { - &report(6, "\nConverting MP images to PDF"); - foreach my $image (@mp_image_list) { - &convert_mp2pdf($image); - } - } - - ### Convert all MMP images to PDF - if ( @mmp_image_list > 0) { - &report(6, "\nConverting MMP images to PDF"); - foreach my $image (@mmp_image_list) { - &convert_mp2pdf($image); - } - } - - &report(6, "\nFinished converting for ${tex_source}."); -} - -### run pdflatex -# parameter 1: LaTeX file without extension -# parameter 2: log-file where the full out put is stored -# return value: 0 - no errors (no rerun); 1 - errors (rerun required) - -sub run_pdflatex { - my $texfile=$_[0]; - my $logfile=$_[1]; - my @errors=(); - my $exit_status=0; - my $extra_options=¶m_value('pdftex_opts'); - - if( !defined($extra_options) or $extra_options eq $NIL ) { - $extra_options = ""; - } - - &report(7, "Running pdflatex. This may take a while.\n"); - system("pdflatex --interaction nonstopmode $extra_options $texfile > $logfile 2>&1"); - $exit_status=$?; - &report(7, "Pdflatex finished. Errors:"); - - ### extract all errors and warnings from the log file - @errors = &grep_file($logfile, '(Emergency stop|Error|Warning).*:'); - - ### make sure thumbpdf package does not spoil rerun detection - ### as it will be processed very last - if(grep(/Package thumbpdf/, @errors) == @errors) { @errors=(); } - - if ( @errors != 0 or $exit_status != 0 ) { - if ( grep(/Emergency stop/, @errors) != 0 ) { - &report(2, &file_tail($logfile,10)); - &report(2, "\nSee $logfile for details."); - &abort("Fatal error occured. I am lost."); - } - if( @errors != 0 ) { - &report(8, @errors); - } else { - &report(8, &file_tail($logfile,10)); - } - &report(7, "\nSee $logfile for details."); - return $FALSE; - } else { - &report(7, "None detected (log file: $logfile)."); - return $TRUE; - } -} - -#### run bibtex if BIBTEX=$YES or a bibliography tag is found -# included tex files are not parsed for a bibliography -# parameter 1: filename of the aux file without .aux suffix -# parameter 2: log-file where the full out put is stored - -sub handle_bibtex { - my $auxfile=$_[0]; - my $logfile=$_[1]; - my $run_bibtex=$FALSE; - my $bibtex_param=¶m_value('bibtex'); - - if ( $bibtex_param eq $YES ) { - $run_bibtex=$TRUE; - &report(7, "BibTeX parameter set to '$YES':"); - } else { - &report(7, "Checking for BibTeX bibliography in main document: "); - if( &grep_file($auxfile.'.tex', '^[^%]*\\\\bibliography{') != 0) { - $run_bibtex=$TRUE; - &report(7, "Bibliography detected."); - } else { - if ( @REF_DOCS > 0 ) { - &report(7, "Checking for BibTeX bibliography in included documents:"); - foreach my $file (@REF_DOCS) { - if( &grep_file($file, '^[^%]*\\\\bibliography{') != 0) { - $run_bibtex=$TRUE; - &report(7, "Bibliography detected."); - } else { - &report(7, "No bibliography detected in $file."); - } - } - } else { - &report(7, "No bibliography detected."); - } - } - } - - if ( $run_bibtex ) { - my @errors=(); - my $exit_status=0; - - &report(7, "Running bibtex. This may take a while.\n"); - system "bibtex $auxfile > $logfile"; - $exit_status=$?; - &report(7, "Bibtex finished. Errors:"); - - ### extract all errors and warnings from the log file - @errors=&grep_file($logfile, 'error message'); - - if ( @errors != 0 or $exit_status != 0 ) { - &report(4, &file_tail($logfile)); - &report(4, "\nYou can switch off BibTeX support by setting the bibtex parameter accordingly."); - } else { - &report(7, "None detected (log file: $logfile)."); - } - } -} - -#### run bibtex on file.gls if BIBTEX=$YES or a bibliography tag is found -# included tex files are not parsed for a bibliography -# parameter 1: filename of the aux file without .aux suffix -# parameter 2: log-file where the full out put is stored - -sub handle_gloss { - my $auxfile=$_[0]; - my $logfile=$_[1]; - my $run_gloss=$FALSE; - my $gloss_param=¶m_value('gloss'); - my $glsfile=$auxfile.'.gls'; - - if ( $gloss_param eq $YES ) { - $run_gloss=$TRUE; - &report(7, "Gloss parameter set to '$YES':"); - } else { - &report(7, "Checking for Gloss bibliography in main document: "); - if( &grep_file($auxfile.'.tex', '^[^%]*\\\\printgloss{') != 0) { - $run_gloss=$TRUE; - &report(7, "Gloss bibliography detected."); - } else { - if ( @REF_DOCS > 0 ) { - &report(7, "Checking for Gloss bibliography in included documents:"); - foreach my $file (@REF_DOCS) { - if( &grep_file($file, '^[^%]*\\\\printgloss') != 0) { - $run_gloss=$TRUE; - &report(7, "Gloss bibliography detected."); - } else { - &report(7, "No gloss database detected in $file."); - } - } - } else { - &report(7, "No gloss database detected."); - } - } - } - if ( $run_gloss ) { - my @errors=(); - my $exit_status=0; - - &report(7, "Running bibtex on $glsfile. This may take a while.\n"); - system "bibtex $glsfile > $logfile 2>&1"; - $exit_status=$?; - &report(7, "Bibtex finished. Errors:"); - - ### extract all errors and warnings from the log file - @errors=&grep_file($logfile, 'error message'); - - if ( @errors != 0 or $exit_status != 0 ) { - &report(4, &file_tail($logfile)); - &report(4, "\nYou can switch off Gloss support by setting the gloss parameter accordingly."); - } else { - &report(7, "None detected (log file: $logfile)."); - } - } -} - -#### run thumbpdf command to make thumbnails -# more informations: /usr/share/texmf/doc/pdftex/thumbpdf/readme.txt -# parameter 1: LaTeX file without extension -# parameter 2: log-file where the full out put is stored - -sub run_thumbpdf { - my $texfile=$_[0]; - my $logfile=$_[1]; - my $exit_status=0; - - &report(7, "\nCreating thumbnails with 'thumbpdf'\n"); - &system_command("thumbpdf $texfile", $FALSE, 8, - "thumbpdf failed.\n" - ."I will continue, but maybe there will not be thumbs in the PDF doc."); - - if ( -f 'thumbpdf.log' ) { - move('thumbpdf.log', $logfile); - &report(7, "\nSee $logfile for details."); - } - - ### store possible tmp files - push(@TMPFILES, glob 'thumb???.png'); - push(@TMPFILES, 'thumbpdf.pdf'); - push(@TMPFILES, 'thumbdta.tex'); - push(@TMPFILES, $texfile.'.tpt'); -} - - -#### run ppower command to make presentations -# more informations: -# parameter 1: LaTeX file without extension -# parameter 2: log-file where the full out put is stored - -sub run_ppower { - my $texfile=$_[0]; - my $logfile=$_[1]; - my $exit_status=0; - my $infile; - my $outfile; - my $texfile_base; - my $texfile_path; - my $texfile_suffix; - - ##### Getting document name, suffix and path - ($texfile_base,$texfile_path,$texfile_suffix) = fileparse($texfile,¶m_value('tmp_base_suffix')); - - $infile=$texfile_base.'.pdf'; - $outfile=$texfile_base.'_p4.pdf'; - - &report(7, "\nPostprocessing PDF file with 'ppower'\n"); - if(&system_command("ppower $infile $outfile", $FALSE, 8, - "ppower failed.\nI will continue, " - ."but maybe there will not be pause effects in the PDF doc.")) { - &report(7, "\nThe postprocessed pdf file is: $outfile\n"); - } - - if ( -f 'ppower.log' ) { - move('ppower.log', $logfile); - &report(7, "See $logfile for details."); - } -} - -#### run authorindex command to obtain an author index -# more informations: -# parameter 1: LaTeX file without extension -# parameter 2: log-file where the full out put is stored - -sub run_authorindex { - my $texfile=$_[0]; - my $logfile=$_[1]; - my $exit_status=0; - - &report(7, "\nProcessing file with 'authorindex'\n"); - &system_command("authorindex $texfile", $FALSE, 8, - "authorindex failed.\nI will continue, " - ."but maybe there will not be an author index in the PDF doc."); - - if ( -f 'authorindex.log' ) { - move('authorindex.log', $logfile); - &report(7, "\nSee $logfile for details."); - } - -} - - -##### read and analyse configuration and options and adjust basic variables -##### accordingly -#### The following sources are considered (last value overrides previous) -## 1. general configuration (global variables in the script) -## 2. private configuration (in user's RC file) -## 3. command line options -# parameters: none -# return value: given document argument - -sub adjust_configuration { - my $valid_rcfile = $FALSE; - my %opt_specs =(); - - ### Check number of arguments - if ( @ARGV == 0 ) { - &report(1, "\nI need at least one argument!"); - &print_usage; - exit 1; - } - - ##### command line options and private configuration files handling - - ### set parameters from rc file - if ( -f $RC_FILENAME ) { - my $rcfile_version; - $rcfile_version = &read_configuration($RC_FILENAME); - if( $rcfile_version == $MYRCFILE_VERSION ) { - $valid_rcfile = $TRUE; - } elsif ( $rcfile_version == 0 ) { - &report(4, "Could not determine version of read RC file."); - } else { - &report(4, "Version of read RC file ($rcfile_version) and ", - "this script ($MYRCFILE_VERSION) differs."); - } - } - - ### scan parameters - foreach (keys %PARAMETER_LIST) { - $opt_specs{&option_specifier($_)} = \&handle_option; - } - if(! GetOptions(%opt_specs)) { - &print_usage; - &abort("An error occured while processing command line options"); - } - - if( ! $valid_rcfile ) { - &report(3,"No valid configuration file found. Please run '$MYNAME " - ."--configure'.\nUsing default values for missing parameters in this " - ."session."); - } - - ### As the configuration process is done now, it is time to set the - # global configuration flag - $CONFIGURED = $TRUE; - - #### do some test in order to secure as good as possible that we will - #### succeed before to much work was done and maybe some data as damaged - - ### make sure that tmp_base_suffix is not empty - if ( ¶m_value('tmp_base_suffix') eq "" ) { - &report(2, "\nCAUTION: Empty tmp_base_suffix would destroy the original files!"); - &abort("Parameter tmp_base_suffix is not set."); - } - - ##### check for required commands - if (¶m_value('check_commands') ne $NO ) { - &check_commands; - } - - ### Check number of arguments - if ( @ARGV != 1 ) { - &report(1, "\nWrong number of arguments. I need exactly one file."); - &print_usage; - exit 1; - } - - return $ARGV[0]; -} - -#### prepare the logdir for the log files of the various called appplications - -sub prepare_logdir { - my $log_dir= ¶m_value('logdir'); - my $my_new_log; - - ##### Preparing the LOGDIR - if (! &check_dir($log_dir, "Could not create log directory", $YES)) { - &abort("Please, set a different path and restart"); - } - - # make sure there is a slash at the end of the path - $log_dir .= '/' if( ! ($log_dir =~ m#/$#) ); - - if( <$log_dir/*.log> and ¶m_value('clean_logs') eq $YES ) { - &report(6, "\nRemoving old log files ($log_dir)."); - unlink (<$log_dir/pdflatex-*.log>, <$log_dir/bibtex-*.log>, - <$log_dir/gloss-*.log>, <$log_dir/thumbpdf-*.log>, - <$log_dir/ppower-*.log>, <$log_dir/authorindex-*.log>, - <$log_dir/tex2pdf-*.log>); - } else { - &report(6, "\nAll log files will be stored in ($log_dir)."); - } - - ### move my pre-configuration log file to specified log directory - $my_new_log = "$log_dir/tex2pdf-$$.log"; - if ( move($MYLOGFILE, $my_new_log) ) { - $MYLOGFILE = $my_new_log; - } else { - &report(3, "Could not move '$MYLOGFILE' to logdir: $!"); - } -} - -##### analyse document argument -#### process the one and only argument (besides the options) which specifies -#### which LaTeX document the user wants to translate to PDF -#### a lyx file will be translated to LaTeX first -# parameter 1: argument - -sub process_doc_argument { - my $argument = $_[0]; - my $doc_base; - my $doc_path; - my $arg_suffix; - - ##### Getting document name, suffix and path - ($doc_base,$doc_path,$arg_suffix) = fileparse($argument, ('\.tex', '\.lyx')); - - if (! defined($arg_suffix) or $arg_suffix eq "") { - $arg_suffix = '.tex'; - } - - ###### change working directory to document directory - if ( $doc_path ne "" ) { - chdir $doc_path; - } - - ###### make DOCPATH an absolute path - $doc_path = cwd.'/'; - - ###### Cut off suffix and do lyx or tex specific stuff - if ( $arg_suffix eq '.lyx' ) { - # Lyx document argument: generate Latex document if required - &generate_tex_file($doc_base.'.lyx', $doc_base.'.tex'); - } else { - # LaTeX document argument: check access to given LaTeX document - &check_file($doc_base.'.tex', "Cannot read the specified LaTeX document!"); - } - - return $doc_path.$doc_base.'.tex'; -} - -#### handle the dir of the input path if the document has one and -# parameter 1: main tex doc -# return value: absolute input path - -sub process_inputpath { - my $texdoc = $_[0]; - my $doc_base; - my $doc_path; - my $doc_suffix; - my $input_path; - my @matches; - - ### Maybe the user has given us a different inputpath - $input_path = ¶m_value('input_path'); - if(defined($input_path) and $input_path ne "") { - &report(6, "Setting input path to specified directory: $input_path"); - - &report(7, "Change working directory to input path."); - chdir $input_path; - - return $input_path; - } - - ##### Getting document name, suffix and path - ($doc_base,$doc_path,$doc_suffix) = fileparse($texdoc, ('\.tex')); - - ###### change working directory to input_path if set - # When the files' path (images, included documents, etc.) in your document is - # relative to another directory than the PASSED document's directory. - # This is useful when the calling application (e.g. LyX) generates a - # temporary - # TeX file and calls the tex2pdf with it instead of the original file. - - @matches=&extract_tag_contents($texdoc, 'def\\\\input@path'); - $input_path=$matches[0]; - - ## check if input_path is ok - if ($input_path) { - &report(7, "Found an input path in the latex document: $input_path"); - if( &check_dir($input_path, 'The retrieved input@path seems not to be valid.', $NO)) { - &set_param_value('input_path', $input_path); - &report(7, "Change working directory to input path."); - chdir $input_path; - } else { - &abort ('I am lost.'); - } - } else { - &report(4, "No input path in the latex document found."); - &report(7, "Resources are expected to be relative to document's location: $doc_path"); - $input_path=$doc_path; - } - - return $input_path; -} - -#### set the working dir to the input path if the document has one and -#### and determine the path for the result -# parameter 1: main tex doc -# parameter 2: absolute input path -# return value: absolute path were the resulting pdf doc should be stored - -sub get_target_name { - my $texdoc = $_[0]; - my $input_path=$_[1]; - - my $doc_base; - my $doc_path; - my $doc_suffix; - my $destination; - my $pdf_path; - - ##### Getting document name, suffix and path - ($doc_base,$doc_path,$doc_suffix) = fileparse($texdoc, ('\.tex')); - - ##### set the directory where the final pdf will be stored - $destination=¶m_value('destination'); - $pdf_path=undef; - - if ($destination eq 'custom' ) { - $pdf_path=¶m_value('custom_path'); - } elsif ($destination eq 'input') { - $pdf_path=$input_path; - } else { - $pdf_path=$doc_path; - } - - if( ! defined($pdf_path) or $pdf_path eq "" or ! &check_dir($pdf_path, - 'The specified destination directory for the final PDF documents is not valid.', $NO)) { - &report(7, "Using document's instead of destination path: $doc_path"); - $pdf_path=$doc_path; - } - - # make sure there is a slash at the end of the path - $pdf_path .= '/' if( ! ($pdf_path =~ m#/$#) ); - - return $pdf_path.$doc_base.'.pdf'; -} - -### generate hyperref parameters from given settings -# parameter 1: main tex doc -# return_value: result - -sub generate_hyperref_params { - my $texdoc = $_[0]; - my $para; - my @params = ('pdftex'); - - ##### Set title and author from main LaTeX document or parameters - foreach my $info (('title', 'author')) { - my $value; - - $value=¶m_value("$info"); - if (! defined($value) ) { - my @matches=&extract_tag_contents($texdoc, $info); - $value= $matches[0]; - if (! defined($value) ) { - &report(4, "\nWARNING: Could not identify document's $info correctly."); - &report(7, "Maybe you have used a LaTeX tag inside the $info which confuses me.\n", - "Adjust the $info of the LaTeX file in order to avoid the problem or\n", - "you could set the $info parameter manually."); - $value= ¶m_value("default_$info"); - &report(7, "Using default-$info: $value"); - } - } - if (! defined($value) or $value eq $NIL ) { - &report(7, "$info field set to $NIL - no value will be passed."); - } else { - &report(7, "Document's $info: $value"); - push(@params, "pdf$info={$value}"); - } - } - - $para=¶m_value('paper'); - if ( $para ne $NIL ) { push(@params, $para); } - - $para=¶m_value('link_toc_page'); - if ( $para eq $YES ) { push(@params, 'linktocpage'); } - - $para=¶m_value('colorlinks'); - if ( $para ne $NIL ) { - $para= $para eq $YES ? 'true' : 'false'; - push(@params, "colorlinks=$para"); - } - - if ( $para ne 'false' ) { - foreach (('linkcolor', 'pagecolor', 'urlcolor', 'citecolor')) { - $para=¶m_value($_); - if ( $para ne $NIL ) { push(@params, "$_={$para}"); } - } - } - - $para=¶m_value('hyperref_args'); - if(defined($para) and $para ne "" and $para ne $NIL) { - push(@params, $para); - } - - return @params; -} - -#### Prepare the main document and all referenced ones for the generation -#### of the PDF document (including referenced images) -# parameter 1: top level tex document -# parameter 2: parameter list of hyperref parameters - -sub prepare_documents { - my ($main_tex_doc, $input_path, @hyperref_params) = @_; - @REF_DOCS=(); - - ## get a name for the tmp tex file - my $main_tmp_doc = &reserve_tmp_texname($main_tex_doc, $input_path); - - ## Get the list of imported files from the tex file - &get_file_list($main_tex_doc); - - ## remove main file from list (first element; needs special handling) - shift @REF_DOCS; - - ## tell user about the identified refereneced docs - if ( @REF_DOCS > 0 ) { - &report(7, "\nFound the following referenced TeX files:"); - foreach my $file (@REF_DOCS) { - &report(7, ">>>>> $file"); - } - } else { - &report(7, "\nFound no referenced TeX files."); - } - - ##### Generate adjusted temp tex files and convert all their images - ## main doc - &report(6, "\nGenerating main temporary LaTeX document: $main_tmp_doc"); - &convert_tex2tmp($main_tex_doc, $main_tmp_doc, \@hyperref_params); - &convert_images($main_tex_doc); - - ## referenced docs - foreach my $file (@REF_DOCS) { - my $tmp_file = &reserve_tmp_texname($file); - - ### Insert pdf conversation tags in tex file and write it to tmp_file - &report(7, "\nGenerating temporary LaTeX document: $tmp_file"); - &convert_tex2tmp($file, $tmp_file, undef); - &convert_images($file); - } - - return $main_tmp_doc; -} - -##### Generate the final PDF document -# parameter 1: filename of the source LaTeX document (with extension) - -sub generate_pdf_doc { - my $source = $_[0]; - my $runno=1; - my $rerun=$TRUE; - my $doc; - my $pdf_doc; - my $base; - my $path; - my $suffix; - - my $max_run_no= ¶m_value('maxrun'); - my $min_run_no= ¶m_value('minrun'); - my $log_dir= ¶m_value('logdir'); - $log_dir .= '/' if( ! ($log_dir =~ m#/$#) ); - my $makeindex_options=¶m_value('makeindex_opts'); - - # setting the log files for the output of pdflatex, bibtex, gloss, - # thumbpdf, ppower and authorindex - my $pdflog_base = $log_dir."pdflatex-$$-"; - my $bibtex_log = $log_dir."bibtex-$$.log"; - my $gloss_log = $log_dir."gloss-$$.log"; - my $thumbpdf_log = $log_dir."thumbpdf-$$.log"; - my $ppower_log = $log_dir."ppower-$$.log"; - my $authorindex_log = $log_dir."authorindex-$$.log"; - - ##### Getting document name, suffix and path - ($base,$path,$suffix) = fileparse($source, ('\.tex')); - $doc = $path.$base; - $pdf_doc = $base.'.pdf'; - - ### run pdflatex until no more errors are reported (max MAXRUNNO) - while ( $rerun and $runno <= $max_run_no ) - { - &report(6, "\n************ Pdflatex run no. $runno *************"); - if ( &run_pdflatex($doc, $pdflog_base.$runno.'.log') == $TRUE - and ( $min_run_no <= $runno )) { - # no errors detected and min. no. of runs are done - $rerun=$FALSE; - } else { - # errors appeared or max run no. has not been reached - $rerun=$TRUE; - } - - ### Execute BibTeX after first run if set (and required) - if ( $runno == 1 and ¶m_value('bibtex') ne $NO ) { - &report(6, "\n****************** BibTeX handling ***********************"); - &handle_bibtex($doc, $bibtex_log); - } - - ### Execute BibTeX on file.gls after first run if set (and required) - if ( $runno == 1 and ¶m_value('gloss') ne $NO ) { - &report(6, "\n****************** Gloss handling ***********************"); - &handle_gloss($doc, $gloss_log); - } - - ### generated index file exists - if ( $runno == 1 and -f $doc.'.idx' and ¶m_value('force_index') ne $NO ) { - if( !defined($makeindex_options) or $makeindex_options eq $NIL ) { - $makeindex_options = ""; - } - &report(6, "\n****************** Extra index generation ***************"); - &report(7, "Document seems to have an index. Generating ...\n"); - &system_command("makeindex $makeindex_options $doc.idx", $FALSE, 8, - "makeindex failed.\nI will continue, " - ."but maybe there will not be an index in the PDF doc."); - } - - $runno += 1; - } - - $rerun = $FALSE; - - ### if the authorindex option is switched on then run authorindex - if ( ¶m_value('authorindex') eq $YES ) { - &report(6, "\n****************** authorindex generation *****************"); - &run_authorindex($doc, $authorindex_log); - } - - ### generated index file exists - if ( -f $doc.'.idx' and ¶m_value('force_index') ne $NO ) { - if( !defined($makeindex_options) or $makeindex_options eq $NIL ) { - $makeindex_options = ""; - } - &report(6, "\n****************** Extra index generation ***************"); - &report(7, "Document seems to have an index. Generating ...\n"); - &system_command("makeindex $makeindex_options $doc.idx", $FALSE, 8, - "makeindex failed.\nI will continue, " - ."but maybe there will not be an index in the PDF doc."); - $rerun=$TRUE; - } - - ### if the thumbpdf option is switched on then make thumbnails - if ( ¶m_value('thumbpdf') eq $YES ) { - &report(6, "\n****************** Thumbnail generation *****************"); - &run_thumbpdf($doc, $thumbpdf_log); - $rerun=$TRUE; - } - - ### One final pdflatex run if requested - if ( $rerun ) { - &report(6, "\n************ One final pdflatex run no. $runno *************"); - &run_pdflatex($doc, $pdflog_base.$runno.'.log'); - } - - ### if the ppower option is switched on then run ppower - if ( ¶m_value('ppower') eq $YES ) { - &report(6, "\n****************** ppower postprocess *****************"); - &run_ppower($doc, $ppower_log); - } - - if ( ! -f $pdf_doc ) { - &abort("\nThe new PDF file could not be generated: ".$pdf_doc); - } - - return $pdf_doc; -} - -################## Lift off !!!! (main part) ################## - -my $texdoc; -my $doc_argument; -my $input_path; -my $target_name; -my @hyperref_params; -my $new_pdf_doc; -my $tmp_tex_doc; - -&report(5, "\nScript starts ($MYRELEASE)"); - -##### read and analyse configuration and options and adjust basic variables -##### accordingly -##### write RC file on config request -#### use the finished configuration to get all further settings before we -#### actually start doing something - -&report(5, "\nProcessing given parameters and arguments."); -$doc_argument = &adjust_configuration; - -#### prepare the script to write some information to specified log files - -&report(5, "\nPreparing directory for log files."); -&prepare_logdir; - -#### process the one and only argument (besides the options) which specifies -#### which LaTeX document the user wants to translate to PDF -#### a lyx file will be translated to LaTeX first - -&report(5, "\nAnalysing your document argument."); -$texdoc = &process_doc_argument($doc_argument); - -#### we would like to get some more information from the actual -#### main LaTeX document before we really start -#### parse the document and try to get the info - -## translate hyperref settings to the actual package parameters - -&report(5, "\nSetting up parameters for hyperref."); -@hyperref_params = &generate_hyperref_params($texdoc); - -## set the working dir to the input path if the document has one and - -&report(5, "\nProcessing input path for main tex document."); -$input_path = &process_inputpath($texdoc); - -## determine the name for the result - -&report(5, "\nSetting the correct name for the result."); -$target_name = &get_target_name($texdoc, $input_path); - -## as much as possible is prepared in advance at this point -## so hopefully we will succeed in generating the PDF document - -##### real work starts NOW - -##### Generate adjusted temp tex files and convert all their images - -&report(5, "\nPreparing all documents and images."); -$tmp_tex_doc = &prepare_documents($texdoc, $input_path, @hyperref_params); - -##### Generate the final PDF document - -&report(5, "\nProcessing the actual generation of the PDF document."); -$new_pdf_doc = &generate_pdf_doc($tmp_tex_doc); - -##### Finalize -move($new_pdf_doc, $target_name) or &abort("Could not move PDF file to final destination: $!"); - -if ( ¶m_value('debug') eq $NO ) { - &clean_up; -} else { - &print_temp_files; -} - -&report(5, "\nThe new pdf file is: $target_name\n"); - diff --git a/lustre/include/.cvsignore b/lustre/include/.cvsignore deleted file mode 100644 index 7b78c04..0000000 --- a/lustre/include/.cvsignore +++ /dev/null @@ -1,12 +0,0 @@ -.Xrefs -config.log -config.status -configure -config.h -stamp-h -stamp-h1 -stamp-h.in -Makefile -Makefile.in -.deps -TAGS diff --git a/lustre/include/config.h.in b/lustre/include/config.h.in deleted file mode 100644 index 14f0f3b..0000000 --- a/lustre/include/config.h.in +++ /dev/null @@ -1,10 +0,0 @@ -/* include/config.h.in. Generated automatically from configure.in by autoheader. */ - -/* Define if you have the `readline' library (-lreadline). */ -#undef HAVE_LIBREADLINE - -/* Name of package */ -#undef PACKAGE - -/* Version number of package */ -#undef VERSION diff --git a/lustre/include/ioctl.h b/lustre/include/ioctl.h deleted file mode 100644 index a4ec8a5..0000000 --- a/lustre/include/ioctl.h +++ /dev/null @@ -1,64 +0,0 @@ -#ifndef _ASMI386_IOCTL_H -#define _ASMI386_IOCTL_H - -/* ioctl command encoding: 32 bits total, command in lower 16 bits, - * size of the parameter structure in the lower 14 bits of the - * upper 16 bits. - * Encoding the size of the parameter structure in the ioctl request - * The highest 2 bits are reserved for indicating the ``access mode''. - * NOTE: This limits the max parameter size to 16kB -1 ! - */ - -/* - * The following is for compatibility across the various Linux - * platforms. The i386 ioctl numbering scheme doesn't really enforce - * a type field. De facto, however, the top 8 bits of the lower 16 - * bits are indeed used as a type field, so we might just as well make - * this explicit here. Please be sure to use the decoding macros - * below from now on. - */ -#define _IOC_NRBITS 8 -#define _IOC_TYPEBITS 8 -#define _IOC_SIZEBITS 14 -#define _IOC_DIRBITS 2 - -#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1) -#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1) -#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1) -#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1) - -#define _IOC_NRSHIFT 0 -#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS) -#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS) -#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS) - -/* - * Direction bits. - */ -#define _IOC_NONE 0U -#define _IOC_WRITE 1U -#define _IOC_READ 2U - -#define _IOC(dir,type,nr,size) (((dir) << _IOC_DIRSHIFT) | ((type) << _IOC_TYPESHIFT) | ((nr) << _IOC_NRSHIFT) | ((size) << _IOC_SIZESHIFT)) - -/* used to create numbers */ -#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) -#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size)) -#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size)) -#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size)) - -/* used to decode ioctl numbers.. */ -#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) -#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK) -#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK) -#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK) - -/* ...and for the drivers/sound files... */ - -#define IOC_IN (_IOC_WRITE << _IOC_DIRSHIFT) -#define IOC_OUT (_IOC_READ << _IOC_DIRSHIFT) -#define IOC_INOUT ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT) -#define IOCSIZE_MASK (_IOC_SIZEMASK << _IOC_SIZESHIFT) -#define IOCSIZE_SHIFT (_IOC_SIZESHIFT) - -#endif /* _ASMI386_IOCTL_H */ diff --git a/lustre/include/liblustre.h b/lustre/include/liblustre.h deleted file mode 100644 index 202a761..0000000 --- a/lustre/include/liblustre.h +++ /dev/null @@ -1,608 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * User-space Lustre headers. - * - */ -#ifndef LIBLUSTRE_H__ -#define LIBLUSTRE_H__ - -#include -#ifndef __CYGWIN__ -#include -#include -#else -#include -#include "ioctl.h" -#endif -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -/* definitions for liblustre */ - -#ifdef __CYGWIN__ - -#define PAGE_SHIFT 12 -#define PAGE_SIZE (1UL << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) -#define loff_t __u64 -#define ERESTART 2001 -typedef unsigned short umode_t; - -#endif - -/* This is because lprocfs_status.h gets included here indirectly. It would - * be much better to just avoid lprocfs being included into liblustre entirely - * but that requires more header surgery than I can handle right now. - */ -#ifndef smp_processor_id -#define smp_processor_id() 0 -#endif - -/* always adopt 2.5 definitions */ -#define KERNEL_VERSION(a,b,c) ((a)*100+(b)*10+c) -#define LINUX_VERSION_CODE (2*200+5*10+0) - -static inline void inter_module_put(void *a) -{ - return; -} - -extern ptl_handle_ni_t tcpnal_ni; - -void *inter_module_get(char *arg); - -/* cheats for now */ - -struct work_struct { - void (*ws_task)(void *arg); - void *ws_arg; -}; - -static inline void prepare_work(struct work_struct *q, void (*t)(void *), - void *arg) -{ - q->ws_task = t; - q->ws_arg = arg; - return; -} - -static inline void schedule_work(struct work_struct *q) -{ - q->ws_task(q->ws_arg); -} - - -#define strnlen(a,b) strlen(a) -static inline void *kmalloc(int size, int prot) -{ - return malloc(size); -} -#define vmalloc malloc -#define vfree free -#define kfree(a) free(a) -#define GFP_KERNEL 1 -#define GFP_HIGHUSER 1 -#define IS_ERR(a) (((a) && abs((int)(a)) < 500) ? 1 : 0) -#define PTR_ERR(a) ((int)(a)) - -#define capable(foo) 1 -#define CAP_SYS_ADMIN 1 - -typedef struct { - void *cwd; - -}mm_segment_t; - -typedef void *read_proc_t; -typedef void *write_proc_t; - - -/* byteorder */ -#define __swab16(x) \ -({ \ - __u16 __x = (x); \ - ((__u16)( \ - (((__u16)(__x) & (__u16)0x00ffU) << 8) | \ - (((__u16)(__x) & (__u16)0xff00U) >> 8) )); \ -}) - -#define __swab32(x) \ -({ \ - __u32 __x = (x); \ - ((__u32)( \ - (((__u32)(__x) & (__u32)0x000000ffUL) << 24) | \ - (((__u32)(__x) & (__u32)0x0000ff00UL) << 8) | \ - (((__u32)(__x) & (__u32)0x00ff0000UL) >> 8) | \ - (((__u32)(__x) & (__u32)0xff000000UL) >> 24) )); \ -}) - -#define __swab64(x) \ -({ \ - __u64 __x = (x); \ - ((__u64)( \ - (__u64)(((__u64)(__x) & (__u64)0x00000000000000ffULL) << 56) | \ - (__u64)(((__u64)(__x) & (__u64)0x000000000000ff00ULL) << 40) | \ - (__u64)(((__u64)(__x) & (__u64)0x0000000000ff0000ULL) << 24) | \ - (__u64)(((__u64)(__x) & (__u64)0x00000000ff000000ULL) << 8) | \ - (__u64)(((__u64)(__x) & (__u64)0x000000ff00000000ULL) >> 8) | \ - (__u64)(((__u64)(__x) & (__u64)0x0000ff0000000000ULL) >> 24) | \ - (__u64)(((__u64)(__x) & (__u64)0x00ff000000000000ULL) >> 40) | \ - (__u64)(((__u64)(__x) & (__u64)0xff00000000000000ULL) >> 56) )); \ -}) - -#define __swab16s(x) __swab16(*(x)) -#define __swab32s(x) __swab32(*(x)) -#define __swab64s(x) __swab64(*(x)) - -#define __LITTLE_ENDIAN__ -#ifdef __LITTLE_ENDIAN__ -# define le16_to_cpu(x) ((__u16)(x)) -# define cpu_to_le16(x) ((__u16)(x)) -# define le32_to_cpu(x) ((__u32)(x)) -# define cpu_to_le32(x) ((__u32)(x)) -# define le64_to_cpu(x) ((__u64)(x)) -# define cpu_to_le64(x) ((__u64)(x)) -#else -# define le16_to_cpu(x) __swab16(x) -# define cpu_to_le16(x) __swab16(x) -# define le32_to_cpu(x) __swab32(x) -# define cpu_to_le32(x) __swab32(x) -# define le64_to_cpu(x) __swab64(x) -# define cpu_to_le64(x) __swab64(x) -# error "do more check here!!!" -#endif - -/* bits ops */ -static __inline__ int set_bit(int nr,long * addr) -{ - int mask, retval; - - addr += nr >> 5; - mask = 1 << (nr & 0x1f); - retval = (mask & *addr) != 0; - *addr |= mask; - return retval; -} - -static __inline__ int clear_bit(int nr, long * addr) -{ - int mask, retval; - - addr += nr >> 5; - mask = 1 << (nr & 0x1f); - retval = (mask & *addr) != 0; - *addr &= ~mask; - return retval; -} - -static __inline__ int test_bit(int nr, long * addr) -{ - int mask; - - addr += nr >> 5; - mask = 1 << (nr & 0x1f); - return ((mask & *addr) != 0); -} - -/* modules */ - -struct module { - int count; -}; - -static inline void MODULE_AUTHOR(char *name) -{ - printf("%s\n", name); -} -#define MODULE_DESCRIPTION(name) MODULE_AUTHOR(name) -#define MODULE_LICENSE(name) MODULE_AUTHOR(name) - -#define THIS_MODULE NULL -#define __init -#define __exit - -/* devices */ - -static inline int misc_register(void *foo) -{ - return 0; -} -#define misc_deregister misc_register - -#define __MOD_INC_USE_COUNT(m) do {int a = 1; a++; } while (0) -#define __MOD_DEC_USE_COUNT(m) do {int a = 1; a++; } while (0) -#define MOD_INC_USE_COUNT do {int a = 1; a++; } while (0) -#define MOD_DEC_USE_COUNT do {int a = 1; a++; } while (0) - -/* module initialization */ -extern int init_obdclass(void); -extern int ptlrpc_init(void); -extern int ldlm_init(void); -extern int osc_init(void); -extern int lov_init(void); -extern int mdc_init(void); -extern int echo_client_init(void); - - - -/* general stuff */ -#define jiffies 0 - -#define EXPORT_SYMBOL(S) - -typedef int spinlock_t; -typedef __u64 kdev_t; - -#define SPIN_LOCK_UNLOCKED 0 -#define spin_lock(l) do {int a = 1; a++; } while (0) -#define spin_unlock(l) do {int a= 1; a++; } while (0) -#define spin_lock_init(l) do {int a= 1; a++; } while (0) -static inline void spin_lock_bh(spinlock_t *l) -{ - return; -} -static inline void spin_unlock_bh(spinlock_t *l) -{ - return; -} -static inline void spin_unlock_irqrestore(spinlock_t *a, long b) -{ - return; -} -static inline void spin_lock_irqsave(spinlock_t *a, long b) -{ - return; -} - -#define barrier() do {int a= 1; a++; } while (0) - -#define min(x,y) ((x)<(y) ? (x) : (y)) -#define max(x,y) ((x)>(y) ? (x) : (y)) - -/* registering symbols */ - -#define ERESTARTSYS ERESTART -#define HZ 1 - -/* random */ - -static inline void get_random_bytes(void *ptr, int size) -{ - int *p = (int *)ptr; - int i, count = size/sizeof(int); - - for (i = 0; i< count; i++) - *p++ = rand(); -} - -/* memory */ - -/* FIXME */ -#define num_physpages (16 * 1024) - -static inline int copy_from_user(void *a,void *b, int c) -{ - memcpy(a,b,c); - return 0; -} - -static inline int copy_to_user(void *a,void *b, int c) -{ - memcpy(a,b,c); - return 0; -} - - -/* slabs */ -typedef struct { - int size; -} kmem_cache_t; -#define SLAB_HWCACHE_ALIGN 0 -static inline kmem_cache_t * -kmem_cache_create(const char *name, size_t objsize, size_t cdum, - unsigned long d, - void (*e)(void *, kmem_cache_t *, unsigned long), - void (*f)(void *, kmem_cache_t *, unsigned long)) -{ - kmem_cache_t *c; - c = malloc(sizeof(*c)); - if (!c) - return NULL; - c->size = objsize; - CDEBUG(D_MALLOC, "alloc slab cache %s at %p, objsize %d\n", - name, c, (int)objsize); - return c; -}; - -static inline int kmem_cache_destroy(kmem_cache_t *a) -{ - CDEBUG(D_MALLOC, "destroy slab cache %p, objsize %u\n", a, a->size); - free(a); - return 0; -} -#define kmem_cache_validate(a,b) 1 -#define kmem_cache_alloc(cache, prio) malloc(cache->size) -#define kmem_cache_free(cache, obj) free(obj) - -#define PAGE_CACHE_SIZE PAGE_SIZE -#define PAGE_CACHE_SHIFT 12 -#define PAGE_CACHE_MASK PAGE_MASK - -struct page { - void *addr; - int index; -}; - -#define kmap(page) (page)->addr -#define kunmap(a) do { int foo = 1; foo++; } while (0) - -static inline struct page *alloc_pages(int mask, unsigned long foo) -{ - struct page *pg = malloc(sizeof(*pg)); - - if (!pg) - return NULL; -#ifdef MAP_ANONYMOUS - pg->addr = mmap(0, PAGE_SIZE, PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); -#else - pg->addr = malloc(PAGE_SIZE); -#endif - - if (!pg->addr) { - free(pg); - return NULL; - } - return pg; -} - -static inline void __free_pages(struct page *pg, int what) -{ -#ifdef MAP_ANONYMOUS - munmap(pg->addr, PAGE_SIZE); -#else - free(pg->addr); -#endif - free(pg); -} - -static inline struct page* __grab_cache_page(int index) -{ - struct page *pg = alloc_pages(0, 0); - - if (pg) - pg->index = index; - return pg; -} - -#define grab_cache_page(index) __grab_cache_page(index) -#define page_cache_release(page) __free_pages(page, 0) - -/* arithmetic */ -#define do_div(a,b) \ - ({ \ - unsigned long ret; \ - ret = (a)%(b); \ - (a) = (a)/(b); \ - (ret); \ - }) - -/* VFS stuff */ -#define ATTR_MODE 1 -#define ATTR_UID 2 -#define ATTR_GID 4 -#define ATTR_SIZE 8 -#define ATTR_ATIME 16 -#define ATTR_MTIME 32 -#define ATTR_CTIME 64 -#define ATTR_ATIME_SET 128 -#define ATTR_MTIME_SET 256 -#define ATTR_FORCE 512 /* Not a change, but a change it */ -#define ATTR_ATTR_FLAG 1024 -#define ATTR_RAW 2048 /* file system, not vfs will massage attrs */ -#define ATTR_FROM_OPEN 4096 /* called from open path, ie O_TRUNC */ - -struct iattr { - unsigned int ia_valid; - umode_t ia_mode; - uid_t ia_uid; - gid_t ia_gid; - loff_t ia_size; - time_t ia_atime; - time_t ia_mtime; - time_t ia_ctime; - unsigned int ia_attr_flags; -}; - -/* copy from kernel header */ -#define IT_OPEN (1) -#define IT_CREAT (1<<1) -#define IT_READDIR (1<<2) -#define IT_GETATTR (1<<3) -#define IT_LOOKUP (1<<4) -#define IT_UNLINK (1<<5) - -struct lookup_intent { - int it_op; - int it_mode; - int it_flags; - int it_disposition; - int it_status; - struct iattr *it_iattr; - __u64 it_lock_handle[2]; - int it_lock_mode; - void *it_data; -}; - -struct dentry { - int d_count; -}; - -struct vfsmount { - void *pwd; -}; - -#define cpu_to_le32(x) ((__u32)(x)) - -/* semaphores */ -struct semaphore { - int count; -}; - -#define down(a) do {(a)->count++;} while (0) -#define up(a) do {(a)->count--;} while (0) -#define sema_init(a,b) do { (a)->count = b; } while (0) - -typedef struct { - struct list_head sleepers; -} wait_queue_head_t; - -typedef struct { - struct list_head sleeping; - void *process; -} wait_queue_t; - -struct signal { - int signal; -}; - -struct fs_struct { - int umask; -}; - -struct task_struct { - struct fs_struct *fs; - int state; - struct signal pending; - char comm[32]; - int pid; - int fsuid; - int fsgid; - __u32 cap_effective; -}; - -extern struct task_struct *current; - -#define in_group_p(a) 0 /* FIXME */ - -#define set_current_state(foo) do { current->state = foo; } while (0) - -#define init_waitqueue_entry(q,p) do { (q)->process = p; } while (0) -#define add_wait_queue(q,p) do { list_add(&(q)->sleepers, &(p)->sleeping); } while (0) -#define del_wait_queue(p) do { list_del(&(p)->sleeping); } while (0) -#define remove_wait_queue(q,p) do { list_del(&(p)->sleeping); } while (0) - -#define init_waitqueue_head(l) INIT_LIST_HEAD(&(l)->sleepers) -#define wake_up(l) do { int a; a++; } while (0) -#define TASK_INTERRUPTIBLE 0 -#define TASK_UNINTERRUPTIBLE 1 -#define TASK_RUNNING 2 - -#define in_interrupt() (0) - -#define schedule() do { int a; a++; } while (0) -static inline int schedule_timeout(signed long t) -{ - return 0; -} - -#define lock_kernel() do { int a; a++; } while (0) -#define daemonize(l) do { int a; a++; } while (0) -#define sigfillset(l) do { int a; a++; } while (0) -#define recalc_sigpending(l) do { int a; a++; } while (0) -#define kernel_thread(l,m,n) - -static inline int call_usermodehelper(char *prog, char **argv, char **evnp, int unknown) -{ - return 0; -} - - - -#define KERN_INFO - - - -struct timer_list { - struct list_head tl_list; - void (*function)(unsigned long unused); - void *data; - int expires; -}; - -static inline int timer_pending(struct timer_list *l) -{ - if (l->expires > jiffies) - return 1; - else - return 0; -} - -static inline int init_timer(struct timer_list *l) -{ - INIT_LIST_HEAD(&l->tl_list); - return 0; -} - -static inline void mod_timer(struct timer_list *l, int thetime) -{ - l->expires = thetime; -} - -static inline void del_timer(struct timer_list *l) -{ - free(l); -} - -typedef struct { volatile int counter; } atomic_t; - -#define atomic_read(a) ((a)->counter) -#define atomic_set(a,b) do {(a)->counter = b; } while (0) -#define atomic_dec_and_test(a) ((--((a)->counter)) == 0) -#define atomic_inc(a) (((a)->counter)++) -#define atomic_dec(a) do { (a)->counter--; } while (0) -#define atomic_add(b,a) do {(a)->counter += b;} while (0) -#define atomic_sub(b,a) do {(a)->counter -= b;} while (0) - -#define LBUG() \ - do { \ - printf("!!!LBUG at %s:%d\n", __FILE__, __LINE__); \ - sleep(1000000); \ - } while (0) - -#include -#include -#include -#include -#include -#include - - -#endif - diff --git a/lustre/include/linux/.cvsignore b/lustre/include/linux/.cvsignore deleted file mode 100644 index b731c89..0000000 --- a/lustre/include/linux/.cvsignore +++ /dev/null @@ -1,15 +0,0 @@ -.Xrefs -config.log -config.status -configure -Makefile -Makefile.in -.deps -TAGS -extN_fs.h -extN_fs_i.h -extN_fs_sb.h -extN_jbd.h -extN_xattr.h -xattr.h -lustre_build_version.h diff --git a/lustre/include/linux/Makefile b/lustre/include/linux/Makefile deleted file mode 100644 index c263b40..0000000 --- a/lustre/include/linux/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -all .DEFAULT: - $(MAKE) -C ../.. $@ diff --git a/lustre/include/linux/lprocfs_status.h b/lustre/include/linux/lprocfs_status.h deleted file mode 100644 index fb96bde..0000000 --- a/lustre/include/linux/lprocfs_status.h +++ /dev/null @@ -1,354 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Top level header file for LProc SNMP - * Author: Hariharan Thantry thantry@users.sourceforge.net - */ -#ifndef _LPROCFS_SNMP_H -#define _LPROCFS_SNMP_H - -#ifdef __KERNEL__ -#include -#include -#include -#include -#endif - -#include - -#ifndef LPROCFS -#ifdef CONFIG_PROC_FS /* Ensure that /proc is configured */ -#define LPROCFS -#endif -#endif - -struct lprocfs_vars { - const char *name; - read_proc_t *read_fptr; - write_proc_t *write_fptr; - void *data; -}; - -struct lprocfs_static_vars { - struct lprocfs_vars *module_vars; - struct lprocfs_vars *obd_vars; -}; - -/* An lprocfs counter can be configured using the enum bit masks below. - * - * LPROCFS_CNTR_EXTERNALLOCK indicates that an external lock already - * protects this counter from concurrent updates. If not specified, - * lprocfs an internal per-counter lock variable. External locks are - * not used to protect counter increments, but are used to protect - * counter readout and resets. - * - * LPROCFS_CNTR_AVGMINMAX indicates a multi-valued counter samples, - * (i.e. counter can be incremented by more than "1"). When specified, - * the counter maintains min, max and sum in addition to a simple - * invocation count. This allows averages to be be computed. - * If not specified, the counter is an increment-by-1 counter. - * min, max, sum, etc. are not maintained. - * - * LPROCFS_CNTR_STDDEV indicates that the counter should track sum of - * squares (for multi-valued counter samples only). This allows - * external computation of standard deviation, but involves a 64-bit - * multiply per counter increment. - */ - -enum { - LPROCFS_CNTR_EXTERNALLOCK = 0x0001, - LPROCFS_CNTR_AVGMINMAX = 0x0002, - LPROCFS_CNTR_STDDEV = 0x0004, - - /* counter data type */ - LPROCFS_TYPE_REGS = 0x0100, - LPROCFS_TYPE_BYTES = 0x0200, - LPROCFS_TYPE_PAGES = 0x0400, - LPROCFS_TYPE_CYCLE = 0x0800, -}; - -struct lprocfs_atomic { - atomic_t la_entry; - atomic_t la_exit; -}; - -struct lprocfs_counter { - struct lprocfs_atomic lc_cntl; /* may need to move to per set */ - unsigned int lc_config; - __u64 lc_count; - __u64 lc_sum; - __u64 lc_min; - __u64 lc_max; - __u64 lc_sumsquare; - const char *lc_name; /* must be static */ - const char *lc_units; /* must be static */ -}; - -struct lprocfs_percpu { - struct lprocfs_counter lp_cntr[0]; -}; - - -struct lprocfs_stats { - unsigned int ls_num; /* # of counters */ - unsigned int ls_percpu_size; - struct lprocfs_percpu *ls_percpu[0]; -}; - - -/* class_obd.c */ -extern struct proc_dir_entry *proc_lustre_root; - -/* lproc_lov.c */ -extern struct file_operations ll_proc_target_fops; -struct obd_device; - -#ifdef LPROCFS - -/* Two optimized LPROCFS counter increment functions are provided: - * lprocfs_counter_incr(cntr, value) - optimized for by-one counters - * lprocfs_counter_add(cntr) - use for multi-valued counters - * Counter data layout allows config flag, counter lock and the - * count itself to reside within a single cache line. - */ - -static inline void lprocfs_counter_add(struct lprocfs_stats *stats, int idx, - long amount) -{ - struct lprocfs_counter *percpu_cntr; - - LASSERT(stats != NULL); - percpu_cntr = &(stats->ls_percpu[smp_processor_id()]->lp_cntr[idx]); - atomic_inc(&percpu_cntr->lc_cntl.la_entry); - percpu_cntr->lc_count++; - - if (percpu_cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) { - percpu_cntr->lc_sum += amount; - if (percpu_cntr->lc_config & LPROCFS_CNTR_STDDEV) - percpu_cntr->lc_sumsquare += (__u64)amount * amount; - if (amount < percpu_cntr->lc_min) - percpu_cntr->lc_min = amount; - if (amount > percpu_cntr->lc_max) - percpu_cntr->lc_max = amount; - } - atomic_inc(&percpu_cntr->lc_cntl.la_exit); -} - -static inline void lprocfs_counter_incr(struct lprocfs_stats *stats, int idx) -{ - struct lprocfs_counter *percpu_cntr; - - LASSERT(stats != NULL); - percpu_cntr = &(stats->ls_percpu[smp_processor_id()]->lp_cntr[idx]); - atomic_inc(&percpu_cntr->lc_cntl.la_entry); - percpu_cntr->lc_count++; - atomic_inc(&percpu_cntr->lc_cntl.la_exit); -} - -extern struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num); -extern void lprocfs_free_stats(struct lprocfs_stats *stats); -extern int lprocfs_alloc_obd_stats(struct obd_device *obddev, - unsigned int num_private_stats); -extern void lprocfs_counter_init(struct lprocfs_stats *stats, int index, - unsigned conf, const char *name, - const char *units); -extern void lprocfs_free_obd_stats(struct obd_device *obddev); -extern int lprocfs_register_stats(struct proc_dir_entry *root, const char *name, - struct lprocfs_stats *stats); - -#define LPROCFS_INIT_MULTI_VARS(array, size) \ -void lprocfs_init_multi_vars(unsigned int idx, \ - struct lprocfs_static_vars *x) \ -{ \ - struct lprocfs_static_vars *glob = (struct lprocfs_static_vars*)array; \ - LASSERT(glob != 0); \ - LASSERT(idx < (unsigned int)(size)); \ - x->module_vars = glob[idx].module_vars; \ - x->obd_vars = glob[idx].obd_vars; \ -} \ - -#define LPROCFS_INIT_VARS(vclass, vinstance) \ -void lprocfs_init_vars(struct lprocfs_static_vars *x) \ -{ \ - x->module_vars = vclass; \ - x->obd_vars = vinstance; \ -} \ - -extern void lprocfs_init_vars(struct lprocfs_static_vars *var); -extern void lprocfs_init_multi_vars(unsigned int idx, - struct lprocfs_static_vars *var); -/* lprocfs_status.c */ -extern int lprocfs_add_vars(struct proc_dir_entry *root, - struct lprocfs_vars *var, - void *data); - -extern struct proc_dir_entry *lprocfs_register(const char *name, - struct proc_dir_entry *parent, - struct lprocfs_vars *list, - void *data); - -extern void lprocfs_remove(struct proc_dir_entry *root); - -extern struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *root, - const char *name); - -extern int lprocfs_obd_attach(struct obd_device *dev, struct lprocfs_vars *list); -extern int lprocfs_obd_detach(struct obd_device *dev); - -/* Generic callbacks */ - -extern int lprocfs_rd_u64(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_rd_uuid(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_rd_name(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_rd_server_uuid(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_rd_numrefs(char *page, char **start, off_t off, - int count, int *eof, void *data); - -/* Statfs helpers */ -struct statfs; -extern int lprocfs_rd_blksize(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs); -extern int lprocfs_rd_kbytestotal(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs); -extern int lprocfs_rd_kbytesfree(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs); -extern int lprocfs_rd_filestotal(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs); -extern int lprocfs_rd_filesfree(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs); -extern int lprocfs_rd_filegroups(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs); - -/* lprocfs_status.c: counter read/write functions */ -struct file; -extern int lprocfs_counter_read(char *page, char **start, off_t off, - int count, int *eof, void *data); -extern int lprocfs_counter_write(struct file *file, const char *buffer, - unsigned long count, void *data); - -#define DEFINE_LPROCFS_STATFS_FCT(fct_name, get_statfs_fct) \ -int fct_name(char *page, char **start, off_t off, \ - int count, int *eof, void *data) \ -{ \ - struct statfs sfs; \ - int rc = get_statfs_fct((struct obd_device*)data, &sfs); \ - return (rc == 0 ? \ - lprocfs_##fct_name (page, start, off, count, eof, &sfs) : \ - rc); \ -} - -#else -/* LPROCFS is not defined */ -static inline void lprocfs_counter_add(struct lprocfs_stats *stats, - int index, long amount) { return; } -static inline void lprocfs_counter_incr(struct lprocfs_stats *stats, - int index) { return; } -static inline void lprocfs_counter_init(struct lprocfs_stats *stats, - int index, unsigned conf, - const char *name, const char *units) -{ return; } - -static inline struct lprocfs_stats* lprocfs_alloc_stats(unsigned int num) -{ return NULL; } -static inline void lprocfs_free_stats(struct lprocfs_stats *stats) -{ return; } - -static inline int lprocfs_register_stats(struct proc_dir_entry *root, - const char *name, - struct lprocfs_stats *stats) -{ return 0; } -static inline int lprocfs_alloc_obd_stats(struct obd_device *obddev, - unsigned int num_private_stats) -{ return 0; } -static inline void lprocfs_free_obd_stats(struct obd_device *obddev) -{ return; } - -static inline struct proc_dir_entry * -lprocfs_register(const char *name, struct proc_dir_entry *parent, - struct lprocfs_vars *list, void *data) { return NULL; } -#define LPROCFS_INIT_MULTI_VARS(array, size) -static inline void lprocfs_init_multi_vars(unsigned int idx, - struct lprocfs_static_vars *x) { return; } -#define LPROCFS_INIT_VARS(vclass, vinstance) -static inline void lprocfs_init_vars(struct lprocfs_static_vars *x) { return; } -static inline int lprocfs_add_vars(struct proc_dir_entry *root, - struct lprocfs_vars *var, - void *data) { return 0; } -static inline void lprocfs_remove(struct proc_dir_entry *root) {}; -static inline struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head, - const char *name) {return 0;} -struct obd_device; -static inline int lprocfs_obd_attach(struct obd_device *dev, - struct lprocfs_vars *list) { return 0; } -static inline int lprocfs_obd_detach(struct obd_device *dev) { return 0; } -static inline int lprocfs_rd_u64(char *page, char **start, off_t off, - int count, int *eof, void *data) { return 0; } -static inline int lprocfs_rd_uuid(char *page, char **start, off_t off, - int count, int *eof, void *data) { return 0; } -static inline int lprocfs_rd_name(char *page, char **start, off_t off, - int count, int *eof, void *data) { return 0; } -static inline int lprocfs_rd_server_uuid(char *page, char **start, off_t off, - int count, int *eof, void *data) { return 0; } -static inline int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, - int count, int *eof, void *data) { return 0; } -static inline int lprocfs_rd_numrefs(char *page, char **start, off_t off, - int count, int *eof, void *data) { return 0; } - -/* Statfs helpers */ -struct statfs; -static inline -int lprocfs_rd_blksize(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs) { return 0; } -static inline -int lprocfs_rd_kbytestotal(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs) { return 0; } -static inline -int lprocfs_rd_kbytesfree(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs) { return 0; } -static inline -int lprocfs_rd_filestotal(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs) { return 0; } -static inline -int lprocfs_rd_filesfree(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs) { return 0; } -static inline -int lprocfs_rd_filegroups(char *page, char **start, off_t off, - int count, int *eof, struct statfs *sfs) { return 0; } -static inline -int lprocfs_counter_read(char *page, char **start, off_t off, - int count, int *eof, void *data) { return 0; } -struct file; -static inline -int lprocfs_counter_write(struct file *file, const char *buffer, - unsigned long count, void *data) { return 0; } - -#define DEFINE_LPROCFS_STATFS_FCT(fct_name, get_statfs_fct) \ -int fct_name(char *page, char **start, off_t off, \ - int count, int *eof, void *data) { *eof = 1; return 0; } - -#endif /* LPROCFS */ - -#endif /* LPROCFS_SNMP_H */ diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h deleted file mode 100644 index 4275a10..0000000 --- a/lustre/include/linux/lustre_compat25.h +++ /dev/null @@ -1,76 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef _COMPAT25_H -#define _COMPAT25_H - -#include - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) -#define KDEVT_VAL(dev, val) dev.value = 0 -#else -#define KDEVT_VAL(dev, val) dev = 0 -#endif - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) -# define PGCACHE_WRLOCK(mapping) write_lock(&mapping->page_lock) -# define PGCACHE_WRUNLOCK(mapping) write_unlock(&mapping->page_lock) -#else -# define PGCACHE_WRLOCK(mapping) spin_lock(&pagecache_lock) -# define PGCACHE_WRUNLOCK(mapping) spin_unlock(&pagecache_lock) -#endif - - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) -# define filemap_fdatasync(mapping) filemap_fdatawrite(mapping) -#endif - - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) -# define TryLockPage(page) TestSetPageLocked(page) -#endif - - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) -# define Page_Uptodate(page) PageUptodate(page) -#endif - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) -# define USERMODEHELPER(path, argv, envp) call_usermodehelper(path, argv, envp, 0) -#else -# define USERMODEHELPER(path, argv, envp) call_usermodehelper(path, argv, envp) -#endif - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) -# define LL_CHECK_DIRTY(sb) do { }while(0) -#else -# define LL_CHECK_DIRTY(sb) ll_check_dirty(sb) -#endif - -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -#define rb_node_s rb_node -#define rb_root_s rb_root -typedef struct rb_root_s rb_root_t; -typedef struct rb_node_s rb_node_t; -#endif - -#endif /* _COMPAT25_H */ diff --git a/lustre/include/linux/lustre_debug.h b/lustre/include/linux/lustre_debug.h deleted file mode 100644 index 756d32e..0000000 --- a/lustre/include/linux/lustre_debug.h +++ /dev/null @@ -1,53 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef _LUSTRE_DEBUG_H -#define _LUSTRE_DEBUG_H - -#include - -#define ASSERT_MAX_SIZE_MB 60000ULL -#define ASSERT_PAGE_INDEX(index, OP) \ -do { if (index > ASSERT_MAX_SIZE_MB << (20 - PAGE_SHIFT)) { \ - CERROR("bad page index %lu > %Lu\n", index, \ - ASSERT_MAX_SIZE_MB << (20 - PAGE_SHIFT)); \ - portal_debug = ~0UL; \ - OP; \ -}} while(0) - -#define ASSERT_FILE_OFFSET(offset, OP) \ -do { if (offset > ASSERT_MAX_SIZE_MB << 20) { \ - CERROR("bad file offset %Lu > %Lu\n", offset, \ - ASSERT_MAX_SIZE_MB << 20); \ - portal_debug = ~0UL; \ - OP; \ -}} while(0) - -/* lib/debug.c */ -int dump_lniobuf(struct niobuf_local *lnb); -int dump_rniobuf(struct niobuf_remote *rnb); -int dump_ioo(struct obd_ioobj *nb); -int dump_req(struct ptlrpc_request *req); -int dump_obdo(struct obdo *oa); -int page_debug_setup(void *addr, int len, __u64 off, __u64 id); -int page_debug_check(char *who, void *addr, int len, __u64 off, __u64 id); -#endif diff --git a/lustre/include/linux/lustre_dlm.h b/lustre/include/linux/lustre_dlm.h deleted file mode 100644 index 2db4196..0000000 --- a/lustre/include/linux/lustre_dlm.h +++ /dev/null @@ -1,467 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * (visit-tags-table FILE) - * vim:expandtab:shiftwidth=8:tabstop=8: - */ - -#ifndef _LUSTRE_DLM_H__ -#define _LUSTRE_DLM_H__ - -#ifdef __KERNEL__ -# include -#endif - -#include -#include -#include -#include -#include /* for obd_export, for LDLM_DEBUG */ - -struct obd_ops; -struct obd_device; - -#define OBD_LDLM_DEVICENAME "ldlm" - -typedef enum { - ELDLM_OK = 0, - - ELDLM_LOCK_CHANGED = 300, - ELDLM_LOCK_ABORTED = 301, - ELDLM_LOCK_REPLACED = 302, - - ELDLM_NAMESPACE_EXISTS = 400, - ELDLM_BAD_NAMESPACE = 401 -} ldlm_error_t; - -#define LDLM_NAMESPACE_SERVER 0 -#define LDLM_NAMESPACE_CLIENT 1 - -#define LDLM_FL_LOCK_CHANGED (1 << 0) /* extent, mode, or resource changed */ - -/* If the server returns one of these flags, then the lock was put on that list. - * If the client sends one of these flags (during recovery ONLY!), it wants the - * lock added to the specified list, no questions asked. -p */ -#define LDLM_FL_BLOCK_GRANTED (1 << 1) -#define LDLM_FL_BLOCK_CONV (1 << 2) -#define LDLM_FL_BLOCK_WAIT (1 << 3) - -#define LDLM_FL_CBPENDING (1 << 4) // this lock is being destroyed -#define LDLM_FL_AST_SENT (1 << 5) // blocking or cancel packet was sent -#define LDLM_FL_WAIT_NOREPROC (1 << 6)// not a real lock flag,not saved in lock -#define LDLM_FL_CANCEL (1 << 7) // cancellation callback already run - -/* Lock is being replayed. This could probably be implied by the fact that one - * of BLOCK_{GRANTED,CONV,WAIT} is set, but that is pretty dangerous. */ -#define LDLM_FL_REPLAY (1 << 8) - -#define LDLM_FL_INTENT_ONLY (1 << 9) /* don't grant lock, just do intent */ -#define LDLM_FL_LOCAL_ONLY (1 << 10) /* see ldlm_cli_cancel_unused */ - -/* don't run the cancel callback under ldlm_cli_cancel_unused */ -#define LDLM_FL_NO_CALLBACK (1 << 11) - -#define LDLM_FL_HAS_INTENT (1 << 12) /* lock request has intent */ -#define LDLM_FL_CANCELING (1 << 13) /* lock cancel has already been sent */ -#define LDLM_FL_LOCAL (1 << 14) // a local lock (ie, no srv/cli split) -#define LDLM_FL_WARN (1 << 15) /* see ldlm_cli_cancel_unused */ -#define LDLM_FL_MATCH_DATA (1 << 16) /* see ldlm_lock_match */ - -/* The blocking callback is overloaded to perform two functions. These flags - * indicate which operation should be performed. */ -#define LDLM_CB_BLOCKING 1 -#define LDLM_CB_CANCELING 2 - -#define L2B(c) (1 << c) - -/* compatibility matrix */ -#define LCK_COMPAT_EX L2B(LCK_NL) -#define LCK_COMPAT_PW (LCK_COMPAT_EX | L2B(LCK_CR)) -#define LCK_COMPAT_PR (LCK_COMPAT_PW | L2B(LCK_PR)) -#define LCK_COMPAT_CW (LCK_COMPAT_PW | L2B(LCK_CW)) -#define LCK_COMPAT_CR (LCK_COMPAT_CW | L2B(LCK_PR) | L2B(LCK_PW)) -#define LCK_COMPAT_NL (LCK_COMPAT_CR | L2B(LCK_EX)) - -static ldlm_mode_t lck_compat_array[] = { - [LCK_EX] LCK_COMPAT_EX, - [LCK_PW] LCK_COMPAT_PW, - [LCK_PR] LCK_COMPAT_PR, - [LCK_CW] LCK_COMPAT_CW, - [LCK_CR] LCK_COMPAT_CR, - [LCK_NL] LCK_COMPAT_NL -}; - -static inline int lockmode_compat(ldlm_mode_t exist, ldlm_mode_t new) -{ - if (exist < LCK_EX || exist > LCK_NL) - LBUG(); - if (new < LCK_EX || new > LCK_NL) - LBUG(); - - return (lck_compat_array[exist] & L2B(new)); -} - -/* - * - * cluster name spaces - * - */ - -#define DLM_OST_NAMESPACE 1 -#define DLM_MDS_NAMESPACE 2 - -/* XXX - - do we just separate this by security domains and use a prefix for - multiple namespaces in the same domain? - - -*/ - -struct ldlm_namespace { - char *ns_name; - __u32 ns_client; /* is this a client-side lock tree? */ - struct list_head *ns_hash; /* hash table for ns */ - __u32 ns_refcount; /* count of resources in the hash */ - struct list_head ns_root_list; /* all root resources in ns */ - struct lustre_lock ns_lock; /* protects hash, refcount, list */ - struct list_head ns_list_chain; /* position in global NS list */ - /* - struct proc_dir_entry *ns_proc_dir; - */ - - struct list_head ns_unused_list; /* all root resources in ns */ - unsigned int ns_nr_unused; - unsigned int ns_max_unused; - - spinlock_t ns_counter_lock; - __u64 ns_locks; - __u64 ns_resources; -}; - -/* - * - * Resource hash table - * - */ - -#define RES_HASH_BITS 10 -#define RES_HASH_SIZE (1UL << RES_HASH_BITS) -#define RES_HASH_MASK (RES_HASH_SIZE - 1) - -struct ldlm_lock; - -typedef int (*ldlm_blocking_callback)(struct ldlm_lock *lock, - struct ldlm_lock_desc *new, void *data, - int flag); -typedef int (*ldlm_completion_callback)(struct ldlm_lock *lock, int flags, - void *data); - -struct ldlm_lock { - struct portals_handle l_handle; // must be first in the structure - atomic_t l_refc; - struct ldlm_resource *l_resource; - struct ldlm_lock *l_parent; - struct list_head l_children; - struct list_head l_childof; - struct list_head l_lru; - struct list_head l_res_link; // position in one of three res lists - struct list_head l_export_chain; // per-export chain of locks - struct list_head l_pending_chain; // locks with callbacks pending - unsigned long l_callback_timeout; - - ldlm_mode_t l_req_mode; - ldlm_mode_t l_granted_mode; - - ldlm_completion_callback l_completion_ast; - ldlm_blocking_callback l_blocking_ast; - - struct obd_export *l_export; - struct lustre_handle *l_connh; - __u32 l_flags; - struct lustre_handle l_remote_handle; - void *l_data; - struct ldlm_extent l_extent; - __u32 l_version[RES_VERSION_SIZE]; - - __u32 l_readers; - __u32 l_writers; - __u8 l_destroyed; - - /* If the lock is granted, a process sleeps on this waitq to learn when - * it's no longer in use. If the lock is not granted, a process sleeps - * on this waitq to learn when it becomes granted. */ - wait_queue_head_t l_waitq; -}; - -typedef int (*ldlm_res_compat)(struct ldlm_lock *child, struct ldlm_lock *new); -typedef int (*ldlm_res_policy)(struct ldlm_namespace *, struct ldlm_lock **, - void *req_cookie, ldlm_mode_t mode, int flags, - void *data); - -#define LDLM_PLAIN 10 -#define LDLM_EXTENT 11 - -#define LDLM_MIN_TYPE 10 -#define LDLM_MAX_TYPE 11 - -struct ldlm_resource { - struct ldlm_namespace *lr_namespace; - struct list_head lr_hash; - struct ldlm_resource *lr_parent; /* 0 for a root resource */ - struct list_head lr_children; /* list head for child resources */ - struct list_head lr_childof; /* part of ns_root_list if root res, - * part of lr_children if child */ - - struct list_head lr_granted; - struct list_head lr_converting; - struct list_head lr_waiting; - ldlm_mode_t lr_most_restr; - __u32 lr_type; /* LDLM_PLAIN or LDLM_EXTENT */ - struct ldlm_resource *lr_root; - struct ldlm_res_id lr_name; - __u32 lr_version[RES_VERSION_SIZE]; - atomic_t lr_refcount; - - /* lr_tmp holds a list head temporarily, during the building of a work - * queue. see ldlm_add_ast_work_item and ldlm_run_ast_work */ - void *lr_tmp; -}; - -struct ldlm_ast_work { - struct ldlm_lock *w_lock; - int w_blocking; - struct ldlm_lock_desc w_desc; - struct list_head w_list; - int w_flags; - void *w_data; - int w_datalen; -}; - -extern struct obd_ops ldlm_obd_ops; - -extern char *ldlm_lockname[]; -extern char *ldlm_typename[]; -extern char *ldlm_it2str(int it); - -#define __LDLM_DEBUG(level, lock, format, a...) \ -do { \ - if (lock->l_resource == NULL) { \ - CDEBUG(level, "### " format \ - " ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "\ - "res: \?\? rrc=\?\? type: \?\?\? remote: " \ - LPX64"\n" , ## a, lock, lock->l_handle.h_cookie, \ - atomic_read(&lock->l_refc), \ - lock->l_readers, lock->l_writers, \ - ldlm_lockname[lock->l_granted_mode], \ - ldlm_lockname[lock->l_req_mode], \ - lock->l_remote_handle.cookie); \ - break; \ - } \ - if (lock->l_resource->lr_type == LDLM_EXTENT) { \ - CDEBUG(level, "### " format \ - " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \ - "res: "LPU64"/"LPU64" rrc: %d type: %s ["LPU64"->"LPU64\ - "] remote: "LPX64"\n" , ## a, \ - lock->l_resource->lr_namespace->ns_name, lock, \ - lock->l_handle.h_cookie, atomic_read(&lock->l_refc), \ - lock->l_readers, lock->l_writers, \ - ldlm_lockname[lock->l_granted_mode], \ - ldlm_lockname[lock->l_req_mode], \ - lock->l_resource->lr_name.name[0], \ - lock->l_resource->lr_name.name[1], \ - atomic_read(&lock->l_resource->lr_refcount), \ - ldlm_typename[lock->l_resource->lr_type], \ - lock->l_extent.start, lock->l_extent.end, \ - lock->l_remote_handle.cookie); \ - break; \ - } \ - { \ - CDEBUG(level, "### " format \ - " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s " \ - "res: "LPU64"/"LPU64" rrc: %d type: %s remote: "LPX64 \ - "\n" , ## a, \ - lock->l_resource->lr_namespace->ns_name, \ - lock, lock->l_handle.h_cookie, \ - atomic_read (&lock->l_refc), \ - lock->l_readers, lock->l_writers, \ - ldlm_lockname[lock->l_granted_mode], \ - ldlm_lockname[lock->l_req_mode], \ - lock->l_resource->lr_name.name[0], \ - lock->l_resource->lr_name.name[1], \ - atomic_read(&lock->l_resource->lr_refcount), \ - ldlm_typename[lock->l_resource->lr_type], \ - lock->l_remote_handle.cookie); \ - } \ -} while (0) - -#define LDLM_DEBUG(lock, format, a...) __LDLM_DEBUG(D_DLMTRACE, lock, \ - format, ## a) -#define LDLM_ERROR(lock, format, a...) __LDLM_DEBUG(D_ERROR, lock, format, ## a) - -#define LDLM_DEBUG_NOLOCK(format, a...) \ - CDEBUG(D_DLMTRACE, "### " format "\n" , ## a) - -/* - * Iterators. - */ - -#define LDLM_ITER_CONTINUE 1 /* keep iterating */ -#define LDLM_ITER_STOP 0 /* stop iterating */ - -typedef int (*ldlm_iterator_t)(struct ldlm_lock *, void *); -typedef int (*ldlm_res_iterator_t)(struct ldlm_resource *, void *); - -int ldlm_resource_foreach(struct ldlm_resource *res, ldlm_iterator_t iter, - void *closure); -int ldlm_namespace_foreach(struct ldlm_namespace *ns, ldlm_iterator_t iter, - void *closure); -int ldlm_namespace_foreach_res(struct ldlm_namespace *ns, - ldlm_res_iterator_t iter, void *closure); - -int ldlm_replay_locks(struct obd_import *imp); - -/* ldlm_extent.c */ -int ldlm_extent_compat(struct ldlm_lock *, struct ldlm_lock *); -int ldlm_extent_policy(struct ldlm_namespace *, struct ldlm_lock **, void *, - ldlm_mode_t, int flags, void *); - -/* ldlm_lockd.c */ -int ldlm_server_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *, - void *data, int flag); -int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data); -int ldlm_handle_enqueue(struct ptlrpc_request *req, ldlm_completion_callback, - ldlm_blocking_callback); -int ldlm_handle_convert(struct ptlrpc_request *req); -int ldlm_handle_cancel(struct ptlrpc_request *req); -int ldlm_del_waiting_lock(struct ldlm_lock *lock); - -/* ldlm_lock.c */ -void ldlm_register_intent(ldlm_res_policy arg); -void ldlm_unregister_intent(void); -void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh); -struct ldlm_lock *__ldlm_handle2lock(struct lustre_handle *, int flags); -void ldlm_cancel_callback(struct ldlm_lock *); -int ldlm_lock_set_data(struct lustre_handle *, void *data); -void ldlm_lock_remove_from_lru(struct ldlm_lock *); -struct ldlm_lock *ldlm_handle2lock_ns(struct ldlm_namespace *, - struct lustre_handle *); - -static inline struct ldlm_lock *ldlm_handle2lock(struct lustre_handle *h) -{ - return __ldlm_handle2lock(h, 0); -} - -#define LDLM_LOCK_PUT(lock) \ -do { \ - /*LDLM_DEBUG((lock), "put");*/ \ - ldlm_lock_put(lock); \ -} while (0) - -#define LDLM_LOCK_GET(lock) \ -({ \ - ldlm_lock_get(lock); \ - /*LDLM_DEBUG((lock), "get");*/ \ - lock; \ -}) - -struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock); -void ldlm_lock_put(struct ldlm_lock *lock); -void ldlm_lock_destroy(struct ldlm_lock *lock); -void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc); -void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode); -void ldlm_lock_addref_internal(struct ldlm_lock *, __u32 mode); -void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode); -void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode); -void ldlm_grant_lock(struct ldlm_lock *lock, void *data, int datalen); -int ldlm_lock_match(struct ldlm_namespace *ns, int flags, struct ldlm_res_id *, - __u32 type, void *cookie, int cookielen, ldlm_mode_t mode, - void *data, struct lustre_handle *); -struct ldlm_lock * -ldlm_lock_create(struct ldlm_namespace *ns, - struct lustre_handle *parent_lock_handle, struct ldlm_res_id, - __u32 type, ldlm_mode_t, ldlm_blocking_callback, - void *data); -ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *, struct ldlm_lock **, - void *cookie, int cookie_len, int *flags, - ldlm_completion_callback completion); -struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode, - int *flags); -void ldlm_lock_cancel(struct ldlm_lock *lock); -void ldlm_cancel_locks_for_export(struct obd_export *export); -int ldlm_run_ast_work(struct list_head *rpc_list); -void ldlm_reprocess_all(struct ldlm_resource *res); -void ldlm_reprocess_all_ns(struct ldlm_namespace *ns); -void ldlm_lock_dump(int level, struct ldlm_lock *lock); -void ldlm_lock_dump_handle(int level, struct lustre_handle *); - -/* ldlm_test.c */ -int ldlm_test(struct obd_device *device, struct lustre_handle *connh); -int ldlm_regression_start(struct obd_device *obddev, - struct lustre_handle *connh, - unsigned int threads, unsigned int max_locks_in, - unsigned int num_resources_in, - unsigned int num_extents_in); -int ldlm_regression_stop(void); - - -/* resource.c */ -struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 local); -int ldlm_namespace_cleanup(struct ldlm_namespace *ns, int local_only); -int ldlm_namespace_free(struct ldlm_namespace *ns); -int ldlm_proc_setup(struct obd_device *obd); -void ldlm_proc_cleanup(struct obd_device *obd); - -/* resource.c - internal */ -struct ldlm_resource *ldlm_resource_get(struct ldlm_namespace *ns, - struct ldlm_resource *parent, - struct ldlm_res_id, __u32 type, - int create); -struct ldlm_resource *ldlm_resource_getref(struct ldlm_resource *res); -int ldlm_resource_putref(struct ldlm_resource *res); -void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head, - struct ldlm_lock *lock); -void ldlm_resource_unlink_lock(struct ldlm_lock *lock); -void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc); -void ldlm_dump_all_namespaces(void); -void ldlm_namespace_dump(struct ldlm_namespace *); -void ldlm_resource_dump(struct ldlm_resource *); -int ldlm_lock_change_resource(struct ldlm_namespace *, struct ldlm_lock *, - struct ldlm_res_id); - -/* ldlm_request.c */ -int ldlm_expired_completion_wait(void *data); -int ldlm_completion_ast(struct ldlm_lock *lock, int flags, void *data); -int ldlm_cli_enqueue(struct lustre_handle *conn, - struct ptlrpc_request *req, - struct ldlm_namespace *ns, - struct lustre_handle *parent_lock_handle, - struct ldlm_res_id, - __u32 type, - void *cookie, int cookielen, - ldlm_mode_t mode, - int *flags, - ldlm_completion_callback completion, - ldlm_blocking_callback callback, - void *data, - struct lustre_handle *lockh); -int ldlm_server_ast(struct lustre_handle *lockh, struct ldlm_lock_desc *new, - void *data, __u32 data_len); -int ldlm_cli_convert(struct lustre_handle *, int new_mode, int *flags); -int ldlm_cli_cancel(struct lustre_handle *lockh); -int ldlm_cli_cancel_unused(struct ldlm_namespace *, struct ldlm_res_id *, - int flags, void *opaque); - -/* mds/handler.c */ -/* This has to be here because recurisve inclusion sucks. */ -int mds_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, - void *data, int flag); - - -/* ioctls for trying requests */ -#define IOC_LDLM_TYPE 'f' -#define IOC_LDLM_MIN_NR 40 - -#define IOC_LDLM_TEST _IOWR('f', 40, long) -#define IOC_LDLM_DUMP _IOWR('f', 41, long) -#define IOC_LDLM_REGRESS_START _IOWR('f', 42, long) -#define IOC_LDLM_REGRESS_STOP _IOWR('f', 43, long) -#define IOC_LDLM_MAX_NR 43 - -#endif diff --git a/lustre/include/linux/lustre_export.h b/lustre/include/linux/lustre_export.h deleted file mode 100644 index 6939a95..0000000 --- a/lustre/include/linux/lustre_export.h +++ /dev/null @@ -1,68 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. - * - * This code is issued under the GNU General Public License. - * See the file COPYING in this distribution - */ - -#ifndef __EXPORT_H -#define __EXPORT_H - -#include -#include - -struct mds_client_data; - -struct mds_export_data { - struct list_head med_open_head; - spinlock_t med_open_lock; - struct mds_client_data *med_mcd; - int med_off; -}; - -struct ldlm_export_data { - struct list_head led_held_locks; /* protected by namespace lock */ - struct obd_import *led_import; -}; - -struct lov_export_data { - spinlock_t led_lock; - struct list_head led_open_head; -}; - -struct ec_export_data { /* echo client */ - struct list_head eced_open_head; - struct list_head eced_locks; -}; - -struct obd_export { - struct portals_handle exp_handle; - atomic_t exp_refcount; - struct obd_uuid exp_client_uuid; - struct list_head exp_obd_chain; - struct obd_device *exp_obd; - struct ptlrpc_connection *exp_connection; - struct ldlm_export_data exp_ldlm_data; - struct ptlrpc_request *exp_outstanding_reply; - time_t exp_last_request_time; - spinlock_t exp_lock; /* protects flags int below */ - int exp_failed:1, exp_failover:1; - union { - struct mds_export_data eu_mds_data; - struct filter_export_data eu_filter_data; - struct lov_export_data eu_lov_data; - struct ec_export_data eu_ec_data; - } u; -}; - -#define exp_mds_data u.eu_mds_data -#define exp_lov_data u.eu_lov_data -#define exp_filter_data u.eu_filter_data -#define exp_ec_data u.eu_ec_data - -extern struct obd_export *class_conn2export(struct lustre_handle *conn); -extern struct obd_device *class_conn2obd(struct lustre_handle *conn); - -#endif /* __EXPORT_H */ diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h deleted file mode 100644 index fc00fe1..0000000 --- a/lustre/include/linux/lustre_fsfilt.h +++ /dev/null @@ -1,177 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Filesystem interface helper. - * - */ - -#ifndef _LUSTRE_FSFILT_H -#define _LUSTRE_FSFILT_H - -#ifdef __KERNEL__ - -#include -#include - -typedef void (*fsfilt_cb_t)(struct obd_device *obd, __u64 last_rcvd, int error); - -struct fsfilt_objinfo { - struct dentry *fso_dentry; - int fso_bufcnt; -}; - -struct fsfilt_operations { - struct list_head fs_list; - struct module *fs_owner; - char *fs_type; - void *(* fs_start)(struct inode *inode, int op); - void *(* fs_brw_start)(int objcount, struct fsfilt_objinfo *fso, - int niocount, struct niobuf_remote *nb); - int (* fs_commit)(struct inode *inode, void *handle,int force_sync); - int (* fs_setattr)(struct dentry *dentry, void *handle, - struct iattr *iattr, int do_trunc); - int (* fs_set_md)(struct inode *inode, void *handle, void *md, - int size); - int (* fs_get_md)(struct inode *inode, void *md, int size); - ssize_t (* fs_readpage)(struct file *file, char *buf, size_t count, - loff_t *offset); - int (* fs_journal_data)(struct file *file); - int (* fs_set_last_rcvd)(struct obd_device *obd, __u64 last_rcvd, - void *handle, fsfilt_cb_t cb_func); - int (* fs_statfs)(struct super_block *sb, struct obd_statfs *osfs); - int (* fs_sync)(struct super_block *sb); - int (* fs_prep_san_write)(struct inode *inode, long *blocks, - int nblocks, loff_t newsize); -}; - -extern int fsfilt_register_ops(struct fsfilt_operations *fs_ops); -extern void fsfilt_unregister_ops(struct fsfilt_operations *fs_ops); -extern struct fsfilt_operations *fsfilt_get_ops(char *type); -extern void fsfilt_put_ops(struct fsfilt_operations *fs_ops); - -#define FSFILT_OP_UNLINK 1 -#define FSFILT_OP_RMDIR 2 -#define FSFILT_OP_RENAME 3 -#define FSFILT_OP_CREATE 4 -#define FSFILT_OP_MKDIR 5 -#define FSFILT_OP_SYMLINK 6 -#define FSFILT_OP_MKNOD 7 -#define FSFILT_OP_SETATTR 8 -#define FSFILT_OP_LINK 9 - -static inline void *fsfilt_start(struct obd_device *obd, - struct inode *inode, int op) -{ - unsigned long now = jiffies; - void *handle = obd->obd_fsops->fs_start(inode, op); - CDEBUG(D_HA, "started handle %p\n", handle); - if (time_after(jiffies, now + 15*HZ)) - CERROR("long journal start time %lus\n", (jiffies - now) / HZ); - return handle; -} - -static inline void *fsfilt_brw_start(struct obd_device *obd, int objcount, - struct fsfilt_objinfo *fso, int niocount, - struct niobuf_remote *nb) -{ - unsigned long now = jiffies; - void *handle = obd->obd_fsops->fs_brw_start(objcount, fso, niocount,nb); - CDEBUG(D_HA, "started handle %p\n", handle); - if (time_after(jiffies, now + 15*HZ)) - CERROR("long journal start time %lus\n", (jiffies - now) / HZ); - return handle; -} - -static inline int fsfilt_commit(struct obd_device *obd, struct inode *inode, - void *handle, int force_sync) -{ - unsigned long now = jiffies; - int rc = obd->obd_fsops->fs_commit(inode, handle, force_sync); - CDEBUG(D_HA, "committing handle %p\n", handle); - if (time_after(jiffies, now + 15*HZ)) - CERROR("long journal start time %lus\n", (jiffies - now) / HZ); - return rc; -} - -static inline int fsfilt_setattr(struct obd_device *obd, struct dentry *dentry, - void *handle, struct iattr *iattr,int do_trunc) -{ - unsigned long now = jiffies; - int rc; - rc = obd->obd_fsops->fs_setattr(dentry, handle, iattr, do_trunc); - if (time_after(jiffies, now + 15*HZ)) - CERROR("long setattr time %lus\n", (jiffies - now) / HZ); - - return rc; -} - -static inline int fsfilt_set_md(struct obd_device *obd, struct inode *inode, - void *handle, void *md, int size) -{ - return obd->obd_fsops->fs_set_md(inode, handle, md, size); -} - -static inline int fsfilt_get_md(struct obd_device *obd, struct inode *inode, - void *md, int size) -{ - return obd->obd_fsops->fs_get_md(inode, md, size); -} - -static inline ssize_t fsfilt_readpage(struct obd_device *obd, - struct file *file, char *buf, - size_t count, loff_t *offset) -{ - return obd->obd_fsops->fs_readpage(file, buf, count, offset); -} - -static inline int fsfilt_journal_data(struct obd_device *obd, struct file *file) -{ - return obd->obd_fsops->fs_journal_data(file); -} - -static inline int fsfilt_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd, - void *handle, fsfilt_cb_t cb_func) -{ - return obd->obd_fsops->fs_set_last_rcvd(obd, last_rcvd,handle,cb_func); -} - -static inline int fsfilt_statfs(struct obd_device *obd, struct super_block *fs, - struct obd_statfs *osfs) -{ - return obd->obd_fsops->fs_statfs(fs, osfs); -} - -static inline int fsfilt_sync(struct obd_device *obd, struct super_block *fs) -{ - return obd->obd_fsops->fs_sync(fs); -} - -static inline int fs_prep_san_write(struct obd_device *obd, - struct inode *inode, - long *blocks, - int nblocks, - loff_t newsize) -{ - return obd->obd_fsops->fs_prep_san_write(inode, blocks, - nblocks, newsize); -} -#endif /* __KERNEL__ */ - -#endif diff --git a/lustre/include/linux/lustre_ha.h b/lustre/include/linux/lustre_ha.h deleted file mode 100644 index 8493f91..0000000 --- a/lustre/include/linux/lustre_ha.h +++ /dev/null @@ -1,24 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ - -#ifndef _LUSTRE_HA_H -#define _LUSTRE_HA_H - -struct obd_import; -struct obd_export; -struct obd_device; -struct ptlrpc_request; - -void ptlrpc_run_failed_import_upcall(struct obd_import *imp); -void ptlrpc_run_recovery_over_upcall(struct obd_device *obd); -int ptlrpc_reconnect_import(struct obd_import *imp); -int ptlrpc_replay(struct obd_import *imp); -int ptlrpc_resend(struct obd_import *imp); -void ptlrpc_free_committed(struct obd_import *imp); -void ptlrpc_wake_delayed(struct obd_import *imp); -int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid); -int ptlrpc_set_import_active(struct obd_import *imp, int active); -void ptlrpc_fail_import(struct obd_import *imp, int generation); -void ptlrpc_fail_export(struct obd_export *exp); -#endif diff --git a/lustre/include/linux/lustre_handles.h b/lustre/include/linux/lustre_handles.h deleted file mode 100644 index f644cf1..0000000 --- a/lustre/include/linux/lustre_handles.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef __LINUX_HANDLES_H_ -#define __LINUX_HANDLES_H_ - -#ifdef __KERNEL__ -#include -#include -#include -#endif - -typedef void (*portals_handle_addref_cb)(void *object); - -/* These handles are most easily used by having them appear at the very top of - * whatever object that you want to make handles for. ie: - * - * struct ldlm_lock { - * struct portals_handle handle; - * ... - * }; - * - * Now you're able to assign the results of cookie2handle directly to an - * ldlm_lock. If it's not at the top, you'll want to hack up a macro that - * uses some offsetof() magic. */ - -struct portals_handle { - struct list_head h_link; - __u64 h_cookie; - portals_handle_addref_cb h_addref; -}; - -/* handles.c */ - -/* Add a handle to the hash table */ -void class_handle_hash(struct portals_handle *, portals_handle_addref_cb); -void class_handle_unhash(struct portals_handle *); -void *class_handle2object(__u64 cookie); -int class_handle_init(void); -void class_handle_cleanup(void); - -#endif diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h deleted file mode 100644 index f4a5f2d..0000000 --- a/lustre/include/linux/lustre_idl.h +++ /dev/null @@ -1,728 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * (Un)packing of OST requests - * - * We assume all nodes are either little-endian or big-endian, and we - * always send messages in the sender's native format. The receiver - * detects the message format by checking the 'magic' field of the message - * (see lustre_msg_swabbed() below). - * - * Each wire type has corresponding 'lustre_swab_xxxtypexxx()' routines, - * implemented either here, inline (trivial implementations) or in - * ptlrpc/pack_generic.c. These 'swabbers' convert the type from "other" - * endian, in-place in the message buffer. - * - * A swabber takes a single pointer argument. The caller must already have - * verified that the length of the message buffer >= sizeof (type). - * - * For variable length types, a second 'lustre_swab_v_xxxtypexxx()' routine - * may be defined that swabs just the variable part, after the caller has - * verified that the message buffer is large enough. - */ - -#ifndef _LUSTRE_IDL_H_ -#define _LUSTRE_IDL_H_ - -#ifdef __KERNEL__ -# include -# include -# include -# include -# include /* for strncpy, below */ -# include -#else -#ifdef __CYGWIN__ -# include -#else -# include -# include -#endif -# include -#endif -/* - * this file contains all data structures used in Lustre interfaces: - * - obdo and obd_request records - * - mds_request records - * - ldlm data - * - ioctl's - */ - -/* - * GENERAL STUFF - */ -struct obd_uuid { - __u8 uuid[37]; -}; - -static inline int obd_uuid_equals(struct obd_uuid *u1, struct obd_uuid *u2) -{ - return strcmp(u1->uuid, u2->uuid) == 0; -} - -static inline void obd_str2uuid(struct obd_uuid *uuid, char *tmp) -{ - strncpy(uuid->uuid, tmp, sizeof(*uuid)); - uuid->uuid[sizeof(*uuid) - 1] = '\0'; -} - -extern struct obd_uuid lctl_fake_uuid; - -/* FOO_REQUEST_PORTAL is for incoming requests on the FOO - * FOO_REPLY_PORTAL is for incoming replies on the FOO - * FOO_BULK_PORTAL is for incoming bulk on the FOO - */ - -#define CONNMGR_REQUEST_PORTAL 1 -#define CONNMGR_REPLY_PORTAL 2 -//#define OSC_REQUEST_PORTAL 3 -#define OSC_REPLY_PORTAL 4 -//#define OSC_BULK_PORTAL 5 -#define OST_REQUEST_PORTAL 6 -//#define OST_REPLY_PORTAL 7 -#define OST_BULK_PORTAL 8 -//#define MDC_REQUEST_PORTAL 9 -#define MDC_REPLY_PORTAL 10 -//#define MDC_BULK_PORTAL 11 -#define MDS_REQUEST_PORTAL 12 -//#define MDS_REPLY_PORTAL 13 -#define MDS_BULK_PORTAL 14 -#define LDLM_CB_REQUEST_PORTAL 15 -#define LDLM_CB_REPLY_PORTAL 16 -#define LDLM_CANCEL_REQUEST_PORTAL 17 -#define LDLM_CANCEL_REPLY_PORTAL 18 -#define PTLBD_REQUEST_PORTAL 19 -#define PTLBD_REPLY_PORTAL 20 -#define PTLBD_BULK_PORTAL 21 -#define MDS_SETATTR_PORTAL 22 -#define MDS_READPAGE_PORTAL 23 - -#define SVC_KILLED 1 -#define SVC_EVENT 2 -#define SVC_SIGNAL 4 -#define SVC_RUNNING 8 -#define SVC_STOPPING 16 -#define SVC_STOPPED 32 - -#define LUSTRE_CONN_NEW 1 -#define LUSTRE_CONN_CON 2 -#define LUSTRE_CONN_NOTCONN 3 -#define LUSTRE_CONN_RECOVER 4 -#define LUSTRE_CONN_FULL 5 - -/* packet types */ -#define PTL_RPC_MSG_REQUEST 4711 -#define PTL_RPC_MSG_ERR 4712 -#define PTL_RPC_MSG_REPLY 4713 - -#define PTLRPC_MSG_MAGIC 0x0BD00BD0 -#define PTLRPC_MSG_VERSION 0x00040002 - -struct lustre_handle { - __u64 cookie; -}; -#define DEAD_HANDLE_MAGIC 0xdeadbeefcafebabe - -/* we depend on this structure to be 8-byte aligned */ -/* this type is only endian-adjusted in lustre_unpack_msg() */ -struct lustre_msg { - struct lustre_handle handle; - __u32 magic; - __u32 type; - __u32 version; - __u32 opc; - __u64 last_xid; - __u64 last_committed; - __u64 transno; - __u32 status; - __u32 flags; - __u32 bufcount; - __u32 buflens[0]; -}; - -static inline int lustre_msg_swabbed (struct lustre_msg *msg) -{ - return (msg->magic == __swab32 (PTLRPC_MSG_MAGIC)); -} - -/* Flags that are operation-specific go in the top 16 bits. */ -#define MSG_OP_FLAG_MASK 0xffff0000 -#define MSG_OP_FLAG_SHIFT 16 - -/* Flags that apply to all requests are in the bottom 16 bits */ -#define MSG_GEN_FLAG_MASK 0x0000ffff -#define MSG_LAST_REPLAY 1 -#define MSG_RESENT 2 - -static inline int lustre_msg_get_flags(struct lustre_msg *msg) -{ - return (msg->flags & MSG_GEN_FLAG_MASK); -} - -static inline void lustre_msg_add_flags(struct lustre_msg *msg, int flags) -{ - msg->flags |= MSG_GEN_FLAG_MASK & flags; -} - -static inline void lustre_msg_set_flags(struct lustre_msg *msg, int flags) -{ - msg->flags &= ~MSG_GEN_FLAG_MASK; - lustre_msg_add_flags(msg, flags); -} - -static inline int lustre_msg_get_op_flags(struct lustre_msg *msg) -{ - return (msg->flags >> MSG_OP_FLAG_SHIFT); -} - -static inline void lustre_msg_add_op_flags(struct lustre_msg *msg, int flags) -{ - msg->flags |= ((flags & MSG_GEN_FLAG_MASK) << MSG_OP_FLAG_SHIFT); -} - -static inline void lustre_msg_set_op_flags(struct lustre_msg *msg, int flags) -{ - msg->flags &= ~MSG_OP_FLAG_MASK; - lustre_msg_add_op_flags(msg, flags); -} - -/* - * Flags for all connect opcodes (MDS_CONNECT, OST_CONNECT) - */ - -#define MSG_CONNECT_RECOVERING 0x1 -#define MSG_CONNECT_RECONNECT 0x2 -#define MSG_CONNECT_REPLAYABLE 0x4 - -/* - * OST requests: OBDO & OBD request records - */ - -/* opcodes */ -typedef enum { - OST_REPLY = 0, /* reply ? */ - OST_GETATTR = 1, - OST_SETATTR = 2, - OST_READ = 3, - OST_WRITE = 4, - OST_CREATE = 5, - OST_DESTROY = 6, - OST_GET_INFO = 7, - OST_CONNECT = 8, - OST_DISCONNECT = 9, - OST_PUNCH = 10, - OST_OPEN = 11, - OST_CLOSE = 12, - OST_STATFS = 13, - OST_SAN_READ = 14, - OST_SAN_WRITE = 15, - OST_SYNCFS = 16, - OST_LAST_OPC -} ost_cmd_t; -#define OST_FIRST_OPC OST_REPLY -/* When adding OST RPC opcodes, please update - * LAST/FIRST macros used in ptlrpc/ptlrpc_internals.h */ - - -typedef uint64_t obd_id; -typedef uint64_t obd_gr; -typedef uint64_t obd_time; -typedef uint64_t obd_size; -typedef uint64_t obd_off; -typedef uint64_t obd_blocks; -typedef uint32_t obd_blksize; -typedef uint32_t obd_mode; -typedef uint32_t obd_uid; -typedef uint32_t obd_gid; -typedef uint64_t obd_rdev; -typedef uint32_t obd_flag; -typedef uint32_t obd_count; - -#define OBD_FL_INLINEDATA (0x00000001) -#define OBD_FL_OBDMDEXISTS (0x00000002) - -#define OBD_INLINESZ 60 -#define FD_OSTDATA_SIZE sizeof(struct obd_client_handle) - -/* Note: 64-bit types are 64-bit aligned in structure */ -struct obdo { - obd_id o_id; - obd_gr o_gr; - obd_time o_atime; - obd_time o_mtime; - obd_time o_ctime; - obd_size o_size; - obd_blocks o_blocks; /* brw: clients sent cached bytes */ - obd_rdev o_rdev; /* brw: clients/servers sent grant */ - obd_blksize o_blksize; /* optimal IO blocksize */ - obd_mode o_mode; - obd_uid o_uid; - obd_gid o_gid; - obd_flag o_flags; - obd_count o_nlink; /* brw: checksum */ - obd_count o_generation; - obd_flag o_valid; /* hot fields in this obdo */ - obd_flag o_obdflags; - __u32 o_easize; - char o_inline[OBD_INLINESZ]; -}; - -extern void lustre_swab_obdo (struct obdo *o); - -struct lov_object_id { /* per-child structure */ - __u64 l_object_id; -}; - -#define LOV_MAGIC 0x0BD00BD0 - -struct lov_mds_md { - __u32 lmm_magic; - __u64 lmm_object_id; /* lov object id */ - __u32 lmm_stripe_size; /* size of the stripe */ - __u32 lmm_stripe_offset; /* starting stripe offset in lmm_objects */ - __u16 lmm_stripe_count; /* number of stipes in use for this object */ - __u16 lmm_ost_count; /* how many OST idx are in this LOV md */ - struct lov_object_id lmm_objects[0]; -}; - -#define OBD_MD_FLALL (0xffffffff) -#define OBD_MD_FLID (0x00000001) /* object ID */ -#define OBD_MD_FLATIME (0x00000002) /* access time */ -#define OBD_MD_FLMTIME (0x00000004) /* data modification time */ -#define OBD_MD_FLCTIME (0x00000008) /* change time */ -#define OBD_MD_FLSIZE (0x00000010) /* size */ -#define OBD_MD_FLBLOCKS (0x00000020) /* allocated blocks count */ -#define OBD_MD_FLBLKSZ (0x00000040) /* block size */ -#define OBD_MD_FLMODE (0x00000080) /* access bits (mode & ~S_IFMT) */ -#define OBD_MD_FLTYPE (0x00000100) /* object type (mode & S_IFMT) */ -#define OBD_MD_FLUID (0x00000200) /* user ID */ -#define OBD_MD_FLGID (0x00000400) /* group ID */ -#define OBD_MD_FLFLAGS (0x00000800) /* flags word */ -#define OBD_MD_FLOBDFLG (0x00001000) -#define OBD_MD_FLNLINK (0x00002000) /* link count */ -#define OBD_MD_FLGENER (0x00004000) /* generation number */ -#define OBD_MD_FLINLINE (0x00008000) /* inline data */ -#define OBD_MD_FLRDEV (0x00010000) /* device number */ -#define OBD_MD_FLEASIZE (0x00020000) /* extended attribute data */ -#define OBD_MD_LINKNAME (0x00040000) /* symbolic link target */ -#define OBD_MD_FLHANDLE (0x00080000) /* file handle */ -#define OBD_MD_FLCKSUM (0x00100000) /* bulk data checksum */ -#define OBD_MD_FLNOTOBD (~(OBD_MD_FLOBDFLG | OBD_MD_FLBLOCKS | OBD_MD_LINKNAME|\ - OBD_MD_FLEASIZE | OBD_MD_FLHANDLE | OBD_MD_FLCKSUM)) - -struct obd_statfs { - __u64 os_type; - __u64 os_blocks; - __u64 os_bfree; - __u64 os_bavail; - __u64 os_files; - __u64 os_ffree; - __u8 os_fsid[40]; - __u32 os_bsize; - __u32 os_namelen; - __u64 os_maxbytes; - __u32 os_spare[10]; -}; - -extern void lustre_swab_obd_statfs (struct obd_statfs *os); - -/* ost_body.data values for OST_BRW */ - -#define OBD_BRW_READ 0x01 -#define OBD_BRW_WRITE 0x02 -#define OBD_BRW_RWMASK (OBD_BRW_READ | OBD_BRW_WRITE) -#define OBD_BRW_CREATE 0x04 -#define OBD_BRW_SYNC 0x08 -#define OBD_BRW_CHECK 0x10 -#define OBD_BRW_FROM_GRANT 0x20 - -#define OBD_OBJECT_EOF 0xffffffffffffffffULL - -struct obd_ioobj { - obd_id ioo_id; - obd_gr ioo_gr; - __u32 ioo_type; - __u32 ioo_bufcnt; -} __attribute__((packed)); - -extern void lustre_swab_obd_ioobj (struct obd_ioobj *ioo); - -/* multiple of 8 bytes => can array */ -struct niobuf_remote { - __u64 offset; - __u32 len; - __u32 flags; -} __attribute__((packed)); - -extern void lustre_swab_niobuf_remote (struct niobuf_remote *nbr); - -/* request structure for OST's */ - -#define OST_REQ_HAS_OA1 0x1 - -struct ost_body { - struct obdo oa; -}; - -extern void lustre_swab_ost_body (struct ost_body *b); - -/* - * MDS REQ RECORDS - */ - -/* opcodes */ -typedef enum { - MDS_GETATTR = 33, - MDS_GETATTR_NAME = 34, - MDS_CLOSE = 35, - MDS_REINT = 36, - MDS_READPAGE = 37, - MDS_CONNECT = 38, - MDS_DISCONNECT = 39, - MDS_GETSTATUS = 40, - MDS_STATFS = 41, - MDS_GETLOVINFO = 42, - MDS_LAST_OPC -} mds_cmd_t; -#define MDS_FIRST_OPC MDS_GETATTR -/* When adding MDS RPC opcodes, please update - * LAST/FIRST macros used in ptlrpc/ptlrpc_internals.h */ - -/* - * Do not exceed 63 - */ - -#define REINT_SETATTR 1 -#define REINT_CREATE 2 -#define REINT_LINK 3 -#define REINT_UNLINK 4 -#define REINT_RENAME 5 -#define REINT_OPEN 6 -#define REINT_MAX 6 - -#define IT_INTENT_EXEC 1 -#define IT_OPEN_LOOKUP (1 << 1) -#define IT_OPEN_NEG (1 << 2) -#define IT_OPEN_POS (1 << 3) -#define IT_OPEN_CREATE (1 << 4) -#define IT_OPEN_OPEN (1 << 5) - -struct ll_fid { - __u64 id; - __u32 generation; - __u32 f_type; -}; - -extern void lustre_swab_ll_fid (struct ll_fid *fid); - -#define MDS_STATUS_CONN 1 -#define MDS_STATUS_LOV 2 - -struct mds_status_req { - __u32 flags; - __u32 repbuf; -}; - -extern void lustre_swab_mds_status_req (struct mds_status_req *r); - -struct mds_fileh_body { - struct ll_fid f_fid; - struct lustre_handle f_handle; -}; - -extern void lustre_swab_mds_fileh_body (struct mds_fileh_body *f); - -struct mds_body { - struct ll_fid fid1; - struct ll_fid fid2; - struct lustre_handle handle; - __u64 size; /* Offset, in the case of MDS_READPAGE */ - __u64 blocks; /* XID, in the case of MDS_READPAGE */ - __u32 ino; /* make this a __u64 */ - __u32 valid; - __u32 fsuid; - __u32 fsgid; - __u32 capability; - __u32 mode; - __u32 uid; - __u32 gid; - __u32 mtime; - __u32 ctime; - __u32 atime; - __u32 flags; - __u32 rdev; - __u32 nlink; /* #bytes to read in the case of MDS_READPAGE */ - __u32 generation; - __u32 suppgid; - __u32 eadatasize; -}; - -extern void lustre_swab_mds_body (struct mds_body *b); - -/* This is probably redundant with OBD_MD_FLEASIZE, but we need an audit */ -#define MDS_OPEN_HAS_EA 1 /* this open has an EA, for a delayed create*/ - -/* MDS update records */ - -//struct mds_update_record_hdr { -// __u32 ur_opcode; -//}; - -struct mds_rec_setattr { - __u32 sa_opcode; - __u32 sa_fsuid; - __u32 sa_fsgid; - __u32 sa_cap; - __u32 sa_reserved; - __u32 sa_valid; - struct ll_fid sa_fid; - __u32 sa_mode; - __u32 sa_uid; - __u32 sa_gid; - __u32 sa_attr_flags; - __u64 sa_size; - __u64 sa_atime; - __u64 sa_mtime; - __u64 sa_ctime; - __u32 sa_suppgid; -}; - -extern void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa); - -struct mds_rec_create { - __u32 cr_opcode; - __u32 cr_fsuid; - __u32 cr_fsgid; - __u32 cr_cap; - __u32 cr_flags; /* for use with open */ - __u32 cr_mode; - struct ll_fid cr_fid; - struct ll_fid cr_replayfid; - __u32 cr_uid; - __u32 cr_gid; - __u64 cr_time; - __u64 cr_rdev; - __u32 cr_suppgid; -}; - -extern void lustre_swab_mds_rec_create (struct mds_rec_create *cr); - -struct mds_rec_link { - __u32 lk_opcode; - __u32 lk_fsuid; - __u32 lk_fsgid; - __u32 lk_cap; - __u32 lk_suppgid1; - __u32 lk_suppgid2; - struct ll_fid lk_fid1; - struct ll_fid lk_fid2; -}; - -extern void lustre_swab_mds_rec_link (struct mds_rec_link *lk); - -struct mds_rec_unlink { - __u32 ul_opcode; - __u32 ul_fsuid; - __u32 ul_fsgid; - __u32 ul_cap; - __u32 ul_reserved; - __u32 ul_mode; - __u32 ul_suppgid; - struct ll_fid ul_fid1; - struct ll_fid ul_fid2; -}; - -extern void lustre_swab_mds_rec_unlink (struct mds_rec_unlink *ul); - -struct mds_rec_rename { - __u32 rn_opcode; - __u32 rn_fsuid; - __u32 rn_fsgid; - __u32 rn_cap; - __u32 rn_suppgid1; - __u32 rn_suppgid2; - struct ll_fid rn_fid1; - struct ll_fid rn_fid2; -}; - -extern void lustre_swab_mds_rec_rename (struct mds_rec_rename *rn); - -/* - * LOV data structures - */ - -#define LOV_RAID0 0 -#define LOV_RAIDRR 1 - -#define LOV_MAX_UUID_BUFFER_SIZE 8192 -/* The size of the buffer the lov/mdc reserves for the - * array of UUIDs returned by the MDS. With the current - * protocol, this will limit the max number of OSTs per LOV */ - -struct lov_desc { - __u32 ld_tgt_count; /* how many OBD's */ - __u32 ld_active_tgt_count; /* how many active */ - __u32 ld_default_stripe_count; /* how many objects are used */ - __u64 ld_default_stripe_size; /* in bytes */ - __u64 ld_default_stripe_offset; /* in bytes */ - __u32 ld_pattern; /* RAID 0,1 etc */ - struct obd_uuid ld_uuid; -}; - -extern void lustre_swab_lov_desc (struct lov_desc *ld); - -/* - * LDLM requests: - */ -/* opcodes -- MUST be distinct from OST/MDS opcodes */ -typedef enum { - LDLM_ENQUEUE = 101, - LDLM_CONVERT = 102, - LDLM_CANCEL = 103, - LDLM_BL_CALLBACK = 104, - LDLM_CP_CALLBACK = 105, - LDLM_LAST_OPC -} ldlm_cmd_t; -#define LDLM_FIRST_OPC LDLM_ENQUEUE -/* When adding LDLM RPC opcodes, please update - * LAST/FIRST macros used in ptlrpc/ptlrpc_internals.h */ - -#define RES_NAME_SIZE 3 -#define RES_VERSION_SIZE 4 - -struct ldlm_res_id { - __u64 name[RES_NAME_SIZE]; -}; - -extern void lustre_swab_ldlm_res_id (struct ldlm_res_id *id); - -/* lock types */ -typedef enum { - LCK_EX = 1, - LCK_PW, - LCK_PR, - LCK_CW, - LCK_CR, - LCK_NL -} ldlm_mode_t; - -struct ldlm_extent { - __u64 start; - __u64 end; -}; - -extern void lustre_swab_ldlm_extent (struct ldlm_extent *e); - -struct ldlm_intent { - __u64 opc; -}; - -extern void lustre_swab_ldlm_intent (struct ldlm_intent *i); - -/* Note this unaligned structure; as long as it's only used in ldlm_request - * below, we're probably fine. */ -struct ldlm_resource_desc { - __u32 lr_type; - struct ldlm_res_id lr_name; - __u32 lr_version[RES_VERSION_SIZE]; -}; - -extern void lustre_swab_ldlm_resource_desc (struct ldlm_resource_desc *r); - -struct ldlm_lock_desc { - struct ldlm_resource_desc l_resource; - ldlm_mode_t l_req_mode; - ldlm_mode_t l_granted_mode; - struct ldlm_extent l_extent; - __u32 l_version[RES_VERSION_SIZE]; -}; - -extern void lustre_swab_ldlm_lock_desc (struct ldlm_lock_desc *l); - -struct ldlm_request { - __u32 lock_flags; - struct ldlm_lock_desc lock_desc; - struct lustre_handle lock_handle1; - struct lustre_handle lock_handle2; -}; - -extern void lustre_swab_ldlm_request (struct ldlm_request *rq); - -struct ldlm_reply { - __u32 lock_flags; - __u32 lock_mode; - struct ldlm_res_id lock_resource_name; - struct lustre_handle lock_handle; - struct ldlm_extent lock_extent; /* XXX make this policy 1 &2 */ - __u64 lock_policy_res1; - __u64 lock_policy_res2; -}; - -extern void lustre_swab_ldlm_reply (struct ldlm_reply *r); - -/* - * ptlbd, portal block device requests - */ -typedef enum { - PTLBD_QUERY = 200, - PTLBD_READ = 201, - PTLBD_WRITE = 202, - PTLBD_FLUSH = 203, - PTLBD_CONNECT = 204, - PTLBD_DISCONNECT = 205, - PTLBD_LAST_OPC -} ptlbd_cmd_t; -#define PTLBD_FIRST_OPC PTLBD_QUERY -/* When adding PTLBD RPC opcodes, please update - * LAST/FIRST macros used in ptlrpc/ptlrpc_internals.h */ - -struct ptlbd_op { - __u16 op_cmd; - __u16 op_lun; - __u16 op_niob_cnt; - __u16 op__padding; - __u32 op_block_cnt; -}; - -extern void lustre_swab_ptlbd_op (struct ptlbd_op *op); - -struct ptlbd_niob { - __u64 n_xid; - __u64 n_block_nr; - __u32 n_offset; - __u32 n_length; -}; - -extern void lustre_swab_ptlbd_niob (struct ptlbd_niob *n); - -struct ptlbd_rsp { - __u16 r_status; - __u16 r_error_cnt; -}; - -extern void lustre_swab_ptlbd_rsp (struct ptlbd_rsp *r); - -/* - * Opcodes for multiple servers. - */ - -#define OBD_PING 400 - -#endif diff --git a/lustre/include/linux/lustre_import.h b/lustre/include/linux/lustre_import.h deleted file mode 100644 index 9dc0a92..0000000 --- a/lustre/include/linux/lustre_import.h +++ /dev/null @@ -1,73 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. - * - * This code is issued under the GNU General Public License. - * See the file COPYING in this distribution - */ - -#ifndef __IMPORT_H -#define __IMPORT_H - -#include -#include - -struct obd_import { - struct portals_handle imp_handle; - atomic_t imp_refcount; - struct lustre_handle imp_dlm_handle; /* client's ldlm export */ - struct ptlrpc_connection *imp_connection; - struct ptlrpc_client *imp_client; - struct list_head imp_observers; - struct list_head imp_pinger_chain; - - /* Lists of requests that are retained for replay, waiting for a reply, - * or waiting for recovery to complete, respectively. - */ - struct list_head imp_replay_list; - struct list_head imp_sending_list; - struct list_head imp_delayed_list; - - struct obd_device *imp_obd; - int imp_level; - int imp_generation; - __u64 imp_max_transno; - __u64 imp_peer_committed_transno; - struct obd_uuid imp_target_uuid; /* XXX -> lustre_name */ - struct lustre_handle imp_remote_handle; - unsigned long imp_next_ping; - - /* Protects flags, level, generation, *_list */ - spinlock_t imp_lock; - - /* flags */ - int imp_invalid:1, imp_replayable:1, - imp_dlm_fake:1; - __u32 imp_connect_op; -}; - -typedef void (*obd_import_callback)(struct obd_import *imp, void *closure, - int event, void *event_arg, void *cb_data); - -struct obd_import_observer { - struct list_head oio_chain; - obd_import_callback oio_cb; - void *oio_cb_data; -}; - -void class_observe_import(struct obd_import *imp, obd_import_callback cb, - void *cb_data); -void class_unobserve_import(struct obd_import *imp, obd_import_callback cb, - void *cb_data); -void class_notify_import_observers(struct obd_import *imp, int event, - void *event_arg); - -#define IMP_EVENT_ACTIVE 1 -#define IMP_EVENT_INACTIVE 2 - -/* genops.c */ -extern struct obd_import *class_conn2cliimp(struct lustre_handle *); -extern struct obd_import *class_conn2ldlmimp(struct lustre_handle *); - -#endif /* __IMPORT_H */ diff --git a/lustre/include/linux/lustre_lib.h b/lustre/include/linux/lustre_lib.h deleted file mode 100644 index b18e2d2..0000000 --- a/lustre/include/linux/lustre_lib.h +++ /dev/null @@ -1,706 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic Lustre library routines. - * - */ - -#ifndef _LUSTRE_LIB_H -#define _LUSTRE_LIB_H - -#include - -#ifndef __KERNEL__ -# include -# include -#else -# include -# include -# include -# include -#endif -#include -#include /* XXX just for LASSERT! */ -#include - -#ifndef LPU64 -#if BITS_PER_LONG > 32 -#define LPU64 "%lu" -#define LPD64 "%ld" -#define LPX64 "%#lx" -#else -#define LPU64 "%Lu" -#define LPD64 "%Ld" -#define LPX64 "%#Lx" -#endif -#endif - -/* target.c */ -struct ptlrpc_request; -struct recovd_data; -struct recovd_obd; -struct obd_export; -#include -#include -#include - -int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler); -int target_handle_disconnect(struct ptlrpc_request *req); -int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp, - struct obd_uuid *cluuid); -int target_handle_ping(struct ptlrpc_request *req); -void target_cancel_recovery_timer(struct obd_device *obd); - -#define OBD_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */ -void target_start_recovery_timer(struct obd_device *obd, svc_handler_t handler); -void target_abort_recovery(void *data); -int target_queue_recovery_request(struct ptlrpc_request *req, - struct obd_device *obd); -int target_queue_final_reply(struct ptlrpc_request *req, int rc); -void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id); - -/* client.c */ - -int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf); -int client_sanobd_setup(struct obd_device *obddev, obd_count len, void *buf); -int client_obd_cleanup(struct obd_device * obddev, int force, int failover); -struct client_obd *client_conn2cli(struct lustre_handle *conn); -struct obd_device *client_tgtuuid2obd(struct obd_uuid *tgtuuid); - -/* It is important that och_fh remain the first item in this structure: that - * way, we don't have to re-pack the obdo's inline data before we send it to - * the server, we can just send the whole struct unaltered. */ -struct obd_client_handle { - struct lustre_handle och_fh; - struct ptlrpc_request *och_req; - __u32 och_magic; -}; -#define OBD_CLIENT_HANDLE_MAGIC 0xd15ea5ed - -/* statfs_pack.c */ -int obd_self_statfs(struct obd_device *dev, struct statfs *sfs); - -/* l_lock.c */ -struct lustre_lock { - int l_depth; - struct task_struct *l_owner; - struct semaphore l_sem; - spinlock_t l_spin; -}; - -void l_lock_init(struct lustre_lock *); -void l_lock(struct lustre_lock *); -void l_unlock(struct lustre_lock *); -int l_has_lock(struct lustre_lock *); - -/* simple.c */ -struct obd_ucred { - __u32 ouc_fsuid; - __u32 ouc_fsgid; - __u32 ouc_cap; - __u32 ouc_suppgid1; - __u32 ouc_suppgid2; -}; - -#define OBD_RUN_CTXT_MAGIC 0xC0FFEEAA -#define OBD_CTXT_DEBUG /* development-only debugging */ -struct obd_run_ctxt { - struct vfsmount *pwdmnt; - struct dentry *pwd; - mm_segment_t fs; - struct obd_ucred ouc; - int ngroups; -#ifdef OBD_CTXT_DEBUG - __u32 magic; -#endif -}; - - -#ifdef OBD_CTXT_DEBUG -#define OBD_SET_CTXT_MAGIC(ctxt) (ctxt)->magic = OBD_RUN_CTXT_MAGIC -#else -#define OBD_SET_CTXT_MAGIC(ctxt) do {} while(0) -#endif - -#ifdef __KERNEL__ - -void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, - struct obd_ucred *cred); -void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx, - struct obd_ucred *cred); -struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode); -struct dentry *simple_mknod(struct dentry *dir, char *name, int mode); -int lustre_fread(struct file *file, void *buf, int len, loff_t *off); -int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off); -int lustre_fsync(struct file *file); - -static inline void l_dput(struct dentry *de) -{ - if (!de || IS_ERR(de)) - return; - //shrink_dcache_parent(de); - LASSERT(atomic_read(&de->d_count) > 0); - dput(de); -} - -/* We need to hold the inode semaphore over the dcache lookup itself, or we - * run the risk of entering the filesystem lookup path concurrently on SMP - * systems, and instantiating two inodes for the same entry. We still - * protect against concurrent addition/removal races with the DLM locking. - */ -static inline struct dentry *ll_lookup_one_len(char *fid_name, - struct dentry *dparent, - int fid_namelen) -{ - struct dentry *dchild; - - down(&dparent->d_inode->i_sem); - dchild = lookup_one_len(fid_name, dparent, fid_namelen); - up(&dparent->d_inode->i_sem); - - return dchild; -} - -static inline void ll_sleep(int t) -{ - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(t * HZ); - set_current_state(TASK_RUNNING); -} -#endif - -#define LL_FID_NAMELEN (16 + 1 + 8 + 1) -static inline int ll_fid2str(char *str, __u64 id, __u32 generation) -{ - return sprintf(str, "%llx:%08x", (unsigned long long)id, generation); -} - -#include - -/* - * OBD IOCTLS - */ -#define OBD_IOCTL_VERSION 0x00010003 - -struct obd_ioctl_data { - uint32_t ioc_len; - uint32_t ioc_version; - - uint64_t ioc_cookie; - uint32_t ioc_conn1; - uint32_t ioc_conn2; - - struct obdo ioc_obdo1; - struct obdo ioc_obdo2; - - obd_size ioc_count; - obd_off ioc_offset; - uint32_t ioc_dev; - uint32_t ioc_command; - - uint64_t ioc_nid; - uint32_t ioc_nal; - - /* buffers the kernel will treat as user pointers */ - uint32_t ioc_plen1; - char *ioc_pbuf1; - uint32_t ioc_plen2; - char *ioc_pbuf2; - - /* inline buffers for various arguments */ - uint32_t ioc_inllen1; - char *ioc_inlbuf1; - uint32_t ioc_inllen2; - char *ioc_inlbuf2; - uint32_t ioc_inllen3; - char *ioc_inlbuf3; - uint32_t ioc_inllen4; - char *ioc_inlbuf4; - - char ioc_bulk[0]; -}; - -struct obd_ioctl_hdr { - uint32_t ioc_len; - uint32_t ioc_version; -}; - -static inline int obd_ioctl_packlen(struct obd_ioctl_data *data) -{ - int len = size_round(sizeof(struct obd_ioctl_data)); - len += size_round(data->ioc_inllen1); - len += size_round(data->ioc_inllen2); - len += size_round(data->ioc_inllen3); - len += size_round(data->ioc_inllen4); - return len; -} - - -static inline int obd_ioctl_is_invalid(struct obd_ioctl_data *data) -{ - if (data->ioc_len > (1<<30)) { - printk("OBD ioctl: ioc_len larger than 1<<30\n"); - return 1; - } - if (data->ioc_inllen1 > (1<<30)) { - printk("OBD ioctl: ioc_inllen1 larger than 1<<30\n"); - return 1; - } - if (data->ioc_inllen2 > (1<<30)) { - printk("OBD ioctl: ioc_inllen2 larger than 1<<30\n"); - return 1; - } - if (data->ioc_inllen3 > (1<<30)) { - printk("OBD ioctl: ioc_inllen3 larger than 1<<30\n"); - return 1; - } - if (data->ioc_inllen4 > (1<<30)) { - printk("OBD ioctl: ioc_inllen4 larger than 1<<30\n"); - return 1; - } - if (data->ioc_inlbuf1 && !data->ioc_inllen1) { - printk("OBD ioctl: inlbuf1 pointer but 0 length\n"); - return 1; - } - if (data->ioc_inlbuf2 && !data->ioc_inllen2) { - printk("OBD ioctl: inlbuf2 pointer but 0 length\n"); - return 1; - } - if (data->ioc_inlbuf3 && !data->ioc_inllen3) { - printk("OBD ioctl: inlbuf3 pointer but 0 length\n"); - return 1; - } - if (data->ioc_inlbuf4 && !data->ioc_inllen4) { - printk("OBD ioctl: inlbuf4 pointer but 0 length\n"); - return 1; - } - if (data->ioc_pbuf1 && !data->ioc_plen1) { - printk("OBD ioctl: pbuf1 pointer but 0 length\n"); - return 1; - } - if (data->ioc_pbuf2 && !data->ioc_plen2) { - printk("OBD ioctl: pbuf2 pointer but 0 length\n"); - return 1; - } - if (data->ioc_plen1 && !data->ioc_pbuf1) { - printk("OBD ioctl: plen1 set but NULL pointer\n"); - return 1; - } - if (data->ioc_plen2 && !data->ioc_pbuf2) { - printk("OBD ioctl: plen2 set but NULL pointer\n"); - return 1; - } - if (obd_ioctl_packlen(data) != data->ioc_len) { - printk("OBD ioctl: packlen exceeds ioc_len (%d != %d)\n", - obd_ioctl_packlen(data), data->ioc_len); - return 1; - } - return 0; -} - -#ifndef __KERNEL__ -static inline int obd_ioctl_pack(struct obd_ioctl_data *data, char **pbuf, - int max) -{ - char *ptr; - struct obd_ioctl_data *overlay; - data->ioc_len = obd_ioctl_packlen(data); - data->ioc_version = OBD_IOCTL_VERSION; - - if (*pbuf && data->ioc_len > max) - return 1; - if (*pbuf == NULL) { - *pbuf = malloc(data->ioc_len); - } - if (!*pbuf) - return 1; - overlay = (struct obd_ioctl_data *)*pbuf; - memcpy(*pbuf, data, sizeof(*data)); - - ptr = overlay->ioc_bulk; - if (data->ioc_inlbuf1) - LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr); - if (data->ioc_inlbuf2) - LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr); - if (data->ioc_inlbuf3) - LOGL(data->ioc_inlbuf3, data->ioc_inllen3, ptr); - if (data->ioc_inlbuf4) - LOGL(data->ioc_inlbuf4, data->ioc_inllen4, ptr); - if (obd_ioctl_is_invalid(overlay)) - return 1; - - return 0; -} - -static inline int obd_ioctl_unpack(struct obd_ioctl_data *data, char *pbuf, - int max) -{ - char *ptr; - struct obd_ioctl_data *overlay; - - if (!pbuf) - return 1; - overlay = (struct obd_ioctl_data *)pbuf; - - /* Preserve the caller's buffer pointers */ - overlay->ioc_inlbuf1 = data->ioc_inlbuf1; - overlay->ioc_inlbuf2 = data->ioc_inlbuf2; - overlay->ioc_inlbuf3 = data->ioc_inlbuf3; - overlay->ioc_inlbuf4 = data->ioc_inlbuf4; - - memcpy(data, pbuf, sizeof(*data)); - - ptr = overlay->ioc_bulk; - if (data->ioc_inlbuf1) - LOGU(data->ioc_inlbuf1, data->ioc_inllen1, ptr); - if (data->ioc_inlbuf2) - LOGU(data->ioc_inlbuf2, data->ioc_inllen2, ptr); - if (data->ioc_inlbuf3) - LOGU(data->ioc_inlbuf3, data->ioc_inllen3, ptr); - if (data->ioc_inlbuf4) - LOGU(data->ioc_inlbuf4, data->ioc_inllen4, ptr); - - return 0; -} -#endif - -#include - -/* buffer MUST be at least the size of obd_ioctl_hdr */ -static inline int obd_ioctl_getdata(char **buf, int *len, void *arg) -{ - struct obd_ioctl_hdr hdr; - struct obd_ioctl_data *data; - int err; - ENTRY; - - err = copy_from_user(&hdr, (void *)arg, sizeof(hdr)); - if ( err ) { - EXIT; - return err; - } - - if (hdr.ioc_version != OBD_IOCTL_VERSION) { - CERROR("Version mismatch kernel vs application\n"); - return -EINVAL; - } - - if (hdr.ioc_len > OBD_MAX_IOCTL_BUFFER) { - CERROR("User buffer len %d exceeds %d max buffer\n", - hdr.ioc_len, OBD_MAX_IOCTL_BUFFER); - return -EINVAL; - } - - if (hdr.ioc_len < sizeof(struct obd_ioctl_data)) { - printk("OBD: user buffer too small for ioctl\n"); - return -EINVAL; - } - - /* XXX allocate this more intelligently, using kmalloc when - * appropriate */ - OBD_VMALLOC(*buf, hdr.ioc_len); - if (*buf == NULL) { - CERROR("Cannot allocate control buffer of len %d\n", - hdr.ioc_len); - RETURN(-EINVAL); - } - *len = hdr.ioc_len; - data = (struct obd_ioctl_data *)*buf; - - err = copy_from_user(*buf, (void *)arg, hdr.ioc_len); - if ( err ) { - EXIT; - return err; - } - - if (obd_ioctl_is_invalid(data)) { - CERROR("ioctl not correctly formatted\n"); - return -EINVAL; - } - - if (data->ioc_inllen1) { - data->ioc_inlbuf1 = &data->ioc_bulk[0]; - } - - if (data->ioc_inllen2) { - data->ioc_inlbuf2 = &data->ioc_bulk[0] + - size_round(data->ioc_inllen1); - } - - if (data->ioc_inllen3) { - data->ioc_inlbuf3 = &data->ioc_bulk[0] + - size_round(data->ioc_inllen1) + - size_round(data->ioc_inllen2); - } - - if (data->ioc_inllen4) { - data->ioc_inlbuf4 = &data->ioc_bulk[0] + - size_round(data->ioc_inllen1) + - size_round(data->ioc_inllen2) + - size_round(data->ioc_inllen3) ; - } - - EXIT; - return 0; -} - -static inline void obd_ioctl_freedata(char *buf, int len) -{ - ENTRY; - - OBD_VFREE(buf, len); - EXIT; - return; -} - -#define OBD_IOC_CREATE _IOR ('f', 101, long) -#define OBD_IOC_SETUP _IOW ('f', 102, long) -#define OBD_IOC_CLEANUP _IO ('f', 103 ) -#define OBD_IOC_DESTROY _IOW ('f', 104, long) -#define OBD_IOC_PREALLOCATE _IOWR('f', 105, long) - -#define OBD_IOC_SETATTR _IOW ('f', 107, long) -#define OBD_IOC_GETATTR _IOR ('f', 108, long) -#define OBD_IOC_READ _IOWR('f', 109, long) -#define OBD_IOC_WRITE _IOWR('f', 110, long) -#define OBD_IOC_CONNECT _IOR ('f', 111, long) -#define OBD_IOC_DISCONNECT _IOW ('f', 112, long) -#define OBD_IOC_STATFS _IOWR('f', 113, long) -#define OBD_IOC_SYNC _IOR ('f', 114, long) -#define OBD_IOC_READ2 _IOWR('f', 115, long) -#define OBD_IOC_FORMAT _IOWR('f', 116, long) -#define OBD_IOC_PARTITION _IOWR('f', 117, long) -#define OBD_IOC_ATTACH _IOWR('f', 118, long) -#define OBD_IOC_DETACH _IOWR('f', 119, long) -#define OBD_IOC_COPY _IOWR('f', 120, long) -#define OBD_IOC_MIGR _IOWR('f', 121, long) -#define OBD_IOC_PUNCH _IOWR('f', 122, long) -#define OBD_IOC_DEVICE _IOWR('f', 123, long) -#define OBD_IOC_MODULE_DEBUG _IOWR('f', 124, long) -#define OBD_IOC_BRW_READ _IOWR('f', 125, long) -#define OBD_IOC_BRW_WRITE _IOWR('f', 126, long) -#define OBD_IOC_NAME2DEV _IOWR('f', 127, long) -#define OBD_IOC_NEWDEV _IOWR('f', 128, long) -#define OBD_IOC_LIST _IOWR('f', 129, long) -#define OBD_IOC_UUID2DEV _IOWR('f', 130, long) - -#define OBD_IOC_LOV_SET_CONFIG _IOWR('f', 131, long) -#define OBD_IOC_LOV_GET_CONFIG _IOWR('f', 132, long) -#define OBD_IOC_LOV_CONFIG OBD_IOC_LOV_SET_CONFIG -#define OBD_IOC_CLIENT_RECOVER _IOW ('f', 133, long) - -#define OBD_IOC_OPEN _IOWR('f', 134, long) -#define OBD_IOC_CLOSE _IOWR('f', 135, long) - -#define OBD_IOC_DEC_FS_USE_COUNT _IO ('f', 139 ) -#define OBD_IOC_NO_TRANSNO _IOW ('f', 140, long) -#define OBD_IOC_SET_READONLY _IOW ('f', 141, long) -#define OBD_IOC_ABORT_RECOVERY _IOR ('f', 142, long) - -#define OBD_GET_VERSION _IOWR ('f', 144, long) - -#define OBD_IOC_ADD_UUID _IOWR ('f', 145, long) -#define OBD_IOC_DEL_UUID _IOWR ('f', 146, long) -#define OBD_IOC_CLOSE_UUID _IOWR ('f', 147, long) - -#define OBD_IOC_MOUNTOPT _IOWR('f', 170, long) - -#define ECHO_IOC_GET_STRIPE _IOWR('f', 200, long) -#define ECHO_IOC_SET_STRIPE _IOWR('f', 201, long) -#define ECHO_IOC_ENQUEUE _IOWR('f', 202, long) -#define ECHO_IOC_CANCEL _IOWR('f', 203, long) - -/* XXX _IOWR('f', 250, long) has been defined in - * portals/include/linux/kp30.h for debug, don't use it - */ - -/* Until such time as we get_info the per-stripe maximum from the OST, - * we define this to be 2T - 4k, which is the ext3 maxbytes. */ -#define LUSTRE_STRIPE_MAXBYTES 0x1fffffff000ULL - -#define CHECKSUM_BULK 0 - -#if CHECKSUM_BULK -static inline void ost_checksum(obd_count *cksum, void *addr, int len) -{ - unsigned char *ptr = (unsigned char *)addr; - obd_count sum = 0; - - /* very stupid, but means I don't have to think about byte order */ - while (len-- > 0) - sum += *ptr++; - - *cksum = (*cksum << 2) + sum; -} -#endif - -/* - * l_wait_event is a flexible sleeping function, permitting simple caller - * configuration of interrupt and timeout sensitivity along with actions to - * be performed in the event of either exception. - * - * Common usage looks like this: - * - * struct l_wait_info lwi = LWI_TIMEOUT_INTR(timeout, timeout_handler, - * intr_handler, callback_data); - * rc = l_wait_event(waitq, condition, &lwi); - * - * (LWI_TIMEOUT and LWI_INTR macros are available for timeout- and - * interrupt-only variants, respectively.) - * - * If a timeout is specified, the timeout_handler will be invoked in the event - * that the timeout expires before the process is awakened. (Note that any - * waking of the process will restart the timeout, even if the condition is - * not satisfied and the process immediately returns to sleep. This might be - * considered a bug.) If the timeout_handler returns non-zero, l_wait_event - * will return -ETIMEDOUT and the caller will continue. If the handler returns - * zero instead, the process will go back to sleep until it is awakened by the - * waitq or some similar mechanism, or an interrupt occurs (if the caller has - * asked for interrupts to be detected). The timeout will only fire once, so - * callers should take care that a timeout_handler which returns zero will take - * future steps to awaken the process. N.B. that these steps must include - * making the provided condition become true. - * - * If the interrupt flag (lwi_signals) is non-zero, then the process will be - * interruptible, and will be awakened by any "killable" signal (SIGTERM, - * SIGKILL or SIGINT). If a timeout is also specified, then the process will - * only become interruptible _after_ the timeout has expired, though it can be - * awakened by a signal that was delivered before the timeout and is still - * pending when the timeout expires. If a timeout is not specified, the process - * will be interruptible at all times during l_wait_event. - */ - -struct l_wait_info { - long lwi_timeout; - int (*lwi_on_timeout)(void *); - long lwi_signals; - void (*lwi_on_signal)(void *); - void *lwi_cb_data; -}; - -#define LWI_TIMEOUT(time, cb, data) \ -((struct l_wait_info) { \ - lwi_timeout: time, \ - lwi_on_timeout: cb, \ - lwi_cb_data: data \ -}) - -#define LWI_INTR(cb, data) \ -((struct l_wait_info) { \ - lwi_signals: 1, \ - lwi_on_signal: cb, \ - lwi_cb_data: data \ -}) - -#define LWI_TIMEOUT_INTR(time, time_cb, sig_cb, data) \ -((struct l_wait_info) { \ - lwi_timeout: time, \ - lwi_on_timeout: time_cb, \ - lwi_signals: 1, \ - lwi_on_signal: sig_cb, \ - lwi_cb_data: data \ -}) - -#define LUSTRE_FATAL_SIGS (sigmask(SIGKILL) | sigmask(SIGINT) | \ - sigmask(SIGTERM) | sigmask(SIGQUIT)) - -#ifdef __KERNEL__ -static inline sigset_t l_w_e_set_sigs(int sigs) -{ - sigset_t old; - unsigned long irqflags; - - SIGNAL_MASK_LOCK(current, irqflags); - old = current->blocked; - siginitsetinv(¤t->blocked, sigs); - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, irqflags); - - return old; -} - -#define __l_wait_event(wq, condition, info, ret) \ -do { \ - wait_queue_t __wait; \ - int __timed_out = 0; \ - unsigned long irqflags; \ - sigset_t blocked; \ - \ - init_waitqueue_entry(&__wait, current); \ - add_wait_queue(&wq, &__wait); \ - \ - /* Block all signals (just the non-fatal ones if no timeout). */ \ - if (info->lwi_signals && !info->lwi_timeout) \ - blocked = l_w_e_set_sigs(LUSTRE_FATAL_SIGS); \ - else \ - blocked = l_w_e_set_sigs(0); \ - \ - for (;;) { \ - set_current_state(TASK_INTERRUPTIBLE); \ - if (condition) \ - break; \ - if (signal_pending(current)) { \ - if (info->lwi_on_signal) \ - info->lwi_on_signal(info->lwi_cb_data); \ - ret = -EINTR; \ - break; \ - } \ - if (info->lwi_timeout && !__timed_out) { \ - if (schedule_timeout(info->lwi_timeout) == 0) { \ - __timed_out = 1; \ - if (!info->lwi_on_timeout || \ - info->lwi_on_timeout(info->lwi_cb_data)) { \ - ret = -ETIMEDOUT; \ - break; \ - } \ - /* We'll take signals after a timeout. */ \ - if (info->lwi_signals) \ - (void)l_w_e_set_sigs(LUSTRE_FATAL_SIGS); \ - } \ - } else { \ - schedule(); \ - } \ - } \ - \ - SIGNAL_MASK_LOCK(current, irqflags); \ - current->blocked = blocked; \ - RECALC_SIGPENDING; \ - SIGNAL_MASK_UNLOCK(current, irqflags); \ - \ - current->state = TASK_RUNNING; \ - remove_wait_queue(&wq, &__wait); \ -} while(0) - -#define l_wait_event(wq, condition, info) \ -({ \ - int __ret = 0; \ - struct l_wait_info *__info = (info); \ - if (!(condition)) \ - __l_wait_event(wq, condition, __info, __ret); \ - __ret; \ -}) -#else -#define l_wait_event(wq, condition, info) \ -({ \ - 0; \ -}) -#endif /* __KERNEL__ */ - -#endif /* _LUSTRE_LIB_H */ diff --git a/lustre/include/linux/lustre_lite.h b/lustre/include/linux/lustre_lite.h deleted file mode 100644 index 81184e7..0000000 --- a/lustre/include/linux/lustre_lite.h +++ /dev/null @@ -1,380 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lustre lite cluster file system - * - * This code is issued under the GNU General Public License. - * See the file COPYING in this distribution - * - * Copyright (C) 2002 Cluster File Systems, Inc. - */ - - - -#ifndef _LL_H -#define _LL_H - -#ifdef __KERNEL__ - -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include - -/* careful, this is easy to screw up */ -#define PAGE_CACHE_MAXBYTES ((__u64)(~0UL) << PAGE_CACHE_SHIFT) - -extern kmem_cache_t *ll_file_data_slab; -struct ll_file_data { - struct obd_client_handle fd_mds_och; - struct obd_client_handle fd_ost_och; - __u32 fd_flags; -}; - -struct lustre_intent_data { - __u64 it_lock_handle[2]; - __u32 it_disposition; - __u32 it_status; - __u32 it_lock_mode; -}; - -struct ll_dentry_data { - struct semaphore lld_it_sem; -}; - -#define ll_d2d(dentry) ((struct ll_dentry_data*) dentry->d_fsdata) - -extern struct file_operations ll_pgcache_seq_fops; - -struct ll_inode_info { - struct lov_stripe_md *lli_smd; - char *lli_symlink_name; - struct semaphore lli_open_sem; - struct list_head lli_read_extents; - loff_t lli_maxbytes; - spinlock_t lli_read_extent_lock; - unsigned long lli_flags; -#define LLI_F_HAVE_SIZE_LOCK 0 - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) - struct inode lli_vfs_inode; -#endif -}; - -/* - * this lets ll_file_read tell ll_readpages how far ahead it can read - * and still be covered by ll_file_read's lock. 2.5 won't need this, but - * we have the other problem of other readpage callers making sure that - * they're covered by a lock.. - */ -struct ll_read_extent { - struct list_head re_lli_item; - struct task_struct *re_task; - struct ldlm_extent re_extent; -}; - -int ll_check_dirty( struct super_block *sb ); -int ll_batch_writepage( struct inode *inode, struct page *page ); - -/* interpet return codes from intent lookup */ -#define LL_LOOKUP_POSITIVE 1 -#define LL_LOOKUP_NEGATIVE 2 - -#define LL_SUPER_MAGIC 0x0BD00BD0 - -#define LL_COMMITCBD_STOPPING 0x1 -#define LL_COMMITCBD_STOPPED 0x2 -#define LL_COMMITCBD_RUNNING 0x4 - -#define LL_SBI_NOLCK 0x1 - -struct ll_sb_info { - struct obd_uuid ll_sb_uuid; - struct lustre_handle ll_mdc_conn; - struct lustre_handle ll_osc_conn; - struct proc_dir_entry* ll_proc_root; - obd_id ll_rootino; /* number of root inode */ - - int ll_flags; - wait_queue_head_t ll_commitcbd_waitq; - wait_queue_head_t ll_commitcbd_ctl_waitq; - int ll_commitcbd_flags; - struct task_struct *ll_commitcbd_thread; - time_t ll_commitcbd_waketime; - time_t ll_commitcbd_timeout; - spinlock_t ll_commitcbd_lock; - struct list_head ll_conn_chain; /* per-conn chain of SBs */ - - struct list_head ll_orphan_dentry_list; /*please don't ask -p*/ - - struct lprocfs_stats *ll_stats; /* lprocfs stats counter */ -}; - -static inline struct ll_sb_info *ll_s2sbi(struct super_block *sb) -{ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) - return (struct ll_sb_info *)(sb->s_fs_info); -#else - return (struct ll_sb_info *)(sb->u.generic_sbp); -#endif -} - -static inline struct lustre_handle *ll_s2obdconn(struct super_block *sb) -{ - return &(ll_s2sbi(sb))->ll_osc_conn; -} - -static inline struct client_obd *sbi2mdc(struct ll_sb_info *sbi) -{ - struct obd_device *obd = class_conn2obd(&sbi->ll_mdc_conn); - if (obd == NULL) - LBUG(); - return &obd->u.cli; -} - -// FIXME: replace the name of this with LL_SB to conform to kernel stuff -static inline struct ll_sb_info *ll_i2sbi(struct inode *inode) -{ - return ll_s2sbi(inode->i_sb); -} - -static inline void d_unhash_aliases(struct inode *inode) -{ - struct dentry *dentry = NULL; - struct list_head *tmp; - struct ll_sb_info *sbi = ll_i2sbi(inode); - ENTRY; - - CDEBUG(D_INODE, "marking dentries for ino %lx/%x invalid\n", - inode->i_ino, inode->i_generation); - - spin_lock(&dcache_lock); - list_for_each(tmp, &inode->i_dentry) { - dentry = list_entry(tmp, struct dentry, d_alias); - - list_del_init(&dentry->d_hash); - dentry->d_flags |= DCACHE_LUSTRE_INVALID; - list_add(&dentry->d_hash, &sbi->ll_orphan_dentry_list); - } - - spin_unlock(&dcache_lock); - EXIT; -} - -// FIXME: replace the name of this with LL_I to conform to kernel stuff -// static inline struct ll_inode_info *LL_I(struct inode *inode) -static inline struct ll_inode_info *ll_i2info(struct inode *inode) -{ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) - return container_of(inode, struct ll_inode_info, lli_vfs_inode); -#else - return (struct ll_inode_info *)&(inode->u.generic_ip); -#endif -} - -static inline struct lustre_handle *ll_i2obdconn(struct inode *inode) -{ - return ll_s2obdconn(inode->i_sb); -} - -static inline void ll_ino2fid(struct ll_fid *fid, obd_id ino, __u32 generation, - int type); - -static inline void ll_inode2fid(struct ll_fid *fid, struct inode *inode) -{ - ll_ino2fid(fid, inode->i_ino, inode->i_generation, - inode->i_mode & S_IFMT); -} - -static inline int ll_mds_max_easize(struct super_block *sb) -{ - return sbi2mdc(ll_s2sbi(sb))->cl_max_mds_easize; -} - -static inline loff_t ll_file_maxbytes(struct inode *inode) -{ - return ll_i2info(inode)->lli_maxbytes; -} - -/* namei.c */ -int ll_lock(struct inode *dir, struct dentry *dentry, - struct lookup_intent *it, struct lustre_handle *lockh); -int ll_unlock(__u32 mode, struct lustre_handle *lockh); - -typedef int (*intent_finish_cb)(int flag, struct ptlrpc_request *, - struct inode *parent, struct dentry **, - struct lookup_intent *, int offset, obd_id ino); -int ll_intent_lock(struct inode *parent, struct dentry **, - struct lookup_intent *, intent_finish_cb); -int ll_mdc_blocking_ast(struct ldlm_lock *lock, - struct ldlm_lock_desc *desc, - void *data, int flag); -void ll_mdc_lock_set_inode(struct lustre_handle *lock, struct inode *inode); -void ll_prepare_mdc_op_data(struct mdc_op_data *data, - struct inode *i1, struct inode *i2, - const char *name, int namelen, int mode); - -/* dcache.c */ -void ll_intent_release(struct dentry *, struct lookup_intent *); - -/**** - -I originally implmented these as functions, then realized a macro -would be more helpful for debugging, so the CDEBUG messages show -the current calling function. The orignal functions are in llite/dcache.c - -int ll_save_intent(struct dentry * de, struct lookup_intent * it); -struct lookup_intent * ll_get_intent(struct dentry * de); -****/ - -#define IT_RELEASED_MAGIC 0xDEADCAFE - -#define LL_SAVE_INTENT(de, it) \ -do { \ - LASSERT(ll_d2d(de) != NULL); \ - \ - down(&ll_d2d(de)->lld_it_sem); \ - LASSERT(de->d_it == NULL); \ - de->d_it = it; \ - CDEBUG(D_DENTRY, \ - "D_IT DOWN dentry %p fsdata %p intent: %p %s sem %d\n", \ - de, ll_d2d(de), de->d_it, ldlm_it2str(de->d_it->it_op), \ - atomic_read(&(ll_d2d(de)->lld_it_sem.count))); \ -} while(0) - -#define LL_GET_INTENT(de, it) \ -do { \ - it = de->d_it; \ - \ - LASSERT(ll_d2d(de) != NULL); \ - LASSERT(it); \ - LASSERT(it->it_op != IT_RELEASED_MAGIC); \ - \ - CDEBUG(D_DENTRY, "D_IT UP dentry %p fsdata %p intent: %p %s\n", \ - de, ll_d2d(de), de->d_it, ldlm_it2str(de->d_it->it_op)); \ - de->d_it = NULL; \ - it->it_op = IT_RELEASED_MAGIC; \ - up(&ll_d2d(de)->lld_it_sem); \ -} while(0) - -#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0") - -enum { - LPROC_LL_DIRTY_HITS = 0, - LPROC_LL_DIRTY_MISSES, - LPROC_LL_WB_WRITEPAGE, - LPROC_LL_WB_PRESSURE, - LPROC_LL_WB_OK, - LPROC_LL_WB_FAIL, - LPROC_LL_READ_BYTES, - LPROC_LL_WRITE_BYTES, - LPROC_LL_BRW_READ, - LPROC_LL_BRW_WRITE, - LPROC_LL_IOCTL, - LPROC_LL_OPEN, - LPROC_LL_RELEASE, - LPROC_LL_MAP, - LPROC_LL_LLSEEK, - LPROC_LL_FSYNC, - LPROC_LL_SETATTR_RAW, - LPROC_LL_SETATTR, - LPROC_LL_TRUNC, - -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) - LPROC_LL_GETATTR, -#else - LPROC_LL_REVALIDATE, -#endif - LPROC_LL_STAFS, - LPROC_LL_ALLOC_INODE, - - LPROC_LL_DIRECT_READ, - LPROC_LL_DIRECT_WRITE, - LPROC_LL_FILE_OPCODES -}; -/* dcache.c */ -int ll_have_md_lock(struct dentry *de); - -/* dir.c */ -extern struct file_operations ll_dir_operations; -extern struct inode_operations ll_dir_inode_operations; - -/* file.c */ -extern struct file_operations ll_file_operations; -extern struct inode_operations ll_file_inode_operations; -extern struct inode_operations ll_special_inode_operations; -struct ldlm_lock; -int ll_extent_lock_callback(struct ldlm_lock *, struct ldlm_lock_desc *, - void *data, int flag); -int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode, - struct lov_stripe_md *lsm, int mode, - struct ldlm_extent *extent, struct lustre_handle *lockh); -int ll_extent_lock(struct ll_file_data *fd, struct inode *inode, - struct lov_stripe_md *lsm, int mode, - struct ldlm_extent *extent, struct lustre_handle *lockh); -int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode, - struct lov_stripe_md *lsm, int mode, - struct lustre_handle *lockh); -int ll_create_objects(struct super_block *sb, obd_id id, uid_t uid, - gid_t gid, struct lov_stripe_md **lsmp); -int ll_file_open(struct inode *inode, struct file *file); -int ll_file_release(struct inode *inode, struct file *file); - - -/* rw.c */ -struct page *ll_getpage(struct inode *inode, unsigned long offset, - int create, int locked); -void ll_truncate(struct inode *inode); - -/* super.c */ -void ll_update_inode(struct inode *, struct mds_body *, struct lov_stripe_md *); -int ll_setattr_raw(struct inode *inode, struct iattr *attr); - -/* symlink.c */ -extern struct inode_operations ll_fast_symlink_inode_operations; -extern struct inode_operations ll_symlink_inode_operations; - -/* sysctl.c */ -void ll_sysctl_init(void); -void ll_sysctl_clean(void); - -#else -#include -#endif /* __KERNEL__ */ - -static inline void ll_ino2fid(struct ll_fid *fid, - obd_id ino, - __u32 generation, - int type) -{ - fid->id = ino; - fid->generation = generation; - fid->f_type = type; -} - -struct ll_read_inode2_cookie { - struct mds_body *lic_body; - struct lov_stripe_md *lic_lsm; -}; - -#include - -#define LL_IOC_GETFLAGS _IOR ('f', 151, long) -#define LL_IOC_SETFLAGS _IOW ('f', 152, long) -#define LL_IOC_CLRFLAGS _IOW ('f', 153, long) -#define LL_IOC_LOV_SETSTRIPE _IOW ('f', 154, long) -#define LL_IOC_LOV_GETSTRIPE _IOW ('f', 155, long) - -#define O_LOV_DELAY_CREATE 0100000000 /* hopefully this does not conflict */ - -#define LL_FILE_IGNORE_LOCK 0x00000001 - -#endif diff --git a/lustre/include/linux/lustre_mds.h b/lustre/include/linux/lustre_mds.h deleted file mode 100644 index 683d78d..0000000 --- a/lustre/include/linux/lustre_mds.h +++ /dev/null @@ -1,305 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * MDS data structures. - * See also lustre_idl.h for wire formats of requests. - * - */ - -#ifndef _LUSTRE_MDS_H -#define _LUSTRE_MDS_H - -#ifdef __KERNEL__ -# include -# include -#endif -#include -#include -#include -#include -#include - -struct ldlm_lock_desc; -struct mds_obd; -struct ptlrpc_connection; -struct ptlrpc_client; -struct obd_export; -struct ptlrpc_request; -struct obd_device; -struct ll_file_data; - -#define LUSTRE_MDS_NAME "mds" -#define LUSTRE_MDT_NAME "mdt" -#define LUSTRE_MDC_NAME "mdc" - -struct mdc_rpc_lock { - struct semaphore rpcl_sem; - struct lookup_intent *rpcl_it; -}; -extern struct mdc_rpc_lock mdc_rpc_lock; -extern struct mdc_rpc_lock mdc_setattr_lock; - -static inline void mdc_init_rpc_lock(struct mdc_rpc_lock *lck) -{ - sema_init(&lck->rpcl_sem, 1); - lck->rpcl_it = NULL; -} - -#ifdef __KERNEL__ -/* Compat code for kernel patch v18 users, can be removed when everyone has - * upgraded --phik 02 June 2003 */ -#ifdef IT_FL_LOCKED -static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck, - struct lookup_intent *it) -{ - down(&lck->rpcl_sem); - if (it) { - lck->rpcl_it = it; - it->it_int_flags |= IT_FL_LOCKED; - } -} - -static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck, - struct lookup_intent *it) -{ - if (it == NULL) { - LASSERT(it == lck->rpcl_it); - up(&lck->rpcl_sem); - return; - } - if (it != NULL && (it->it_int_flags & IT_FL_LOCKED)) { - it->it_int_flags &= ~IT_FL_LOCKED; - LASSERT(it == lck->rpcl_it); - lck->rpcl_it = NULL; - up(&lck->rpcl_sem); - } -} -#else -static inline void mdc_get_rpc_lock(struct mdc_rpc_lock *lck, - struct lookup_intent *it) -{ - down(&lck->rpcl_sem); - if (it) { - lck->rpcl_it = it; - it->it_iattr = (void *)1; - } -} - -static inline void mdc_put_rpc_lock(struct mdc_rpc_lock *lck, - struct lookup_intent *it) -{ - if (it == NULL) { - LASSERT(it == lck->rpcl_it); - up(&lck->rpcl_sem); - return; - } - if (it && it->it_iattr) { - it->it_iattr = NULL; - LASSERT(it == lck->rpcl_it); - lck->rpcl_it = NULL; - up(&lck->rpcl_sem); - } -} -#endif -#endif - -struct mdc_op_data { - __u64 ino1; - __u32 gen1; - __u32 typ1; - __u32 gid1; - __u64 ino2; - __u32 gen2; - __u32 typ2; - __u32 gid2; - const char *name; - int namelen; - int mode; -}; - -struct mds_update_record { - __u32 ur_opcode; - struct ll_fid *ur_fid1; - struct ll_fid *ur_fid2; - int ur_namelen; - char *ur_name; - int ur_tgtlen; - char *ur_tgt; - int ur_eadatalen; - void *ur_eadata; - struct iattr ur_iattr; - struct obd_ucred ur_uc; - __u64 ur_rdev; - __u32 ur_mode; - __u32 ur_uid; - __u32 ur_gid; - __u64 ur_time; - __u32 ur_flags; -}; - -#define ur_fsuid ur_uc.ouc_fsuid -#define ur_fsgid ur_uc.ouc_fsgid -#define ur_cap ur_uc.ouc_cap -#define ur_suppgid1 ur_uc.ouc_suppgid1 -#define ur_suppgid2 ur_uc.ouc_suppgid2 - -#define MDS_LR_CLIENT 8192 -#define MDS_LR_SIZE 128 - -#define MDS_CLIENT_SLOTS 17 - -#define MDS_MOUNT_RECOV 2 - -/* Data stored per server at the head of the last_rcvd file. In le32 order. */ -struct mds_server_data { - __u8 msd_uuid[37]; /* server UUID */ - __u8 uuid_padding[3]; /* unused */ - __u64 msd_last_transno; /* last completed transaction ID */ - __u64 msd_mount_count; /* MDS incarnation number */ - __u8 padding[512 - 56]; -}; - -/* Data stored per client in the last_rcvd file. In le32 order. */ -struct mds_client_data { - __u8 mcd_uuid[37]; /* client UUID */ - __u8 uuid_padding[3]; /* unused */ - __u64 mcd_mount_count; /* MDS incarnation number */ - __u64 mcd_last_transno; /* last completed transaction ID */ - __u64 mcd_last_xid; /* xid for the last transaction */ - __u32 mcd_last_result; /* result from last RPC */ - __u32 mcd_last_data; /* per-op data (disposition for open &c.) */ - __u8 padding[MDS_LR_SIZE - 74]; -}; - -/* file data for open files on MDS */ -struct mds_file_data { - struct portals_handle mfd_handle; /* must be first */ - atomic_t mfd_refcount; - struct list_head mfd_list; - __u64 mfd_xid; - int mfd_mode; - struct dentry *mfd_dentry; -}; - -/* mds/mds_reint.c */ -int mds_reint_rec(struct mds_update_record *r, int offset, - struct ptlrpc_request *req, struct lustre_handle *); - -/* mds/mds_open.c */ -int mds_open(struct mds_update_record *rec, int offset, - struct ptlrpc_request *req, struct lustre_handle *); - -/* mds/handler.c */ -#ifdef __KERNEL__ -struct dentry *mds_name2locked_dentry(struct obd_device *, struct dentry *dir, - struct vfsmount **mnt, char *name, - int namelen, int lock_mode, - struct lustre_handle *lockh, - int dir_lock_mode); -struct dentry *mds_fid2locked_dentry(struct obd_device *obd, struct ll_fid *fid, - struct vfsmount **mnt, int lock_mode, - struct lustre_handle *lockh); -struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid, - struct vfsmount **mnt); -int mds_reint(struct ptlrpc_request *req, int offset, struct lustre_handle *); -int mds_pack_md(struct obd_device *mds, struct lustre_msg *msg, - int offset, struct mds_body *body, struct inode *inode); -void mds_steal_ack_locks(struct obd_export *exp, - struct ptlrpc_request *req); - -/* mds/mds_fs.c */ -int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt); -int mds_fs_cleanup(struct obd_device *obddev, int failover); -#endif - -/* mdc/mdc_request.c */ -int mdc_enqueue(struct lustre_handle *conn, int lock_type, - struct lookup_intent *it, int lock_mode, - struct mdc_op_data *enq_data, - struct lustre_handle *lockh, char *tgt, int tgtlen, - ldlm_completion_callback cb_completion, - ldlm_blocking_callback cb_blocking, - void *cb_data); -int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh, - struct ptlrpc_request **request); -int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid); -int mdc_getattr(struct lustre_handle *conn, struct ll_fid *fid, - unsigned long valid, unsigned int ea_size, - struct ptlrpc_request **request); -int mdc_getattr_name(struct lustre_handle *conn, struct ll_fid *fid, - char *filename, int namelen, unsigned long valid, - unsigned int ea_size, struct ptlrpc_request **request); -int mdc_setattr(struct lustre_handle *conn, - struct mdc_op_data *data, - struct iattr *iattr, void *ea, int ealen, - struct ptlrpc_request **request); -int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags, - struct lov_mds_md *lmm, int lmm_size, struct lustre_handle *fh, - struct ptlrpc_request **); -struct obd_client_handle; -void mdc_set_open_replay_data(struct obd_client_handle *och); -int mdc_close(struct lustre_handle *conn, obd_id ino, int type, - struct lustre_handle *fh, struct ptlrpc_request **req); -int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset, - struct page *, struct ptlrpc_request **); -int mdc_create(struct lustre_handle *conn, struct mdc_op_data *op_data, - const void *data, int datalen, int mode, __u32 uid, __u32 gid, - __u64 time, __u64 rdev, struct ptlrpc_request **request); -int mdc_unlink(struct lustre_handle *conn, struct mdc_op_data *data, - struct ptlrpc_request **request); -int mdc_link(struct lustre_handle *conn, struct mdc_op_data *data, - struct ptlrpc_request **); -int mdc_rename(struct lustre_handle *conn, struct mdc_op_data *data, - const char *old, int oldlen, const char *new, int newlen, - struct ptlrpc_request **request); -int mdc_create_client(struct obd_uuid uuid, struct ptlrpc_client *cl); - -/* Store the generation of a newly-created inode in |req| for replay. */ -void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff, - int repoff); - - -/* ioctls for trying requests */ -#define IOC_REQUEST_TYPE 'f' -#define IOC_REQUEST_MIN_NR 30 - -#define IOC_REQUEST_GETATTR _IOWR('f', 30, long) -#define IOC_REQUEST_READPAGE _IOWR('f', 31, long) -#define IOC_REQUEST_SETATTR _IOWR('f', 32, long) -#define IOC_REQUEST_CREATE _IOWR('f', 33, long) -#define IOC_REQUEST_OPEN _IOWR('f', 34, long) -#define IOC_REQUEST_CLOSE _IOWR('f', 35, long) -#define IOC_REQUEST_MAX_NR 35 - -#define MDS_CHECK_RESENT(req, reconstruct) \ -{ \ - if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { \ - struct mds_client_data *mcd = \ - req->rq_export->exp_mds_data.med_mcd; \ - if (mcd->mcd_last_xid == req->rq_xid) { \ - reconstruct; \ - RETURN(0); \ - } \ - DEBUG_REQ(D_HA, req, "no reply for RESENT req (have "LPD64")", \ - mcd->mcd_last_xid); \ - } \ -} - -#endif diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h deleted file mode 100644 index ac87d7f..0000000 --- a/lustre/include/linux/lustre_net.h +++ /dev/null @@ -1,541 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef _LUSTRE_NET_H -#define _LUSTRE_NET_H - -#ifdef __KERNEL__ -#include -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#include -#else -#include -#endif -#endif - -#include -// #include -#include -#include -#include -#include -#include - -/* The following constants determine how much memory is devoted to - * buffering in the lustre services. - * - * ?_NEVENTS # event queue entries - * - * ?_NBUFS # request buffers - * ?_BUFSIZE # bytes in a single request buffer - * total memory = ?_NBUFS * ?_BUFSIZE - * - * ?_MAXREQSIZE # maximum request service will receive - * larger messages will get dropped. - * request buffers are auto-unlinked when less than ?_MAXREQSIZE - * is left in them. - */ - -#define LDLM_NUM_THREADS 4 -#define LDLM_NEVENT_MAX 8192UL -#define LDLM_NEVENTS min(num_physpages / 64, LDLM_NEVENT_MAX) -#define LDLM_NBUF_MAX 256UL -#define LDLM_NBUFS min(LDLM_NEVENTS / 16, LDLM_NBUF_MAX) -#define LDLM_BUFSIZE (8 * 1024) -#define LDLM_MAXREQSIZE 1024 - -#define MDT_MAX_THREADS 32UL -#define MDT_NUM_THREADS min(num_physpages / 8192, MDT_MAX_THREADS) -#define MDS_NEVENT_MAX 8192UL -#define MDS_NEVENTS min(num_physpages / 64, MDS_NEVENT_MAX) -#define MDS_NBUF_MAX 512UL -#define MDS_NBUFS min(MDS_NEVENTS / 16, MDS_NBUF_MAX) -#define MDS_BUFSIZE (8 * 1024) -/* Assume file name length = FNAME_MAX = 256 (true for extN). - * path name length = PATH_MAX = 4096 - * LOV MD size max = EA_MAX = 4000 - * symlink: FNAME_MAX + PATH_MAX <- largest - * link: FNAME_MAX + PATH_MAX (mds_rec_link < mds_rec_create) - * rename: FNAME_MAX + FNAME_MAX - * open: FNAME_MAX + EA_MAX - * - * MDS_MAXREQSIZE ~= 4736 bytes = - * lustre_msg + ldlm_request + mds_body + mds_rec_create + FNAME_MAX + PATH_MAX - * - * Realistic size is about 512 bytes (20 character name + 128 char symlink), - * except in the open case where there are a large number of OSTs in a LOV. - */ -#define MDS_MAXREQSIZE (5 * 1024) - -#define OST_MAX_THREADS 36UL -#define OST_NUM_THREADS min(num_physpages / 8192, OST_MAX_THREADS) -#define OST_NEVENT_MAX 32768UL -#define OST_NEVENTS min(num_physpages / 16, OST_NEVENT_MAX) -#define OST_NBUF_MAX 1280UL -#define OST_NBUFS min(OST_NEVENTS / 64, OST_NBUF_MAX) -#define OST_BUFSIZE (8 * 1024) -/* OST_MAXREQSIZE ~= 1640 bytes = - * lustre_msg + obdo + 16 * obd_ioobj + 64 * niobuf_remote - * - * single object with 16 pages is 512 bytes - */ -#define OST_MAXREQSIZE (2 * 1024) - -#define PTLBD_NUM_THREADS 4 -#define PTLBD_NEVENTS 1024 -#define PTLBD_NBUFS 20 -#define PTLBD_BUFSIZE (32 * 1024) -#define PTLBD_MAXREQSIZE 1024 - -#define CONN_INVALID 1 - -struct ptlrpc_peer { - ptl_nid_t peer_nid; - struct ptlrpc_ni *peer_ni; -}; - -struct ptlrpc_connection { - struct list_head c_link; - struct ptlrpc_peer c_peer; - struct obd_uuid c_local_uuid; /* XXX do we need this? */ - struct obd_uuid c_remote_uuid; - - __u32 c_generation; /* changes upon new connection */ - __u32 c_epoch; /* changes when peer changes */ - __u32 c_bootcount; /* peer's boot count */ - - spinlock_t c_lock; - - atomic_t c_refcount; - __u64 c_token; - __u64 c_remote_conn; - __u64 c_remote_token; - - __u32 c_flags; // can we indicate INVALID elsewhere? -}; - -struct ptlrpc_client { - __u32 cli_request_portal; - __u32 cli_reply_portal; - - __u32 cli_target_devno; - - void *cli_data; - char *cli_name; -}; - -/* state flags of requests */ -/* XXX only ones left are those used by the bulk descs as well! */ -#define PTL_RPC_FL_INTR (1 << 0) /* reply wait was interrupted by user */ -#define PTL_RPC_FL_TIMEOUT (1 << 7) /* request timed out waiting for reply */ - -#define REQ_MAX_ACK_LOCKS 4 - -#define SWAB_PARANOIA 1 -#if SWAB_PARANOIA -/* unpacking: assert idx not unpacked already */ -#define LASSERT_REQSWAB(rq, idx) \ -do { \ - LASSERT ((idx) < sizeof ((rq)->rq_req_swab_mask) * 8); \ - LASSERT (((rq)->rq_req_swab_mask & (1 << (idx))) == 0); \ - (rq)->rq_req_swab_mask |= (1 << (idx)); \ -} while (0) - -#define LASSERT_REPSWAB(rq, idx) \ -do { \ - LASSERT ((idx) < sizeof ((rq)->rq_rep_swab_mask) * 8); \ - LASSERT (((rq)->rq_rep_swab_mask & (1 << (idx))) == 0); \ - (rq)->rq_rep_swab_mask |= (1 << (idx)); \ -} while (0) - -/* just looking: assert idx already unpacked */ -#define LASSERT_REQSWABBED(rq, idx) \ -LASSERT ((idx) < sizeof ((rq)->rq_req_swab_mask) * 8 && \ - ((rq)->rq_req_swab_mask & (1 << (idx))) != 0) - -#define LASSERT_REPSWABBED(rq, idx) \ -LASSERT ((idx) < sizeof ((rq)->rq_rep_swab_mask) * 8 && \ - ((rq)->rq_rep_swab_mask & (1 << (idx))) != 0) -#else -#define LASSERT_REQSWAB(rq, idx) -#define LASSERT_REPSWAB(rq, idx) -#define LASSERT_REQSWABBED(rq, idx) -#define LASSERT_REPSWABBED(rq, idx) -#endif - -union ptlrpc_async_args { - /* Scratchpad for passing args to completion interpreter. Users - * cast to the struct of their choosing, and LASSERT that this is - * big enough. For _tons_ of context, OBD_ALLOC a struct and store - * a pointer to it here. The pointer_arg ensures this struct is at - * least big enough for that. */ - void *pointer_arg[4]; - __u64 space[4]; -}; - -struct ptlrpc_request_set { - int set_remaining; /* # uncompleted requests */ - wait_queue_head_t set_waitq; - struct list_head set_requests; - void *set_interpret; /* completion callback */ - union ptlrpc_async_args set_args; /* completion context */ -}; - -struct ptlrpc_bulk_desc; - -struct ptlrpc_request { - int rq_type; /* one of PTL_RPC_MSG_* */ - struct list_head rq_list; - struct obd_device *rq_obd; - int rq_status; - spinlock_t rq_lock; - unsigned int rq_intr:1, rq_replied:1, rq_want_ack:1, rq_err:1, - rq_timedout:1, rq_resend:1, rq_restart:1, rq_replay:1, - rq_no_resend:1, rq_resent:1, rq_no_recov:1, rq_waiting:1, - rq_receiving_reply:1; - int rq_phase; - - atomic_t rq_refcount; - - int rq_request_portal; /* XXX FIXME bug 249 */ - int rq_reply_portal; /* XXX FIXME bug 249 */ - - int rq_reqlen; - struct lustre_msg *rq_reqmsg; - - int rq_timeout; - int rq_replen; - struct lustre_msg *rq_repmsg; - __u64 rq_transno; - __u64 rq_xid; - -#if SWAB_PARANOIA - __u32 rq_req_swab_mask; - __u32 rq_rep_swab_mask; -#endif - - int rq_import_generation; - int rq_level; - wait_queue_head_t rq_wait_for_rep; /* XXX also _for_ack */ - - /* incoming reply */ - ptl_md_t rq_reply_md; - ptl_handle_md_t rq_reply_md_h; - - /* outgoing req/rep */ - ptl_md_t rq_req_md; - - struct ptlrpc_peer rq_peer; /* XXX see service.c can this be factored away? */ - struct obd_export *rq_export; - struct ptlrpc_connection *rq_connection; - struct obd_import *rq_import; - struct ptlrpc_service *rq_svc; - - void (*rq_replay_cb)(struct ptlrpc_request *); - void *rq_replay_data; - - struct ptlrpc_bulk_desc *rq_bulk; /* client side bulk */ - time_t rq_sent; /* when the request was sent */ - - /* Multi-rpc bits */ - struct list_head rq_set_chain; - struct ptlrpc_request_set *rq_set; - void *rq_interpret_reply; /* Async completion handler */ - union ptlrpc_async_args rq_async_args; /* Async completion context */ - - /* Only used on the server side for tracking acks. */ - struct ptlrpc_req_ack_lock { - struct lustre_handle lock; - __u32 mode; - } rq_ack_locks[REQ_MAX_ACK_LOCKS]; -}; - -#define RQ_PHASE_NEW 0xebc0de00 -#define RQ_PHASE_RPC 0xebc0de01 -#define RQ_PHASE_BULK 0xebc0de02 -#define RQ_PHASE_INTERPRET 0xebc0de03 -#define RQ_PHASE_COMPLETE 0xebc0de04 - -/* Spare the preprocessor, spoil the bugs. */ -#define FLAG(field, str) (field ? str : "") - -#define DEBUG_REQ_FLAGS(req) \ - ((req->rq_phase == RQ_PHASE_NEW) ? "New" : \ - (req->rq_phase == RQ_PHASE_RPC) ? "RPC" : \ - (req->rq_phase == RQ_PHASE_INTERPRET) ? "Interpret" : \ - (req->rq_phase == RQ_PHASE_COMPLETE) ? "Complete" : \ - (req->rq_phase == RQ_PHASE_BULK) ? "Bulk" : "?phase?"), \ - FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"), \ - FLAG(req->rq_want_ack, "A"), FLAG(req->rq_err, "E"), \ - FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"), \ - FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"), \ - FLAG(req->rq_no_resend, "N"), FLAG(req->rq_resent, "s"), \ - FLAG(req->rq_no_recov, "n"), FLAG(req->rq_waiting, "W") - -#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s" - -#define DEBUG_REQ(level, req, fmt, args...) \ -do { \ -CDEBUG(level, "@@@ " fmt \ - " req@%p x"LPD64"/t"LPD64" o%d->%s@%s:%d lens %d/%d ref %d fl " \ - REQ_FLAGS_FMT"/%x/%x rc %x\n" , ## args, req, req->rq_xid, \ - req->rq_reqmsg ? req->rq_reqmsg->transno : -1, \ - req->rq_reqmsg ? req->rq_reqmsg->opc : -1, \ - req->rq_import ? (char *)req->rq_import->imp_target_uuid.uuid : "", \ - req->rq_connection ? \ - (char *)req->rq_connection->c_remote_uuid.uuid : "", \ - (req->rq_import && req->rq_import->imp_client) ? \ - req->rq_import->imp_client->cli_request_portal : -1, \ - req->rq_reqlen, req->rq_replen, \ - atomic_read(&req->rq_refcount), \ - DEBUG_REQ_FLAGS(req), \ - req->rq_reqmsg ? req->rq_reqmsg->flags : 0, \ - req->rq_repmsg ? req->rq_repmsg->flags : 0, \ - req->rq_status); \ -} while (0) - -struct ptlrpc_bulk_page { - struct ptlrpc_bulk_desc *bp_desc; - struct list_head bp_link; - int bp_buflen; - int bp_pageoffset; /* offset within a page */ - struct page *bp_page; -}; - -#define BULK_GET_SOURCE 0 -#define BULK_PUT_SINK 1 -#define BULK_GET_SINK 2 -#define BULK_PUT_SOURCE 3 - -struct ptlrpc_bulk_desc { - unsigned int bd_complete:1; - unsigned int bd_network_rw:1; /* accessible to the network */ - unsigned int bd_type:2; /* {put,get}{source,sink} */ - unsigned int bd_registered:1; /* client side */ - spinlock_t bd_lock; /* serialise with callback */ - int bd_import_generation; - struct obd_export *bd_export; - struct obd_import *bd_import; - __u32 bd_portal; - struct ptlrpc_request *bd_req; /* associated request */ - wait_queue_head_t bd_waitq; /* server side only WQ */ - struct list_head bd_page_list; - __u32 bd_page_count; - __u32 bd_last_xid; - - ptl_md_t bd_md; - ptl_handle_md_t bd_md_h; - ptl_handle_me_t bd_me_h; - - int bd_callback_count; /* server side callbacks */ - -#ifdef __KERNEL__ - ptl_kiov_t bd_iov[16]; /* self-sized pre-allocated iov */ -#else - struct iovec bd_iov[16]; /* self-sized pre-allocated iov */ -#endif -}; - -struct ptlrpc_thread { - struct list_head t_link; - - __u32 t_flags; - wait_queue_head_t t_ctl_waitq; -}; - -struct ptlrpc_request_buffer_desc { - struct list_head rqbd_list; - struct ptlrpc_srv_ni *rqbd_srv_ni; - ptl_handle_me_t rqbd_me_h; - atomic_t rqbd_refcount; - char *rqbd_buffer; -}; - -struct ptlrpc_ni { - /* Generic interface state */ - char *pni_name; - int pni_number; - ptl_handle_ni_t pni_ni_h; - ptl_handle_eq_t pni_request_out_eq_h; - ptl_handle_eq_t pni_reply_in_eq_h; - ptl_handle_eq_t pni_reply_out_eq_h; - ptl_handle_eq_t pni_bulk_put_source_eq_h; - ptl_handle_eq_t pni_bulk_put_sink_eq_h; - ptl_handle_eq_t pni_bulk_get_source_eq_h; - ptl_handle_eq_t pni_bulk_get_sink_eq_h; -}; - -struct ptlrpc_srv_ni { - /* Interface-specific service state */ - struct ptlrpc_service *sni_service; /* owning service */ - struct ptlrpc_ni *sni_ni; /* network interface */ - ptl_handle_eq_t sni_eq_h; /* event queue handle */ - struct list_head sni_rqbds; /* all the request buffer descriptors */ - __u32 sni_nrqbds; /* # request buffers */ - atomic_t sni_nrqbds_receiving; /* # request buffers posted */ -}; - -struct ptlrpc_service { - time_t srv_time; - time_t srv_timeout; - - struct list_head srv_ni_list; /* list of interfaces */ - __u32 srv_max_req_size; /* biggest request to receive */ - __u32 srv_buf_size; /* # bytes in a request buffer */ - - __u32 srv_req_portal; - __u32 srv_rep_portal; - - __u32 srv_xid; - - wait_queue_head_t srv_waitq; /* all threads sleep on this */ - - spinlock_t srv_lock; - struct list_head srv_threads; - int (*srv_handler)(struct ptlrpc_request *req); - char *srv_name; /* only statically allocated strings here; we don't clean them */ - struct proc_dir_entry *svc_procroot; - struct lprocfs_stats *svc_stats; - - int srv_interface_rover; - struct ptlrpc_srv_ni srv_interfaces[0]; -}; - -typedef int (*svc_handler_t)(struct ptlrpc_request *req); - -/* ptlrpc/events.c */ -extern struct ptlrpc_ni ptlrpc_interfaces[]; -extern int ptlrpc_ninterfaces; -extern int ptlrpc_uuid_to_peer(struct obd_uuid *uuid, struct ptlrpc_peer *peer); - -/* ptlrpc/connection.c */ -void ptlrpc_dump_connections(void); -void ptlrpc_readdress_connection(struct ptlrpc_connection *, struct obd_uuid *); -struct ptlrpc_connection *ptlrpc_get_connection(struct ptlrpc_peer *peer, - struct obd_uuid *uuid); -int ptlrpc_put_connection(struct ptlrpc_connection *c); -struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *); -void ptlrpc_init_connection(void); -void ptlrpc_cleanup_connection(void); - -/* ptlrpc/niobuf.c */ -int ptlrpc_bulk_put(struct ptlrpc_bulk_desc *); -int ptlrpc_bulk_get(struct ptlrpc_bulk_desc *); -void ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *bulk); -int ptlrpc_register_bulk(struct ptlrpc_request *req); -void ptlrpc_unregister_bulk (struct ptlrpc_request *req); - -static inline int ptlrpc_bulk_complete (struct ptlrpc_bulk_desc *desc) -{ - unsigned long flags; - int rc; - - spin_lock_irqsave (&desc->bd_lock, flags); - rc = desc->bd_complete; - spin_unlock_irqrestore (&desc->bd_lock, flags); - return (rc); -} - -int ptlrpc_reply(struct ptlrpc_request *req); -int ptlrpc_error(struct ptlrpc_request *req); -void ptlrpc_resend_req(struct ptlrpc_request *request); -int ptl_send_rpc(struct ptlrpc_request *request); -void ptlrpc_link_svc_me(struct ptlrpc_request_buffer_desc *rqbd); - -/* ptlrpc/client.c */ -void ptlrpc_init_client(int req_portal, int rep_portal, char *name, - struct ptlrpc_client *); -void ptlrpc_cleanup_client(struct obd_import *imp); -struct obd_uuid *ptlrpc_req_to_uuid(struct ptlrpc_request *req); -struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid); - -int ptlrpc_queue_wait(struct ptlrpc_request *req); -int ptlrpc_replay_req(struct ptlrpc_request *req); -void ptlrpc_unregister_reply(struct ptlrpc_request *req); -void ptlrpc_restart_req(struct ptlrpc_request *req); -void ptlrpc_abort_inflight(struct obd_import *imp); - -struct ptlrpc_request_set *ptlrpc_prep_set(void); -int ptlrpc_set_wait(struct ptlrpc_request_set *); -void ptlrpc_set_destroy(struct ptlrpc_request_set *); -void ptlrpc_set_add_req(struct ptlrpc_request_set *, struct ptlrpc_request *); - -struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode, - int count, int *lengths, char **bufs); -void ptlrpc_free_req(struct ptlrpc_request *request); -void ptlrpc_req_finished(struct ptlrpc_request *request); -struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req); -struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp (struct ptlrpc_request *req, - int type, int portal); -struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_exp(struct ptlrpc_request *req, - int type, int portal); -void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *bulk); -int ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc, - struct page *page, int pageoffset, int len); -void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *page); -void ptlrpc_retain_replayable_request(struct ptlrpc_request *req, - struct obd_import *imp); -__u64 ptlrpc_next_xid(void); - -/* ptlrpc/ptlrpc_module.c */ -void ptlrpc_put_ldlm_hooks(void); -int ptlrpc_ldlm_hooks_referenced(void); - -/* ptlrpc/service.c */ -struct ptlrpc_service * -ptlrpc_init_svc(__u32 nevents, __u32 nbufs, __u32 bufsize, __u32 max_req_size, - int req_portal, int rep_portal, svc_handler_t, char *name, - struct obd_device *dev); -void ptlrpc_stop_all_threads(struct ptlrpc_service *svc); -int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc, - char *name); -int ptlrpc_unregister_service(struct ptlrpc_service *service); - -struct ptlrpc_svc_data { - char *name; - struct ptlrpc_service *svc; - struct ptlrpc_thread *thread; - struct obd_device *dev; -}; - -/* ptlrpc/pack_generic.c */ -int lustre_pack_msg(int count, int *lens, char **bufs, int *len, - struct lustre_msg **msg); -int lustre_msg_size(int count, int *lengths); -int lustre_unpack_msg(struct lustre_msg *m, int len); -void *lustre_msg_buf(struct lustre_msg *m, int n, int minlen); -char *lustre_msg_string (struct lustre_msg *m, int n, int max_len); -void *lustre_swab_reqbuf (struct ptlrpc_request *req, int n, int minlen, - void *swabber); -void *lustre_swab_repbuf (struct ptlrpc_request *req, int n, int minlen, - void *swabber); - -/* ldlm/ldlm_lib.c */ -int client_import_connect(struct lustre_handle *conn, struct obd_device *obd, - struct obd_uuid *cluuid); -int client_import_disconnect(struct lustre_handle *conn, int failover); - -/* ptlrpc/pinger.c */ -int ptlrpc_pinger_add_import(struct obd_import *imp); -int ptlrpc_pinger_del_import(struct obd_import *imp); - -#endif diff --git a/lustre/include/linux/lustre_otree.h b/lustre/include/linux/lustre_otree.h deleted file mode 100644 index 3d8d510..0000000 --- a/lustre/include/linux/lustre_otree.h +++ /dev/null @@ -1,31 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef _LUSTRE_OTREE_H -#define _LUSTRE_OTREE_H - -/* XXX ok, I can't make sense of our header nest right now.. */ -#ifdef __KERNEL__ -#include -#include - -struct otree { - rb_root_t ot_root; - spinlock_t ot_lock; - unsigned long ot_num_marked; -}; -#else -struct otree { - unsigned long lalala; -}; -#endif - -int ot_mark_offset(struct otree *ot, unsigned long offset); -int ot_clear_extent(struct otree *ot, unsigned long start, unsigned long end); -int ot_find_marked_extent(struct otree *ot, unsigned long *start, - unsigned long *end); -int ot_last_marked(struct otree *ot, unsigned long *last); -unsigned long ot_num_marked(struct otree *ot); -void ot_init(struct otree *ot); - -#endif diff --git a/lustre/include/linux/obd.h b/lustre/include/linux/obd.h deleted file mode 100644 index ba848a9..0000000 --- a/lustre/include/linux/obd.h +++ /dev/null @@ -1,432 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This code is issued under the GNU General Public License. - * See the file COPYING in this distribution - */ - -#ifndef __OBD_H -#define __OBD_H - -#include - -struct lov_oinfo { /* per-child structure */ - __u64 loi_id; /* object ID on the target OST */ - struct lustre_handle *loi_handle; /* open file handle for obj on OST */ - int loi_ost_idx; /* OST stripe index in lmd_objects array */ - /* tracking offsets per file, per stripe.. */ - struct otree *loi_dirty_ot; /* lets lov stack on osc */ - struct otree loi_dirty_ot_inline; -}; - -struct lov_stripe_md { - /* Public members. */ - __u64 lsm_object_id; /* lov object id */ - __u64 lsm_maxbytes; - - /* LOV-private members start here -- only for use in lov/. */ - __u32 lsm_magic; - __u32 lsm_stripe_size; /* size of the stripe */ - unsigned lsm_stripe_offset; /* offset of first stripe in lmd_objects */ - unsigned lsm_stripe_count; /* how many objects are being striped on */ - struct lov_oinfo lsm_oinfo[0]; -}; - -#define IOC_OSC_TYPE 'h' -#define IOC_OSC_MIN_NR 20 -#define IOC_OSC_REGISTER_LOV _IOWR(IOC_OSC_TYPE, 20, struct obd_device *) -#define IOC_OSC_SET_ACTIVE _IOWR(IOC_OSC_TYPE, 21, struct obd_device *) -#define IOC_OSC_MAX_NR 50 - -#define IOC_MDC_TYPE 'i' -#define IOC_MDC_MIN_NR 20 -#define IOC_MDC_LOOKUP _IOWR(IOC_MDC_TYPE, 20, struct obd_device *) -#define IOC_MDC_MAX_NR 50 - -#ifdef __KERNEL__ -# include -# include -# include /* for struct task_struct, for current.h */ -# include /* for smp_lock.h */ -# include -# include - -# include -# include -# include -# include -#endif - -struct obd_type { - struct list_head typ_chain; - struct obd_ops *typ_ops; - struct proc_dir_entry *typ_procroot; - char *typ_name; - int typ_refcnt; -}; - -struct brw_page { - obd_off off; - struct page *pg; - int count; - obd_flag flag; -}; - -/* Individual type definitions */ - -struct ost_server_data; - -struct filter_obd { - const char *fo_fstype; - char *fo_nspath; - struct super_block *fo_sb; - struct vfsmount *fo_vfsmnt; - struct obd_run_ctxt fo_ctxt; - struct dentry *fo_dentry_O; - struct dentry *fo_dentry_O_mode[16]; - struct dentry **fo_dentry_O_sub; - spinlock_t fo_objidlock; /* protect fo_lastobjid increment */ - spinlock_t fo_translock; /* protect fsd_last_rcvd increment */ - struct file *fo_rcvd_filp; - struct filter_server_data *fo_fsd; - unsigned long *fo_last_rcvd_slots; - - struct file_operations *fo_fop; - struct inode_operations *fo_iop; - struct address_space_operations *fo_aops; - - struct list_head fo_export_list; - spinlock_t fo_fddlock; /* protect setting dentry->d_fsdata */ - int fo_subdir_count; - spinlock_t fo_grant_lock; /* protects tot_granted */ - obd_size fo_tot_granted; - obd_size fo_tot_cached; -}; - -struct mds_server_data; - -struct client_obd { - struct obd_import *cl_import; - struct semaphore cl_sem; - int cl_conn_count; - /* max_mds_easize is purely a performance thing so we don't have to - * call obd_size_wiremd() all the time. */ - int cl_max_mds_easize; - struct obd_device *cl_containing_lov; - kdev_t cl_sandev; - struct semaphore cl_dirty_sem; - obd_size cl_dirty; /* both in bytes */ - obd_size cl_dirty_granted; - /* this is just to keep existing infinitely caching behaviour between - * clients and OSTs that don't have the grant code in yet.. it can - * be yanked once everything speaks grants */ - char cl_ost_can_grant; -}; - -struct mds_obd { - struct ptlrpc_service *mds_service; - struct ptlrpc_service *mds_setattr_service; - struct ptlrpc_service *mds_readpage_service; - - struct super_block *mds_sb; - struct vfsmount *mds_vfsmnt; - struct dentry *mds_fid_de; - struct obd_run_ctxt mds_ctxt; - struct file_operations *mds_fop; - struct inode_operations *mds_iop; - struct address_space_operations *mds_aops; - - int mds_max_mdsize; - struct file *mds_rcvd_filp; - spinlock_t mds_transno_lock; - __u64 mds_last_transno; - __u64 mds_mount_count; - struct ll_fid mds_rootfid; - struct mds_server_data *mds_server_data; - - int mds_has_lov_desc; - struct lov_desc mds_lov_desc; - unsigned long *mds_client_bitmap; -}; - -struct ldlm_obd { - struct ptlrpc_service *ldlm_cb_service; - struct ptlrpc_service *ldlm_cancel_service; - struct ptlrpc_client *ldlm_client; - struct ptlrpc_connection *ldlm_server_conn; -}; - -struct echo_obd { - char *eo_fstype; - struct obdo oa; - spinlock_t eo_lock; - __u64 eo_lastino; - atomic_t eo_getattr; - atomic_t eo_setattr; - atomic_t eo_create; - atomic_t eo_destroy; - atomic_t eo_prep; - atomic_t eo_read; - atomic_t eo_write; -}; - -/* - * this struct does double-duty acting as either a client or - * server instance .. maybe not wise. - */ -struct ptlbd_obd { - /* server's */ - struct ptlrpc_service *ptlbd_service; - struct file *filp; - /* client's */ - struct ptlrpc_client bd_client; - struct obd_import *bd_import; - struct obd_uuid bd_server_uuid; - struct lustre_handle bd_connect_handle; - int refcount; /* XXX sigh */ -}; - -struct recovd_obd { - spinlock_t recovd_lock; - struct list_head recovd_managed_items; /* items managed */ - struct list_head recovd_troubled_items; /* items in recovery */ - - wait_queue_head_t recovd_recovery_waitq; - wait_queue_head_t recovd_ctl_waitq; - wait_queue_head_t recovd_waitq; - struct task_struct *recovd_thread; - __u32 recovd_state; -}; - -struct ost_obd { - struct ptlrpc_service *ost_service; -}; - -struct echo_client_obd { - struct lustre_handle ec_conn; /* the local connection to osc/lov */ - spinlock_t ec_lock; - struct list_head ec_objects; - int ec_lsmsize; - int ec_nstripes; - __u64 ec_unique; -}; - -struct cache_obd { - struct lustre_handle cobd_target; /* local connection to target obd */ - struct lustre_handle cobd_cache; /* local connection to cache obd */ -}; - -struct lov_tgt_desc { - struct obd_uuid uuid; - struct lustre_handle conn; - int active; /* is this target available for requests, etc */ -}; - -struct lov_obd { - spinlock_t lov_lock; - struct obd_device *mdcobd; - struct lov_desc desc; - int bufsize; - int refcount; - struct lov_tgt_desc *tgts; -}; - -struct niobuf_local { - __u64 offset; - __u32 len; - __u32 flags; - __u32 rc; - struct page *page; - struct dentry *dentry; - unsigned long start; -}; - -/* Don't conflict with on-wire flags OBD_BRW_WRITE, etc */ -#define N_LOCAL_TEMP_PAGE 0x10000000 - -struct obd_trans_info { - __u64 oti_transno; - /* Only used on the server side for tracking acks. */ - struct oti_req_ack_lock { - struct lustre_handle lock; - __u32 mode; - } oti_ack_locks[4]; -}; - -/* corresponds to one of the obd's */ -struct obd_device { - struct obd_type *obd_type; - - /* common and UUID name of this device */ - char *obd_name; - struct obd_uuid obd_uuid; - - int obd_minor; - int obd_attached:1, obd_set_up:1, obd_recovering:1, - obd_abort_recovery:1, obd_replayable:1, obd_no_transno:1, - obd_no_recov:1, obd_stopping:1; - atomic_t obd_refcount; - wait_queue_head_t obd_refcount_waitq; - struct proc_dir_entry *obd_proc_entry; - struct list_head obd_exports; - int obd_num_exports; - struct list_head obd_imports; - struct ldlm_namespace *obd_namespace; - struct ptlrpc_client obd_ldlm_client; /* XXX OST/MDS only */ - /* a spinlock is OK for what we do now, may need a semaphore later */ - spinlock_t obd_dev_lock; - __u64 obd_last_committed; - struct fsfilt_operations *obd_fsops; - - /* XXX encapsulate all this recovery data into one struct */ - svc_handler_t obd_recovery_handler; - int obd_recoverable_clients; - spinlock_t obd_processing_task_lock; - pid_t obd_processing_task; - __u64 obd_next_recovery_transno; - wait_queue_head_t obd_next_transno_waitq; - wait_queue_head_t obd_commit_waitq; - struct timer_list obd_recovery_timer; - struct list_head obd_recovery_queue; - struct list_head obd_delayed_reply_queue; - - union { - struct filter_obd filter; - struct mds_obd mds; - struct client_obd cli; - struct ost_obd ost; - struct echo_client_obd echo_client;; - struct ldlm_obd ldlm; - struct echo_obd echo; - struct recovd_obd recovd; - struct lov_obd lov; - struct cache_obd cobd; - struct ptlbd_obd ptlbd; - } u; - /* Fields used by LProcFS */ - unsigned int obd_cntr_base; - struct lprocfs_stats *obd_stats; -}; - -struct obd_ops { - struct module *o_owner; - int (*o_iocontrol)(unsigned int cmd, struct lustre_handle *, int len, - void *karg, void *uarg); - int (*o_get_info)(struct lustre_handle *, __u32 keylen, void *key, - __u32 *vallen, void *val); - int (*o_set_info)(struct lustre_handle *, __u32 keylen, void *key, - __u32 vallen, void *val); - int (*o_attach)(struct obd_device *dev, obd_count len, void *data); - int (*o_detach)(struct obd_device *dev); - int (*o_setup) (struct obd_device *dev, obd_count len, void *data); - int (*o_cleanup)(struct obd_device *dev, int force, int failover); - int (*o_connect)(struct lustre_handle *conn, struct obd_device *src, - struct obd_uuid *cluuid); - int (*o_disconnect)(struct lustre_handle *conn, int failover); - - int (*o_statfs)(struct obd_export *exp, struct obd_statfs *osfs); - int (*o_syncfs)(struct obd_export *); - int (*o_packmd)(struct lustre_handle *, struct lov_mds_md **disk_tgt, - struct lov_stripe_md *mem_src); - int (*o_unpackmd)(struct lustre_handle *, - struct lov_stripe_md **mem_tgt, - struct lov_mds_md *disk_src, int disk_len); - int (*o_preallocate)(struct lustre_handle *, obd_count *req, - obd_id *ids); - int (*o_create)(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md **ea, struct obd_trans_info *oti); - int (*o_destroy)(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *ea, struct obd_trans_info *oti); - int (*o_setattr)(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *ea, struct obd_trans_info *oti); - int (*o_getattr)(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *ea); - int (*o_getattr_async)(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *ea, - struct ptlrpc_request_set *set); - int (*o_open)(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *ea, struct obd_trans_info *oti, - struct obd_client_handle *och); - int (*o_close)(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *ea, struct obd_trans_info *oti); - int (*o_brw)(int rw, struct lustre_handle *conn, - struct lov_stripe_md *ea, obd_count oa_bufs, - struct brw_page *pgarr, struct obd_trans_info *oti); - int (*o_brw_async)(int rw, struct lustre_handle *conn, - struct lov_stripe_md *ea, obd_count oa_bufs, - struct brw_page *pgarr, struct ptlrpc_request_set *, - struct obd_trans_info *oti); - int (*o_punch)(struct lustre_handle *conn, struct obdo *tgt, - struct lov_stripe_md *ea, obd_size count, - obd_off offset, struct obd_trans_info *oti); - int (*o_sync)(struct lustre_handle *conn, struct obdo *tgt, - obd_size count, obd_off offset); - int (*o_migrate)(struct lustre_handle *conn, struct obdo *dst, - struct obdo *src, obd_size count, obd_off offset); - int (*o_copy)(struct lustre_handle *dstconn, struct obdo *dst, - struct lustre_handle *srconn, struct obdo *src, - obd_size count, obd_off offset, struct obd_trans_info *); - int (*o_iterate)(struct lustre_handle *conn, - int (*)(obd_id, obd_gr, void *), - obd_id *startid, obd_gr group, void *data); - int (*o_preprw)(int cmd, struct obd_export *, struct obdo *obdo, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_remote *remote, - struct niobuf_local *local, void **desc_private, - struct obd_trans_info *oti); - int (*o_commitrw)(int cmd, struct obd_export *, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_local *local, - void *desc_private, struct obd_trans_info *oti); - int (*o_enqueue)(struct lustre_handle *conn, struct lov_stripe_md *md, - struct lustre_handle *parent_lock, - __u32 type, void *cookie, int cookielen, __u32 mode, - int *flags, void *cb, void *data, - struct lustre_handle *lockh); - int (*o_match)(struct lustre_handle *conn, struct lov_stripe_md *md, - __u32 type, void *cookie, int cookielen, __u32 mode, - int *flags, void *data, struct lustre_handle *lockh); - int (*o_cancel)(struct lustre_handle *, struct lov_stripe_md *md, - __u32 mode, struct lustre_handle *); - int (*o_cancel_unused)(struct lustre_handle *, struct lov_stripe_md *, - int local_only, void *opaque); - int (*o_san_preprw)(int cmd, struct lustre_handle *conn, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_remote *remote); - int (*o_mark_page_dirty)(struct lustre_handle *conn, - struct lov_stripe_md *ea, - unsigned long offset); - int (*o_clear_dirty_pages)(struct lustre_handle *conn, - struct lov_stripe_md *ea, - unsigned long start, - unsigned long end, - unsigned long *cleared); - int (*o_last_dirty_offset)(struct lustre_handle *conn, - struct lov_stripe_md *ea, - unsigned long *offset); - void (*o_destroy_export)(struct obd_export *export); -}; - -static inline void obd_transno_commit_cb(struct obd_device *obd, __u64 transno, - int error) -{ - if (error) { - CDEBUG(D_ERROR, "%s: transno "LPD64" commit error: %d\n", - obd->obd_name, transno, error); - return; - } - CDEBUG(D_HA, "%s: transno "LPD64" committed\n", - obd->obd_name, transno); - if (transno > obd->obd_last_committed) { - obd->obd_last_committed = transno; - wake_up(&obd->obd_commit_waitq); - } -} - -/* When adding a function pointer to struct obd_ops, please update - * function lprocfs_alloc_obd_counters() in obdclass/lprocfs_status.c - * accordingly. */ - -#endif /* __OBD_H */ diff --git a/lustre/include/linux/obd_cache.h b/lustre/include/linux/obd_cache.h deleted file mode 100644 index e75b9f4..0000000 --- a/lustre/include/linux/obd_cache.h +++ /dev/null @@ -1,13 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ - -#ifndef _OBD_CACHE_H__ -#define _OBD_CACHE_H__ - -#ifdef __KERNEL__ - -#define OBD_CACHE_DEVICENAME "cobd" - -#endif -#endif diff --git a/lustre/include/linux/obd_class.h b/lustre/include/linux/obd_class.h deleted file mode 100644 index 0c33ceb..0000000 --- a/lustre/include/linux/obd_class.h +++ /dev/null @@ -1,1159 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef __LINUX_CLASS_OBD_H -#define __LINUX_CLASS_OBD_H - -#ifndef __KERNEL__ -#include -#include -#else -#include -#include -#include -#include -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -/* OBD Device Declarations */ -#define MAX_OBD_DEVICES 128 -extern struct obd_device obd_dev[MAX_OBD_DEVICES]; - -/* OBD Operations Declarations */ -extern struct obd_device *class_conn2obd(struct lustre_handle *); - -/* genops.c */ -struct obd_export *class_conn2export(struct lustre_handle *); -int class_register_type(struct obd_ops *ops, struct lprocfs_vars *, char *nm); -int class_unregister_type(char *nm); -int class_name2dev(char *name); -struct obd_device *class_name2obd(char *name); -int class_uuid2dev(struct obd_uuid *uuid); -struct obd_device *class_uuid2obd(struct obd_uuid *uuid); - -struct obd_export *class_export_get(struct obd_export *); -void class_export_put(struct obd_export *); -struct obd_export *class_new_export(struct obd_device *obddev); -void class_unlink_export(struct obd_export *exp); - -struct obd_import *class_import_get(struct obd_import *); -void class_import_put(struct obd_import *); -struct obd_import *class_new_import(void); -void class_destroy_import(struct obd_import *exp); - -struct obd_type *class_get_type(char *name); -void class_put_type(struct obd_type *type); -int class_connect(struct lustre_handle *conn, struct obd_device *obd, - struct obd_uuid *cluuid); -int class_disconnect(struct lustre_handle *conn, int failover); -void class_disconnect_exports(struct obd_device *obddev, int failover); -/* generic operations shared by various OBD types */ -int class_multi_setup(struct obd_device *obddev, uint32_t len, void *data); -int class_multi_cleanup(struct obd_device *obddev); - -static inline int obd_check_conn(struct lustre_handle *conn) -{ - struct obd_device *obd; - if (!conn) { - CERROR("NULL conn\n"); - RETURN(-ENOTCONN); - } - - obd = class_conn2obd(conn); - if (!obd) { - CERROR("NULL obd\n"); - RETURN(-ENODEV); - } - - if (!obd->obd_attached) { - CERROR("obd %d not attached\n", obd->obd_minor); - RETURN(-ENODEV); - } - - if (!obd->obd_set_up) { - CERROR("obd %d not setup\n", obd->obd_minor); - RETURN(-ENODEV); - } - - if (!obd->obd_type) { - CERROR("obd %d not typed\n", obd->obd_minor); - RETURN(-ENODEV); - } - - if (!obd->obd_type->typ_ops) { - CERROR("obd_check_conn: obd %d no operations\n", - obd->obd_minor); - RETURN(-EOPNOTSUPP); - } - return 0; -} - - -#define OBT(dev) (dev)->obd_type -#define OBP(dev, op) (dev)->obd_type->typ_ops->o_ ## op - -/* Ensure obd_setup: used for disconnect which might be called while - an obd is stopping. */ -#define OBD_CHECK_SETUP(conn, exp) \ -do { \ - if (!(conn)) { \ - CERROR("NULL connection\n"); \ - RETURN(-EINVAL); \ - } \ - \ - exp = class_conn2export(conn); \ - if (!(exp)) { \ - CERROR("No export for conn "LPX64"\n", (conn)->cookie); \ - RETURN(-EINVAL); \ - } \ - \ - if (!(exp)->exp_obd->obd_set_up) { \ - CERROR("Device %d not setup\n", \ - (exp)->exp_obd->obd_minor); \ - class_export_put(exp); \ - RETURN(-EINVAL); \ - } \ -} while (0) - -/* Ensure obd_setup and !obd_stopping. */ -#define OBD_CHECK_ACTIVE(conn, exp) \ -do { \ - if (!(conn)) { \ - CERROR("NULL connection\n"); \ - RETURN(-EINVAL); \ - } \ - \ - exp = class_conn2export(conn); \ - if (!(exp)) { \ - CERROR("No export for conn "LPX64"\n", (conn)->cookie); \ - RETURN(-EINVAL); \ - } \ - \ - if (!(exp)->exp_obd->obd_set_up || (exp)->exp_obd->obd_stopping) { \ - CERROR("Device %d not setup\n", \ - (exp)->exp_obd->obd_minor); \ - class_export_put(exp); \ - RETURN(-EINVAL); \ - } \ -} while (0) - -/* Ensure obd_setup: used for cleanup which must be called - while obd is stopping */ -#define OBD_CHECK_DEV_STOPPING(obd) \ -do { \ - if (!(obd)) { \ - CERROR("NULL device\n"); \ - RETURN(-ENODEV); \ - } \ - \ - if (!(obd)->obd_set_up) { \ - CERROR("Device %d not setup\n", \ - (obd)->obd_minor); \ - RETURN(-ENODEV); \ - } \ - \ - if (!(obd)->obd_stopping) { \ - CERROR("Device %d not stopping\n", \ - (obd)->obd_minor); \ - RETURN(-ENODEV); \ - } \ -} while (0) - -/* ensure obd_setup and !obd_stopping */ -#define OBD_CHECK_DEV_ACTIVE(obd) \ -do { \ - if (!(obd)) { \ - CERROR("NULL device\n"); \ - RETURN(-ENODEV); \ - } \ - \ - if (!(obd)->obd_set_up || (obd)->obd_stopping) { \ - CERROR("Device %d not setup\n", \ - (obd)->obd_minor); \ - RETURN(-ENODEV); \ - } \ -} while (0) - - -#ifdef LPROCFS -#define OBD_COUNTER_OFFSET(op) \ - ((offsetof(struct obd_ops, o_ ## op) - \ - offsetof(struct obd_ops, o_iocontrol)) \ - / sizeof(((struct obd_ops *)(0))->o_iocontrol)) - -#define OBD_COUNTER_INCREMENT(obd, op) \ - if ((obd)->obd_stats != NULL) { \ - unsigned int coffset; \ - coffset = (unsigned int)(obd)->obd_cntr_base + \ - OBD_COUNTER_OFFSET(op); \ - LASSERT(coffset < obd->obd_stats->ls_num); \ - lprocfs_counter_incr(obd->obd_stats, coffset); \ - } -#else -#define OBD_COUNTER_OFFSET(op) -#define OBD_COUNTER_INCREMENT(obd, op) -#endif - -#define OBD_CHECK_OP(obd, op) \ -do { \ - if (!OBP((obd), op)) { \ - CERROR("obd_" #op ": dev %d no operation\n", \ - obd->obd_minor); \ - RETURN(-EOPNOTSUPP); \ - } \ -} while (0) - -static inline int obd_get_info(struct lustre_handle *conn, __u32 keylen, - void *key, __u32 *vallen, void *val) -{ - struct obd_export *exp; - int rc; - ENTRY; - - OBD_CHECK_ACTIVE(conn, exp); - OBD_CHECK_OP(exp->exp_obd, get_info); - OBD_COUNTER_INCREMENT(exp->exp_obd, get_info); - - rc = OBP(exp->exp_obd, get_info)(conn, keylen, key, vallen, val); - class_export_put(exp); - RETURN(rc); -} - -static inline int obd_set_info(struct lustre_handle *conn, obd_count keylen, - void *key, obd_count vallen, void *val) -{ - struct obd_export *exp; - int rc; - ENTRY; - - OBD_CHECK_ACTIVE(conn, exp); - OBD_CHECK_OP(exp->exp_obd, set_info); - OBD_COUNTER_INCREMENT(exp->exp_obd, set_info); - - rc = OBP(exp->exp_obd, set_info)(conn, keylen, key, vallen, val); - class_export_put(exp); - RETURN(rc); -} - -static inline int obd_setup(struct obd_device *obd, int datalen, void *data) -{ - int rc; - ENTRY; - - OBD_CHECK_OP(obd, setup); - OBD_COUNTER_INCREMENT(obd, setup); - - rc = OBP(obd, setup)(obd, datalen, data); - RETURN(rc); -} - -static inline int obd_cleanup(struct obd_device *obd, int force, int failover) -{ - int rc; - ENTRY; - - OBD_CHECK_DEV_STOPPING(obd); - OBD_CHECK_OP(obd, cleanup); - OBD_COUNTER_INCREMENT(obd, cleanup); - - rc = OBP(obd, cleanup)(obd, force, failover); - RETURN(rc); -} - -/* Pack an in-memory MD struct for storage on disk. - * Returns +ve size of packed MD (0 for free), or -ve error. - * - * If @disk_tgt == NULL, MD size is returned (max size if @mem_src == NULL). - * If @*disk_tgt != NULL and @mem_src == NULL, @*disk_tgt will be freed. - * If @*disk_tgt == NULL, it will be allocated - */ -static inline int obd_packmd(struct lustre_handle *conn, - struct lov_mds_md **disk_tgt, - struct lov_stripe_md *mem_src) -{ - struct obd_export *exp; - int rc; - ENTRY; - - OBD_CHECK_ACTIVE(conn, exp); - OBD_CHECK_OP(exp->exp_obd, packmd); - OBD_COUNTER_INCREMENT(exp->exp_obd, packmd); - - rc = OBP(exp->exp_obd, packmd)(conn, disk_tgt, mem_src); - class_export_put(exp); - RETURN(rc); -} - -static inline int obd_size_diskmd(struct lustre_handle *conn, - struct lov_stripe_md *mem_src) -{ - return obd_packmd(conn, NULL, mem_src); -} - -/* helper functions */ -static inline int obd_alloc_diskmd(struct lustre_handle *conn, - struct lov_mds_md **disk_tgt) -{ - LASSERT(disk_tgt); - LASSERT(*disk_tgt == NULL); - return obd_packmd(conn, disk_tgt, NULL); -} - -static inline int obd_free_diskmd(struct lustre_handle *conn, - struct lov_mds_md **disk_tgt) -{ - LASSERT(disk_tgt); - LASSERT(*disk_tgt); - return obd_packmd(conn, disk_tgt, NULL); -} - -/* Unpack an MD struct from disk to in-memory format. - * Returns +ve size of unpacked MD (0 for free), or -ve error. - * - * If @mem_tgt == NULL, MD size is returned (max size if @disk_src == NULL). - * If @*mem_tgt != NULL and @disk_src == NULL, @*mem_tgt will be freed. - * If @*mem_tgt == NULL, it will be allocated - */ -static inline int obd_unpackmd(struct lustre_handle *conn, - struct lov_stripe_md **mem_tgt, - struct lov_mds_md *disk_src, - int disk_len) -{ - struct obd_export *exp; - int rc; - ENTRY; - - OBD_CHECK_ACTIVE(conn, exp); - OBD_CHECK_OP(exp->exp_obd, unpackmd); - OBD_COUNTER_INCREMENT(exp->exp_obd, unpackmd); - - rc = OBP(exp->exp_obd, unpackmd)(conn, mem_tgt, disk_src, disk_len); - class_export_put(exp); - RETURN(rc); -} - -static inline int obd_size_memmd(struct lustre_handle *conn, - struct lov_mds_md *disk_src, - int disk_len) -{ - return obd_unpackmd(conn, NULL, disk_src, disk_len); -} - -/* helper functions */ -static inline int obd_alloc_memmd(struct lustre_handle *conn, - struct lov_stripe_md **mem_tgt) -{ - LASSERT(mem_tgt); - LASSERT(*mem_tgt == NULL); - return obd_unpackmd(conn, mem_tgt, NULL, 0); -} - -static inline int obd_free_memmd(struct lustre_handle *conn, - struct lov_stripe_md **mem_tgt) -{ - LASSERT(mem_tgt); - LASSERT(*mem_tgt); - return obd_unpackmd(conn, mem_tgt, NULL, 0); -} - -static inline int obd_create(struct lustre_handle *conn, struct obdo *obdo, - struct lov_stripe_md **ea, - struct obd_trans_info *oti) -{ - struct obd_export *exp; - int rc; - ENTRY; - - OBD_CHECK_ACTIVE(conn, exp); - OBD_CHECK_OP(exp->exp_obd, create); - OBD_COUNTER_INCREMENT(exp->exp_obd, create); - - rc = OBP(exp->exp_obd, create)(conn, obdo, ea, oti); - class_export_put(exp); - RETURN(rc); -} - -static inline int obd_destroy(struct lustre_handle *conn, struct obdo *obdo, - struct lov_stripe_md *ea, - struct obd_trans_info *oti) -{ - struct obd_export *exp; - int rc; - ENTRY; - - OBD_CHECK_ACTIVE(conn, exp); - OBD_CHECK_OP(exp->exp_obd, destroy); - OBD_COUNTER_INCREMENT(exp->exp_obd, destroy); - - rc = OBP(exp->exp_obd, destroy)(conn, obdo, ea, oti); - class_export_put(exp); - RETURN(rc); -} - -static inline int obd_getattr(struct lustre_handle *conn, struct obdo *obdo, - struct lov_stripe_md *ea) -{ - struct obd_export *exp; - int rc; - ENTRY; - - OBD_CHECK_ACTIVE(conn, exp); - OBD_CHECK_OP(exp->exp_obd, getattr); - OBD_COUNTER_INCREMENT(exp->exp_obd, getattr); - - rc = OBP(exp->exp_obd, getattr)(conn, obdo, ea); - class_export_put(exp); - RETURN(rc); -} - -static inline int obd_getattr_async(struct lustre_handle *conn, - struct obdo *obdo, struct lov_stripe_md *ea, - struct ptlrpc_request_set *set) -{ - struct obd_export *exp; - int rc; - ENTRY; - - OBD_CHECK_SETUP(conn, exp); - OBD_CHECK_OP(exp->exp_obd, getattr); - OBD_COUNTER_INCREMENT(exp->exp_obd, getattr); - - rc = OBP(exp->exp_obd, getattr_async)(conn, obdo, ea, set); - class_export_put(exp); - RETURN(rc); -} - -static inline int obd_close(struct lustre_handle *conn, struct obdo *obdo, - struct lov_stripe_md *ea, - struct obd_trans_info *oti) -{ - struct obd_export *exp; - int rc; - ENTRY; - - OBD_CHECK_ACTIVE(conn, exp); - OBD_CHECK_OP(exp->exp_obd, close); - OBD_COUNTER_INCREMENT(exp->exp_obd, close); - - rc = OBP(exp->exp_obd, close)(conn, obdo, ea, oti); - class_export_put(exp); - RETURN(rc); -} - -static inline int obd_open(struct lustre_handle *conn, struct obdo *obdo, - struct lov_stripe_md *ea, struct obd_trans_info *oti, - struct obd_client_handle *och) -{ - struct obd_export *exp; - int rc; - ENTRY; - - OBD_CHECK_ACTIVE(conn, exp); - OBD_CHECK_OP(exp->exp_obd, open); - OBD_COUNTER_INCREMENT(exp->exp_obd, open); - - rc = OBP(exp->exp_obd, open)(conn, obdo, ea, oti, och); - class_export_put(exp); - RETURN(rc); -} - -static inline int obd_setattr(struct lustre_handle *conn, struct obdo *obdo, - struct lov_stripe_md *ea, - struct obd_trans_info *oti) -{ - struct obd_export *exp; - int rc; - ENTRY; - - OBD_CHECK_ACTIVE(conn, exp); - OBD_CHECK_OP(exp->exp_obd, setattr); - OBD_COUNTER_INCREMENT(exp->exp_obd, setattr); - - rc = OBP(exp->exp_obd, setattr)(conn, obdo, ea, oti); - class_export_put(exp); - RETURN(rc); -} - -static inline int obd_connect(struct lustre_handle *conn, - struct obd_device *obd, struct obd_uuid *cluuid) -{ - int rc; - ENTRY; - - OBD_CHECK_DEV_ACTIVE(obd); - OBD_CHECK_OP(obd, connect); - OBD_COUNTER_INCREMENT(obd, connect); - - rc = OBP(obd, connect)(conn, obd, cluuid); - RETURN(rc); -} - -static inline int obd_disconnect(struct lustre_handle *conn, int failover) -{ - struct obd_export *exp; - int rc; - ENTRY; - - OBD_CHECK_SETUP(conn, exp); - OBD_CHECK_OP(exp->exp_obd, disconnect); - OBD_COUNTER_INCREMENT(exp->exp_obd, disconnect); - - rc = OBP(exp->exp_obd, disconnect)(conn, failover); - class_export_put(exp); - RETURN(rc); -} - -static inline void obd_destroy_export(struct obd_export *exp) -{ - ENTRY; - if (OBP(exp->exp_obd, destroy_export)) - OBP(exp->exp_obd, destroy_export)(exp); - EXIT; -} - -static inline int obd_statfs(struct obd_export *exp, struct obd_statfs *osfs) -{ - int rc; - ENTRY; - - OBD_CHECK_OP(exp->exp_obd, statfs); - OBD_COUNTER_INCREMENT(exp->exp_obd, statfs); - - rc = OBP(exp->exp_obd, statfs)(exp, osfs); - RETURN(rc); -} - -static inline int obd_syncfs(struct obd_export *exp) -{ - int rc; - ENTRY; - - OBD_CHECK_OP(exp->exp_obd, syncfs); - OBD_COUNTER_INCREMENT(exp->exp_obd, syncfs); - - rc = OBP(exp->exp_obd, syncfs)(exp); - RETURN(rc); -} - -static inline int obd_punch(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *ea, obd_size start, - obd_size end, struct obd_trans_info *oti) -{ - struct obd_export *exp; - int rc; - ENTRY; - - OBD_CHECK_ACTIVE(conn, exp); - OBD_CHECK_OP(exp->exp_obd, punch); - OBD_COUNTER_INCREMENT(exp->exp_obd, punch); - - rc = OBP(exp->exp_obd, punch)(conn, oa, ea, start, end, oti); - class_export_put(exp); - RETURN(rc); -} - -static inline int obd_brw(int cmd, struct lustre_handle *conn, - struct lov_stripe_md *ea, obd_count oa_bufs, - struct brw_page *pg, struct obd_trans_info *oti) -{ - struct obd_export *exp; - int rc; - ENTRY; - - OBD_CHECK_ACTIVE(conn, exp); - OBD_CHECK_OP(exp->exp_obd, brw); - OBD_COUNTER_INCREMENT(exp->exp_obd, brw); - - if (!(cmd & (OBD_BRW_RWMASK | OBD_BRW_CHECK))) { - CERROR("obd_brw: cmd must be OBD_BRW_READ, OBD_BRW_WRITE, " - "or OBD_BRW_CHECK\n"); - LBUG(); - } - - rc = OBP(exp->exp_obd, brw)(cmd, conn, ea, oa_bufs, pg, oti); - class_export_put(exp); - RETURN(rc); -} - -static inline int obd_brw_async(int cmd, struct lustre_handle *conn, - struct lov_stripe_md *ea, obd_count oa_bufs, - struct brw_page *pg, - struct ptlrpc_request_set *set, - struct obd_trans_info *oti) -{ - struct obd_export *exp; - int rc; - ENTRY; - - OBD_CHECK_ACTIVE(conn, exp); - OBD_CHECK_OP(exp->exp_obd, brw_async); - OBD_COUNTER_INCREMENT(exp->exp_obd, brw_async); - - if (!(cmd & OBD_BRW_RWMASK)) { - CERROR("obd_brw: cmd must be OBD_BRW_READ or OBD_BRW_WRITE\n"); - LBUG(); - } - - rc = OBP(exp->exp_obd, brw_async)(cmd, conn, ea, oa_bufs, pg, set, oti); - class_export_put(exp); - RETURN(rc); -} - -static inline int obd_preprw(int cmd, struct obd_export *exp, struct obdo *obdo, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_remote *remote, - struct niobuf_local *local, void **desc_private, - struct obd_trans_info *oti) -{ - int rc; - ENTRY; - - OBD_CHECK_OP(exp->exp_obd, preprw); - OBD_COUNTER_INCREMENT(exp->exp_obd, preprw); - - rc = OBP(exp->exp_obd, preprw)(cmd, exp, obdo, objcount, obj, niocount, - remote, local, desc_private, oti); - RETURN(rc); -} - -static inline int obd_commitrw(int cmd, struct obd_export *exp, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_local *local, - void *desc_private, struct obd_trans_info *oti) -{ - int rc; - ENTRY; - - OBD_CHECK_OP(exp->exp_obd, commitrw); - OBD_COUNTER_INCREMENT(exp->exp_obd, commitrw); - - rc = OBP(exp->exp_obd, commitrw)(cmd, exp, objcount, obj, niocount, - local, desc_private, oti); - RETURN(rc); -} - -static inline int obd_iocontrol(unsigned int cmd, struct lustre_handle *conn, - int len, void *karg, void *uarg) -{ - struct obd_export *exp; - int rc; - ENTRY; - - OBD_CHECK_ACTIVE(conn, exp); - OBD_CHECK_OP(exp->exp_obd, iocontrol); - OBD_COUNTER_INCREMENT(exp->exp_obd, iocontrol); - - rc = OBP(exp->exp_obd, iocontrol)(cmd, conn, len, karg, uarg); - class_export_put(exp); - RETURN(rc); -} - -static inline int obd_enqueue(struct lustre_handle *conn, - struct lov_stripe_md *ea, - struct lustre_handle *parent_lock, - __u32 type, void *cookie, int cookielen, - __u32 mode, int *flags, void *cb, void *data, - struct lustre_handle *lockh) -{ - struct obd_export *exp; - int rc; - ENTRY; - - OBD_CHECK_ACTIVE(conn, exp); - OBD_CHECK_OP(exp->exp_obd, enqueue); - OBD_COUNTER_INCREMENT(exp->exp_obd, enqueue); - - rc = OBP(exp->exp_obd, enqueue)(conn, ea, parent_lock, type, - cookie, cookielen, mode, flags, cb, - data, lockh); - class_export_put(exp); - RETURN(rc); -} - -static inline int obd_match(struct lustre_handle *conn, - struct lov_stripe_md *ea, __u32 type, void *cookie, - int cookielen, __u32 mode, int *flags, void *data, - struct lustre_handle *lockh) -{ - struct obd_export *exp; - int rc; - ENTRY; - - OBD_CHECK_ACTIVE(conn, exp); - OBD_CHECK_OP(exp->exp_obd, match); - OBD_COUNTER_INCREMENT(exp->exp_obd, match); - - rc = OBP(exp->exp_obd, match)(conn, ea, type, cookie, cookielen, mode, - flags, data, lockh); - class_export_put(exp); - RETURN(rc); -} - - -static inline int obd_cancel(struct lustre_handle *conn, - struct lov_stripe_md *ea, __u32 mode, - struct lustre_handle *lockh) -{ - struct obd_export *exp; - int rc; - ENTRY; - - OBD_CHECK_ACTIVE(conn, exp); - OBD_CHECK_OP(exp->exp_obd, cancel); - OBD_COUNTER_INCREMENT(exp->exp_obd, cancel); - - rc = OBP(exp->exp_obd, cancel)(conn, ea, mode, lockh); - class_export_put(exp); - RETURN(rc); -} - -static inline int obd_cancel_unused(struct lustre_handle *conn, - struct lov_stripe_md *ea, int flags, - void *opaque) -{ - struct obd_export *exp; - int rc; - ENTRY; - - OBD_CHECK_ACTIVE(conn, exp); - OBD_CHECK_OP(exp->exp_obd, cancel_unused); - OBD_COUNTER_INCREMENT(exp->exp_obd, cancel_unused); - - rc = OBP(exp->exp_obd, cancel_unused)(conn, ea, flags, opaque); - class_export_put(exp); - RETURN(rc); -} - -static inline int obd_san_preprw(int cmd, struct lustre_handle *conn, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_remote *remote) -{ - struct obd_export *exp; - int rc; - - OBD_CHECK_ACTIVE(conn, exp); - OBD_CHECK_OP(exp->exp_obd, preprw); - OBD_COUNTER_INCREMENT(exp->exp_obd, preprw); - - rc = OBP(exp->exp_obd, san_preprw)(cmd, conn, objcount, obj, - niocount, remote); - class_export_put(exp); - RETURN(rc); -} - -static inline int obd_mark_page_dirty(struct lustre_handle *conn, - struct lov_stripe_md *lsm, - unsigned long offset) -{ - struct obd_export *exp; - int rc; - - OBD_CHECK_SETUP(conn, exp); - OBD_CHECK_OP(exp->exp_obd, mark_page_dirty); - - rc = OBP(exp->exp_obd, mark_page_dirty)(conn, lsm, offset); - class_export_put(exp); - RETURN(rc); -} - -static inline int obd_clear_dirty_pages(struct lustre_handle *conn, - struct lov_stripe_md *lsm, - unsigned long start, - unsigned long end, - unsigned long *cleared) -{ - struct obd_export *exp; - int rc; - - OBD_CHECK_SETUP(conn, exp); - OBD_CHECK_OP(exp->exp_obd, clear_dirty_pages); - - rc = OBP(exp->exp_obd, clear_dirty_pages)(conn, lsm, start, end, - cleared); - class_export_put(exp); - RETURN(rc); -} - -static inline int obd_last_dirty_offset(struct lustre_handle *conn, - struct lov_stripe_md *lsm, - unsigned long *offset) -{ - struct obd_export *exp; - int rc; - - OBD_CHECK_SETUP(conn, exp); - OBD_CHECK_OP(exp->exp_obd, last_dirty_offset); - - rc = OBP(exp->exp_obd, last_dirty_offset)(conn, lsm, offset); - class_export_put(exp); - RETURN(rc); -} - -/* OBD Metadata Support */ - -extern int obd_init_caches(void); -extern void obd_cleanup_caches(void); - -static inline struct lustre_handle *obdo_handle(struct obdo *oa) -{ - return (struct lustre_handle *)&oa->o_inline; -} - -/* support routines */ -extern kmem_cache_t *obdo_cachep; -static inline struct obdo *obdo_alloc(void) -{ - struct obdo *oa; - - oa = kmem_cache_alloc(obdo_cachep, SLAB_KERNEL); - if (oa == NULL) - LBUG(); - memset(oa, 0, sizeof (*oa)); - - return oa; -} - -static inline void obdo_free(struct obdo *oa) -{ - if (!oa) - return; - kmem_cache_free(obdo_cachep, oa); -} - -#if !defined(__KERNEL__) || (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#define to_kdev_t(dev) dev -#define kdev_t_to_nr(dev) dev -#endif - -#ifdef __KERNEL__ -static inline void obdo_from_iattr(struct obdo *oa, struct iattr *attr) -{ - unsigned int ia_valid = attr->ia_valid; - - if (ia_valid & ATTR_ATIME) { - oa->o_atime = LTIME_S(attr->ia_atime); - oa->o_valid |= OBD_MD_FLATIME; - } - if (ia_valid & ATTR_MTIME) { - oa->o_mtime = LTIME_S(attr->ia_mtime); - oa->o_valid |= OBD_MD_FLMTIME; - } - if (ia_valid & ATTR_CTIME) { - oa->o_ctime = LTIME_S(attr->ia_ctime); - oa->o_valid |= OBD_MD_FLCTIME; - } - if (ia_valid & ATTR_SIZE) { - oa->o_size = attr->ia_size; - oa->o_valid |= OBD_MD_FLSIZE; - } - if (ia_valid & ATTR_MODE) { - oa->o_mode = attr->ia_mode; - oa->o_valid |= OBD_MD_FLTYPE | OBD_MD_FLMODE; - if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID)) - oa->o_mode &= ~S_ISGID; - } - if (ia_valid & ATTR_UID) { - oa->o_uid = attr->ia_uid; - oa->o_valid |= OBD_MD_FLUID; - } - if (ia_valid & ATTR_GID) { - oa->o_gid = attr->ia_gid; - oa->o_valid |= OBD_MD_FLGID; - } -} - - -static inline void iattr_from_obdo(struct iattr *attr, struct obdo *oa, - obd_flag valid) -{ - memset(attr, 0, sizeof(*attr)); - if (valid & OBD_MD_FLATIME) { - LTIME_S(attr->ia_atime) = oa->o_atime; - attr->ia_valid |= ATTR_ATIME; - } - if (valid & OBD_MD_FLMTIME) { - LTIME_S(attr->ia_mtime) = oa->o_mtime; - attr->ia_valid |= ATTR_MTIME; - } - if (valid & OBD_MD_FLCTIME) { - LTIME_S(attr->ia_ctime) = oa->o_ctime; - attr->ia_valid |= ATTR_CTIME; - } - if (valid & OBD_MD_FLSIZE) { - attr->ia_size = oa->o_size; - attr->ia_valid |= ATTR_SIZE; - } - if (valid & OBD_MD_FLTYPE) { - attr->ia_mode = (attr->ia_mode & ~S_IFMT)|(oa->o_mode & S_IFMT); - attr->ia_valid |= ATTR_MODE; - } - if (valid & OBD_MD_FLMODE) { - attr->ia_mode = (attr->ia_mode & S_IFMT)|(oa->o_mode & ~S_IFMT); - attr->ia_valid |= ATTR_MODE; - if (!in_group_p(oa->o_gid) && !capable(CAP_FSETID)) - attr->ia_mode &= ~S_ISGID; - } - if (valid & OBD_MD_FLUID) - { - attr->ia_uid = oa->o_uid; - attr->ia_valid |= ATTR_UID; - } - if (valid & OBD_MD_FLGID) { - attr->ia_gid = oa->o_gid; - attr->ia_valid |= ATTR_GID; - } -} - - -/* WARNING: the file systems must take care not to tinker with - attributes they don't manage (such as blocks). */ - - -static inline void obdo_from_inode(struct obdo *dst, struct inode *src, - obd_flag valid) -{ - if (valid & OBD_MD_FLATIME) - dst->o_atime = LTIME_S(src->i_atime); - if (valid & OBD_MD_FLMTIME) - dst->o_mtime = LTIME_S(src->i_mtime); - if (valid & OBD_MD_FLCTIME) - dst->o_ctime = LTIME_S(src->i_ctime); - if (valid & OBD_MD_FLSIZE) - dst->o_size = src->i_size; - if (valid & OBD_MD_FLBLOCKS) /* allocation of space */ - dst->o_blocks = src->i_blocks; - if (valid & OBD_MD_FLBLKSZ) - dst->o_blksize = src->i_blksize; - if (valid & OBD_MD_FLTYPE) - dst->o_mode = (dst->o_mode & ~S_IFMT) | (src->i_mode & S_IFMT); - if (valid & OBD_MD_FLMODE) - dst->o_mode = (dst->o_mode & S_IFMT) | (src->i_mode & ~S_IFMT); - if (valid & OBD_MD_FLUID) - dst->o_uid = src->i_uid; - if (valid & OBD_MD_FLGID) - dst->o_gid = src->i_gid; - if (valid & OBD_MD_FLFLAGS) - dst->o_flags = src->i_flags; - if (valid & OBD_MD_FLNLINK) - dst->o_nlink = src->i_nlink; - if (valid & OBD_MD_FLGENER) - dst->o_generation = src->i_generation; - if (valid & OBD_MD_FLRDEV) - dst->o_rdev = (__u32)kdev_t_to_nr(src->i_rdev); - - dst->o_valid |= (valid & ~OBD_MD_FLID); -} - -static inline void obdo_refresh_inode(struct inode *dst, struct obdo *src, - obd_flag valid) -{ - valid &= src->o_valid; - - if (valid & OBD_MD_FLATIME && src->o_atime > LTIME_S(dst->i_atime)) - LTIME_S(dst->i_atime) = src->o_atime; - if (valid & OBD_MD_FLMTIME && src->o_mtime > LTIME_S(dst->i_mtime)) - LTIME_S(dst->i_mtime) = src->o_mtime; - if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime)) - LTIME_S(dst->i_ctime) = src->o_ctime; - if (valid & OBD_MD_FLSIZE && src->o_size > dst->i_size) - dst->i_size = src->o_size; - /* allocation of space */ - if (valid & OBD_MD_FLBLOCKS && src->o_blocks > dst->i_blocks) - dst->i_blocks = src->o_blocks; -} - -static inline void obdo_to_inode(struct inode *dst, struct obdo *src, - obd_flag valid) -{ - valid &= src->o_valid; - - if (valid & OBD_MD_FLATIME) - LTIME_S(dst->i_atime) = src->o_atime; - if (valid & OBD_MD_FLMTIME) - LTIME_S(dst->i_mtime) = src->o_mtime; - if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(dst->i_ctime)) - LTIME_S(dst->i_ctime) = src->o_ctime; - if (valid & OBD_MD_FLSIZE) - dst->i_size = src->o_size; - if (valid & OBD_MD_FLBLOCKS) /* allocation of space */ - dst->i_blocks = src->o_blocks; - if (valid & OBD_MD_FLBLKSZ) - dst->i_blksize = src->o_blksize; - if (valid & OBD_MD_FLTYPE) - dst->i_mode = (dst->i_mode & ~S_IFMT) | (src->o_mode & S_IFMT); - if (valid & OBD_MD_FLMODE) - dst->i_mode = (dst->i_mode & S_IFMT) | (src->o_mode & ~S_IFMT); - if (valid & OBD_MD_FLUID) - dst->i_uid = src->o_uid; - if (valid & OBD_MD_FLGID) - dst->i_gid = src->o_gid; - if (valid & OBD_MD_FLFLAGS) - dst->i_flags = src->o_flags; - if (valid & OBD_MD_FLNLINK) - dst->i_nlink = src->o_nlink; - if (valid & OBD_MD_FLGENER) - dst->i_generation = src->o_generation; - if (valid & OBD_MD_FLRDEV) - dst->i_rdev = to_kdev_t(src->o_rdev); -} -#endif - -static inline void obdo_cpy_md(struct obdo *dst, struct obdo *src, - obd_flag valid) -{ -#ifdef __KERNEL__ - CDEBUG(D_INODE, "src obdo %Ld valid 0x%x, dst obdo %Ld\n", - (unsigned long long)src->o_id, src->o_valid, - (unsigned long long)dst->o_id); -#endif - if (valid & OBD_MD_FLATIME) - dst->o_atime = src->o_atime; - if (valid & OBD_MD_FLMTIME) - dst->o_mtime = src->o_mtime; - if (valid & OBD_MD_FLCTIME) - dst->o_ctime = src->o_ctime; - if (valid & OBD_MD_FLSIZE) - dst->o_size = src->o_size; - if (valid & OBD_MD_FLBLOCKS) /* allocation of space */ - dst->o_blocks = src->o_blocks; - if (valid & OBD_MD_FLBLKSZ) - dst->o_blksize = src->o_blksize; - if (valid & OBD_MD_FLTYPE) - dst->o_mode = (dst->o_mode & ~S_IFMT) | (src->o_mode & S_IFMT); - if (valid & OBD_MD_FLMODE) - dst->o_mode = (dst->o_mode & S_IFMT) | (src->o_mode & ~S_IFMT); - if (valid & OBD_MD_FLUID) - dst->o_uid = src->o_uid; - if (valid & OBD_MD_FLGID) - dst->o_gid = src->o_gid; - if (valid & OBD_MD_FLFLAGS) - dst->o_flags = src->o_flags; - /* - if (valid & OBD_MD_FLOBDFLG) - dst->o_obdflags = src->o_obdflags; - */ - if (valid & OBD_MD_FLNLINK) - dst->o_nlink = src->o_nlink; - if (valid & OBD_MD_FLGENER) - dst->o_generation = src->o_generation; - if (valid & OBD_MD_FLRDEV) - dst->o_rdev = src->o_rdev; - if (valid & OBD_MD_FLINLINE && - src->o_obdflags & OBD_FL_INLINEDATA) { - memcpy(dst->o_inline, src->o_inline, sizeof(src->o_inline)); - dst->o_obdflags |= OBD_FL_INLINEDATA; - } - - dst->o_valid |= valid; -} - - -/* returns FALSE if comparison (by flags) is same, TRUE if changed */ -static inline int obdo_cmp_md(struct obdo *dst, struct obdo *src, - obd_flag compare) -{ - int res = 0; - - if ( compare & OBD_MD_FLATIME ) - res = (res || (dst->o_atime != src->o_atime)); - if ( compare & OBD_MD_FLMTIME ) - res = (res || (dst->o_mtime != src->o_mtime)); - if ( compare & OBD_MD_FLCTIME ) - res = (res || (dst->o_ctime != src->o_ctime)); - if ( compare & OBD_MD_FLSIZE ) - res = (res || (dst->o_size != src->o_size)); - if ( compare & OBD_MD_FLBLOCKS ) /* allocation of space */ - res = (res || (dst->o_blocks != src->o_blocks)); - if ( compare & OBD_MD_FLBLKSZ ) - res = (res || (dst->o_blksize != src->o_blksize)); - if ( compare & OBD_MD_FLTYPE ) - res = (res || (((dst->o_mode ^ src->o_mode) & S_IFMT) != 0)); - if ( compare & OBD_MD_FLMODE ) - res = (res || (((dst->o_mode ^ src->o_mode) & ~S_IFMT) != 0)); - if ( compare & OBD_MD_FLUID ) - res = (res || (dst->o_uid != src->o_uid)); - if ( compare & OBD_MD_FLGID ) - res = (res || (dst->o_gid != src->o_gid)); - if ( compare & OBD_MD_FLFLAGS ) - res = (res || (dst->o_flags != src->o_flags)); - if ( compare & OBD_MD_FLNLINK ) - res = (res || (dst->o_nlink != src->o_nlink)); - if ( compare & OBD_MD_FLGENER ) - res = (res || (dst->o_generation != src->o_generation)); - /* XXX Don't know if thses should be included here - wasn't previously - if ( compare & OBD_MD_FLINLINE ) - res = (res || memcmp(dst->o_inline, src->o_inline)); - */ - return res; -} - -/* I'm as embarrassed about this as you are. - * - * // XXX do not look into _superhack with remaining eye - * // XXX if this were any uglier, I'd get my own show on MTV */ -extern int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c); -extern void (*ptlrpc_abort_inflight_superhack)(struct obd_import *imp); - -struct obd_statfs; -struct statfs; -void statfs_pack(struct obd_statfs *osfs, struct statfs *sfs); -void statfs_unpack(struct statfs *sfs, struct obd_statfs *osfs); - -struct obd_class_user_state { - struct obd_device *ocus_current_obd; - struct list_head ocus_conns; -}; - -struct obd_class_user_conn { - struct list_head ocuc_chain; - struct lustre_handle ocuc_conn; -}; - - -/* sysctl.c */ -extern void obd_sysctl_init (void); -extern void obd_sysctl_clean (void); - -/* uuid.c */ -typedef __u8 class_uuid_t[16]; -//int class_uuid_parse(struct obd_uuid in, class_uuid_t out); -void class_uuid_unparse(class_uuid_t in, struct obd_uuid *out); - -/* lustre_peer.c */ -int lustre_uuid_to_peer(char *uuid, struct lustre_peer *peer); -int class_add_uuid(char *uuid, __u64 nid, __u32 nal); -int class_del_uuid (char *uuid); -void class_init_uuidlist(void); -void class_exit_uuidlist(void); - -#endif /* __LINUX_OBD_CLASS_H */ diff --git a/lustre/include/linux/obd_echo.h b/lustre/include/linux/obd_echo.h deleted file mode 100644 index c344d8a..0000000 --- a/lustre/include/linux/obd_echo.h +++ /dev/null @@ -1,42 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. - * - * This code is issued under the GNU General Public License. - * See the file COPYING in this distribution - */ - -#ifndef _OBD_ECHO_H -#define _OBD_ECHO_H - -#define OBD_ECHO_DEVICENAME "obdecho" -#define OBD_ECHO_CLIENT_DEVICENAME "echo_client" - -struct ec_object { - struct list_head eco_obj_chain; - struct obd_device *eco_device; - int eco_refcount; - int eco_deleted; - obd_id eco_id; - struct lov_stripe_md *eco_lsm; -}; - -struct ec_open_object { - struct list_head ecoo_exp_chain; - struct ec_object *ecoo_object; - __u64 ecoo_cookie; - struct obdo ecoo_oa; - struct obd_client_handle ecoo_och; -}; - -struct ec_lock { - struct list_head ecl_exp_chain; - struct ec_object *ecl_object; - __u64 ecl_cookie; - struct lustre_handle ecl_lock_handle; - struct ldlm_extent ecl_extent; - __u32 ecl_mode; -}; - -#endif diff --git a/lustre/include/linux/obd_ext2.h b/lustre/include/linux/obd_ext2.h deleted file mode 100644 index 73b4b0b..0000000 --- a/lustre/include/linux/obd_ext2.h +++ /dev/null @@ -1,49 +0,0 @@ -#ifndef _OBD_EXT2 -#define _OBD_EXT2 -/* - * Copyright (C) 2001 Cluster File Systems, Inc. - * - * This code is issued under the GNU General Public License. - * See the file COPYING in this distribution - */ - -#define OBD_EXT2_RUNIT _IOWR('f', 61, long) - -#ifndef OBD_EXT2_DEVICENAME -#define OBD_EXT2_DEVICENAME "obdext2" -#endif - -/* development definitions */ -extern struct obdfs_sb_info *obd_sbi; -extern struct file_operations *obd_fso; - -/* ext2_obd.c */ -extern struct obd_ops ext2_obd_ops; - -#include -#include - -/* super.c */ -#ifdef EXT2_OBD_DEBUG -# undef ext2_debug -# define ext2_debug(format, a...) CDEBUG(D_EXT2, format, ## a) -# define ext2_error ext2_warning -# define ext2_panic ext2_warning -# define ext2_warning(sb, func, format, a...) CDEBUG(D_WARNING, format, ## a) -#else -# undef ext2_debug -# define ext2_debug(format, a...) {} -# define ext2_error(sb, func, format, a...) printk(KERN_ERR "%s: " format, func, ## a) -# define ext2_panic(sb, func, format, a...) printk(KERN_CRIT "%s: " format, func, ## a) -# define ext2_warning(sb, func, format, a...) printk(KERN_WARNING "%s: " format, func, ## a) -#endif - -extern struct super_operations ext2_sops; -int obd_remount (struct super_block * sb, int * flags, char * data); -struct super_block * ext2_read_super (struct super_block * sb, void * data, - int silent); -/* punch.c */ -void ext2_truncate (struct inode * inode); -int ext2_punch (struct inode * inode, loff_t start, size_t count); - -#endif diff --git a/lustre/include/linux/obd_filter.h b/lustre/include/linux/obd_filter.h deleted file mode 100644 index 3d8188a..0000000 --- a/lustre/include/linux/obd_filter.h +++ /dev/null @@ -1,104 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef _OBD_FILTER_H -#define _OBD_FILTER_H - -#ifdef __KERNEL__ -#include -#endif -#include - -#ifndef OBD_FILTER_DEVICENAME -#define OBD_FILTER_DEVICENAME "obdfilter" -#endif - -#define FILTER_LR_SERVER_SIZE 512 - -#define FILTER_LR_CLIENT_START 8192 -#define FILTER_LR_CLIENT_SIZE 128 - -#define FILTER_SUBDIR_COUNT 32 /* set to zero for no subdirs */ - -#define FILTER_MOUNT_RECOV 2 -#define FILTER_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */ - -/* Data stored per server at the head of the last_rcvd file. In le32 order. */ -struct filter_server_data { - __u8 fsd_uuid[37]; /* server UUID */ - __u8 fsd_uuid_padding[3]; /* unused */ - __u64 fsd_last_objid; /* last created object ID */ - __u64 fsd_last_rcvd; /* last completed transaction ID */ - __u64 fsd_mount_count; /* FILTER incarnation number */ - __u32 fsd_feature_compat; /* compatible feature flags */ - __u32 fsd_feature_rocompat;/* read-only compatible feature flags */ - __u32 fsd_feature_incompat;/* incompatible feature flags */ - __u32 fsd_server_size; /* size of server data area */ - __u32 fsd_client_start; /* start of per-client data area */ - __u16 fsd_client_size; /* size of per-client data area */ - __u16 fsd_subdir_count; /* number of subdirectories for objects */ - __u8 fsd_padding[FILTER_LR_SERVER_SIZE - 88]; -}; - -/* Data stored per client in the last_rcvd file. In le32 order. */ -struct filter_client_data { - __u8 fcd_uuid[37]; /* client UUID */ - __u8 fcd_uuid_padding[3]; /* unused */ - __u64 fcd_last_rcvd; /* last completed transaction ID */ - __u64 fcd_mount_count; /* FILTER incarnation number */ - __u64 fcd_last_xid; /* client RPC xid for the last transaction */ - __u8 fcd_padding[FILTER_LR_CLIENT_SIZE - 64]; -}; - -#ifndef OBD_FILTER_SAN_DEVICENAME -#define OBD_FILTER_SAN_DEVICENAME "sanobdfilter" -#endif - -/* In-memory access to client data from OST struct */ -struct filter_export_data { - struct list_head fed_open_head; /* files to close on disconnect */ - spinlock_t fed_lock; /* protects fed_open_head */ - struct filter_client_data *fed_fcd; - loff_t fed_lr_off; - int fed_lr_idx; -}; - -/* file data for open files on OST */ -struct filter_file_data { - struct portals_handle ffd_handle; - atomic_t ffd_refcount; - struct list_head ffd_export_list; /* export open list - fed_lock */ - struct file *ffd_file; /* file handle */ -}; - -struct filter_dentry_data { - obd_id fdd_objid; - __u32 fdd_magic; - atomic_t fdd_open_count; - int fdd_flags; -}; - -#define FILTER_DENTRY_MAGIC 0x9efba101 -#define FILTER_FLAG_DESTROY 0x0001 /* destroy dentry on last file close */ - - -#endif diff --git a/lustre/include/linux/obd_lov.h b/lustre/include/linux/obd_lov.h deleted file mode 100644 index b12a062..0000000 --- a/lustre/include/linux/obd_lov.h +++ /dev/null @@ -1,45 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ - -#ifndef _OBD_LOV_H__ -#define _OBD_LOV_H__ - -#define OBD_LOV_DEVICENAME "lov" - -struct lov_brw_async_args { - obd_count aa_oa_bufs; - struct brw_page *aa_ioarr; -}; - -struct lov_getattr_async_args { - struct lov_stripe_md *aa_lsm; - struct obdo *aa_oa; - struct obdo *aa_stripe_oas; -}; - -static inline int lov_stripe_md_size(int stripes) -{ - return sizeof(struct lov_stripe_md) + stripes*sizeof(struct lov_oinfo); -} - -static inline int lov_mds_md_size(int stripes) -{ - return sizeof(struct lov_mds_md) + stripes*sizeof(struct lov_object_id); -} - -extern int lov_packmd(struct lustre_handle *conn, struct lov_mds_md **lmm, - struct lov_stripe_md *lsm); -extern int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsm, - struct lov_mds_md *lmm, int lmmsize); -extern int lov_setstripe(struct lustre_handle *conn, - struct lov_stripe_md **lsmp, struct lov_mds_md *lmmu); -extern int lov_getstripe(struct lustre_handle *conn, - struct lov_stripe_md *lsm, struct lov_mds_md *lmmu); - -#define IOC_LOV_TYPE 'g' -#define IOC_LOV_MIN_NR 50 -#define IOC_LOV_SET_OSC_ACTIVE _IOWR('g', 50, long) -#define IOC_LOV_MAX_NR 50 - -#endif diff --git a/lustre/include/linux/obd_ost.h b/lustre/include/linux/obd_ost.h deleted file mode 100644 index 22fe694..0000000 --- a/lustre/include/linux/obd_ost.h +++ /dev/null @@ -1,48 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Data structures for object storage targets and client: OST & OSC's - * - * See also lustre_idl.h for wire formats of requests. - * - */ - -#ifndef _LUSTRE_OST_H -#define _LUSTRE_OST_H - -#include - -#define LUSTRE_OST_NAME "ost" -#define LUSTRE_OSC_NAME "osc" -#define LUSTRE_SANOSC_NAME "sanosc" -#define LUSTRE_SANOST_NAME "sanost" - -struct osc_brw_async_args { - int aa_requested_nob; - int aa_nio_count; - obd_count aa_page_count; - struct brw_page *aa_pga; -}; - -struct osc_getattr_async_args { - struct obdo *aa_oa; -}; - -#endif diff --git a/lustre/include/linux/obd_ptlbd.h b/lustre/include/linux/obd_ptlbd.h deleted file mode 100644 index 1e6de5a..0000000 --- a/lustre/include/linux/obd_ptlbd.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef _OBD_PTLBD_H -#define _OBD_PTLBD_H - -#include -/* - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This code is issued under the GNU General Public License. - * See the file COPYING in this distribution - */ - -#define OBD_PTLBD_SV_DEVICENAME "ptlbd_server" -#define OBD_PTLBD_CL_DEVICENAME "ptlbd_client" - -/* XXX maybe this isn't the best header to be dumping all this in.. */ - -extern int ptlbd_blk_init(void); -extern int ptlbd_cl_init(void); -extern int ptlbd_sv_init(void); - -extern void ptlbd_blk_exit(void); -extern void ptlbd_cl_exit(void); -extern void ptlbd_sv_exit(void); - -extern int ptlbd_do_connect(struct ptlbd_obd *); -extern int ptlbd_do_disconnect(struct ptlbd_obd *); -extern void ptlbd_blk_register(struct ptlbd_obd *ptlbd); -extern int ptlbd_send_rw_req(struct ptlbd_obd *, ptlbd_cmd_t cmd, - struct buffer_head *); -extern int ptlbd_send_flush_req(struct ptlbd_obd *, ptlbd_cmd_t cmd); -extern int ptlbd_handle(struct ptlrpc_request *req); - -#endif diff --git a/lustre/include/linux/obd_snap.h b/lustre/include/linux/obd_snap.h deleted file mode 100644 index b7641d4..0000000 --- a/lustre/include/linux/obd_snap.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef _OBD_SNAP -#define _OBD_SNAP -/* - * Copyright (C) 2001 Cluster File Systems, Inc. - * - * This code is issued under the GNU General Public License. - * See the file COPYING in this distribution - */ - -#include - -#define OBD_SNAP_MAGIC 0xfffffff3 /* an unlikely block number */ - -#ifndef OBD_SNAP_DEVICENAME -#define OBD_SNAP_DEVICENAME "obdsnap" -#endif - -/* ioctls for manipulating snapshots 40 - 60 */ -#define OBD_SNAP_SETTABLE _IOWR('f', 40, long) -#define OBD_SNAP_PRINTTABLE _IOWR('f', 41, long) -#define OBD_SNAP_DELETE _IOWR('f', 42, long) -#define OBD_SNAP_RESTORE _IOWR('f', 43, long) - -void snap_use(int table_no, int snap_index) ; -void snap_unuse(int table_no, int snap_index) ; -int snap_is_used(int table_no, int snap_index) ; -int snap_table_attach(int tableno, int snap_index); - -#endif diff --git a/lustre/include/linux/obd_snap_support.h b/lustre/include/linux/obd_snap_support.h deleted file mode 100644 index fd8baa4..0000000 --- a/lustre/include/linux/obd_snap_support.h +++ /dev/null @@ -1,85 +0,0 @@ -#ifndef __OBD_SNAP_SUPP_H -#define __OBD_SNAP_SUPP_H -/* - * Copyright (C) 2001 Cluster File Systems, Inc. - * - * This code is issued under the GNU General Public License. - * See the file COPYING in this distribution - */ - -/* What we use to point to IDs in the obdmd data for snapshots. If we use - * obd_id (8 bytes) instead of ino_t (4 bytes), we halve the number of - * available snapshot slots (14 in 56 bytes vs. 7 in 56 bytes until we - * increase the size of OBD_OBDMDSZ). - */ -typedef obd_id snap_id; - -/* maximum number of snapshot tables we maintain in the kernel */ -#define SNAP_MAX_TABLES 8 - -/* maximum number of snapshots per device - must fit in "o_obdmd" area of struct obdo */ -#define SNAP_MAX ((OBD_OBDMDSZ - sizeof(uint32_t))/sizeof(snap_id)) - -struct snap_md { - uint32_t m_magic; - snap_id m_ids[SNAP_MAX]; /* id of snaps; slot 0 has current id */ -}; - - -/* if time is 0 this designates the "current" snapshot, i.e. - the head of the tree -*/ -struct snap { - time_t time; - int index; -}; - -/* snap ioctl data for attach: current always in first slot of this array */ -struct snap_obd_data { - int snap_dev; /* which device contains the data */ - unsigned int snap_index;/* which snapshot is ours */ - unsigned int snap_table;/* which table do we use */ -}; - - -/* snap ioctl data for table fiddling */ -struct snap_table_data { - int tblcmd_no; /* which table */ - unsigned int tblcmd_count; /* how many snaps */ - struct snap tblcmd_snaps[SNAP_MAX]; /* sorted times! */ -}; - - -struct snap_table { - spinlock_t tbl_lock; - unsigned int tbl_count; /* how many snapshots exist in this table*/ - int tbl_used; /* bitmap of snaps in use by a device */ - time_t tbl_times[SNAP_MAX]; - int tbl_index[SNAP_MAX]; -}; - -struct snap_iterdata { - struct lustre_handle *conn; - struct lustre_handle *ch_conn; - int index; - int previndex; - int currentindex; - int prevslot; - time_t prevtime; -}; - -inline struct lustre_handle *child_conn(struct lustre_handle *conn); -int snap_deleteobj(obd_id id, obd_gr group, void *data); -int snap_restoreobj(obd_id id, obd_gr group, void *data); -int snap_printobj(obd_id id, obd_gr group, void *data); -int snap_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len, void *karg, void *uarg); - -/* In the future, this function may have to deal with offsets into the obdmd. - * Currently, we assume we have the whole obdmd struct. - */ -static __inline__ struct snap_md *snap_obdmd(struct obdo *oa) -{ - return ((struct snap_md *)(&oa->o_obdmd)); -} -#endif diff --git a/lustre/include/linux/obd_support.h b/lustre/include/linux/obd_support.h deleted file mode 100644 index 2a76905..0000000 --- a/lustre/include/linux/obd_support.h +++ /dev/null @@ -1,308 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef _OBD_SUPPORT -#define _OBD_SUPPORT - -#ifdef __KERNEL__ -#include -#include -#include -#include -#else - -#endif -#include - -/* global variables */ -extern atomic_t obd_memory; -extern int obd_memmax; -extern unsigned long obd_fail_loc; -extern unsigned long obd_timeout; -extern unsigned long obd_max_dirty_pages; -extern char obd_lustre_upcall[128]; -extern unsigned long obd_sync_filter; - -#define OBD_FAIL_MDS 0x100 -#define OBD_FAIL_MDS_HANDLE_UNPACK 0x101 -#define OBD_FAIL_MDS_GETATTR_NET 0x102 -#define OBD_FAIL_MDS_GETATTR_PACK 0x103 -#define OBD_FAIL_MDS_READPAGE_NET 0x104 -#define OBD_FAIL_MDS_READPAGE_PACK 0x105 -#define OBD_FAIL_MDS_SENDPAGE 0x106 -#define OBD_FAIL_MDS_REINT_NET 0x107 -#define OBD_FAIL_MDS_REINT_UNPACK 0x108 -#define OBD_FAIL_MDS_REINT_SETATTR 0x109 -#define OBD_FAIL_MDS_REINT_SETATTR_WRITE 0x10a -#define OBD_FAIL_MDS_REINT_CREATE 0x10b -#define OBD_FAIL_MDS_REINT_CREATE_WRITE 0x10c -#define OBD_FAIL_MDS_REINT_UNLINK 0x10d -#define OBD_FAIL_MDS_REINT_UNLINK_WRITE 0x10e -#define OBD_FAIL_MDS_REINT_LINK 0x10f -#define OBD_FAIL_MDS_REINT_LINK_WRITE 0x110 -#define OBD_FAIL_MDS_REINT_RENAME 0x111 -#define OBD_FAIL_MDS_REINT_RENAME_WRITE 0x112 -#define OBD_FAIL_MDS_OPEN_NET 0x113 -#define OBD_FAIL_MDS_OPEN_PACK 0x114 -#define OBD_FAIL_MDS_CLOSE_NET 0x115 -#define OBD_FAIL_MDS_CLOSE_PACK 0x116 -#define OBD_FAIL_MDS_CONNECT_NET 0x117 -#define OBD_FAIL_MDS_CONNECT_PACK 0x118 -#define OBD_FAIL_MDS_REINT_NET_REP 0x119 -#define OBD_FAIL_MDS_DISCONNECT_NET 0x11a -#define OBD_FAIL_MDS_GETSTATUS_NET 0x11b -#define OBD_FAIL_MDS_GETSTATUS_PACK 0x11c -#define OBD_FAIL_MDS_STATFS_PACK 0x11d -#define OBD_FAIL_MDS_STATFS_NET 0x11e -#define OBD_FAIL_MDS_GETATTR_NAME_NET 0x11f -#define OBD_FAIL_MDS_ALL_REPLY_NET 0x120 -#define OBD_FAIL_MDS_ALL_REQUEST_NET 0x121 - -#define OBD_FAIL_OST 0x200 -#define OBD_FAIL_OST_CONNECT_NET 0x201 -#define OBD_FAIL_OST_DISCONNECT_NET 0x202 -#define OBD_FAIL_OST_GET_INFO_NET 0x203 -#define OBD_FAIL_OST_CREATE_NET 0x204 -#define OBD_FAIL_OST_DESTROY_NET 0x205 -#define OBD_FAIL_OST_GETATTR_NET 0x206 -#define OBD_FAIL_OST_SETATTR_NET 0x207 -#define OBD_FAIL_OST_OPEN_NET 0x208 -#define OBD_FAIL_OST_CLOSE_NET 0x209 -#define OBD_FAIL_OST_BRW_NET 0x20a -#define OBD_FAIL_OST_PUNCH_NET 0x20b -#define OBD_FAIL_OST_STATFS_NET 0x20c -#define OBD_FAIL_OST_HANDLE_UNPACK 0x20d -#define OBD_FAIL_OST_BRW_WRITE_BULK 0x20e -#define OBD_FAIL_OST_BRW_READ_BULK 0x20f -#define OBD_FAIL_OST_SYNCFS_NET 0x210 -#define OBD_FAIL_OST_ALL_REPLY_NET 0x211 -#define OBD_FAIL_OST_ALL_REQUESTS_NET 0x212 -#define OBD_FAIL_OST_LDLM_REPLY_NET 0x213 -#define OBD_FAIL_OST_BRW_PAUSE_BULK 0x214 - -#define OBD_FAIL_LDLM 0x300 -#define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 -#define OBD_FAIL_LDLM_ENQUEUE 0x302 -#define OBD_FAIL_LDLM_CONVERT 0x303 -#define OBD_FAIL_LDLM_CANCEL 0x304 -#define OBD_FAIL_LDLM_BL_CALLBACK 0x305 -#define OBD_FAIL_LDLM_CP_CALLBACK 0x306 - -#define OBD_FAIL_OSC 0x400 -#define OBD_FAIL_OSC_BRW_READ_BULK 0x401 -#define OBD_FAIL_OSC_BRW_WRITE_BULK 0x402 -#define OBD_FAIL_OSC_LOCK_BL_AST 0x403 -#define OBD_FAIL_OSC_LOCK_CP_AST 0x404 - -#define OBD_FAIL_PTLRPC 0x500 -#define OBD_FAIL_PTLRPC_ACK 0x501 - -/* preparation for a more advanced failure testbed (not functional yet) */ -#define OBD_FAIL_MASK_SYS 0x0000FF00 -#define OBD_FAIL_MASK_LOC (0x000000FF | OBD_FAIL_MASK_SYS) -#define OBD_FAIL_ONCE 0x80000000 -#define OBD_FAILED 0x40000000 -#define OBD_FAIL_MDS_ALL_NET 0x01000000 -#define OBD_FAIL_OST_ALL_NET 0x02000000 - -#define OBD_FAIL_CHECK(id) (((obd_fail_loc & OBD_FAIL_MASK_LOC) == \ - ((id) & OBD_FAIL_MASK_LOC)) && \ - ((obd_fail_loc & (OBD_FAILED | OBD_FAIL_ONCE))!= \ - (OBD_FAILED | OBD_FAIL_ONCE))) - -#define OBD_FAIL_CHECK_ONCE(id) \ -({ int _ret_ = 0; \ - if (OBD_FAIL_CHECK(id)) { \ - CERROR("obd_fail_loc=%x\n", id); \ - obd_fail_loc |= OBD_FAILED; \ - if ((id) & OBD_FAIL_ONCE) \ - obd_fail_loc |= OBD_FAIL_ONCE; \ - _ret_ = 1; \ - } \ - _ret_; \ -}) - -#define OBD_FAIL_RETURN(id, ret) \ -do { \ - if (OBD_FAIL_CHECK_ONCE(id)) { \ - RETURN(ret); \ - } \ -} while(0) - -#define OBD_FAIL_TIMEOUT(id, secs) \ -do { \ - if (OBD_FAIL_CHECK_ONCE(id)) { \ - CERROR("obd_fail_timeout id %x sleeping for %ld secs\n", \ - (id), (secs)); \ - set_current_state(TASK_UNINTERRUPTIBLE); \ - schedule_timeout((secs) * HZ); \ - set_current_state(TASK_RUNNING); \ - CERROR("obd_fail_timeout id %x awake\n", \ - (id)); \ - } \ -} while(0) - -#define fixme() CDEBUG(D_OTHER, "FIXME\n"); - -#ifdef __KERNEL__ -#include -#include - - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#define ll_bdevname(a) __bdevname((a)) -#define ll_lock_kernel lock_kernel() -#define LTIME_S(time) (time.tv_sec) -#else -#define ll_lock_kernel -#define ll_bdevname(a) bdevname((a)) -#define LTIME_S(time) (time) -#endif - - -static inline void OBD_FAIL_WRITE(int id, kdev_t dev) -{ - if (OBD_FAIL_CHECK(id)) { -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#ifdef CONFIG_DEV_RDONLY - CERROR("obd_fail_loc=%x, fail write operation on %s\n", - id, ll_bdevname(dev)); - dev_set_rdonly(dev, 2); -#else - CERROR("obd_fail_loc=%x, can't fail write operation on %s\n", - id, ll_bdevname(dev)); -#endif -#else -#ifdef CONFIG_DEV_RDONLY - CERROR("obd_fail_loc=%x, fail write operation on %s\n", - id, ll_bdevname(dev.value)); - dev_set_rdonly(dev, 2); -#else - CERROR("obd_fail_loc=%x, can't fail write operation on %s\n", - id, ll_bdevname(dev.value)); -#endif -#endif - /* We set FAIL_ONCE because we never "un-fail" a device */ - obd_fail_loc |= OBD_FAILED | OBD_FAIL_ONCE; - } -} -#else /* !__KERNEL__ */ -#define LTIME_S(time) (time) -#endif /* __KERNEL__ */ - -#define OBD_ALLOC(ptr, size) \ -do { \ - (ptr) = kmalloc(size, GFP_KERNEL); \ - if ((ptr) == NULL) { \ - CERROR("kmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ - (int)(size), __FILE__, __LINE__); \ - } else { \ - memset(ptr, 0, size); \ - atomic_add(size, &obd_memory); \ - if (atomic_read(&obd_memory) > obd_memmax) \ - obd_memmax = atomic_read(&obd_memory); \ - CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d)\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ - } \ -} while (0) - -#ifdef __arch_um__ -# define OBD_VMALLOC(ptr, size) OBD_ALLOC(ptr, size) -#else -# define OBD_VMALLOC(ptr, size) \ -do { \ - (ptr) = vmalloc(size); \ - if ((ptr) == NULL) { \ - CERROR("vmalloc of '" #ptr "' (%d bytes) failed at %s:%d\n", \ - (int)(size), __FILE__, __LINE__); \ - } else { \ - memset(ptr, 0, size); \ - atomic_add(size, &obd_memory); \ - if (atomic_read(&obd_memory) > obd_memmax) \ - obd_memmax = atomic_read(&obd_memory); \ - CDEBUG(D_MALLOC, "vmalloced '" #ptr "': %d at %p (tot %d)\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ - } \ -} while (0) -#endif - -#ifdef CONFIG_DEBUG_SLAB -#define POISON(lptr, c, s) do {} while (0) -#else -#define POISON(lptr, c, s) memset(lptr, c, s) -#endif - -#define OBD_FREE(ptr, size) \ -do { \ - LASSERT(ptr); \ - atomic_sub(size, &obd_memory); \ - CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ - POISON(ptr, 0x5a, size); \ - kfree(ptr); \ - (ptr) = (void *)0xdeadbeef; \ -} while (0) - -#ifdef __arch_um__ -# define OBD_VFREE(ptr, size) OBD_FREE(ptr, size) -#else -# define OBD_VFREE(ptr, size) \ -do { \ - LASSERT(ptr); \ - atomic_sub(size, &obd_memory); \ - CDEBUG(D_MALLOC, "vfreed '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ - POISON(ptr, 0x5a, size); \ - vfree(ptr); \ - (ptr) = (void *)0xdeadbeef; \ -} while (0) -#endif - -#define OBD_SLAB_ALLOC(ptr, slab, type, size) \ -do { \ - LASSERT (!in_interrupt()); \ - (ptr) = kmem_cache_alloc(slab, type); \ - if ((ptr) == NULL) { \ - CERROR("slab-alloc of '"#ptr"' (%d bytes) failed at %s:%d\n", \ - (int)(size), __FILE__, __LINE__); \ - } else { \ - memset(ptr, 0, size); \ - atomic_add(size, &obd_memory); \ - if (atomic_read(&obd_memory) > obd_memmax) \ - obd_memmax = atomic_read(&obd_memory); \ - CDEBUG(D_MALLOC, "slab-alloced '"#ptr"': %d at %p (tot %d)\n",\ - (int)(size), ptr, atomic_read(&obd_memory)); \ - } \ -} while (0) - -#define OBD_SLAB_FREE(ptr, slab, size) \ -do { \ - LASSERT(ptr); \ - CDEBUG(D_MALLOC, "slab-freed '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), ptr, atomic_read(&obd_memory)); \ - atomic_sub(size, &obd_memory); \ - POISON(ptr, 0x5a, size); \ - kmem_cache_free(slab, ptr); \ - (ptr) = (void *)0xdeadbeef; \ -} while (0) - -#endif diff --git a/lustre/include/linux/obd_trace.h b/lustre/include/linux/obd_trace.h deleted file mode 100644 index 524889d..0000000 --- a/lustre/include/linux/obd_trace.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * This code is issued under the GNU General Public License. - * See the file COPYING in this distribution - * - * obdtrace (header file) - is useful for tracing and performance - * debug of the Lustre obd protocol stack. obdtrace is a transparent - * logical obd driver that prints commands their in- and outbound - * parameters. obdtrace maintains statistics about number and latency - * of the obd commands that pass through it. As such it is also use - * for performance analysis. - * - * Copyright (c) 2001 Rumi Zahir - */ - -#ifndef __OBD_TRACE_H -#define __OBD_TRACE_H - -#include - -#endif diff --git a/lustre/install-sh b/lustre/install-sh deleted file mode 100755 index e9de238..0000000 --- a/lustre/install-sh +++ /dev/null @@ -1,251 +0,0 @@ -#!/bin/sh -# -# install - install a program, script, or datafile -# This comes from X11R5 (mit/util/scripts/install.sh). -# -# Copyright 1991 by the Massachusetts Institute of Technology -# -# Permission to use, copy, modify, distribute, and sell this software and its -# documentation for any purpose is hereby granted without fee, provided that -# the above copyright notice appear in all copies and that both that -# copyright notice and this permission notice appear in supporting -# documentation, and that the name of M.I.T. not be used in advertising or -# publicity pertaining to distribution of the software without specific, -# written prior permission. M.I.T. makes no representations about the -# suitability of this software for any purpose. It is provided "as is" -# without express or implied warranty. -# -# Calling this script install-sh is preferred over install.sh, to prevent -# `make' implicit rules from creating a file called install from it -# when there is no Makefile. -# -# This script is compatible with the BSD install script, but was written -# from scratch. It can only install one file at a time, a restriction -# shared with many OS's install programs. - - -# set DOITPROG to echo to test this script - -# Don't use :- since 4.3BSD and earlier shells don't like it. -doit="${DOITPROG-}" - - -# put in absolute paths if you don't have them in your path; or use env. vars. - -mvprog="${MVPROG-mv}" -cpprog="${CPPROG-cp}" -chmodprog="${CHMODPROG-chmod}" -chownprog="${CHOWNPROG-chown}" -chgrpprog="${CHGRPPROG-chgrp}" -stripprog="${STRIPPROG-strip}" -rmprog="${RMPROG-rm}" -mkdirprog="${MKDIRPROG-mkdir}" - -transformbasename="" -transform_arg="" -instcmd="$mvprog" -chmodcmd="$chmodprog 0755" -chowncmd="" -chgrpcmd="" -stripcmd="" -rmcmd="$rmprog -f" -mvcmd="$mvprog" -src="" -dst="" -dir_arg="" - -while [ x"$1" != x ]; do - case $1 in - -c) instcmd="$cpprog" - shift - continue;; - - -d) dir_arg=true - shift - continue;; - - -m) chmodcmd="$chmodprog $2" - shift - shift - continue;; - - -o) chowncmd="$chownprog $2" - shift - shift - continue;; - - -g) chgrpcmd="$chgrpprog $2" - shift - shift - continue;; - - -s) stripcmd="$stripprog" - shift - continue;; - - -t=*) transformarg=`echo $1 | sed 's/-t=//'` - shift - continue;; - - -b=*) transformbasename=`echo $1 | sed 's/-b=//'` - shift - continue;; - - *) if [ x"$src" = x ] - then - src=$1 - else - # this colon is to work around a 386BSD /bin/sh bug - : - dst=$1 - fi - shift - continue;; - esac -done - -if [ x"$src" = x ] -then - echo "install: no input file specified" - exit 1 -else - true -fi - -if [ x"$dir_arg" != x ]; then - dst=$src - src="" - - if [ -d $dst ]; then - instcmd=: - chmodcmd="" - else - instcmd=mkdir - fi -else - -# Waiting for this to be detected by the "$instcmd $src $dsttmp" command -# might cause directories to be created, which would be especially bad -# if $src (and thus $dsttmp) contains '*'. - - if [ -f $src -o -d $src ] - then - true - else - echo "install: $src does not exist" - exit 1 - fi - - if [ x"$dst" = x ] - then - echo "install: no destination specified" - exit 1 - else - true - fi - -# If destination is a directory, append the input filename; if your system -# does not like double slashes in filenames, you may need to add some logic - - if [ -d $dst ] - then - dst="$dst"/`basename $src` - else - true - fi -fi - -## this sed command emulates the dirname command -dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` - -# Make sure that the destination directory exists. -# this part is taken from Noah Friedman's mkinstalldirs script - -# Skip lots of stat calls in the usual case. -if [ ! -d "$dstdir" ]; then -defaultIFS=' -' -IFS="${IFS-${defaultIFS}}" - -oIFS="${IFS}" -# Some sh's can't handle IFS=/ for some reason. -IFS='%' -set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'` -IFS="${oIFS}" - -pathcomp='' - -while [ $# -ne 0 ] ; do - pathcomp="${pathcomp}${1}" - shift - - if [ ! -d "${pathcomp}" ] ; - then - $mkdirprog "${pathcomp}" - else - true - fi - - pathcomp="${pathcomp}/" -done -fi - -if [ x"$dir_arg" != x ] -then - $doit $instcmd $dst && - - if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi -else - -# If we're going to rename the final executable, determine the name now. - - if [ x"$transformarg" = x ] - then - dstfile=`basename $dst` - else - dstfile=`basename $dst $transformbasename | - sed $transformarg`$transformbasename - fi - -# don't allow the sed command to completely eliminate the filename - - if [ x"$dstfile" = x ] - then - dstfile=`basename $dst` - else - true - fi - -# Make a temp file name in the proper directory. - - dsttmp=$dstdir/#inst.$$# - -# Move or copy the file name to the temp name - - $doit $instcmd $src $dsttmp && - - trap "rm -f ${dsttmp}" 0 && - -# and set any options; do chmod last to preserve setuid bits - -# If any of these fail, we abort the whole thing. If we want to -# ignore errors from any of these, just make sure not to ignore -# errors from the above "$doit $instcmd $src $dsttmp" command. - - if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi && - -# Now rename the file to the real destination. - - $doit $rmcmd -f $dstdir/$dstfile && - $doit $mvcmd $dsttmp $dstdir/$dstfile - -fi && - - -exit 0 diff --git a/lustre/kernel_patches/README b/lustre/kernel_patches/README deleted file mode 100644 index 1860f13..0000000 --- a/lustre/kernel_patches/README +++ /dev/null @@ -1,717 +0,0 @@ -Patch management scripts -Andrew Morton -18 October 2002 - -This is a description of a bunch of shell scripts which I use for -managing kernel patches. They are quite powerful. They can be used on -projects other than the linux kernel. They are easy to use, and fast. - -You end up doing a ton of recompiling with these scripts, because -you're pushing and popping all the time. ccache takes away the pain of -all that. http://ccache.samba.org/ - be sure to put the cache -directory on the same fs as where you're working so that ccache can use -hardlinks. - -The key philosophical concept is that your primary output is patches. -Not ".c" files, not ".h" files. But patches. So patches are the -first-class object here. - -Installation -============ - -You place all the scripts somewhere in your path, or in -/usr/lib/patch-scripts. - -Terminology -=========== - -The patch scripts require three special directories called "pc", -"patches" and "txt". - -If the environment variable PATCHSCRIPTS is set, it is taken to to be -the directory in which those three directories reside. Typically, it -would be a relative pathname. So - - setenv PATCHSCRIPTS ./i-put-them-here - -would tell the patch scripts to look in ./i-put-them-here/pc, etc. - -If PATCHSCRIPTS is not set, and the directory ./patch-scripts is -present then the patch scripts will us ./patch-scripts/pc/, -./patch-scripts/patches/ and ./patch-scripts/txt/. - -Otherwise, the patch scripts use ./pc, ./patches and ./txt. - -In this document, the symbol $P is used to describe the directory which -holds the pc/, patches/ and txt/ directories, as determined by the -above search. - -It is expected that $P will always expand to a relative path. - -Concepts -======== - -All work occurs with a single directory tree. All commands are invoked -within the root of that tree. The scripts manage a "stack" of patches. - -Each patch is a changeset against the base tree plus the preceding patches. - -All patches are listed, in order, in the file ./series. You manage the -series file. Lines in the series file which start with `#' are ignored. - -Any currently-applied patches are described in the file -./applied-patches. The patch scripts manage this file. - -Each patch affects a number of files in the tree. These files are -listed in a "patch control" file. These .pc files live in the -directory $P/pc/ - -Patches are placed in the directory $P/patches/ - -Documentation for the patches is placed in $P/txt/ - -So for a particular patch "my-first-patch" the following will exist: - -- An entry "my-first-patch.patch" in ./series - -- An entry "my-first-patch" in ./applied-patches (if it's currently applied) - -- A file $P/pc/my-first-patch.pc which contains the names of the - files which my-first-patch modifies, adds or removes - -- A file $P/txt/my-first-patch.txt which contains the patch's - changelog. - -- A file $P/patches/my-first-patch.patch, which is the output of the - patch scripts. - -Operation -========= - -When a patch "my-patch" is applied with apatch, or with pushpatch -(which calls apatch), all the affected files (from $P/pc/my-patch.pc) -are copied to files with ~my-patch appended. So if $P/pc/my-patch.pc -contained - - kernel/sched.c - fs/inode.c - -then apatch will copy those files into kernel/sched.c~my-patch and -fs/inode.c~my-patch. It will then apply the patch to kernel/sched.c -and fs/inode.c - -When a diff is regenerated by refpatch (which calls mpatch), the diff -is made between kernel/sched.c and kernel/sched.c~my-patch. How do the -scripts know to use "~my-patch"? Because my-patch is the current -topmost patch. It's the last line in ./applied-patches. - -In this way, the whole thing is stackable. If you have four patches -applied, say "patch-1", "patch-2", "patch-3" and "patch-4", and if -patch-2 and patch-4 both touch kernel/sched.c then you will have: - - kernel/sched.c~patch-2 Original copy, before patch-2 - kernel/sched.c~patch-4 Copy before patch-4. Contains changes - from patch-2 - kernel/sched.c Current working copy. Contains changes - from patch-4. - -This means that your diff headers contain "~patch-name" in them, which -is convenient documentation. - -Walkthrough -=========== - -Let's start. - -Go into /usr/src/linux (or wherever) - - mkdir pc patches txt - -Now let's generate a patch - - fpatch my-patch kernel/sched.c - -OK, we've copied kernel/sched.c to kernel/sched.c~my-patch. We've -appended "my-patch" to ./applied-patches and we've put "kernel/sched.c" -into the patch control file, pc/my-patch.pc. - - Now edit kernel/sched.c a bit. - -Now we're ready to document the patch - - Now write txt/my-patch.txt - -Now generate the patch - - refpatch - -This will generate patches/my-patch.patch. Take a look. - -Now remove the patch - - poppatch - -applied-patches is now empty, and the patch is removed. - -Now let's add a file to my-patch and then generate my-second-patch: - - Add "my-patch.patch" to ./series (no blank lines in that file please) - - pushpatch - -OK, the patch is applied again. Let's add another file - - fpatch kernel/printk.c - -Note that here we gave fpatch a single argument. So rather than -opening a new patch, it adds kernel/printk.c to the existing topmost -patch. That's my-patch. - - Edit kernel/printk.c - -Refresh my-patch (you end up running refpatch a lot) - - refpatch - -Now start a second patch: - - fpatch my-second-patch kernel/sched.c - -Now take a look at applied-patches. Also do an `ls kernel/sched*'. - - Edit kernel/sched.c, to make some changes for my-second-patch - -Generate my-second-patch: - - refpatch - -Take a look in patches/my-second-patch.patch - -Don't forget to add "my-second-patch.patch" to the series file. - -And remove both patches: - - poppatch - poppatch - - -That's pretty much it, really. - - -Command reference -================= - -Generally, where any of these commands take a "patch-name", that can be -of the form txt/patch-name.txt, patch-name.pc, just patch-name or -whatever. The scripts will strip off a leading "txt/", "patches/" or -"pc/" and any trailing extension. This is so you can do - - apatch patches/a - -to conveniently use shell tabbing to select patch names. - - - -added-by-patch - - Some internal thing. - -apatch [-f] patch-name - - This is the low-level function which adds patches. It does the - copying into ~-files and updates the applied-patches file. It - applies the actual patch. - - apatch will do a patch --dry-run first and will refuse to apply the - patch if the dryrun fails. - - So when you are getting rejects you do this: - - pushpatch # This fails, due to rejects. Drat. - apatch -f patch-name # Force the patch - (or) pushpatch -f # Force the patch - - OK, you've now applied patch-name, but you have rejects. Go fix - those up and do - - refpatch - - And you're ready to move on. - -combine-series output-file - - It incrementally combinediffs all the patches in series to make a - complete patch for the series. Requires combinediff frmo patchutils. - - See http://cyberelk.net/tim/patchutils/ (Don't download the - "experimental" patchutils - it seems to only have half of the - commands in it. Go for "stable") - -cvs-take-patch - - I forget. - -export_patch - - export the patches listed in ./series to a set of files which - are named in such a way that the sort order is the same as the - order of the series file. - - Usage: export_patch directory [prefix] - - Example: - - Suppose ./series contains - - mango.patch - orange.patch - banana.patch - apple.patch - pear.patch - - export_patch ../mypatches fruit - - The patches would be copied to - - ../mypatches/p00001_fruit_mango.patch - ../mypatches/p00002_fruit_orange.patch - ../mypatches/p00003_fruit_banana.patch - ../mypatches/p00003_fruit_banana.patch - ../mypatches/p00003_fruit_banana.patch - - Named in this way, someone may easily apply them: - - cat mypatches/p*fruit* | patch -p1 - - If prefix is omitted, the patchnames will be transformed - such that "original.patch" becomes "pXXXXX_original.patch". - -fpatch [patch-name] foo.c - - If patch-name is given, fpatch will start a new patch which - modifies (or adds, or removes) the single file foo.c. It updates - ./applied-patches and creates pc/patch-name.pc. fpatch will copy - foo.c to foo.c~patch-name in preparation for edits of foo.c. - - If patch-name is not given then fpatch will add foo.c to the - current topmost patch. It will add "foo.c" to $P/pc/$(toppatch).pc. - It will copy foo.c to foo.c~$(toppatch). - -import_patch - - Imports a set of patch files, creating $P/pc, $P/txt, $P/patches and - ./series as necessary. It also creates $P/txt/*.txt by stripping - off the top of the patches (and removes any diffstat output it finds, - so that it can eat refpatch output and export_patch output.) The - imported patch names are appended to the series file. - - In creating the $P/txt/*.txt files, mail headers are stripped with - formail, preserving the "From:" and "Subject:" lines. "DESC" and - "EDESC" markers are added if they are not already present, using the - "From:" and "Subject:" lines for the DESC portion, if they are present. - (See "patchdesc" command, below, for more on these markers.) - - Also, it can rename the patch file as it is imported by stripping out - a pattern. This is useful if, as often is the case, you have patch - sets with filenames designed to help sort the patches into the correct - order, such as "p001_xxx_funky_stuff.patch" you can have it automatically - renamed to funky_stuff.patch on import, and let the series file manage - the ordering. - - Import_patch will uncompress patches (*.Z, *.bz2, *.gz) as necessary. - - Usage: - - import_patch [-p pattern] patchfile ... - - Example: - - % ls ../fruit/p*patch - ../fruit/p00001_northern_apple.patch - ../fruit/p00001_tropical_mango.patch - ../fruit/p00002_northern_pear.patch - ../fruit/p00002_tropical_orange.patch - ../fruit/p00003_tropical_banana.patch - % import_patch -p 'p[0-9]*_tropical_' ../fruit/p*tropical* - Recreated pc/mango.pc - Recreated pc/orange.pc - Recreated pc/banana.pc - % import_patch -p 'p[0-9]*_northern_' ../fruit/p*northern* - Recreated pc/apple.pc - Recreated pc/pear.pc - - Then you can "pushpatch; refpatch" 5 times. - -inpatch - - List the names of ths files which are affected by the current - topmost patch. - - This is basically - - cat pc/$(toppatch).pc - -join-patch patchname - - "joins" the named patch to the current topmost patch. - - Use this when you want to merge two patches into one. All the - files which `patchname' affects are added to pc/$(toppatch).pc (if - they are not already there) and patch `patchname' is applied. The - top patch remains unchanged. You'll need to run refpatch afterwards. - -mpatch - - A low-level thing to generate patches - -new-kernel - - Some thing I use for importing a new kernel from kernel.org - -p0-2-p1 - - Internal thing to convert patch -p0 form into patch -p1 - -patchdesc - - Generates a single-line description of a patch. - - The txt/my-patch.txt files have the following format: - - - DESC - some short description - EDESC - - The long description - - - I use - - patchdesc $(cat series) - - to generate short-form summaries of the patch series. - -patchfns - - Internal utilities - -pcpatch - - Standalone tool to generate a .pc file from a patch. - - Say someone sends you "his-patch.diff". What you do is: - - cp ~/his-patch.diff patches/his-patch.patch - pcpatch his-patch - - This generates $P/pc/his-patch.pc and you're all set. Add - "his-patch.patch" to ./series in the right place and start pushing. - -p_diff - - I forget - -poppatch - - Remove one or more patches from the current stack. This command - does *not* use the series file. It works purely against - applied-patches. - - Usage: - - poppatch - Remove the topmost patch - poppatch 10 - Remove ten patches - poppatch some-patch-name[.patch] - Remove patches until "some-patch-name" is top patch - -pstatus - - Shows status of patches - - Usage: - pstatus [patchfile ...] - - One line per patch is output showing: - 1: Patch number in the series file - 2: Whether the patch is currently applied - 3: Name of patch - 4: Status of the patch (needs pcpatch, changelog, refpatch) - - If no patchfiles are specified, $P/patches/*.patch - are assumed. - - Caveats: - A patch set which contains separate patches to add a file - and modify that same file may give spurious "Needs refpatch" - status for the patch which adds the file or the topmost patch. - -ptkdiff - - Two modes: - - ptkdiff - - - Run tkdiff against all the file affected - by $(toppatch). The diff is only for the changes made - by the top patch! ie: it's between "filename" and - "filename~toppatch-name". - - ptkdiff filename - - Just run tkdiff against that file, - showing the changes which are due to toppatch. - -pushpatch [-f] - - Apply the next patch, from the series file. - - This consults ./applied-patches to find out the top patch, then - consults ./series to find the next patch. And pushes it. - - pushpatch - - Apply the next patch - - pushpatch 10 - - Apply the next ten patches - - pushpatch some-patch-name - - Keep pushing patches until "some-patch-name" is toppatch - - pushpatch -f - - Push the next patch, ignoring rejects. - -refpatch - - regnerates the topmost patch. Reads all the affected files - from pc/$(toppatch).pc and diffs them against their tilde-files. - - Also pastes into the patch your patch documentation and - generates a diffstat summary. - -removed-by-patch - - Some thing. - -rename-patch - - CVS rename for patches. - -rolled-up-patch - - Bit of a hack. Is designed to generate a rolled-up diff of all - currently-applied patches. But it requires a ../linux-2.x.y tree to - diff against. Needs to be redone. - -rpatch - - Internal command - -split-patch - - Some thing someone write to split patches up. I don't use it. - -tag-series - - Assuming you keep pc/*, patches/* and txt/* under CVS revision - control, tag-series allows you to tag a patchset's individual - components. I use - - tag-series s2_5_44-mm3 pc/2.5.44-mm3-series - - which will attach the cvs tag "s2_5_44-mm3" to every .pc, .patch - and .txt file which is mentioned in the series file - "pc/2.5.44-mm3-series". - - It will also tag pc/2.5.44-mm3-series, which is a bit redundant - given that I use a different series file for each patchset release.. - - -toppatch - - Print the name of the topmost patch. From ./applied-patches - -touched-by-patch patch-filename - - List the names of files which are affected by a diff. - -unitdiff.py - - Rasmus Andersen's script to convert a diff into minimum-context - form. This form has a better chance of applying if you're getting - nasty rejects. But patch can and will make mistakes when fed - small-context input. - - -Work Practices -============== - -I keep the kernel tree, the $P/pc/, $P/patches/ and $P/txt/ contents under -CVS control. This is important... - -I have several "series" files. I keep these in $P/pc/foo-series and use - - ln -s pc/foo-series series - -when I'm working on foo. - -If someone sends me a patch I'll do: - - cp ~/whatever patches/his-patch.patch - pcpatch his-patch - apatch his-patch - - If apatch fails then run `apatch -f his-patch' and fix the rejects. - - refpatch - - to clean up any fuzz. - - poppatch - cvs add pc/his-patch.pc patches/his-patch.patch - cvs commit pc patches - - Now edit ./series and place "his-patch.patch" in the appropriate place. - - -If you're working on a particular patch (say, "dud-patch") and you -balls something up, just run: - - refpatch # Generate the crap patch - poppatch # Remove it all - rm patches/dud-patch.patch - cvs up patches/dud-patch.patch - -and all is well. - - -Getting updates from Linus -========================== - -What I do is to grab the latest -bk diff from -http://www.kernel.org/pub/linux/kernel/people/dwmw2/bk-2.5/ -and do: - - gzip -d < cs > patches/linus.patch - pcpatch linus - apatch linus | grep diff - - Now fix up all the files which got deleted, - because there's something wrong with bitkeeper diffs: - - cvs up -ko - - apatch linus - $EDITOR linus/linus.txt - - Add the changeset number to txt/linus.txt - - refpatch - poppatch - - Now add "linus.patch" as the first entry in your ./series file and - start pushing your other patches on top of that. - -BUGS -==== - -Tons and tons. The scripts are fragile, the error handling is ungraceful and -if you do something silly you can end up in a pickle. - -Generally the scripts are very careful to not wreck your files or your -patches. But they can get the ./applied-patches and ~-files into an -awkward state. - -Usually you can sort it out by copying the ~-files back onto the originals -and removing the last line from ./applied-patches. Or do a "refpatch ; -poppatch ; rm patches/troublesome-patch.patch ; cvs up patches". - -If it's really bad, just blow away the entire tree and do a new CVS checkout. - - -Working on non-kernel projects -============================== - -Well it's the same thing. Say you've downloaded a copy of util-linux -and you want to make a change: - - cd /usr/src - tar xvfz ~/util-linux.tar.gz - cd util-linux - mkdir pc patches txt - fpatch my-patch sys-utils/rdev.c - fpatch sys-utils/ipcs.8 - - refpatch - - -How to balls things up -====================== - -Well here's one way. Suppose you have 20 patches applied, and three of -them (say, "p1", "p6" and "p11") all modify "foo.c". - -Now you go and change foo.c. - -Well, to which patch does that change belong? You need to decide. -Let's say you decide "p6". - -If you run `refpatch' when "p11" is toppatch then you lose. The diff -went into p11. - -What you can do is: - -1: - poppatch p6 - - refpatch - pushpatch p11 - - - (See why ccache is looking good?) - -or - -2: - - - poppatch p6 - refpatch - - -Another good way of ballsing up is to cheat. Say "oh I just want to make -this one-line change". And "oh, and this one". - -Now you're getting in a mess. It's much, much better to just use the system: - - fpatch junk file1 - fpatch file2 - - - refpatch - poppatch - rm pc/junk.pc patches/junk.patch - -Merging with -mm kernels -======================== - -Haven't tried this, but it should work: - -- Grab all the patches from broken-out/, place them in your $P/patches/ - -- Copy my series file into ./series (or $P/pc/akpm-series and symlink it) - -- pushpatch 99 - -And you're off and running. The nice thing about this is that you can -send me incremental diffs to diffs which I already have. - -Or whatever. I'm fairly handy with diffs nowadays. Rejects are -expected. I just prefer to have "one concept per diff". - diff --git a/lustre/kernel_patches/kernel_configs/config-linux-2.4.18-i386 b/lustre/kernel_patches/kernel_configs/config-linux-2.4.18-i386 deleted file mode 100644 index 94ee0ab..0000000 --- a/lustre/kernel_patches/kernel_configs/config-linux-2.4.18-i386 +++ /dev/null @@ -1,1834 +0,0 @@ -# -# Automatically generated by make menuconfig: don't edit -# -CONFIG_X86=y -CONFIG_ISA=y -# CONFIG_SBUS is not set -CONFIG_UID16=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# Processor type and features -# -CONFIG_LOLAT=y -# CONFIG_LOLAT_SYSCTL is not set -CONFIG_M386=y -# CONFIG_M486 is not set -# CONFIG_M586 is not set -# CONFIG_M586TSC is not set -# CONFIG_M586MMX is not set -# CONFIG_M686 is not set -# CONFIG_MPENTIUMIII is not set -# CONFIG_MPENTIUM4 is not set -# CONFIG_MK6 is not set -# CONFIG_MK7 is not set -# CONFIG_MELAN is not set -# CONFIG_MCRUSOE is not set -# CONFIG_MWINCHIPC6 is not set -# CONFIG_MWINCHIP2 is not set -# CONFIG_MWINCHIP3D is not set -# CONFIG_MCYRIXIII is not set -# CONFIG_X86_CMPXCHG is not set -# CONFIG_X86_XADD is not set -CONFIG_X86_L1_CACHE_SHIFT=4 -CONFIG_RWSEM_GENERIC_SPINLOCK=y -# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set -CONFIG_X86_PPRO_FENCE=y -CONFIG_X86_MCE=y -# CONFIG_CPU_FREQ is not set -CONFIG_TOSHIBA=m -CONFIG_I8K=m -# CONFIG_MICROCODE is not set -CONFIG_X86_MSR=m -CONFIG_X86_CPUID=m -# CONFIG_E820_PROC is not set -CONFIG_NOHIGHMEM=y -# CONFIG_HIGHMEM4G is not set -# CONFIG_HIGHMEM64G is not set -CONFIG_HIGHIO=y -CONFIG_MATH_EMULATION=y -CONFIG_MTRR=y -# CONFIG_SMP is not set -CONFIG_X86_UP_APIC=y -CONFIG_X86_UP_IOAPIC=y -CONFIG_X86_LOCAL_APIC=y -CONFIG_X86_IO_APIC=y - -# -# General setup -# -CONFIG_HZ=100 -CONFIG_NET=y -CONFIG_PCI=y -# CONFIG_PCI_GOBIOS is not set -# CONFIG_PCI_GODIRECT is not set -CONFIG_PCI_GOANY=y -CONFIG_PCI_BIOS=y -CONFIG_PCI_DIRECT=y -CONFIG_PCI_NAMES=y - -# -# Performance-monitoring counters support -# -# CONFIG_PERFCTR is not set -CONFIG_EISA=y -# CONFIG_MCA is not set -CONFIG_HOTPLUG=y - -# -# PCMCIA/CardBus support -# -CONFIG_PCMCIA=m -CONFIG_CARDBUS=y -CONFIG_TCIC=y -CONFIG_I82092=y -CONFIG_I82365=y - -# -# PCI Hotplug Support -# -# CONFIG_HOTPLUG_PCI is not set -# CONFIG_HOTPLUG_PCI_COMPAQ is not set -# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set -# CONFIG_HOTPLUG_PCI_IBM is not set -# CONFIG_HOTPLUG_PCI_ACPI is not set -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_KCORE_ELF=y -# CONFIG_KCORE_AOUT is not set -CONFIG_BINFMT_AOUT=m -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=m -# CONFIG_IKCONFIG is not set -CONFIG_PM=y - -# -# Additional device driver support -# -CONFIG_CIPE=m -CONFIG_CRYPTO_AEP=m -CONFIG_MEGARAC=m -CONFIG_FC_QLA2200=m -CONFIG_FC_QLA2300=m -CONFIG_SCSI_ISCSI=m -# CONFIG_IBMASM is not set -# CONFIG_IBMSER is not set -# CONFIG_ACPI is not set -CONFIG_APM=y -# CONFIG_APM_IGNORE_USER_SUSPEND is not set -# CONFIG_APM_DO_ENABLE is not set -CONFIG_APM_CPU_IDLE=y -# CONFIG_APM_DISPLAY_BLANK is not set -CONFIG_APM_RTC_IS_GMT=y -# CONFIG_APM_ALLOW_INTS is not set -# CONFIG_APM_REAL_MODE_POWER_OFF is not set - -# -# Binary emulation of other systems -# -CONFIG_ABI=m -CONFIG_ABI_SVR4=m -CONFIG_ABI_UW7=m -# CONFIG_ABI_SOLARIS is not set -CONFIG_ABI_IBCS=m -CONFIG_ABI_ISC=m -CONFIG_ABI_SCO=m -# CONFIG_ABI_WYSE is not set -CONFIG_BINFMT_COFF=m -CONFIG_BINFMT_XOUT=m -# CONFIG_BINFMT_XOUT_X286 is not set -CONFIG_ABI_SPX=y -CONFIG_ABI_XTI=y -CONFIG_ABI_TLI_OPTMGMT=y -# CONFIG_ABI_XTI_OPTMGMT is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Parallel port support -# -CONFIG_PARPORT=m -CONFIG_PARPORT_PC=m -CONFIG_PARPORT_PC_CML1=m -CONFIG_PARPORT_SERIAL=m -# CONFIG_PARPORT_PC_FIFO is not set -# CONFIG_PARPORT_PC_SUPERIO is not set -CONFIG_PARPORT_PC_PCMCIA=m -# CONFIG_PARPORT_AMIGA is not set -# CONFIG_PARPORT_MFC3 is not set -# CONFIG_PARPORT_ATARI is not set -# CONFIG_PARPORT_GSC is not set -# CONFIG_PARPORT_SUNBPP is not set -# CONFIG_PARPORT_OTHER is not set -CONFIG_PARPORT_1284=y - -# -# Plug and Play configuration -# -CONFIG_PNP=y -CONFIG_ISAPNP=y -# CONFIG_PNPBIOS is not set - -# -# Block devices -# -CONFIG_BLK_DEV_FD=y -CONFIG_BLK_DEV_XD=m -CONFIG_PARIDE=m -CONFIG_PARIDE_PARPORT=m -CONFIG_PARIDE_PD=m -CONFIG_PARIDE_PCD=m -CONFIG_PARIDE_PF=m -CONFIG_PARIDE_PT=m -CONFIG_PARIDE_PG=m -CONFIG_PARIDE_ATEN=m -CONFIG_PARIDE_BPCK=m -CONFIG_PARIDE_BPCK6=m -CONFIG_PARIDE_COMM=m -CONFIG_PARIDE_DSTR=m -CONFIG_PARIDE_FIT2=m -CONFIG_PARIDE_FIT3=m -CONFIG_PARIDE_EPAT=m -CONFIG_PARIDE_EPATC8=y -CONFIG_PARIDE_EPIA=m -CONFIG_PARIDE_FRIQ=m -CONFIG_PARIDE_FRPW=m -CONFIG_PARIDE_KBIC=m -CONFIG_PARIDE_KTTI=m -CONFIG_PARIDE_ON20=m -CONFIG_PARIDE_ON26=m -CONFIG_BLK_CPQ_DA=m -CONFIG_BLK_CPQ_CISS_DA=m -CONFIG_CISS_SCSI_TAPE=y -CONFIG_BLK_DEV_DAC960=m -CONFIG_BLK_DEV_UMEM=m -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_NBD=m -# CONFIG_BLK_DEV_ENBD is not set -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_INITRD=y - -# -# Multi-device support (RAID and LVM) -# -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_RAID5=m -CONFIG_MD_MULTIPATH=m -CONFIG_BLK_DEV_LVM=m - -# -# Cryptography support (CryptoAPI) -# -CONFIG_CRYPTO=m -CONFIG_CIPHERS=m -CONFIG_CIPHER_AES=m -CONFIG_CIPHER_IDENTITY=m -CONFIG_CRYPTODEV=m -CONFIG_CRYPTOLOOP=m - -# -# Networking options -# -CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y -CONFIG_NETLINK_DEV=y -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_TUX=m -CONFIG_TUX_EXTCGI=y -# CONFIG_TUX_EXTENDED_LOG is not set -# CONFIG_TUX_DEBUG is not set -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_FWMARK=y -CONFIG_IP_ROUTE_NAT=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_TOS=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_ROUTE_LARGE_TABLES=y -# CONFIG_IP_PNP is not set -CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -CONFIG_SYN_COOKIES=y - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=m -CONFIG_IP_NF_FTP=m -CONFIG_IP_NF_IRC=m -CONFIG_IP_NF_QUEUE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_LIMIT=m -CONFIG_IP_NF_MATCH_MAC=m -CONFIG_IP_NF_MATCH_MARK=m -CONFIG_IP_NF_MATCH_MULTIPORT=m -CONFIG_IP_NF_MATCH_TOS=m -CONFIG_IP_NF_MATCH_AH_ESP=m -CONFIG_IP_NF_MATCH_LENGTH=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_MATCH_TCPMSS=m -CONFIG_IP_NF_MATCH_STATE=m -CONFIG_IP_NF_MATCH_UNCLEAN=m -CONFIG_IP_NF_MATCH_OWNER=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_MIRROR=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_IP_NF_NAT_LOCAL=y -CONFIG_IP_NF_NAT_SNMP_BASIC=m -CONFIG_IP_NF_NAT_IRC=m -CONFIG_IP_NF_NAT_FTP=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_TOS=m -CONFIG_IP_NF_TARGET_MARK=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_IP_NF_TARGET_TCPMSS=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_COMPAT_IPCHAINS=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_COMPAT_IPFWADM=m -CONFIG_IP_NF_NAT_NEEDED=y - -# -# IP: Virtual Server Configuration -# -CONFIG_IP_VS=m -# CONFIG_IP_VS_DEBUG is not set -CONFIG_IP_VS_TAB_BITS=16 -CONFIG_IP_VS_RR=m -CONFIG_IP_VS_WRR=m -CONFIG_IP_VS_LC=m -CONFIG_IP_VS_WLC=m -CONFIG_IP_VS_LBLC=m -CONFIG_IP_VS_LBLCR=m -CONFIG_IP_VS_DH=m -CONFIG_IP_VS_SH=m -CONFIG_IP_VS_FTP=m -CONFIG_IPV6=m - -# -# IPv6: Netfilter Configuration -# -# CONFIG_IP6_NF_QUEUE is not set -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_LIMIT=m -CONFIG_IP6_NF_MATCH_MAC=m -CONFIG_IP6_NF_MATCH_MULTIPORT=m -CONFIG_IP6_NF_MATCH_OWNER=m -CONFIG_IP6_NF_MATCH_MARK=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_LOG=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_TARGET_MARK=m -# CONFIG_KHTTPD is not set -CONFIG_ATM=y -CONFIG_ATM_CLIP=y -# CONFIG_ATM_CLIP_NO_ICMP is not set -CONFIG_ATM_LANE=m -CONFIG_ATM_MPOA=m -CONFIG_ATM_BR2684=m -CONFIG_ATM_BR2684_IPFILTER=y -CONFIG_VLAN_8021Q=m -CONFIG_IPX=m -# CONFIG_IPX_INTERN is not set -CONFIG_ATALK=m - -# -# Appletalk devices -# -CONFIG_DEV_APPLETALK=y -CONFIG_LTPC=m -CONFIG_COPS=m -CONFIG_COPS_DAYNA=y -CONFIG_COPS_TANGENT=y -CONFIG_IPDDP=m -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP_DECAP=y -CONFIG_DECNET=m -CONFIG_DECNET_SIOCGIFCONF=y -CONFIG_DECNET_ROUTER=y -CONFIG_DECNET_ROUTE_FWMARK=y -CONFIG_BRIDGE=m -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_LLC is not set -CONFIG_NET_DIVERT=y -# CONFIG_ECONET is not set -CONFIG_WAN_ROUTER=m -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_CSZ=m -# CONFIG_NET_SCH_ATM is not set -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_QOS=y -CONFIG_NET_ESTIMATOR=y -CONFIG_NET_CLS=y -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_ROUTE=y -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_POLICE=y - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set - -# -# Telephony Support -# -CONFIG_PHONE=m -CONFIG_PHONE_IXJ=m -CONFIG_PHONE_IXJ_PCMCIA=m - -# -# ATA/IDE/MFM/RLL support -# -CONFIG_IDE=y - -# -# IDE, ATA and ATAPI Block devices -# -CONFIG_BLK_DEV_IDE=y -# CONFIG_BLK_DEV_HD_IDE is not set -# CONFIG_BLK_DEV_HD is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -# CONFIG_IDEDISK_STROKE is not set -# CONFIG_BLK_DEV_IDEDISK_VENDOR is not set -# CONFIG_BLK_DEV_IDEDISK_FUJITSU is not set -# CONFIG_BLK_DEV_IDEDISK_IBM is not set -# CONFIG_BLK_DEV_IDEDISK_MAXTOR is not set -# CONFIG_BLK_DEV_IDEDISK_QUANTUM is not set -# CONFIG_BLK_DEV_IDEDISK_SEAGATE is not set -# CONFIG_BLK_DEV_IDEDISK_WD is not set -# CONFIG_BLK_DEV_COMMERIAL is not set -# CONFIG_BLK_DEV_TIVO is not set -CONFIG_BLK_DEV_IDECS=m -CONFIG_BLK_DEV_IDECD=m -CONFIG_BLK_DEV_IDETAPE=m -CONFIG_BLK_DEV_IDEFLOPPY=y -CONFIG_BLK_DEV_IDESCSI=m -# CONFIG_IDE_TASK_IOCTL is not set -CONFIG_BLK_DEV_CMD640=y -# CONFIG_BLK_DEV_CMD640_ENHANCED is not set -CONFIG_BLK_DEV_ISAPNP=y -CONFIG_BLK_DEV_RZ1000=y -CONFIG_BLK_DEV_IDEPCI=y -CONFIG_IDEPCI_SHARE_IRQ=y -CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_OFFBOARD is not set -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -CONFIG_IDEDMA_PCI_AUTO=y -# CONFIG_IDEDMA_ONLYDISK is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_PCI_WIP is not set -# CONFIG_BLK_DEV_IDEDMA_TIMEOUT is not set -# CONFIG_IDEDMA_NEW_DRIVE_LISTINGS is not set -CONFIG_BLK_DEV_ADMA=y -CONFIG_BLK_DEV_AEC62XX=y -CONFIG_AEC62XX_TUNING=y -CONFIG_BLK_DEV_ALI15X3=y -# CONFIG_WDC_ALI15X3 is not set -CONFIG_BLK_DEV_AMD74XX=y -# CONFIG_AMD74XX_OVERRIDE is not set -CONFIG_BLK_DEV_CMD64X=y -CONFIG_BLK_DEV_CMD680=y -CONFIG_BLK_DEV_CY82C693=y -CONFIG_BLK_DEV_CS5530=y -CONFIG_BLK_DEV_HPT34X=y -# CONFIG_HPT34X_AUTODMA is not set -CONFIG_BLK_DEV_HPT366=y -CONFIG_BLK_DEV_PIIX=y -CONFIG_PIIX_TUNING=y -# CONFIG_BLK_DEV_NS87415 is not set -# CONFIG_BLK_DEV_OPTI621 is not set -CONFIG_BLK_DEV_ADMA100=y -CONFIG_BLK_DEV_PDC202XX=y -# CONFIG_PDC202XX_BURST is not set -CONFIG_PDC202XX_FORCE=y -CONFIG_BLK_DEV_SVWKS=y -CONFIG_BLK_DEV_SIS5513=y -CONFIG_BLK_DEV_SLC90E66=y -# CONFIG_BLK_DEV_TRM290 is not set -CONFIG_BLK_DEV_VIA82CXXX=y -CONFIG_BLK_DEV_CENATEK=y -# CONFIG_IDE_CHIPSETS is not set -# CONFIG_BLK_DEV_ELEVATOR_NOOP is not set -CONFIG_IDEDMA_AUTO=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_DMA_NONPCI is not set -CONFIG_BLK_DEV_IDE_MODES=y -CONFIG_BLK_DEV_ATARAID=m -CONFIG_BLK_DEV_ATARAID_PDC=m -CONFIG_BLK_DEV_ATARAID_HPT=m - -# -# SCSI support -# -CONFIG_SCSI=m -CONFIG_BLK_DEV_SD=m -CONFIG_SD_EXTRA_DEVS=40 -CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m -CONFIG_BLK_DEV_SR=m -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_SR_EXTRA_DEVS=4 -CONFIG_CHR_DEV_SG=m -# CONFIG_SCSI_DEBUG_QUEUES is not set -# CONFIG_SCSI_MULTI_LUN is not set -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y - -# -# SCSI low-level drivers -# -CONFIG_BLK_DEV_3W_XXXX_RAID=m -CONFIG_SCSI_7000FASST=m -CONFIG_SCSI_ACARD=m -CONFIG_SCSI_AHA152X=m -CONFIG_SCSI_AHA1542=m -CONFIG_SCSI_AHA1740=m -CONFIG_SCSI_AACRAID=m -CONFIG_SCSI_AIC7XXX=m -CONFIG_AIC7XXX_CMDS_PER_DEVICE=253 -CONFIG_AIC7XXX_RESET_DELAY_MS=15000 -# CONFIG_AIC7XXX_PROBE_EISA_VL is not set -# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set -CONFIG_SCSI_AIC79XX=m -CONFIG_AIC79XX_CMDS_PER_DEVICE=253 -CONFIG_AIC79XX_RESET_DELAY_MS=15000 -# CONFIG_AIC79XX_BUILD_FIRMWARE is not set -CONFIG_AIC79XX_ENABLE_RD_STRM=y -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -CONFIG_SCSI_AIC7XXX_OLD=m -CONFIG_AIC7XXX_OLD_TCQ_ON_BY_DEFAULT=y -CONFIG_AIC7XXX_OLD_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_OLD_PROC_STATS=y -CONFIG_SCSI_DPT_I2O=m -CONFIG_SCSI_ADVANSYS=m -CONFIG_SCSI_IN2000=m -CONFIG_SCSI_AM53C974=m -CONFIG_SCSI_MEGARAID=m -CONFIG_SCSI_BUSLOGIC=m -# CONFIG_SCSI_OMIT_FLASHPOINT is not set -CONFIG_SCSI_CPQFCTS=m -CONFIG_SCSI_DMX3191D=m -CONFIG_SCSI_DTC3280=m -CONFIG_SCSI_EATA=m -CONFIG_SCSI_EATA_TAGGED_QUEUE=y -# CONFIG_SCSI_EATA_LINKED_COMMANDS is not set -CONFIG_SCSI_EATA_MAX_TAGS=16 -CONFIG_SCSI_EATA_DMA=m -CONFIG_SCSI_EATA_PIO=m -CONFIG_SCSI_FUTURE_DOMAIN=m -CONFIG_SCSI_GDTH=m -CONFIG_SCSI_GENERIC_NCR5380=m -# CONFIG_SCSI_GENERIC_NCR53C400 is not set -CONFIG_SCSI_G_NCR5380_PORT=y -# CONFIG_SCSI_G_NCR5380_MEM is not set -CONFIG_SCSI_IPS=m -CONFIG_SCSI_INITIO=m -CONFIG_SCSI_INIA100=m -CONFIG_SCSI_PPA=m -CONFIG_SCSI_IMM=m -# CONFIG_SCSI_IZIP_EPP16 is not set -# CONFIG_SCSI_IZIP_SLOW_CTR is not set -CONFIG_SCSI_NCR53C406A=m -CONFIG_SCSI_NCR53C7xx=m -# CONFIG_SCSI_NCR53C7xx_sync is not set -CONFIG_SCSI_NCR53C7xx_FAST=y -CONFIG_SCSI_NCR53C7xx_DISCONNECT=y -CONFIG_SCSI_SYM53C8XX_2=m -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 -CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 -CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set -CONFIG_SCSI_NCR53C8XX=m -CONFIG_SCSI_SYM53C8XX=m -CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8 -CONFIG_SCSI_NCR53C8XX_MAX_TAGS=32 -CONFIG_SCSI_NCR53C8XX_SYNC=40 -# CONFIG_SCSI_NCR53C8XX_PROFILE is not set -# CONFIG_SCSI_NCR53C8XX_IOMAPPED is not set -# CONFIG_SCSI_NCR53C8XX_PQS_PDS is not set -# CONFIG_SCSI_NCR53C8XX_SYMBIOS_COMPAT is not set -CONFIG_SCSI_PAS16=m -CONFIG_SCSI_PCI2000=m -CONFIG_SCSI_PCI2220I=m -CONFIG_SCSI_PSI240I=m -CONFIG_SCSI_QLOGIC_FAS=m -CONFIG_SCSI_QLOGIC_ISP=m -CONFIG_SCSI_QLOGIC_FC=m -# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set -CONFIG_SCSI_QLOGIC_1280=m -CONFIG_SCSI_NEWISP=m -CONFIG_SCSI_SEAGATE=m -CONFIG_SCSI_SIM710=m -CONFIG_SCSI_SYM53C416=m -CONFIG_SCSI_DC390T=m -# CONFIG_SCSI_DC390T_NOGENSUPP is not set -CONFIG_SCSI_T128=m -CONFIG_SCSI_U14_34F=m -# CONFIG_SCSI_U14_34F_LINKED_COMMANDS is not set -CONFIG_SCSI_U14_34F_MAX_TAGS=8 -CONFIG_SCSI_ULTRASTOR=m -CONFIG_SCSI_DEBUG=m - -# -# PCMCIA SCSI adapter support -# -CONFIG_SCSI_PCMCIA=y -CONFIG_PCMCIA_AHA152X=m -CONFIG_PCMCIA_FDOMAIN=m -CONFIG_PCMCIA_NINJA_SCSI=m -CONFIG_PCMCIA_QLOGIC=m - -# -# Fusion MPT device support -# -CONFIG_FUSION=m -# CONFIG_FUSION_BOOT is not set -# CONFIG_FUSION_ISENSE is not set -CONFIG_FUSION_CTL=m -CONFIG_FUSION_LAN=m -CONFIG_NET_FC=y - -# -# IEEE 1394 (FireWire) support (EXPERIMENTAL) -# -CONFIG_IEEE1394=m -# CONFIG_IEEE1394_PCILYNX is not set -CONFIG_IEEE1394_OHCI1394=m -CONFIG_IEEE1394_VIDEO1394=m -CONFIG_IEEE1394_SBP2=m -CONFIG_IEEE1394_ETH1394=m -CONFIG_IEEE1394_DV1394=m -CONFIG_IEEE1394_RAWIO=m -CONFIG_IEEE1394_CMP=m -# CONFIG_IEEE1394_VERBOSEDEBUG is not set - -# -# I2O device support -# -CONFIG_I2O=m -CONFIG_I2O_PCI=m -CONFIG_I2O_BLOCK=m -CONFIG_I2O_LAN=m -CONFIG_I2O_SCSI=m -CONFIG_I2O_PROC=m - -# -# Network device support -# -CONFIG_NETDEVICES=y - -# -# ARCnet devices -# -# CONFIG_ARCNET is not set -CONFIG_DUMMY=m -CONFIG_BONDING=m -CONFIG_EQUALIZER=m -CONFIG_TUN=m -CONFIG_ETHERTAP=m -CONFIG_NET_SB1000=m - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_SUNLANCE is not set -CONFIG_HAPPYMEAL=m -# CONFIG_SUNBMAC is not set -# CONFIG_SUNQE is not set -CONFIG_SUNGEM=m -CONFIG_NET_VENDOR_3COM=y -CONFIG_EL1=m -CONFIG_EL2=m -CONFIG_ELPLUS=m -CONFIG_EL16=m -CONFIG_EL3=m -CONFIG_3C515=m -# CONFIG_ELMC is not set -# CONFIG_ELMC_II is not set -CONFIG_VORTEX=m -CONFIG_LANCE=m -CONFIG_NET_VENDOR_SMC=y -CONFIG_WD80x3=m -# CONFIG_ULTRAMCA is not set -CONFIG_ULTRA=m -CONFIG_ULTRA32=m -CONFIG_SMC9194=m -CONFIG_NET_VENDOR_RACAL=y -CONFIG_NI5010=m -CONFIG_NI52=m -CONFIG_NI65=m -CONFIG_AT1700=m -CONFIG_DEPCA=m -CONFIG_HP100=m -CONFIG_NET_ISA=y -CONFIG_E2100=m -CONFIG_EWRK3=m -CONFIG_EEXPRESS=m -CONFIG_EEXPRESS_PRO=m -CONFIG_HPLAN_PLUS=m -CONFIG_HPLAN=m -CONFIG_LP486E=m -CONFIG_ETH16I=m -CONFIG_NE2000=m -CONFIG_NET_PCI=y -CONFIG_PCNET32=m -CONFIG_ADAPTEC_STARFIRE=m -CONFIG_AC3200=m -CONFIG_APRICOT=m -CONFIG_CS89x0=m -CONFIG_TULIP=m -CONFIG_TC35815=m -# CONFIG_TULIP_MWI is not set -CONFIG_TULIP_MMIO=y -CONFIG_DE4X5=m -CONFIG_DGRS=m -CONFIG_DM9102=m -CONFIG_EEPRO100=m -CONFIG_NET_E100=m -CONFIG_LNE390=m -CONFIG_FEALNX=m -CONFIG_NATSEMI=m -# CONFIG_NATSEMI_CABLE_MAGIC is not set -CONFIG_NE2K_PCI=m -CONFIG_NE3210=m -CONFIG_ES3210=m -CONFIG_8139CP=m -CONFIG_8139TOO=m -# CONFIG_8139TOO_PIO is not set -# CONFIG_8139TOO_TUNE_TWISTER is not set -CONFIG_8139TOO_8129=y -# CONFIG_8139_NEW_RX_RESET is not set -CONFIG_SIS900=m -CONFIG_SIS900_OLD=m -CONFIG_EPIC100=m -CONFIG_SUNDANCE=m -CONFIG_TLAN=m -CONFIG_VIA_RHINE=m -# CONFIG_VIA_RHINE_MMIO is not set -CONFIG_WINBOND_840=m -CONFIG_NET_POCKET=y -CONFIG_ATP=m -CONFIG_DE600=m -CONFIG_DE620=m - -# -# Ethernet (1000 Mbit) -# -CONFIG_ACENIC=m -# CONFIG_ACENIC_OMIT_TIGON_I is not set -CONFIG_DL2K=m -# CONFIG_MYRI_SBUS is not set -CONFIG_NS83820=m -CONFIG_HAMACHI=m -CONFIG_YELLOWFIN=m -CONFIG_SK98LIN=m -CONFIG_NET_BROADCOM=m -CONFIG_TIGON3=m -CONFIG_NET_E1000=m -CONFIG_FDDI=y -CONFIG_DEFXX=m -CONFIG_SKFP=m -CONFIG_NETCONSOLE=m -# CONFIG_HIPPI is not set -CONFIG_PLIP=m -CONFIG_PPP=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m -# CONFIG_PPP_BSDCOMP is not set -# CONFIG_PPPOE is not set -CONFIG_PPPOATM=m -CONFIG_SLIP=m -CONFIG_SLIP_COMPRESSED=y -CONFIG_SLIP_SMART=y -CONFIG_SLIP_MODE_SLIP6=y - -# -# Wireless LAN (non-hamradio) -# -CONFIG_NET_RADIO=y -CONFIG_STRIP=m -CONFIG_WAVELAN=m -CONFIG_ARLAN=m -CONFIG_AIRONET4500=m -CONFIG_AIRONET4500_NONCS=m -CONFIG_AIRONET4500_PNP=y -CONFIG_AIRONET4500_PCI=y -CONFIG_AIRONET4500_ISA=y -CONFIG_AIRONET4500_I365=y -CONFIG_AIRONET4500_PROC=m -CONFIG_AIRO=m -CONFIG_HERMES=m -CONFIG_PLX_HERMES=m -CONFIG_PCI_HERMES=m -CONFIG_PCMCIA_HERMES=m -CONFIG_AIRO_CS=m -CONFIG_NET_WIRELESS=y -CONFIG_PCMCIA_HERMES_OLD=m - -# -# Token Ring devices -# -CONFIG_TR=y -CONFIG_IBMTR=m -CONFIG_IBMOL=m -CONFIG_IBMLS=m -CONFIG_3C359=m -CONFIG_TMS380TR=m -CONFIG_TMSPCI=m -CONFIG_TMSISA=m -CONFIG_ABYSS=m -# CONFIG_MADGEMC is not set -CONFIG_SMCTR=m -CONFIG_NET_FC=y -CONFIG_IPHASE5526=m -CONFIG_RCPCI=m -CONFIG_SHAPER=m - -# -# Wan interfaces -# -CONFIG_WAN=y -CONFIG_HOSTESS_SV11=m -CONFIG_COSA=m -# CONFIG_COMX is not set -# CONFIG_DSCC4 is not set -CONFIG_FARSYNC=m -# CONFIG_LANMEDIA is not set -CONFIG_ATI_XX20=m -CONFIG_SEALEVEL_4021=m -# CONFIG_SYNCLINK_SYNCPPP is not set -# CONFIG_HDLC is not set -CONFIG_DLCI=m -CONFIG_DLCI_COUNT=24 -CONFIG_DLCI_MAX=8 -CONFIG_SDLA=m -CONFIG_WAN_ROUTER_DRIVERS=y -CONFIG_VENDOR_SANGOMA=m -CONFIG_WANPIPE_CHDLC=y -CONFIG_WANPIPE_FR=y -CONFIG_WANPIPE_X25=y -CONFIG_WANPIPE_PPP=y -CONFIG_WANPIPE_MULTPPP=y -CONFIG_CYCLADES_SYNC=m -CONFIG_CYCLOMX_X25=y -# CONFIG_LAPBETHER is not set -# CONFIG_X25_ASY is not set -CONFIG_SBNI=m -CONFIG_SBNI_MULTILINE=y - -# -# PCMCIA network device support -# -CONFIG_NET_PCMCIA=y -CONFIG_PCMCIA_3C589=m -CONFIG_PCMCIA_3C574=m -CONFIG_PCMCIA_FMVJ18X=m -CONFIG_PCMCIA_PCNET=m -CONFIG_PCMCIA_AXNET=m -CONFIG_PCMCIA_NMCLAN=m -CONFIG_PCMCIA_SMC91C92=m -CONFIG_PCMCIA_XIRC2PS=m -# CONFIG_ARCNET_COM20020_CS is not set -CONFIG_PCMCIA_IBMTR=m -CONFIG_PCMCIA_XIRCOM=m -CONFIG_PCMCIA_XIRTULIP=m -CONFIG_NET_PCMCIA_RADIO=y -CONFIG_PCMCIA_RAYCS=m -CONFIG_PCMCIA_NETWAVE=m -CONFIG_PCMCIA_WAVELAN=m -CONFIG_PCMCIA_WVLAN=m -CONFIG_AIRONET4500_CS=m - -# -# Quadrics Supercomputers -# - -# -# QsNet -# -CONFIG_QUADRICS=y -CONFIG_QSNETMOD=m -CONFIG_ELAN3MOD=m -CONFIG_EPMOD=m -CONFIG_EIPMOD=m -CONFIG_RMSMOD=m -CONFIG_JTAG=m - -# -# QsNet II -# - -# -# ATM drivers -# -CONFIG_ATM_TCP=m -CONFIG_ATM_LANAI=m -CONFIG_ATM_ENI=m -# CONFIG_ATM_ENI_DEBUG is not set -# CONFIG_ATM_ENI_TUNE_BURST is not set -CONFIG_ATM_FIRESTREAM=m -CONFIG_ATM_ZATM=m -# CONFIG_ATM_ZATM_DEBUG is not set -CONFIG_ATM_ZATM_EXACT_TS=y -CONFIG_ATM_NICSTAR=m -CONFIG_ATM_NICSTAR_USE_SUNI=y -CONFIG_ATM_NICSTAR_USE_IDT77105=y -CONFIG_ATM_IDT77252=m -# CONFIG_ATM_IDT77252_DEBUG is not set -# CONFIG_ATM_IDT77252_RCV_ALL is not set -CONFIG_ATM_IDT77252_USE_SUNI=y -CONFIG_ATM_AMBASSADOR=m -# CONFIG_ATM_AMBASSADOR_DEBUG is not set -CONFIG_ATM_HORIZON=m -# CONFIG_ATM_HORIZON_DEBUG is not set -CONFIG_ATM_IA=m -# CONFIG_ATM_IA_DEBUG is not set -CONFIG_ATM_FORE200E_MAYBE=m -CONFIG_ATM_FORE200E_PCA=y -CONFIG_ATM_FORE200E_PCA_DEFAULT_FW=y -CONFIG_ATM_FORE200E_TX_RETRY=16 -CONFIG_ATM_FORE200E_DEBUG=0 -CONFIG_ATM_FORE200E=m - -# -# Amateur Radio support -# -CONFIG_HAMRADIO=y -CONFIG_AX25=m -# CONFIG_AX25_DAMA_SLAVE is not set -CONFIG_NETROM=m -CONFIG_ROSE=m - -# -# AX.25 network device drivers -# -# CONFIG_MKISS is not set -# CONFIG_6PACK is not set -# CONFIG_BPQETHER is not set -# CONFIG_DMASCC is not set -# CONFIG_SCC is not set -# CONFIG_BAYCOM_SER_FDX is not set -# CONFIG_BAYCOM_SER_HDX is not set -# CONFIG_BAYCOM_PAR is not set -# CONFIG_BAYCOM_EPP is not set -CONFIG_SOUNDMODEM=m -CONFIG_SOUNDMODEM_SBC=y -CONFIG_SOUNDMODEM_WSS=y -CONFIG_SOUNDMODEM_AFSK1200=y -CONFIG_SOUNDMODEM_AFSK2400_7=y -CONFIG_SOUNDMODEM_AFSK2400_8=y -CONFIG_SOUNDMODEM_AFSK2666=y -CONFIG_SOUNDMODEM_HAPN4800=y -CONFIG_SOUNDMODEM_PSK4800=y -CONFIG_SOUNDMODEM_FSK9600=y -# CONFIG_YAM is not set - -# -# IrDA (infrared) support -# -CONFIG_IRDA=m -CONFIG_IRLAN=m -CONFIG_IRNET=m -CONFIG_IRCOMM=m -CONFIG_IRDA_ULTRA=y -CONFIG_IRDA_CACHE_LAST_LSAP=y -CONFIG_IRDA_FAST_RR=y -# CONFIG_IRDA_DEBUG is not set - -# -# Infrared-port device drivers -# -CONFIG_IRTTY_SIR=m -CONFIG_IRPORT_SIR=m -CONFIG_DONGLE=y -CONFIG_ESI_DONGLE=m -CONFIG_ACTISYS_DONGLE=m -CONFIG_TEKRAM_DONGLE=m -CONFIG_GIRBIL_DONGLE=m -CONFIG_LITELINK_DONGLE=m -CONFIG_OLD_BELKIN_DONGLE=m -CONFIG_USB_IRDA=m -CONFIG_NSC_FIR=m -CONFIG_WINBOND_FIR=m -CONFIG_TOSHIBA_FIR=m -CONFIG_SMC_IRCC_FIR=m -CONFIG_ALI_FIR=m -CONFIG_VLSI_FIR=m - -# -# ISDN subsystem -# -CONFIG_ISDN=m -CONFIG_ISDN_BOOL=y -CONFIG_ISDN_PPP=y -CONFIG_ISDN_PPP_VJ=y -CONFIG_ISDN_MPP=y -CONFIG_ISDN_PPP_BSDCOMP=m -CONFIG_ISDN_AUDIO=y -CONFIG_ISDN_TTY_FAX=y - -# -# ISDN feature submodules -# -CONFIG_ISDN_DRV_LOOP=m -# CONFIG_ISDN_DIVERSION is not set - -# -# Passive ISDN cards -# -CONFIG_ISDN_DRV_HISAX=m -CONFIG_ISDN_HISAX=y -CONFIG_HISAX_EURO=y -CONFIG_DE_AOC=y -# CONFIG_HISAX_NO_SENDCOMPLETE is not set -# CONFIG_HISAX_NO_LLC is not set -# CONFIG_HISAX_NO_KEYPAD is not set -CONFIG_HISAX_1TR6=y -CONFIG_HISAX_NI1=y -CONFIG_HISAX_MAX_CARDS=8 -CONFIG_HISAX_16_0=y -CONFIG_HISAX_16_3=y -CONFIG_HISAX_TELESPCI=y -CONFIG_HISAX_S0BOX=y -CONFIG_HISAX_AVM_A1=y -CONFIG_HISAX_FRITZPCI=y -CONFIG_HISAX_AVM_A1_PCMCIA=y -CONFIG_HISAX_ELSA=y -CONFIG_HISAX_IX1MICROR2=y -CONFIG_HISAX_DIEHLDIVA=y -CONFIG_HISAX_ASUSCOM=y -CONFIG_HISAX_TELEINT=y -CONFIG_HISAX_HFCS=y -CONFIG_HISAX_SEDLBAUER=y -CONFIG_HISAX_SPORTSTER=y -CONFIG_HISAX_MIC=y -CONFIG_HISAX_NETJET=y -CONFIG_HISAX_NETJET_U=y -CONFIG_HISAX_NICCY=y -CONFIG_HISAX_ISURF=y -CONFIG_HISAX_HSTSAPHIR=y -CONFIG_HISAX_BKM_A4T=y -CONFIG_HISAX_SCT_QUADRO=y -CONFIG_HISAX_GAZEL=y -CONFIG_HISAX_HFC_PCI=y -CONFIG_HISAX_W6692=y -CONFIG_HISAX_HFC_SX=y -CONFIG_HISAX_DEBUG=y -CONFIG_HISAX_SEDLBAUER_CS=m -CONFIG_HISAX_ELSA_CS=m -CONFIG_HISAX_AVM_A1_CS=m -CONFIG_HISAX_ST5481=m -CONFIG_HISAX_FRITZ_PCIPNP=m - -# -# Active ISDN cards -# -CONFIG_ISDN_DRV_ICN=m -CONFIG_ISDN_DRV_PCBIT=m -# CONFIG_ISDN_DRV_SC is not set -# CONFIG_ISDN_DRV_ACT2000 is not set -CONFIG_ISDN_DRV_EICON=y -CONFIG_ISDN_DRV_EICON_DIVAS=m -# CONFIG_ISDN_DRV_EICON_OLD is not set -CONFIG_ISDN_DRV_TPAM=m -CONFIG_ISDN_CAPI=m -CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON=y -CONFIG_ISDN_CAPI_MIDDLEWARE=y -CONFIG_ISDN_CAPI_CAPI20=m -CONFIG_ISDN_CAPI_CAPIFS_BOOL=y -CONFIG_ISDN_CAPI_CAPIFS=m -CONFIG_ISDN_CAPI_CAPIDRV=m -CONFIG_ISDN_DRV_AVMB1_B1ISA=m -CONFIG_ISDN_DRV_AVMB1_B1PCI=m -CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y -CONFIG_ISDN_DRV_AVMB1_T1ISA=m -CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m -CONFIG_ISDN_DRV_AVMB1_AVM_CS=m -CONFIG_ISDN_DRV_AVMB1_T1PCI=m -CONFIG_ISDN_DRV_AVMB1_C4=m -CONFIG_HYSDN=m -CONFIG_HYSDN_CAPI=y -CONFIG_KALLSYMS=y - -# -# Old CD-ROM drivers (not SCSI, not IDE) -# -# CONFIG_CD_NO_IDESCSI is not set - -# -# Input core support -# -CONFIG_INPUT=m -CONFIG_INPUT_KEYBDEV=m -CONFIG_INPUT_MOUSEDEV=m -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -CONFIG_INPUT_JOYDEV=m -CONFIG_INPUT_EVDEV=m - -# -# Character devices -# -CONFIG_ECC=m -# CONFIG_CHAOSTEST is not set -# CONFIG_P4THERM is not set -CONFIG_VT=y -CONFIG_VT_CONSOLE=y -CONFIG_SERIAL=y -CONFIG_SERIAL_CONSOLE=y -CONFIG_SERIAL_EXTENDED=y -CONFIG_SERIAL_MANY_PORTS=y -CONFIG_SERIAL_SHARE_IRQ=y -# CONFIG_SERIAL_DETECT_IRQ is not set -CONFIG_SERIAL_MULTIPORT=y -# CONFIG_HUB6 is not set -CONFIG_SERIAL_NONSTANDARD=y -CONFIG_COMPUTONE=m -CONFIG_ROCKETPORT=m -CONFIG_CYCLADES=m -# CONFIG_CYZ_INTR is not set -CONFIG_DIGIEPCA=m -CONFIG_ESPSERIAL=m -CONFIG_MOXA_INTELLIO=m -CONFIG_MOXA_SMARTIO=m -CONFIG_ISI=m -CONFIG_SYNCLINK=m -CONFIG_N_HDLC=m -CONFIG_RISCOM8=m -CONFIG_SPECIALIX=m -CONFIG_SPECIALIX_RTSCTS=y -CONFIG_SX=m -# CONFIG_RIO is not set -CONFIG_STALDRV=y -CONFIG_STALLION=m -CONFIG_ISTALLION=m -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=512 -CONFIG_PRINTER=m -CONFIG_LP_CONSOLE=y -CONFIG_PPDEV=m - -# -# I2C support -# -CONFIG_I2C=m -CONFIG_I2C_ALGOBIT=m -CONFIG_I2C_PHILIPSPAR=m -CONFIG_I2C_ELV=m -CONFIG_I2C_VELLEMAN=m -CONFIG_I2C_ALGOPCF=m -CONFIG_I2C_ELEKTOR=m -CONFIG_I2C_MAINBOARD=y -CONFIG_I2C_ALI1535=m -CONFIG_I2C_ALI15X3=m -CONFIG_I2C_HYDRA=m -CONFIG_I2C_AMD756=m -# CONFIG_I2C_TSUNAMI is not set -CONFIG_I2C_I801=m -CONFIG_I2C_I810=m -CONFIG_I2C_PIIX4=m -CONFIG_I2C_SIS5595=m -CONFIG_I2C_VIA=m -CONFIG_I2C_VIAPRO=m -CONFIG_I2C_VOODOO3=m -CONFIG_I2C_ISA=m -CONFIG_I2C_CHARDEV=m -CONFIG_I2C_PROC=m - -# -# Hardware sensors support -# -CONFIG_SENSORS=y -CONFIG_SENSORS_ADM1021=m -CONFIG_SENSORS_ADM1024=m -CONFIG_SENSORS_ADM1025=m -CONFIG_SENSORS_ADM9240=m -CONFIG_SENSORS_DS1621=m -CONFIG_SENSORS_FSCPOS=m -CONFIG_SENSORS_FSCSCY=m -CONFIG_SENSORS_GL518SM=m -CONFIG_SENSORS_GL520SM=m -CONFIG_SENSORS_MAXILIFE=m -CONFIG_SENSORS_IT87=m -CONFIG_SENSORS_MTP008=m -CONFIG_SENSORS_LM75=m -CONFIG_SENSORS_LM78=m -CONFIG_SENSORS_LM80=m -CONFIG_SENSORS_LM87=m -CONFIG_SENSORS_SIS5595=m -CONFIG_SENSORS_THMC50=m -CONFIG_SENSORS_VIA686A=m -CONFIG_SENSORS_W83781D=m -CONFIG_SENSORS_OTHER=y -CONFIG_SENSORS_BT869=m -CONFIG_SENSORS_DDCMON=m -CONFIG_SENSORS_EEPROM=m -CONFIG_SENSORS_MATORB=m - -# -# Mice -# -CONFIG_BUSMOUSE=m -CONFIG_ATIXL_BUSMOUSE=m -CONFIG_LOGIBUSMOUSE=m -CONFIG_MS_BUSMOUSE=m -CONFIG_MOUSE=y -CONFIG_PSMOUSE=y -CONFIG_82C710_MOUSE=m -CONFIG_PC110_PAD=m -CONFIG_MK712_MOUSE=m - -# -# Joysticks -# -CONFIG_INPUT_GAMEPORT=m -CONFIG_INPUT_NS558=m -CONFIG_INPUT_LIGHTNING=m -CONFIG_INPUT_PCIGAME=m -CONFIG_INPUT_CS461X=m -CONFIG_INPUT_EMU10K1=m -CONFIG_INPUT_SERIO=m -CONFIG_INPUT_SERPORT=m -CONFIG_INPUT_ANALOG=m -CONFIG_INPUT_A3D=m -CONFIG_INPUT_ADI=m -CONFIG_INPUT_COBRA=m -CONFIG_INPUT_GF2K=m -CONFIG_INPUT_GRIP=m -CONFIG_INPUT_INTERACT=m -CONFIG_INPUT_TMDC=m -CONFIG_INPUT_SIDEWINDER=m -CONFIG_INPUT_IFORCE_USB=m -CONFIG_INPUT_IFORCE_232=m -CONFIG_INPUT_WARRIOR=m -CONFIG_INPUT_MAGELLAN=m -CONFIG_INPUT_SPACEORB=m -CONFIG_INPUT_SPACEBALL=m -CONFIG_INPUT_STINGER=m -CONFIG_INPUT_DB9=m -CONFIG_INPUT_GAMECON=m -CONFIG_INPUT_TURBOGRAFX=m -# CONFIG_QIC02_TAPE is not set - -# -# Watchdog Cards -# -CONFIG_WATCHDOG=y -# CONFIG_WATCHDOG_NOWAYOUT is not set -CONFIG_ACQUIRE_WDT=m -CONFIG_ADVANTECH_WDT=m -CONFIG_ALIM7101_WDT=m -CONFIG_SC520_WDT=m -CONFIG_PCWATCHDOG=m -CONFIG_EUROTECH_WDT=m -CONFIG_IB700_WDT=m -CONFIG_WAFER_WDT=m -CONFIG_I810_TCO=m -# CONFIG_MIXCOMWD is not set -# CONFIG_60XX_WDT is not set -CONFIG_SC1200_WDT=m -CONFIG_SOFT_WATCHDOG=m -CONFIG_W83877F_WDT=m -CONFIG_WDT=m -CONFIG_WDTPCI=m -# CONFIG_WDT_501 is not set -CONFIG_MACHZ_WDT=m -CONFIG_AMD7XX_TCO=m -CONFIG_AMD_RNG=m -CONFIG_INTEL_RNG=m -CONFIG_AMD_PM768=m -CONFIG_NVRAM=m -CONFIG_RTC=y -CONFIG_DTLK=m -CONFIG_R3964=m -# CONFIG_APPLICOM is not set -CONFIG_SONYPI=m - -# -# Ftape, the floppy tape device driver -# -CONFIG_FTAPE=m -CONFIG_ZFTAPE=m -CONFIG_ZFT_DFLT_BLK_SZ=10240 -CONFIG_ZFT_COMPRESSOR=m -CONFIG_FT_NR_BUFFERS=3 -# CONFIG_FT_PROC_FS is not set -CONFIG_FT_NORMAL_DEBUG=y -# CONFIG_FT_FULL_DEBUG is not set -# CONFIG_FT_NO_TRACE is not set -# CONFIG_FT_NO_TRACE_AT_ALL is not set -CONFIG_FT_STD_FDC=y -# CONFIG_FT_MACH2 is not set -# CONFIG_FT_PROBE_FC10 is not set -# CONFIG_FT_ALT_FDC is not set -CONFIG_FT_FDC_THR=8 -CONFIG_FT_FDC_MAX_RATE=2000 -CONFIG_FT_ALPHA_CLOCK=0 -CONFIG_AGP=m -CONFIG_AGP_INTEL=y -CONFIG_AGP_I810=y -CONFIG_AGP_VIA=y -CONFIG_AGP_AMD=y -CONFIG_AGP_SIS=y -CONFIG_AGP_ALI=y -CONFIG_AGP_SWORKS=y -# CONFIG_DRM is not set - -# -# PCMCIA character devices -# -CONFIG_PCMCIA_SERIAL_CS=m -CONFIG_SYNCLINK_CS=m -CONFIG_MWAVE=m -CONFIG_BATTERY_GERICOM=m - -# -# Multimedia devices -# -CONFIG_VIDEO_DEV=m - -# -# Video For Linux -# -CONFIG_VIDEO_PROC_FS=y -CONFIG_I2C_PARPORT=m -CONFIG_VIDEO_BT848=m -# CONFIG_VIDEO_LS220 is not set -# CONFIG_VIDEO_MARGI is not set -CONFIG_VIDEO_PMS=m -CONFIG_VIDEO_BWQCAM=m -CONFIG_VIDEO_CQCAM=m -CONFIG_VIDEO_W9966=m -CONFIG_VIDEO_CPIA=m -CONFIG_VIDEO_CPIA_PP=m -CONFIG_VIDEO_CPIA_USB=m -CONFIG_VIDEO_SAA5249=m -CONFIG_TUNER_3036=m -CONFIG_VIDEO_STRADIS=m -CONFIG_VIDEO_ZORAN=m -CONFIG_VIDEO_ZORAN_BUZ=m -CONFIG_VIDEO_ZORAN_DC10=m -CONFIG_VIDEO_ZORAN_LML33=m -CONFIG_VIDEO_ZR36120=m -CONFIG_VIDEO_MEYE=m - -# -# Radio Adapters -# -CONFIG_RADIO_CADET=m -CONFIG_RADIO_RTRACK=m -CONFIG_RADIO_RTRACK2=m -CONFIG_RADIO_AZTECH=m -CONFIG_RADIO_GEMTEK=m -CONFIG_RADIO_GEMTEK_PCI=m -CONFIG_RADIO_MAXIRADIO=m -CONFIG_RADIO_MAESTRO=m -CONFIG_RADIO_MIROPCM20=m -CONFIG_RADIO_MIROPCM20_RDS=m -CONFIG_RADIO_SF16FMI=m -CONFIG_RADIO_TERRATEC=m -CONFIG_RADIO_TRUST=m -CONFIG_RADIO_TYPHOON=m -CONFIG_RADIO_TYPHOON_PROC_FS=y -CONFIG_RADIO_ZOLTRIX=m - -# -# Crypto Hardware support -# -CONFIG_CRYPTO=m -CONFIG_CRYPTO_BROADCOM=m - -# -# File systems -# -CONFIG_QUOTA=y -CONFIG_AUTOFS_FS=m -CONFIG_AUTOFS4_FS=m -CONFIG_REISERFS_FS=m -# CONFIG_REISERFS_CHECK is not set -CONFIG_REISERFS_PROC_INFO=y -# CONFIG_ADFS_FS is not set -# CONFIG_AFS_FS is not set -# CONFIG_ADFS_FS_RW is not set -# CONFIG_AFFS_FS is not set -CONFIG_HFS_FS=m -CONFIG_BEFS_FS=m -# CONFIG_BEFS_DEBUG is not set -CONFIG_BFS_FS=m -CONFIG_EXT3_FS=m -CONFIG_JBD=m -# CONFIG_JBD_DEBUG is not set -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_UMSDOS_FS=m -CONFIG_VFAT_FS=m -# CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set -# CONFIG_JFFS2_FS is not set -CONFIG_CRAMFS=m -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_JFS_FS=m -CONFIG_JFS_DEBUG=y -# CONFIG_JFS_STATISTICS is not set -CONFIG_MINIX_FS=m -CONFIG_VXFS_FS=m -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_RW is not set -# CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX4FS_RW is not set -CONFIG_ROMFS_FS=m -CONFIG_EXT2_FS=y -CONFIG_SYSV_FS=m -CONFIG_UDF_FS=m -CONFIG_UDF_RW=y -CONFIG_UFS_FS=m -# CONFIG_UFS_FS_WRITE is not set - -# -# Network File Systems -# -CONFIG_CODA_FS=m -CONFIG_INTERMEZZO_FS=m -CONFIG_NFS_FS=m -CONFIG_NFS_V3=y -# CONFIG_ROOT_NFS is not set -CONFIG_NFSD=m -CONFIG_NFSD_V3=y -# CONFIG_NFSD_TCP is not set -CONFIG_SUNRPC=m -CONFIG_LOCKD=m -CONFIG_LOCKD_V4=y -CONFIG_SMB_FS=m -# CONFIG_SMB_NLS_DEFAULT is not set -CONFIG_NCP_FS=m -CONFIG_NCPFS_PACKET_SIGNING=y -CONFIG_NCPFS_IOCTL_LOCKING=y -CONFIG_NCPFS_STRONG=y -CONFIG_NCPFS_NFS_NS=y -CONFIG_NCPFS_OS2_NS=y -CONFIG_NCPFS_SMALLDOS=y -CONFIG_NCPFS_NLS=y -CONFIG_NCPFS_EXTRAS=y -# CONFIG_PFS_FS is not set -CONFIG_ZISOFS_FS=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -CONFIG_OSF_PARTITION=y -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -CONFIG_MAC_PARTITION=y -CONFIG_MSDOS_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -# CONFIG_LDM_PARTITION is not set -CONFIG_SGI_PARTITION=y -# CONFIG_ULTRIX_PARTITION is not set -CONFIG_SUN_PARTITION=y -CONFIG_SMB_NLS=y -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8859-1" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -CONFIG_NLS_CODEPAGE_1250=m -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m - -# -# Console drivers -# -CONFIG_VGA_CONSOLE=y -CONFIG_VIDEO_SELECT=y -# CONFIG_VIDEO_IGNORE_BAD_MODE is not set -CONFIG_MDA_CONSOLE=m - -# -# Frame-buffer support -# -CONFIG_FB=y -CONFIG_DUMMY_CONSOLE=y -CONFIG_FB_RIVA=m -CONFIG_FB_CLGEN=m -CONFIG_FB_PM2=m -# CONFIG_FB_PM2_FIFO_DISCONNECT is not set -CONFIG_FB_PM2_PCI=y -CONFIG_FB_PM3=m -# CONFIG_FB_CYBER2000 is not set -CONFIG_FB_VESA=y -# CONFIG_FB_VGA16 is not set -CONFIG_FB_HGA=m -CONFIG_VIDEO_SELECT=y -CONFIG_FB_MATROX=m -CONFIG_FB_MATROX_MILLENIUM=y -CONFIG_FB_MATROX_MYSTIQUE=y -CONFIG_FB_MATROX_G100=y -CONFIG_FB_MATROX_I2C=m -CONFIG_FB_MATROX_MAVEN=m -# CONFIG_FB_MATROX_G450 is not set -# CONFIG_FB_MATROX_PROC is not set -CONFIG_FB_MATROX_MULTIHEAD=y -CONFIG_FB_ATY=m -CONFIG_FB_ATY_GX=y -CONFIG_FB_ATY_CT=y -CONFIG_FB_RADEON=m -CONFIG_FB_ATY128=m -CONFIG_FB_SIS=m -CONFIG_FB_SIS_300=y -CONFIG_FB_SIS_315=y -CONFIG_FB_NEOMAGIC=m -CONFIG_FB_3DFX=m -CONFIG_FB_VOODOO1=m -# CONFIG_FB_TRIDENT is not set -# CONFIG_FB_VIRTUAL is not set -# CONFIG_FBCON_ADVANCED is not set -CONFIG_FBCON_MFB=m -CONFIG_FBCON_CFB8=y -CONFIG_FBCON_CFB16=y -CONFIG_FBCON_CFB24=y -CONFIG_FBCON_CFB32=y -CONFIG_FBCON_HGA=m -# CONFIG_FBCON_FONTWIDTH8_ONLY is not set -# CONFIG_FBCON_FONTS is not set -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y -CONFIG_SPEAKUP=y -CONFIG_SPEAKUP_ACNTSA=y -CONFIG_SPEAKUP_ACNTPC=y -CONFIG_SPEAKUP_APOLO=y -CONFIG_SPEAKUP_AUDPTR=y -CONFIG_SPEAKUP_BNS=y -CONFIG_SPEAKUP_DECTLK=y -CONFIG_SPEAKUP_DECEXT=y -CONFIG_SPEAKUP_DTLK=y -CONFIG_SPEAKUP_LTLK=y -CONFIG_SPEAKUP_SPKOUT=y -CONFIG_SPEAKUP_TXPRT=y -CONFIG_SPEAKUP_DEFAULT="none" -# CONFIG_SPEAKUP_KEYMAP is not set - -# -# Sound -# -CONFIG_SOUND=m -CONFIG_SOUND_BT878=m -CONFIG_SOUND_CMPCI=m -CONFIG_SOUND_CMPCI_FM=y -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_MIDI=y -CONFIG_SOUND_CMPCI_MPUIO=330 -CONFIG_SOUND_CMPCI_JOYSTICK=y -CONFIG_SOUND_CMPCI_CM8738=y -# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set -CONFIG_SOUND_CMPCI_SPDIFLOOP=y -CONFIG_SOUND_CMPCI_SPEAKERS=2 -CONFIG_SOUND_EMU10K1=m -CONFIG_MIDI_EMU10K1=y -CONFIG_SOUND_AUDIGY=m -CONFIG_SOUND_FUSION=m -CONFIG_SOUND_CS4281=m -CONFIG_SOUND_ES1370=m -CONFIG_SOUND_ES1371=m -CONFIG_SOUND_ESSSOLO1=m -CONFIG_SOUND_MAESTRO=m -CONFIG_SOUND_MAESTRO3=m -CONFIG_SOUND_ICH=m -CONFIG_SOUND_RME96XX=m -CONFIG_SOUND_SONICVIBES=m -CONFIG_SOUND_TRIDENT=m -CONFIG_SOUND_MSNDCLAS=m -# CONFIG_MSNDCLAS_HAVE_BOOT is not set -CONFIG_MSNDCLAS_INIT_FILE="/etc/sound/msndinit.bin" -CONFIG_MSNDCLAS_PERM_FILE="/etc/sound/msndperm.bin" -CONFIG_SOUND_MSNDPIN=m -# CONFIG_MSNDPIN_HAVE_BOOT is not set -CONFIG_MSNDPIN_INIT_FILE="/etc/sound/pndspini.bin" -CONFIG_MSNDPIN_PERM_FILE="/etc/sound/pndsperm.bin" -CONFIG_SOUND_VIA82CXXX=m -CONFIG_MIDI_VIA82CXXX=y -CONFIG_SOUND_OSS=m -# CONFIG_SOUND_TRACEINIT is not set -CONFIG_SOUND_DMAP=y -CONFIG_SOUND_AD1816=m -CONFIG_SOUND_SGALAXY=m -CONFIG_SOUND_ADLIB=m -CONFIG_SOUND_ACI_MIXER=m -CONFIG_SOUND_CS4232=m -CONFIG_SOUND_SSCAPE=m -CONFIG_SOUND_GUS=m -CONFIG_SOUND_GUS16=y -CONFIG_SOUND_GUSMAX=y -CONFIG_SOUND_VMIDI=m -CONFIG_SOUND_TRIX=m -CONFIG_SOUND_MSS=m -CONFIG_SOUND_MPU401=m -CONFIG_SOUND_NM256=m -CONFIG_SOUND_MAD16=m -CONFIG_MAD16_OLDCARD=y -CONFIG_SOUND_PAS=m -# CONFIG_PAS_JOYSTICK is not set -CONFIG_SOUND_PSS=m -# CONFIG_PSS_MIXER is not set -# CONFIG_PSS_HAVE_BOOT is not set -CONFIG_SOUND_SB=m -CONFIG_SOUND_AWE32_SYNTH=m -CONFIG_SOUND_WAVEFRONT=m -CONFIG_SOUND_MAUI=m -CONFIG_SOUND_YM3812=m -CONFIG_SOUND_OPL3SA1=m -CONFIG_SOUND_OPL3SA2=m -CONFIG_SOUND_YMFPCI=m -CONFIG_SOUND_YMFPCI_LEGACY=y -CONFIG_SOUND_UART6850=m -CONFIG_SOUND_AEDSP16=m -CONFIG_SC6600=y -CONFIG_SC6600_JOY=y -CONFIG_SC6600_CDROM=4 -CONFIG_SC6600_CDROMBASE=0 -CONFIG_AEDSP16_SBPRO=y -CONFIG_AEDSP16_MPU401=y -CONFIG_SOUND_TVMIXER=m - -# -# USB support -# -CONFIG_USB=m -# CONFIG_USB_DEBUG is not set -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_BANDWIDTH is not set -CONFIG_USB_LONG_TIMEOUT=y -CONFIG_USB_EHCI_HCD=m -CONFIG_USB_UHCI=m -CONFIG_USB_UHCI_ALT=m -CONFIG_USB_OHCI=m -CONFIG_USB_AUDIO=m -# CONFIG_USB_EMI26 is not set -CONFIG_USB_BLUETOOTH=m -CONFIG_USB_STORAGE=m -# CONFIG_USB_STORAGE_DEBUG is not set -CONFIG_USB_STORAGE_DATAFAB=y -CONFIG_USB_STORAGE_FREECOM=y -CONFIG_USB_STORAGE_ISD200=y -CONFIG_USB_STORAGE_DPCM=y -CONFIG_USB_STORAGE_HP8200e=y -CONFIG_USB_STORAGE_SDDR09=y -CONFIG_USB_STORAGE_JUMPSHOT=y -CONFIG_USB_ACM=m -CONFIG_USB_PRINTER=m -CONFIG_USB_HID=m -CONFIG_USB_HIDINPUT=y -CONFIG_USB_HIDDEV=y -# CONFIG_USB_KBD is not set -# CONFIG_USB_MOUSE is not set -CONFIG_USB_WACOM=m -# CONFIG_USB_DC2XX is not set -CONFIG_USB_MDC800=m -CONFIG_USB_SCANNER=m -CONFIG_USB_MICROTEK=m -CONFIG_USB_HPUSBSCSI=m -CONFIG_USB_IBMCAM=m -CONFIG_USB_OV511=m -CONFIG_USB_PWC=m -CONFIG_USB_SE401=m -CONFIG_USB_STV680=m -CONFIG_USB_VICAM=m -CONFIG_USB_DSBR=m -CONFIG_USB_DABUSB=m -CONFIG_USB_PEGASUS=m -CONFIG_USB_RTL8150=m -CONFIG_USB_KAWETH=m -CONFIG_USB_CATC=m -CONFIG_USB_CDCETHER=m -CONFIG_USB_USBNET=m -CONFIG_USB_USS720=m - -# -# USB Serial Converter support -# -CONFIG_USB_SERIAL=m -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_BELKIN=m -CONFIG_USB_SERIAL_WHITEHEAT=m -CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m -CONFIG_USB_SERIAL_EMPEG=m -CONFIG_USB_SERIAL_FTDI_SIO=m -CONFIG_USB_SERIAL_VISOR=m -CONFIG_USB_SERIAL_IPAQ=m -CONFIG_USB_SERIAL_IR=m -CONFIG_USB_SERIAL_EDGEPORT=m -CONFIG_USB_SERIAL_KEYSPAN_PDA=m -CONFIG_USB_SERIAL_KEYSPAN=m -# CONFIG_USB_SERIAL_KEYSPAN_USA28 is not set -# CONFIG_USB_SERIAL_KEYSPAN_USA28X is not set -CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y -# CONFIG_USB_SERIAL_KEYSPAN_USA19 is not set -# CONFIG_USB_SERIAL_KEYSPAN_USA18X is not set -# CONFIG_USB_SERIAL_KEYSPAN_USA19W is not set -CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y -CONFIG_USB_SERIAL_KEYSPAN_USA49W=y -CONFIG_USB_SERIAL_MCT_U232=m -CONFIG_USB_SERIAL_KLSI=m -CONFIG_USB_SERIAL_PL2303=m -CONFIG_USB_SERIAL_CYBERJACK=m -CONFIG_USB_SERIAL_XIRCOM=m -CONFIG_USB_SERIAL_OMNINET=m -CONFIG_USB_RIO500=m -CONFIG_USB_AUERSWALD=m -CONFIG_USB_BRLVGER=m -CONFIG_USB_USBLCD=m - -# -# Bluetooth support -# -CONFIG_BLUEZ=m -CONFIG_BLUEZ_L2CAP=m -CONFIG_BLUEZ_SCO=m - -# -# Bluetooth device drivers -# -CONFIG_BLUEZ_HCIUSB=m -CONFIG_BLUEZ_USB_FW_LOAD=y -CONFIG_BLUEZ_USB_ZERO_PACKET=y -CONFIG_BLUEZ_HCIUART=m -CONFIG_BLUEZ_HCIUART_H4=y -CONFIG_BLUEZ_HCIDTL1=m -CONFIG_BLUEZ_HCIVHCI=m - -# -# Kernel hacking -# -CONFIG_DEBUG_KERNEL=y -# CONFIG_FRAME_POINTER is not set -# CONFIG_STACK_TRACE_SCAN is not set -CONFIG_STACK_TRACE_PARAM_COUNT=4 -# CONFIG_DEBUG_HIGHMEM is not set -# CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_IOVIRT is not set -CONFIG_MAGIC_SYSRQ=y -# CONFIG_DEBUG_SPINLOCK is not set -# CONFIG_MCL_COREDUMP is not set -# CONFIG_OPROFILE is not set - -# -# Library routines -# -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=m diff --git a/lustre/kernel_patches/kernel_configs/config-linux-2.4.18-p4smp-61chaos b/lustre/kernel_patches/kernel_configs/config-linux-2.4.18-p4smp-61chaos deleted file mode 100644 index 0de1146..0000000 --- a/lustre/kernel_patches/kernel_configs/config-linux-2.4.18-p4smp-61chaos +++ /dev/null @@ -1,1035 +0,0 @@ -# -# Automatically generated by make menuconfig: don't edit -# -CONFIG_X86=y -CONFIG_ISA=y -# CONFIG_SBUS is not set -CONFIG_UID16=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# Processor type and features -# -CONFIG_LOLAT=y -# CONFIG_LOLAT_SYSCTL is not set -# CONFIG_M386 is not set -# CONFIG_M486 is not set -# CONFIG_M586 is not set -# CONFIG_M586TSC is not set -# CONFIG_M586MMX is not set -# CONFIG_M686 is not set -# CONFIG_MPENTIUMIII is not set -CONFIG_MPENTIUM4=y -# CONFIG_MK6 is not set -# CONFIG_MK7 is not set -# CONFIG_MELAN is not set -# CONFIG_MCRUSOE is not set -# CONFIG_MWINCHIPC6 is not set -# CONFIG_MWINCHIP2 is not set -# CONFIG_MWINCHIP3D is not set -# CONFIG_MCYRIXIII is not set -CONFIG_X86_WP_WORKS_OK=y -CONFIG_X86_INVLPG=y -CONFIG_X86_CMPXCHG=y -CONFIG_X86_XADD=y -CONFIG_X86_BSWAP=y -CONFIG_X86_POPAD_OK=y -# CONFIG_RWSEM_GENERIC_SPINLOCK is not set -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_X86_L1_CACHE_SHIFT=7 -CONFIG_X86_TSC=y -CONFIG_X86_GOOD_APIC=y -CONFIG_X86_PGE=y -CONFIG_X86_USE_PPRO_CHECKSUM=y -CONFIG_X86_MCE=y -# CONFIG_CPU_FREQ is not set -# CONFIG_TOSHIBA is not set -# CONFIG_I8K is not set -CONFIG_MICROCODE=m -CONFIG_X86_MSR=m -CONFIG_X86_CPUID=m -# CONFIG_E820_PROC is not set -# CONFIG_NOHIGHMEM is not set -CONFIG_HIGHMEM4G=y -# CONFIG_HIGHMEM64G is not set -CONFIG_HIGHMEM=y -CONFIG_HIGHIO=y -# CONFIG_MATH_EMULATION is not set -CONFIG_MTRR=y -CONFIG_SMP=y -# CONFIG_MULTIQUAD is not set -CONFIG_HAVE_DEC_LOCK=y - -# -# General setup -# -CONFIG_HZ=100 -CONFIG_NET=y -CONFIG_X86_IO_APIC=y -CONFIG_X86_LOCAL_APIC=y -CONFIG_PCI=y -# CONFIG_PCI_GOBIOS is not set -# CONFIG_PCI_GODIRECT is not set -CONFIG_PCI_GOANY=y -CONFIG_PCI_BIOS=y -CONFIG_PCI_DIRECT=y -CONFIG_PCI_NAMES=y - -# -# Performance-monitoring counters support -# -CONFIG_PERFCTR=m -CONFIG_KPERFCTR=y -# CONFIG_PERFCTR_DEBUG is not set -# CONFIG_PERFCTR_INIT_TESTS is not set -CONFIG_PERFCTR_VIRTUAL=y -CONFIG_PERFCTR_GLOBAL=y -# CONFIG_EISA is not set -# CONFIG_MCA is not set -# CONFIG_HOTPLUG is not set -# CONFIG_PCMCIA is not set -# CONFIG_HOTPLUG_PCI is not set -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_KCORE_ELF=y -# CONFIG_KCORE_AOUT is not set -CONFIG_BINFMT_AOUT=m -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=m -# CONFIG_IKCONFIG is not set -CONFIG_PM=y - -# -# Additional device driver support -# -# CONFIG_CIPE is not set -# CONFIG_CRYPTO_AEP is not set -# CONFIG_MEGARAC is not set -CONFIG_FC_QLA2200=m -CONFIG_FC_QLA2300=m -# CONFIG_SCSI_ISCSI is not set -CONFIG_IBMASM=m -CONFIG_IBMSER=m -# CONFIG_ACPI is not set -CONFIG_APM=y -CONFIG_APM_IGNORE_USER_SUSPEND=y -# CONFIG_APM_DO_ENABLE is not set -# CONFIG_APM_CPU_IDLE is not set -# CONFIG_APM_DISPLAY_BLANK is not set -CONFIG_APM_RTC_IS_GMT=y -# CONFIG_APM_ALLOW_INTS is not set -# CONFIG_APM_REAL_MODE_POWER_OFF is not set - -# -# Binary emulation of other systems -# -# CONFIG_ABI is not set -# CONFIG_ABI_SVR4 is not set -# CONFIG_BINFMT_COFF is not set -# CONFIG_BINFMT_XOUT is not set -# CONFIG_BINFMT_XOUT_X286 is not set - -# -# Memory Technology Devices (MTD) -# -CONFIG_MTD=y -# CONFIG_MTD_DEBUG is not set -# CONFIG_MTD_PARTITIONS is not set -# CONFIG_MTD_CONCAT is not set -# CONFIG_MTD_REDBOOT_PARTS is not set -# CONFIG_MTD_CMDLINE_PARTS is not set -CONFIG_MTD_CHAR=m -# CONFIG_MTD_BLOCK is not set -# CONFIG_MTD_BLOCK_RO is not set -# CONFIG_FTL is not set -# CONFIG_NFTL is not set - -# -# RAM/ROM/Flash chip drivers -# -# CONFIG_MTD_CFI is not set -CONFIG_MTD_JEDECPROBE=y -CONFIG_MTD_GEN_PROBE=y -CONFIG_MTD_CFI_ADV_OPTIONS=y -CONFIG_MTD_CFI_NOSWAP=y -# CONFIG_MTD_CFI_BE_BYTE_SWAP is not set -# CONFIG_MTD_CFI_LE_BYTE_SWAP is not set -CONFIG_MTD_CFI_GEOMETRY=y -CONFIG_MTD_CFI_B1=y -# CONFIG_MTD_CFI_B2 is not set -# CONFIG_MTD_CFI_B4 is not set -# CONFIG_MTD_CFI_B8 is not set -CONFIG_MTD_CFI_I1=y -# CONFIG_MTD_CFI_I2 is not set -# CONFIG_MTD_CFI_I4 is not set -# CONFIG_MTD_CFI_I8 is not set -CONFIG_MTD_CFI_INTELEXT=y -CONFIG_MTD_CFI_AMDSTD=y -# CONFIG_MTD_RAM is not set -CONFIG_MTD_ROM=y -# CONFIG_MTD_ABSENT is not set -# CONFIG_MTD_OBSOLETE_CHIPS is not set -# CONFIG_MTD_AMDSTD is not set -# CONFIG_MTD_SHARP is not set -# CONFIG_MTD_JEDEC is not set - -# -# Mapping drivers for chip access -# -# CONFIG_MTD_PHYSMAP is not set -# CONFIG_MTD_PNC2000 is not set -# CONFIG_MTD_SC520CDP is not set -# CONFIG_MTD_NETSC520 is not set -# CONFIG_MTD_SBC_GXX is not set -# CONFIG_MTD_ELAN_104NC is not set -# CONFIG_MTD_DILNETPC is not set -# CONFIG_MTD_MIXMEM is not set -# CONFIG_MTD_OCTAGON is not set -# CONFIG_MTD_VMAX is not set -# CONFIG_MTD_L440GX is not set -# CONFIG_MTD_AMD766ROM is not set -CONFIG_MTD_ICH2ROM=m -# CONFIG_MTD_PCI is not set - -# -# Self-contained MTD device drivers -# -# CONFIG_MTD_PMC551 is not set -# CONFIG_MTD_SLRAM is not set -# CONFIG_MTD_MTDRAM is not set -# CONFIG_MTD_BLKMTD is not set -# CONFIG_MTD_DOC1000 is not set -# CONFIG_MTD_DOC2000 is not set -# CONFIG_MTD_DOC2001 is not set -# CONFIG_MTD_DOCPROBE is not set - -# -# NAND Flash Device Drivers -# -# CONFIG_MTD_NAND is not set - -# -# Parallel port support -# -CONFIG_PARPORT=m -CONFIG_PARPORT_PC=m -CONFIG_PARPORT_PC_CML1=m -CONFIG_PARPORT_SERIAL=m -# CONFIG_PARPORT_PC_FIFO is not set -# CONFIG_PARPORT_PC_SUPERIO is not set -# CONFIG_PARPORT_AMIGA is not set -# CONFIG_PARPORT_MFC3 is not set -# CONFIG_PARPORT_ATARI is not set -# CONFIG_PARPORT_GSC is not set -# CONFIG_PARPORT_SUNBPP is not set -# CONFIG_PARPORT_OTHER is not set -CONFIG_PARPORT_1284=y - -# -# Plug and Play configuration -# -CONFIG_PNP=y -CONFIG_ISAPNP=y -# CONFIG_PNPBIOS is not set - -# -# Block devices -# -CONFIG_BLK_DEV_FD=y -# CONFIG_BLK_DEV_XD is not set -# CONFIG_PARIDE is not set -# CONFIG_BLK_CPQ_DA is not set -# CONFIG_BLK_CPQ_CISS_DA is not set -# CONFIG_CISS_SCSI_TAPE is not set -# CONFIG_BLK_DEV_DAC960 is not set -# CONFIG_BLK_DEV_UMEM is not set -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_ENBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_INITRD=y - -# -# Multi-device support (RAID and LVM) -# -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -# CONFIG_MD_LINEAR is not set -# CONFIG_MD_RAID0 is not set -# CONFIG_MD_RAID1 is not set -# CONFIG_MD_RAID5 is not set -# CONFIG_MD_MULTIPATH is not set -CONFIG_BLK_DEV_LVM=m - -# -# Cryptography support (CryptoAPI) -# -# CONFIG_CRYPTO is not set -# CONFIG_CIPHERS is not set -# CONFIG_CRYPTODEV is not set -# CONFIG_CRYPTOLOOP is not set - -# -# Networking options -# -CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y -CONFIG_NETLINK_DEV=y -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_INET=y -# CONFIG_TUX is not set -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_FWMARK=y -CONFIG_IP_ROUTE_NAT=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_TOS=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_ROUTE_LARGE_TABLES=y -# CONFIG_IP_PNP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -CONFIG_SYN_COOKIES=y - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=m -# CONFIG_IP_NF_FTP is not set -# CONFIG_IP_NF_IRC is not set -# CONFIG_IP_NF_QUEUE is not set -# CONFIG_IP_NF_IPTABLES is not set -# CONFIG_IP_NF_ARPTABLES is not set -# CONFIG_IP_NF_COMPAT_IPCHAINS is not set -# CONFIG_IP_NF_COMPAT_IPFWADM is not set - -# -# IP: Virtual Server Configuration -# -# CONFIG_IP_VS is not set -# CONFIG_IPV6 is not set -CONFIG_KHTTPD=m -# CONFIG_ATM is not set -# CONFIG_VLAN_8021Q is not set -# CONFIG_IPX is not set -# CONFIG_ATALK is not set - -# -# Appletalk devices -# -# CONFIG_DEV_APPLETALK is not set -# CONFIG_DECNET is not set -# CONFIG_BRIDGE is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_LLC is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set - -# -# Network testing -# -CONFIG_NET_PKTGEN=m - -# -# Telephony Support -# -# CONFIG_PHONE is not set -# CONFIG_PHONE_IXJ is not set -# CONFIG_PHONE_IXJ_PCMCIA is not set - -# -# ATA/IDE/MFM/RLL support -# -CONFIG_IDE=y - -# -# IDE, ATA and ATAPI Block devices -# -CONFIG_BLK_DEV_IDE=y -# CONFIG_BLK_DEV_HD_IDE is not set -# CONFIG_BLK_DEV_HD is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -# CONFIG_IDEDISK_STROKE is not set -# CONFIG_BLK_DEV_IDEDISK_VENDOR is not set -# CONFIG_BLK_DEV_IDEDISK_FUJITSU is not set -# CONFIG_BLK_DEV_IDEDISK_IBM is not set -# CONFIG_BLK_DEV_IDEDISK_MAXTOR is not set -# CONFIG_BLK_DEV_IDEDISK_QUANTUM is not set -# CONFIG_BLK_DEV_IDEDISK_SEAGATE is not set -# CONFIG_BLK_DEV_IDEDISK_WD is not set -# CONFIG_BLK_DEV_COMMERIAL is not set -# CONFIG_BLK_DEV_TIVO is not set -# CONFIG_BLK_DEV_IDECS is not set -CONFIG_BLK_DEV_IDECD=m -# CONFIG_BLK_DEV_IDETAPE is not set -CONFIG_BLK_DEV_IDEFLOPPY=y -# CONFIG_BLK_DEV_IDESCSI is not set -# CONFIG_IDE_TASK_IOCTL is not set -# CONFIG_BLK_DEV_CMD640 is not set -# CONFIG_BLK_DEV_CMD640_ENHANCED is not set -CONFIG_BLK_DEV_ISAPNP=y -# CONFIG_BLK_DEV_RZ1000 is not set -CONFIG_BLK_DEV_IDEPCI=y -CONFIG_IDEPCI_SHARE_IRQ=y -CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_OFFBOARD is not set -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -CONFIG_IDEDMA_PCI_AUTO=y -# CONFIG_IDEDMA_ONLYDISK is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_PCI_WIP is not set -# CONFIG_BLK_DEV_IDEDMA_TIMEOUT is not set -# CONFIG_IDEDMA_NEW_DRIVE_LISTINGS is not set -CONFIG_BLK_DEV_ADMA=y -CONFIG_BLK_DEV_AEC62XX=y -CONFIG_AEC62XX_TUNING=y -CONFIG_BLK_DEV_ALI15X3=y -# CONFIG_WDC_ALI15X3 is not set -CONFIG_BLK_DEV_AMD74XX=y -# CONFIG_AMD74XX_OVERRIDE is not set -CONFIG_BLK_DEV_CMD64X=y -CONFIG_BLK_DEV_CMD680=y -CONFIG_BLK_DEV_CY82C693=y -CONFIG_BLK_DEV_CS5530=y -CONFIG_BLK_DEV_HPT34X=y -# CONFIG_HPT34X_AUTODMA is not set -CONFIG_BLK_DEV_HPT366=y -CONFIG_BLK_DEV_PIIX=y -CONFIG_PIIX_TUNING=y -# CONFIG_BLK_DEV_NS87415 is not set -# CONFIG_BLK_DEV_OPTI621 is not set -# CONFIG_BLK_DEV_ADMA100 is not set -CONFIG_BLK_DEV_PDC202XX=y -# CONFIG_PDC202XX_BURST is not set -# CONFIG_PDC202XX_FORCE is not set -CONFIG_BLK_DEV_SVWKS=y -CONFIG_BLK_DEV_SIS5513=y -CONFIG_BLK_DEV_SLC90E66=y -# CONFIG_BLK_DEV_TRM290 is not set -CONFIG_BLK_DEV_VIA82CXXX=y -CONFIG_BLK_DEV_CENATEK=y -# CONFIG_IDE_CHIPSETS is not set -# CONFIG_BLK_DEV_ELEVATOR_NOOP is not set -CONFIG_IDEDMA_AUTO=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_DMA_NONPCI is not set -CONFIG_BLK_DEV_IDE_MODES=y -# CONFIG_BLK_DEV_ATARAID is not set -# CONFIG_BLK_DEV_ATARAID_PDC is not set -# CONFIG_BLK_DEV_ATARAID_HPT is not set - -# -# SCSI support -# -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_SD_EXTRA_DEVS=40 -# CONFIG_CHR_DEV_ST is not set -# CONFIG_CHR_DEV_OSST is not set -CONFIG_BLK_DEV_SR=m -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_SR_EXTRA_DEVS=4 -CONFIG_CHR_DEV_SG=m -# CONFIG_SCSI_DEBUG_QUEUES is not set -CONFIG_SCSI_MULTI_LUN=y -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y - -# -# SCSI low-level drivers -# -# CONFIG_BLK_DEV_3W_XXXX_RAID is not set -# CONFIG_SCSI_7000FASST is not set -# CONFIG_SCSI_ACARD is not set -# CONFIG_SCSI_AHA152X is not set -# CONFIG_SCSI_AHA1542 is not set -# CONFIG_SCSI_AHA1740 is not set -# CONFIG_SCSI_AACRAID is not set -CONFIG_SCSI_AIC7XXX=y -CONFIG_AIC7XXX_CMDS_PER_DEVICE=253 -CONFIG_AIC7XXX_RESET_DELAY_MS=15000 -# CONFIG_AIC7XXX_PROBE_EISA_VL is not set -# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set -# CONFIG_SCSI_AIC79XX is not set -# CONFIG_SCSI_DPT_I2O is not set -# CONFIG_SCSI_ADVANSYS is not set -# CONFIG_SCSI_IN2000 is not set -# CONFIG_SCSI_AM53C974 is not set -# CONFIG_SCSI_MEGARAID is not set -# CONFIG_SCSI_BUSLOGIC is not set -# CONFIG_SCSI_CPQFCTS is not set -# CONFIG_SCSI_DMX3191D is not set -# CONFIG_SCSI_DTC3280 is not set -# CONFIG_SCSI_EATA is not set -# CONFIG_SCSI_EATA_DMA is not set -# CONFIG_SCSI_EATA_PIO is not set -# CONFIG_SCSI_FUTURE_DOMAIN is not set -# CONFIG_SCSI_GDTH is not set -# CONFIG_SCSI_GENERIC_NCR5380 is not set -# CONFIG_SCSI_IPS is not set -# CONFIG_SCSI_INITIO is not set -# CONFIG_SCSI_INIA100 is not set -# CONFIG_SCSI_PPA is not set -# CONFIG_SCSI_IMM is not set -# CONFIG_SCSI_NCR53C406A is not set -# CONFIG_SCSI_NCR53C7xx is not set -# CONFIG_SCSI_SYM53C8XX_2 is not set -# CONFIG_SCSI_NCR53C8XX is not set -# CONFIG_SCSI_SYM53C8XX is not set -# CONFIG_SCSI_PAS16 is not set -# CONFIG_SCSI_PCI2000 is not set -# CONFIG_SCSI_PCI2220I is not set -# CONFIG_SCSI_PSI240I is not set -# CONFIG_SCSI_QLOGIC_FAS is not set -# CONFIG_SCSI_QLOGIC_ISP is not set -# CONFIG_SCSI_QLOGIC_FC is not set -# CONFIG_SCSI_QLOGIC_1280 is not set -# CONFIG_SCSI_NEWISP is not set -# CONFIG_SCSI_SEAGATE is not set -# CONFIG_SCSI_SIM710 is not set -# CONFIG_SCSI_SYM53C416 is not set -# CONFIG_SCSI_DC390T is not set -# CONFIG_SCSI_T128 is not set -# CONFIG_SCSI_U14_34F is not set -# CONFIG_SCSI_ULTRASTOR is not set -CONFIG_SCSI_DEBUG=m - -# -# Fusion MPT device support -# -CONFIG_FUSION=y -CONFIG_FUSION_BOOT=y -CONFIG_FUSION_ISENSE=m -CONFIG_FUSION_CTL=m -# CONFIG_FUSION_LAN is not set - -# -# IEEE 1394 (FireWire) support (EXPERIMENTAL) -# -# CONFIG_IEEE1394 is not set - -# -# I2O device support -# -# CONFIG_I2O is not set -# CONFIG_I2O_PCI is not set -# CONFIG_I2O_BLOCK is not set -# CONFIG_I2O_LAN is not set -# CONFIG_I2O_SCSI is not set -# CONFIG_I2O_PROC is not set - -# -# Network device support -# -CONFIG_NETDEVICES=y - -# -# ARCnet devices -# -# CONFIG_ARCNET is not set -CONFIG_DUMMY=m -# CONFIG_BONDING is not set -# CONFIG_EQUALIZER is not set -# CONFIG_TUN is not set -# CONFIG_ETHERTAP is not set -# CONFIG_NET_SB1000 is not set - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_SUNLANCE is not set -# CONFIG_HAPPYMEAL is not set -# CONFIG_SUNBMAC is not set -# CONFIG_SUNQE is not set -# CONFIG_SUNGEM is not set -# CONFIG_NET_VENDOR_3COM is not set -# CONFIG_LANCE is not set -# CONFIG_NET_VENDOR_SMC is not set -# CONFIG_NET_VENDOR_RACAL is not set -# CONFIG_AT1700 is not set -# CONFIG_DEPCA is not set -# CONFIG_HP100 is not set -# CONFIG_NET_ISA is not set -CONFIG_NET_PCI=y -# CONFIG_PCNET32 is not set -# CONFIG_ADAPTEC_STARFIRE is not set -# CONFIG_AC3200 is not set -# CONFIG_APRICOT is not set -# CONFIG_CS89x0 is not set -CONFIG_TULIP=m -# CONFIG_TC35815 is not set -# CONFIG_TULIP_MWI is not set -CONFIG_TULIP_MMIO=y -CONFIG_DE4X5=m -# CONFIG_DGRS is not set -# CONFIG_DM9102 is not set -CONFIG_EEPRO100=m -CONFIG_NET_E100=m -# CONFIG_LNE390 is not set -# CONFIG_FEALNX is not set -# CONFIG_NATSEMI is not set -# CONFIG_NE2K_PCI is not set -# CONFIG_NE3210 is not set -# CONFIG_ES3210 is not set -# CONFIG_8139CP is not set -# CONFIG_8139TOO is not set -# CONFIG_8139TOO_PIO is not set -# CONFIG_8139TOO_TUNE_TWISTER is not set -# CONFIG_8139TOO_8129 is not set -# CONFIG_8139_NEW_RX_RESET is not set -# CONFIG_SIS900 is not set -# CONFIG_SIS900_OLD is not set -# CONFIG_EPIC100 is not set -# CONFIG_SUNDANCE is not set -# CONFIG_TLAN is not set -# CONFIG_VIA_RHINE is not set -# CONFIG_VIA_RHINE_MMIO is not set -# CONFIG_WINBOND_840 is not set -# CONFIG_NET_POCKET is not set - -# -# Ethernet (1000 Mbit) -# -CONFIG_ACENIC=m -# CONFIG_ACENIC_OMIT_TIGON_I is not set -# CONFIG_DL2K is not set -# CONFIG_MYRI_SBUS is not set -CONFIG_NS83820=m -# CONFIG_HAMACHI is not set -# CONFIG_YELLOWFIN is not set -# CONFIG_SK98LIN is not set -CONFIG_NET_BROADCOM=m -CONFIG_TIGON3=m -CONFIG_NET_E1000=m -# CONFIG_FDDI is not set -# CONFIG_NETCONSOLE is not set -# CONFIG_HIPPI is not set -# CONFIG_PLIP is not set -# CONFIG_PPP is not set -# CONFIG_SLIP is not set - -# -# Wireless LAN (non-hamradio) -# -# CONFIG_NET_RADIO is not set - -# -# Token Ring devices -# -# CONFIG_TR is not set -# CONFIG_NET_FC is not set -# CONFIG_RCPCI is not set -# CONFIG_SHAPER is not set - -# -# Wan interfaces -# -# CONFIG_WAN is not set - -# -# Quadrics Supercomputers -# - -# -# QsNet -# -CONFIG_QUADRICS=y -CONFIG_QSNETMOD=m -CONFIG_ELAN3MOD=m -CONFIG_EPMOD=m -CONFIG_EIPMOD=m -CONFIG_RMSMOD=m -CONFIG_JTAG=m - -# -# QsNet II -# - -# -# Amateur Radio support -# -# CONFIG_HAMRADIO is not set - -# -# IrDA (infrared) support -# -# CONFIG_IRDA is not set - -# -# ISDN subsystem -# -# CONFIG_ISDN is not set -CONFIG_KALLSYMS=y - -# -# Old CD-ROM drivers (not SCSI, not IDE) -# -# CONFIG_CD_NO_IDESCSI is not set - -# -# Input core support -# -# CONFIG_INPUT is not set -# CONFIG_INPUT_KEYBDEV is not set -# CONFIG_INPUT_MOUSEDEV is not set -# CONFIG_INPUT_JOYDEV is not set -# CONFIG_INPUT_EVDEV is not set - -# -# Character devices -# -CONFIG_ECC=m -CONFIG_CHAOSTEST=m -CONFIG_P4THERM=m -CONFIG_VT=y -CONFIG_VT_CONSOLE=y -CONFIG_SERIAL=y -CONFIG_SERIAL_CONSOLE=y -CONFIG_SERIAL_EXTENDED=y -# CONFIG_SERIAL_MANY_PORTS is not set -CONFIG_SERIAL_SHARE_IRQ=y -# CONFIG_SERIAL_DETECT_IRQ is not set -# CONFIG_SERIAL_MULTIPORT is not set -# CONFIG_HUB6 is not set -# CONFIG_SERIAL_NONSTANDARD is not set -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=2048 -# CONFIG_PRINTER is not set -# CONFIG_PPDEV is not set - -# -# I2C support -# -CONFIG_I2C=y -# CONFIG_I2C_ALGOBIT is not set -# CONFIG_I2C_ALGOPCF is not set -CONFIG_I2C_MAINBOARD=y -# CONFIG_I2C_ALI1535 is not set -# CONFIG_I2C_ALI15X3 is not set -# CONFIG_I2C_HYDRA is not set -# CONFIG_I2C_AMD756 is not set -# CONFIG_I2C_TSUNAMI is not set -CONFIG_I2C_I801=m -# CONFIG_I2C_I810 is not set -# CONFIG_I2C_PIIX4 is not set -# CONFIG_I2C_SIS5595 is not set -# CONFIG_I2C_VIA is not set -# CONFIG_I2C_VIAPRO is not set -# CONFIG_I2C_VOODOO3 is not set -CONFIG_I2C_ISA=y -CONFIG_I2C_CHARDEV=y -CONFIG_I2C_PROC=y - -# -# Hardware sensors support -# -CONFIG_SENSORS=y -CONFIG_SENSORS_ADM1021=m -# CONFIG_SENSORS_ADM1024 is not set -# CONFIG_SENSORS_ADM1025 is not set -# CONFIG_SENSORS_ADM9240 is not set -# CONFIG_SENSORS_DS1621 is not set -# CONFIG_SENSORS_FSCPOS is not set -# CONFIG_SENSORS_FSCSCY is not set -# CONFIG_SENSORS_GL518SM is not set -# CONFIG_SENSORS_GL520SM is not set -# CONFIG_SENSORS_MAXILIFE is not set -# CONFIG_SENSORS_IT87 is not set -# CONFIG_SENSORS_MTP008 is not set -# CONFIG_SENSORS_LM75 is not set -# CONFIG_SENSORS_LM78 is not set -# CONFIG_SENSORS_LM80 is not set -CONFIG_SENSORS_LM87=m -# CONFIG_SENSORS_SIS5595 is not set -# CONFIG_SENSORS_THMC50 is not set -# CONFIG_SENSORS_VIA686A is not set -CONFIG_SENSORS_W83781D=y -# CONFIG_SENSORS_OTHER is not set - -# -# Mice -# -# CONFIG_BUSMOUSE is not set -CONFIG_MOUSE=y -CONFIG_PSMOUSE=y -# CONFIG_82C710_MOUSE is not set -# CONFIG_PC110_PAD is not set -# CONFIG_MK712_MOUSE is not set - -# -# Joysticks -# -# CONFIG_INPUT_GAMEPORT is not set -# CONFIG_QIC02_TAPE is not set - -# -# Watchdog Cards -# -# CONFIG_WATCHDOG is not set -# CONFIG_AMD_RNG is not set -# CONFIG_INTEL_RNG is not set -# CONFIG_AMD_PM768 is not set -# CONFIG_NVRAM is not set -CONFIG_RTC=y -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set -# CONFIG_APPLICOM is not set -# CONFIG_SONYPI is not set - -# -# Ftape, the floppy tape device driver -# -# CONFIG_FTAPE is not set -CONFIG_AGP=m -CONFIG_AGP_INTEL=y -# CONFIG_AGP_I810 is not set -# CONFIG_AGP_VIA is not set -# CONFIG_AGP_AMD is not set -# CONFIG_AGP_SIS is not set -# CONFIG_AGP_ALI is not set -# CONFIG_AGP_SWORKS is not set -# CONFIG_DRM is not set -# CONFIG_MWAVE is not set -# CONFIG_BATTERY_GERICOM is not set - -# -# Multimedia devices -# -# CONFIG_VIDEO_DEV is not set - -# -# Crypto Hardware support -# -# CONFIG_CRYPTO is not set - -# -# File systems -# -# CONFIG_QUOTA is not set -# CONFIG_AUTOFS_FS is not set -# CONFIG_AUTOFS4_FS is not set -# CONFIG_REISERFS_FS is not set -# CONFIG_REISERFS_CHECK is not set -# CONFIG_REISERFS_PROC_INFO is not set -# CONFIG_ADFS_FS is not set -# CONFIG_AFS_FS is not set -# CONFIG_ADFS_FS_RW is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BEFS_DEBUG is not set -# CONFIG_BFS_FS is not set -CONFIG_EXT3_FS=y -CONFIG_EXTN_FS=m -CONFIG_JBD=y -CONFIG_JBD_DEBUG=y -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_UMSDOS_FS=m -CONFIG_VFAT_FS=m -# CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set -# CONFIG_JFFS2_FS is not set -CONFIG_CRAMFS=y -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -# CONFIG_JFS_FS is not set -# CONFIG_JFS_DEBUG is not set -# CONFIG_JFS_STATISTICS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_RW is not set -# CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX4FS_RW is not set -CONFIG_ROMFS_FS=m -CONFIG_EXT2_FS=y -# CONFIG_SYSV_FS is not set -# CONFIG_UDF_FS is not set -# CONFIG_UDF_RW is not set -# CONFIG_UFS_FS is not set -# CONFIG_UFS_FS_WRITE is not set - -# -# Network File Systems -# -# CONFIG_CODA_FS is not set -# CONFIG_INTERMEZZO_FS is not set -CONFIG_NFS_FS=m -CONFIG_NFS_V3=y -# CONFIG_ROOT_NFS is not set -CONFIG_NFSD=m -CONFIG_NFSD_V3=y -CONFIG_NFSD_TCP=y -CONFIG_SUNRPC=m -CONFIG_LOCKD=m -CONFIG_LOCKD_V4=y -# CONFIG_SMB_FS is not set -# CONFIG_NCP_FS is not set -# CONFIG_NCPFS_PACKET_SIGNING is not set -# CONFIG_NCPFS_IOCTL_LOCKING is not set -# CONFIG_NCPFS_STRONG is not set -# CONFIG_NCPFS_NFS_NS is not set -# CONFIG_NCPFS_OS2_NS is not set -# CONFIG_NCPFS_SMALLDOS is not set -# CONFIG_NCPFS_NLS is not set -# CONFIG_NCPFS_EXTRAS is not set -# CONFIG_PFS_FS is not set -CONFIG_ZISOFS_FS=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -CONFIG_OSF_PARTITION=y -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -CONFIG_MAC_PARTITION=y -CONFIG_MSDOS_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -# CONFIG_LDM_PARTITION is not set -CONFIG_SGI_PARTITION=y -# CONFIG_ULTRIX_PARTITION is not set -CONFIG_SUN_PARTITION=y -# CONFIG_SMB_NLS is not set -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8859-1" -CONFIG_NLS_CODEPAGE_437=m -# CONFIG_NLS_CODEPAGE_737 is not set -# CONFIG_NLS_CODEPAGE_775 is not set -CONFIG_NLS_CODEPAGE_850=m -# CONFIG_NLS_CODEPAGE_852 is not set -# CONFIG_NLS_CODEPAGE_855 is not set -# CONFIG_NLS_CODEPAGE_857 is not set -# CONFIG_NLS_CODEPAGE_860 is not set -# CONFIG_NLS_CODEPAGE_861 is not set -# CONFIG_NLS_CODEPAGE_862 is not set -# CONFIG_NLS_CODEPAGE_863 is not set -# CONFIG_NLS_CODEPAGE_864 is not set -# CONFIG_NLS_CODEPAGE_865 is not set -# CONFIG_NLS_CODEPAGE_866 is not set -# CONFIG_NLS_CODEPAGE_869 is not set -# CONFIG_NLS_CODEPAGE_936 is not set -# CONFIG_NLS_CODEPAGE_950 is not set -# CONFIG_NLS_CODEPAGE_932 is not set -# CONFIG_NLS_CODEPAGE_949 is not set -# CONFIG_NLS_CODEPAGE_874 is not set -# CONFIG_NLS_ISO8859_8 is not set -# CONFIG_NLS_CODEPAGE_1250 is not set -# CONFIG_NLS_CODEPAGE_1251 is not set -CONFIG_NLS_ISO8859_1=m -# CONFIG_NLS_ISO8859_2 is not set -# CONFIG_NLS_ISO8859_3 is not set -# CONFIG_NLS_ISO8859_4 is not set -# CONFIG_NLS_ISO8859_5 is not set -# CONFIG_NLS_ISO8859_6 is not set -# CONFIG_NLS_ISO8859_7 is not set -# CONFIG_NLS_ISO8859_9 is not set -# CONFIG_NLS_ISO8859_13 is not set -# CONFIG_NLS_ISO8859_14 is not set -# CONFIG_NLS_ISO8859_15 is not set -# CONFIG_NLS_KOI8_R is not set -# CONFIG_NLS_KOI8_U is not set -# CONFIG_NLS_UTF8 is not set - -# -# Console drivers -# -CONFIG_VGA_CONSOLE=y -CONFIG_VIDEO_SELECT=y -# CONFIG_VIDEO_IGNORE_BAD_MODE is not set -# CONFIG_MDA_CONSOLE is not set - -# -# Frame-buffer support -# -# CONFIG_FB is not set -# CONFIG_SPEAKUP is not set - -# -# Sound -# -# CONFIG_SOUND is not set - -# -# USB support -# -# CONFIG_USB is not set - -# -# Bluetooth support -# -# CONFIG_BLUEZ is not set - -# -# Kernel hacking -# -CONFIG_DEBUG_KERNEL=y -CONFIG_FRAME_POINTER=y -CONFIG_STACK_TRACE_SCAN=y -CONFIG_STACK_TRACE_FPTR=y -CONFIG_STACK_TRACE_PARAM_COUNT=4 -# CONFIG_DEBUG_HIGHMEM is not set -# CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_IOVIRT is not set -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_SPINLOCK=y -CONFIG_MCL_COREDUMP=y -CONFIG_BOOTIMG=y -# CONFIG_OPROFILE is not set - -# -# Library routines -# -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=y diff --git a/lustre/kernel_patches/kernel_configs/config-linux-2.4.18-uml b/lustre/kernel_patches/kernel_configs/config-linux-2.4.18-uml deleted file mode 100644 index bb79c22..0000000 --- a/lustre/kernel_patches/kernel_configs/config-linux-2.4.18-uml +++ /dev/null @@ -1,458 +0,0 @@ -# -# Automatically generated make config: don't edit -# -CONFIG_USERMODE=y -# CONFIG_ISA is not set -# CONFIG_SBUS is not set -# CONFIG_PCI is not set -CONFIG_UID16=y -# CONFIG_RWSEM_XCHGADD_ALGORITHM is not set -CONFIG_RWSEM_GENERIC_SPINLOCK=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# General Setup -# -CONFIG_NET=y -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_BINFMT_AOUT=y -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=y -CONFIG_HOSTFS=y -# CONFIG_HPPFS is not set -CONFIG_MCONSOLE=y -CONFIG_MAGIC_SYSRQ=y -# CONFIG_HOST_2G_2G is not set -# CONFIG_UML_SMP is not set -# CONFIG_SMP is not set -CONFIG_NEST_LEVEL=0 -CONFIG_KERNEL_HALF_GIGS=1 - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_KMOD=y - -# -# Character Devices -# -CONFIG_STDIO_CONSOLE=y -CONFIG_SSL=y -CONFIG_FD_CHAN=y -# CONFIG_NULL_CHAN is not set -CONFIG_PORT_CHAN=y -CONFIG_PTY_CHAN=y -CONFIG_TTY_CHAN=y -CONFIG_XTERM_CHAN=y -CONFIG_CON_ZERO_CHAN="fd:0,fd:1" -CONFIG_CON_CHAN="xterm" -CONFIG_SSL_CHAN="pty" -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=2048 -# CONFIG_WATCHDOG is not set -# CONFIG_UML_SOUND is not set -# CONFIG_SOUND is not set -# CONFIG_HOSTAUDIO is not set -# CONFIG_TTY_LOG is not set - -# -# Block Devices -# -CONFIG_BLK_DEV_UBD=y -# CONFIG_BLK_DEV_UBD_SYNC is not set -CONFIG_BLK_DEV_LOOP=y -CONFIG_BLK_DEV_NBD=y -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_INITRD=y -# CONFIG_MMAPPER is not set -CONFIG_NETDEVICES=y - -# -# Network Devices -# -CONFIG_UML_NET=y -# CONFIG_UML_NET_ETHERTAP is not set -CONFIG_UML_NET_TUNTAP=y -CONFIG_UML_NET_SLIP=y -CONFIG_UML_NET_DAEMON=y -CONFIG_UML_NET_MCAST=y -CONFIG_DUMMY=m -CONFIG_BONDING=m -CONFIG_EQUALIZER=m -CONFIG_TUN=y -CONFIG_PPP=y -CONFIG_PPP_MULTILINK=y -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m -# CONFIG_PPP_BSDCOMP is not set -# CONFIG_PPPOE is not set -CONFIG_SLIP=y -CONFIG_SLIP_COMPRESSED=y -CONFIG_SLIP_SMART=y -CONFIG_SLIP_MODE_SLIP6=y - -# -# Networking options -# -CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y -CONFIG_NETLINK_DEV=y -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_INET=y -# CONFIG_TUX is not set -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_FWMARK=y -CONFIG_IP_ROUTE_NAT=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_TOS=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_ROUTE_LARGE_TABLES=y -# CONFIG_IP_PNP is not set -CONFIG_NET_IPIP=y -CONFIG_NET_IPGRE=y -CONFIG_NET_IPGRE_BROADCAST=y -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -CONFIG_SYN_COOKIES=y - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=y -CONFIG_IP_NF_FTP=m -CONFIG_IP_NF_IRC=m -CONFIG_IP_NF_QUEUE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_LIMIT=m -CONFIG_IP_NF_MATCH_MAC=m -CONFIG_IP_NF_MATCH_MARK=m -CONFIG_IP_NF_MATCH_MULTIPORT=m -CONFIG_IP_NF_MATCH_TOS=m -CONFIG_IP_NF_MATCH_AH_ESP=m -CONFIG_IP_NF_MATCH_LENGTH=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_MATCH_TCPMSS=m -CONFIG_IP_NF_MATCH_STATE=m -CONFIG_IP_NF_MATCH_UNCLEAN=m -CONFIG_IP_NF_MATCH_OWNER=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_MIRROR=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_IP_NF_NAT_LOCAL=y -CONFIG_IP_NF_NAT_SNMP_BASIC=m -CONFIG_IP_NF_NAT_IRC=m -CONFIG_IP_NF_NAT_FTP=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_TOS=m -CONFIG_IP_NF_TARGET_MARK=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_IP_NF_TARGET_TCPMSS=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m - -# -# IP: Virtual Server Configuration -# -CONFIG_IP_VS=y -# CONFIG_IP_VS_DEBUG is not set -CONFIG_IP_VS_TAB_BITS=16 - -# -# IPVS scheduler -# -CONFIG_IP_VS_RR=m -CONFIG_IP_VS_WRR=m -CONFIG_IP_VS_LC=m -CONFIG_IP_VS_WLC=m -CONFIG_IP_VS_LBLC=m -CONFIG_IP_VS_LBLCR=m -CONFIG_IP_VS_DH=m -CONFIG_IP_VS_SH=m - -# -# IPVS application helper -# -CONFIG_IP_VS_FTP=m -CONFIG_IPV6=y - -# -# IPv6: Netfilter Configuration -# -# CONFIG_IP6_NF_QUEUE is not set -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_LIMIT=m -CONFIG_IP6_NF_MATCH_MAC=m -CONFIG_IP6_NF_MATCH_MULTIPORT=m -CONFIG_IP6_NF_MATCH_OWNER=m -CONFIG_IP6_NF_MATCH_MARK=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_LOG=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_TARGET_MARK=m -# CONFIG_KHTTPD is not set -CONFIG_ATM=y -CONFIG_ATM_CLIP=y -# CONFIG_ATM_CLIP_NO_ICMP is not set -CONFIG_ATM_LANE=y -CONFIG_ATM_MPOA=y -CONFIG_ATM_BR2684=m -CONFIG_ATM_BR2684_IPFILTER=y -CONFIG_VLAN_8021Q=m - -# -# -# -CONFIG_IPX=m -# CONFIG_IPX_INTERN is not set -CONFIG_ATALK=m - -# -# Appletalk devices -# -# CONFIG_DEV_APPLETALK is not set -CONFIG_DECNET=m -CONFIG_DECNET_SIOCGIFCONF=y -CONFIG_DECNET_ROUTER=y -CONFIG_DECNET_ROUTE_FWMARK=y -CONFIG_BRIDGE=m -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_LLC is not set -CONFIG_NET_DIVERT=y -# CONFIG_ECONET is not set -CONFIG_WAN_ROUTER=y -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_CSZ=m -# CONFIG_NET_SCH_ATM is not set -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_QOS=y -CONFIG_NET_ESTIMATOR=y -CONFIG_NET_CLS=y -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_ROUTE=y -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_POLICE=y - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set - -# -# File systems -# -CONFIG_QUOTA=y -CONFIG_AUTOFS_FS=m -CONFIG_AUTOFS4_FS=m -CONFIG_REISERFS_FS=y -# CONFIG_REISERFS_CHECK is not set -CONFIG_REISERFS_PROC_INFO=y -# CONFIG_ADFS_FS is not set -# CONFIG_AFS_FS is not set -# CONFIG_AFFS_FS is not set -CONFIG_HFS_FS=m -CONFIG_BEFS_FS=m -# CONFIG_BEFS_DEBUG is not set -CONFIG_BFS_FS=m -CONFIG_EXT3_FS=y -CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set -CONFIG_FAT_FS=y -CONFIG_MSDOS_FS=m -CONFIG_UMSDOS_FS=m -CONFIG_VFAT_FS=y -# CONFIG_EFS_FS is not set -CONFIG_CRAMFS=m -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_JFS_FS=m -CONFIG_JFS_DEBUG=y -# CONFIG_JFS_STATISTICS is not set -CONFIG_MINIX_FS=m -CONFIG_VXFS_FS=m -# CONFIG_NTFS_FS is not set -# CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -CONFIG_DEVFS_FS=y -CONFIG_DEVFS_MOUNT=y -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_QNX4FS_FS is not set -CONFIG_ROMFS_FS=m -CONFIG_EXT2_FS=y -CONFIG_SYSV_FS=m -CONFIG_UDF_FS=m -CONFIG_UDF_RW=y -CONFIG_UFS_FS=m -# CONFIG_UFS_FS_WRITE is not set - -# -# Network File Systems -# -CONFIG_CODA_FS=m -CONFIG_INTERMEZZO_FS=m -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -CONFIG_NFSD=m -CONFIG_NFSD_V3=y -# CONFIG_NFSD_TCP is not set -CONFIG_SUNRPC=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -CONFIG_SMB_FS=m -# CONFIG_SMB_NLS_DEFAULT is not set -CONFIG_NCP_FS=m -CONFIG_NCPFS_PACKET_SIGNING=y -CONFIG_NCPFS_IOCTL_LOCKING=y -CONFIG_NCPFS_STRONG=y -CONFIG_NCPFS_NFS_NS=y -CONFIG_NCPFS_OS2_NS=y -CONFIG_NCPFS_SMALLDOS=y -CONFIG_NCPFS_NLS=y -CONFIG_NCPFS_EXTRAS=y -CONFIG_ZISOFS_FS=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -CONFIG_OSF_PARTITION=y -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -CONFIG_MAC_PARTITION=y -CONFIG_MSDOS_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -# CONFIG_LDM_PARTITION is not set -CONFIG_SGI_PARTITION=y -# CONFIG_ULTRIX_PARTITION is not set -CONFIG_SUN_PARTITION=y -CONFIG_SMB_NLS=y -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8859-1" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -CONFIG_NLS_CODEPAGE_1250=m -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m - -# -# SCSI support -# -# CONFIG_SCSI is not set - -# -# Multi-device support (RAID and LVM) -# -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_RAID5=m -CONFIG_MD_MULTIPATH=m -CONFIG_BLK_DEV_LVM=m - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Kernel hacking -# -# CONFIG_DEBUG_SLAB is not set -CONFIG_DEBUGSYM=y -CONFIG_PT_PROXY=y -# CONFIG_GPROF is not set -# CONFIG_GCOV is not set - -# -# Library routines -# -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=m diff --git a/lustre/kernel_patches/kernel_configs/config-linux-2.4.20-i386-rh b/lustre/kernel_patches/kernel_configs/config-linux-2.4.20-i386-rh deleted file mode 100644 index dec210a..0000000 --- a/lustre/kernel_patches/kernel_configs/config-linux-2.4.20-i386-rh +++ /dev/null @@ -1,1849 +0,0 @@ -# -# Automatically generated by make menuconfig: don't edit -# -CONFIG_X86=y -# CONFIG_SBUS is not set -CONFIG_UID16=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# Loadable module support -# -CONFIG_MODULES=y -CONFIG_MODVERSIONS=y -CONFIG_KMOD=y - -# -# Processor type and features -# -CONFIG_LOLAT=y -# CONFIG_M386 is not set -# CONFIG_M486 is not set -# CONFIG_M586 is not set -# CONFIG_M586TSC is not set -# CONFIG_M586MMX is not set -CONFIG_M686=y -# CONFIG_MPENTIUMIII is not set -# CONFIG_MPENTIUM4 is not set -# CONFIG_MK6 is not set -# CONFIG_MK7 is not set -# CONFIG_MELAN is not set -# CONFIG_MCRUSOE is not set -# CONFIG_MWINCHIPC6 is not set -# CONFIG_MWINCHIP2 is not set -# CONFIG_MWINCHIP3D is not set -# CONFIG_MCYRIXIII is not set -CONFIG_X86_WP_WORKS_OK=y -CONFIG_X86_INVLPG=y -CONFIG_X86_CMPXCHG=y -CONFIG_X86_XADD=y -CONFIG_X86_BSWAP=y -CONFIG_X86_POPAD_OK=y -# CONFIG_RWSEM_GENERIC_SPINLOCK is not set -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_X86_L1_CACHE_SHIFT=7 -CONFIG_X86_HAS_TSC=y -CONFIG_X86_GOOD_APIC=y -CONFIG_X86_PGE=y -CONFIG_X86_USE_PPRO_CHECKSUM=y -CONFIG_X86_PPRO_FENCE=y -CONFIG_X86_F00F_WORKS_OK=y -CONFIG_X86_MCE=y - -# -# CPU Frequency scaling -# -CONFIG_CPU_FREQ=y -# CONFIG_CPU_FREQ_24_API is not set -CONFIG_X86_POWERNOW_K6=m -# CONFIG_X86_LONGHAUL is not set -CONFIG_X86_SPEEDSTEP=m -# CONFIG_X86_P4_CLOCKMOD is not set -# CONFIG_X86_LONGRUN is not set -CONFIG_TOSHIBA=m -CONFIG_I8K=m -CONFIG_MICROCODE=m -CONFIG_X86_MSR=m -CONFIG_X86_CPUID=m -# CONFIG_E820_PROC is not set -CONFIG_EDD=m -# CONFIG_NOHIGHMEM is not set -CONFIG_HIGHMEM4G=y -# CONFIG_HIGHMEM64G is not set -CONFIG_HIGHMEM=y -CONFIG_HIGHPTE=y -CONFIG_HIGHIO=y -# CONFIG_MATH_EMULATION is not set -CONFIG_MTRR=y -# CONFIG_SMP is not set -CONFIG_X86_UP_APIC=y -CONFIG_X86_UP_IOAPIC=y -CONFIG_X86_LOCAL_APIC=y -CONFIG_X86_IO_APIC=y -# CONFIG_X86_TSC_DISABLE is not set -CONFIG_X86_TSC=y - -# -# General setup -# -CONFIG_NET=y -CONFIG_PCI=y -# CONFIG_PCI_GOBIOS is not set -# CONFIG_PCI_GODIRECT is not set -CONFIG_PCI_GOANY=y -CONFIG_PCI_BIOS=y -CONFIG_PCI_DIRECT=y -CONFIG_ISA=y -CONFIG_PCI_NAMES=y -CONFIG_EISA=y -# CONFIG_MCA is not set -CONFIG_HOTPLUG=y - -# -# PCMCIA/CardBus support -# -CONFIG_PCMCIA=m -CONFIG_CARDBUS=y -CONFIG_TCIC=y -CONFIG_I82092=y -CONFIG_I82365=y - -# -# PCI Hotplug Support -# -# CONFIG_HOTPLUG_PCI is not set -# CONFIG_HOTPLUG_PCI_ACPI is not set -# CONFIG_HOTPLUG_PCI_COMPAQ is not set -# CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM is not set -# CONFIG_HOTPLUG_PCI_IBM is not set -# CONFIG_HOTPLUG_PCI_H2999 is not set -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_KCORE_ELF=y -# CONFIG_KCORE_AOUT is not set -CONFIG_BINFMT_AOUT=m -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=m -CONFIG_PM=y -# CONFIG_ACPI is not set -CONFIG_APM=y -# CONFIG_APM_IGNORE_USER_SUSPEND is not set -# CONFIG_APM_DO_ENABLE is not set -CONFIG_APM_CPU_IDLE=y -# CONFIG_APM_DISPLAY_BLANK is not set -CONFIG_APM_RTC_IS_GMT=y -# CONFIG_APM_ALLOW_INTS is not set -# CONFIG_APM_REAL_MODE_POWER_OFF is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Parallel port support -# -CONFIG_PARPORT=m -CONFIG_PARPORT_PC=m -CONFIG_PARPORT_PC_CML1=m -CONFIG_PARPORT_SERIAL=m -# CONFIG_PARPORT_PC_FIFO is not set -# CONFIG_PARPORT_PC_SUPERIO is not set -CONFIG_PARPORT_PC_PCMCIA=m -# CONFIG_PARPORT_AMIGA is not set -# CONFIG_PARPORT_MFC3 is not set -# CONFIG_PARPORT_ATARI is not set -# CONFIG_PARPORT_GSC is not set -# CONFIG_PARPORT_SUNBPP is not set -# CONFIG_PARPORT_OTHER is not set -CONFIG_PARPORT_1284=y - -# -# Plug and Play configuration -# -CONFIG_PNP=y -CONFIG_ISAPNP=y - -# -# Block devices -# -CONFIG_BLK_DEV_FD=y -CONFIG_BLK_DEV_XD=m -CONFIG_PARIDE=m -CONFIG_PARIDE_PARPORT=m -CONFIG_PARIDE_PD=m -CONFIG_PARIDE_PCD=m -CONFIG_PARIDE_PF=m -CONFIG_PARIDE_PT=m -CONFIG_PARIDE_PG=m -CONFIG_PARIDE_ATEN=m -CONFIG_PARIDE_BPCK=m -CONFIG_PARIDE_BPCK6=m -CONFIG_PARIDE_COMM=m -CONFIG_PARIDE_DSTR=m -CONFIG_PARIDE_FIT2=m -CONFIG_PARIDE_FIT3=m -CONFIG_PARIDE_EPAT=m -CONFIG_PARIDE_EPATC8=y -CONFIG_PARIDE_EPIA=m -CONFIG_PARIDE_FRIQ=m -CONFIG_PARIDE_FRPW=m -CONFIG_PARIDE_KBIC=m -CONFIG_PARIDE_KTTI=m -CONFIG_PARIDE_ON20=m -CONFIG_PARIDE_ON26=m -CONFIG_BLK_CPQ_DA=m -CONFIG_BLK_CPQ_CISS_DA=m -CONFIG_CISS_SCSI_TAPE=y -CONFIG_BLK_DEV_DAC960=m -CONFIG_BLK_DEV_UMEM=m -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_NBD=m -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=4096 -CONFIG_BLK_DEV_INITRD=y -CONFIG_BLK_STATS=y - -# -# Multi-device support (RAID and LVM) -# -CONFIG_MD=y -CONFIG_BLK_DEV_MD=y -CONFIG_MD_LINEAR=m -CONFIG_MD_RAID0=m -CONFIG_MD_RAID1=m -CONFIG_MD_RAID5=m -CONFIG_MD_MULTIPATH=m -CONFIG_BLK_DEV_LVM=m - -# -# Cryptography support (CryptoAPI) -# -CONFIG_CRYPTO=m -CONFIG_CIPHERS=m -CONFIG_CIPHER_AES=m -CONFIG_CIPHER_IDENTITY=m -CONFIG_CRYPTODEV=m -CONFIG_CRYPTOLOOP=m - -# -# Networking options -# -CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y -CONFIG_NETLINK_DEV=y -CONFIG_NETFILTER=y -# CONFIG_NETFILTER_DEBUG is not set -CONFIG_FILTER=y -CONFIG_UNIX=y -CONFIG_INET=y -CONFIG_TUX=m -CONFIG_TUX_EXTCGI=y -# CONFIG_TUX_EXTENDED_LOG is not set -# CONFIG_TUX_DEBUG is not set -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_FWMARK=y -CONFIG_IP_ROUTE_NAT=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_TOS=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_ROUTE_LARGE_TABLES=y -# CONFIG_IP_PNP is not set -CONFIG_NET_IPIP=m -CONFIG_NET_IPGRE=m -CONFIG_NET_IPGRE_BROADCAST=y -CONFIG_IP_MROUTE=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -CONFIG_SYN_COOKIES=y - -# -# IP: Netfilter Configuration -# -CONFIG_IP_NF_CONNTRACK=m -CONFIG_IP_NF_FTP=m -CONFIG_IP_NF_IRC=m -CONFIG_IP_NF_QUEUE=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_LIMIT=m -CONFIG_IP_NF_MATCH_MAC=m -CONFIG_IP_NF_MATCH_PKTTYPE=m -CONFIG_IP_NF_MATCH_MARK=m -CONFIG_IP_NF_MATCH_MULTIPORT=m -CONFIG_IP_NF_MATCH_TOS=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_DSCP=m -CONFIG_IP_NF_MATCH_AH_ESP=m -CONFIG_IP_NF_MATCH_LENGTH=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_MATCH_TCPMSS=m -CONFIG_IP_NF_MATCH_HELPER=m -CONFIG_IP_NF_MATCH_STATE=m -CONFIG_IP_NF_MATCH_CONNTRACK=m -CONFIG_IP_NF_MATCH_UNCLEAN=m -CONFIG_IP_NF_MATCH_OWNER=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_TARGET_MIRROR=m -CONFIG_IP_NF_NAT=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_TARGET_MASQUERADE=m -CONFIG_IP_NF_TARGET_REDIRECT=m -CONFIG_IP_NF_NAT_LOCAL=y -CONFIG_IP_NF_NAT_SNMP_BASIC=m -CONFIG_IP_NF_NAT_IRC=m -CONFIG_IP_NF_NAT_FTP=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_TOS=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_DSCP=m -CONFIG_IP_NF_TARGET_MARK=m -CONFIG_IP_NF_TARGET_LOG=m -CONFIG_IP_NF_TARGET_ULOG=m -CONFIG_IP_NF_TARGET_TCPMSS=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_COMPAT_IPCHAINS=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IP_NF_COMPAT_IPFWADM=m -CONFIG_IP_NF_NAT_NEEDED=y -CONFIG_IPV6=m - -# -# IPv6: Netfilter Configuration -# -# CONFIG_IP6_NF_QUEUE is not set -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_LIMIT=m -CONFIG_IP6_NF_MATCH_MAC=m -CONFIG_IP6_NF_MATCH_MULTIPORT=m -CONFIG_IP6_NF_MATCH_OWNER=m -CONFIG_IP6_NF_MATCH_MARK=m -CONFIG_IP6_NF_MATCH_LENGTH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_LOG=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_TARGET_MARK=m -# CONFIG_KHTTPD is not set -CONFIG_ATM=y -CONFIG_ATM_CLIP=y -# CONFIG_ATM_CLIP_NO_ICMP is not set -CONFIG_ATM_LANE=m -CONFIG_ATM_MPOA=m -CONFIG_ATM_BR2684=m -CONFIG_ATM_BR2684_IPFILTER=y -CONFIG_VLAN_8021Q=m -CONFIG_IPX=m -# CONFIG_IPX_INTERN is not set -CONFIG_ATALK=m - -# -# Appletalk devices -# -CONFIG_DEV_APPLETALK=y -CONFIG_LTPC=m -CONFIG_COPS=m -CONFIG_COPS_DAYNA=y -CONFIG_COPS_TANGENT=y -CONFIG_IPDDP=m -CONFIG_IPDDP_ENCAP=y -CONFIG_IPDDP_DECAP=y -CONFIG_DECNET=m -CONFIG_DECNET_SIOCGIFCONF=y -CONFIG_DECNET_ROUTER=y -CONFIG_DECNET_ROUTE_FWMARK=y -CONFIG_BRIDGE=m -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_LLC is not set -CONFIG_NET_DIVERT=y -# CONFIG_ECONET is not set -CONFIG_WAN_ROUTER=m -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_CBQ=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_CSZ=m -# CONFIG_NET_SCH_ATM is not set -CONFIG_NET_SCH_PRIO=m -CONFIG_NET_SCH_RED=m -CONFIG_NET_SCH_SFQ=m -CONFIG_NET_SCH_TEQL=m -CONFIG_NET_SCH_TBF=m -CONFIG_NET_SCH_GRED=m -CONFIG_NET_SCH_DSMARK=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_QOS=y -CONFIG_NET_ESTIMATOR=y -CONFIG_NET_CLS=y -CONFIG_NET_CLS_TCINDEX=m -CONFIG_NET_CLS_ROUTE4=m -CONFIG_NET_CLS_ROUTE=y -CONFIG_NET_CLS_FW=m -CONFIG_NET_CLS_U32=m -CONFIG_NET_CLS_RSVP=m -CONFIG_NET_CLS_RSVP6=m -CONFIG_NET_CLS_POLICE=y - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set - -# -# Telephony Support -# -CONFIG_PHONE=m -CONFIG_PHONE_IXJ=m -CONFIG_PHONE_IXJ_PCMCIA=m - -# -# ATA/IDE/MFM/RLL support -# -CONFIG_IDE=y - -# -# IDE, ATA and ATAPI Block devices -# -CONFIG_BLK_DEV_IDE=y -# CONFIG_BLK_DEV_HD_IDE is not set -# CONFIG_BLK_DEV_HD is not set -CONFIG_BLK_DEV_IDEDISK=y -CONFIG_IDEDISK_MULTI_MODE=y -# CONFIG_IDEDISK_STROKE is not set -CONFIG_BLK_DEV_IDECS=m -CONFIG_BLK_DEV_IDECD=m -CONFIG_BLK_DEV_IDETAPE=m -CONFIG_BLK_DEV_IDEFLOPPY=y -CONFIG_BLK_DEV_IDESCSI=m -# CONFIG_IDE_TASK_IOCTL is not set -CONFIG_BLK_DEV_CMD640=y -# CONFIG_BLK_DEV_CMD640_ENHANCED is not set -CONFIG_BLK_DEV_ISAPNP=y -CONFIG_BLK_DEV_IDEPCI=y -CONFIG_BLK_DEV_GENERIC=y -CONFIG_IDEPCI_SHARE_IRQ=y -CONFIG_BLK_DEV_IDEDMA_PCI=y -# CONFIG_BLK_DEV_OFFBOARD is not set -# CONFIG_BLK_DEV_IDEDMA_FORCED is not set -CONFIG_IDEDMA_PCI_AUTO=y -# CONFIG_IDEDMA_ONLYDISK is not set -CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_IDEDMA_PCI_WIP is not set -CONFIG_BLK_DEV_ADMA=y -CONFIG_BLK_DEV_AEC62XX=y -CONFIG_BLK_DEV_ALI15X3=y -# CONFIG_WDC_ALI15X3 is not set -CONFIG_BLK_DEV_AMD74XX=y -# CONFIG_AMD74XX_OVERRIDE is not set -CONFIG_BLK_DEV_CMD64X=y -CONFIG_BLK_DEV_TRIFLEX=y -CONFIG_BLK_DEV_CY82C693=y -CONFIG_BLK_DEV_CS5530=y -CONFIG_BLK_DEV_HPT34X=y -# CONFIG_HPT34X_AUTODMA is not set -CONFIG_BLK_DEV_HPT366=y -CONFIG_BLK_DEV_PIIX=y -CONFIG_BLK_DEV_NFORCE=y -# CONFIG_BLK_DEV_NS87415 is not set -# CONFIG_BLK_DEV_OPTI621 is not set -CONFIG_BLK_DEV_PDC202XX_OLD=y -# CONFIG_PDC202XX_BURST is not set -CONFIG_BLK_DEV_PDC202XX_NEW=y -CONFIG_PDC202XX_FORCE=y -CONFIG_BLK_DEV_RZ1000=y -# CONFIG_BLK_DEV_SC1200 is not set -CONFIG_BLK_DEV_SVWKS=y -CONFIG_BLK_DEV_SIIMAGE=y -CONFIG_BLK_DEV_SIS5513=y -CONFIG_BLK_DEV_SLC90E66=y -# CONFIG_BLK_DEV_TRM290 is not set -CONFIG_BLK_DEV_VIA82CXXX=y -# CONFIG_IDE_CHIPSETS is not set -CONFIG_IDEDMA_AUTO=y -# CONFIG_IDEDMA_IVB is not set -# CONFIG_DMA_NONPCI is not set -CONFIG_BLK_DEV_PDC202XX=y -CONFIG_BLK_DEV_IDE_MODES=y -CONFIG_BLK_DEV_ATARAID=m -CONFIG_BLK_DEV_ATARAID_PDC=m -CONFIG_BLK_DEV_ATARAID_HPT=m -CONFIG_BLK_DEV_ATARAID_SII=m - -# -# SCSI support -# -CONFIG_SCSI=m -CONFIG_BLK_DEV_SD=m -CONFIG_SD_EXTRA_DEVS=40 -CONFIG_CHR_DEV_ST=m -CONFIG_CHR_DEV_OSST=m -CONFIG_BLK_DEV_SR=m -CONFIG_BLK_DEV_SR_VENDOR=y -CONFIG_SR_EXTRA_DEVS=4 -CONFIG_CHR_DEV_SG=m -# CONFIG_SCSI_DEBUG_QUEUES is not set -# CONFIG_SCSI_MULTI_LUN is not set -CONFIG_SCSI_CONSTANTS=y -CONFIG_SCSI_LOGGING=y - -# -# SCSI low-level drivers -# -CONFIG_BLK_DEV_3W_XXXX_RAID=m -CONFIG_SCSI_7000FASST=m -CONFIG_SCSI_ACARD=m -CONFIG_SCSI_AHA152X=m -CONFIG_SCSI_AHA1542=m -CONFIG_SCSI_AHA1740=m -CONFIG_SCSI_AACRAID=m -CONFIG_SCSI_AIC7XXX=m -CONFIG_AIC7XXX_CMDS_PER_DEVICE=253 -CONFIG_AIC7XXX_RESET_DELAY_MS=15000 -# CONFIG_AIC7XXX_PROBE_EISA_VL is not set -# CONFIG_AIC7XXX_BUILD_FIRMWARE is not set -CONFIG_SCSI_AIC79XX=m -CONFIG_AIC79XX_CMDS_PER_DEVICE=253 -CONFIG_AIC79XX_RESET_DELAY_MS=15000 -# CONFIG_AIC79XX_BUILD_FIRMWARE is not set -CONFIG_AIC79XX_ENABLE_RD_STRM=y -# CONFIG_AIC79XX_DEBUG_ENABLE is not set -CONFIG_AIC79XX_DEBUG_MASK=0 -CONFIG_SCSI_AIC7XXX_OLD=m -CONFIG_AIC7XXX_OLD_TCQ_ON_BY_DEFAULT=y -CONFIG_AIC7XXX_OLD_CMDS_PER_DEVICE=32 -CONFIG_AIC7XXX_OLD_PROC_STATS=y -CONFIG_SCSI_DPT_I2O=m -CONFIG_SCSI_ADVANSYS=m -CONFIG_SCSI_IN2000=m -CONFIG_SCSI_AM53C974=m -CONFIG_SCSI_MEGARAID=m -CONFIG_SCSI_BUSLOGIC=m -# CONFIG_SCSI_OMIT_FLASHPOINT is not set -CONFIG_SCSI_CPQFCTS=m -CONFIG_SCSI_DMX3191D=m -CONFIG_SCSI_DTC3280=m -CONFIG_SCSI_EATA=m -CONFIG_SCSI_EATA_TAGGED_QUEUE=y -# CONFIG_SCSI_EATA_LINKED_COMMANDS is not set -CONFIG_SCSI_EATA_MAX_TAGS=16 -CONFIG_SCSI_EATA_DMA=m -CONFIG_SCSI_EATA_PIO=m -CONFIG_SCSI_FUTURE_DOMAIN=m -CONFIG_SCSI_GDTH=m -CONFIG_SCSI_GENERIC_NCR5380=m -# CONFIG_SCSI_GENERIC_NCR53C400 is not set -CONFIG_SCSI_G_NCR5380_PORT=y -# CONFIG_SCSI_G_NCR5380_MEM is not set -CONFIG_SCSI_IPS=m -CONFIG_SCSI_INITIO=m -CONFIG_SCSI_INIA100=m -CONFIG_SCSI_PPA=m -CONFIG_SCSI_IMM=m -# CONFIG_SCSI_IZIP_EPP16 is not set -# CONFIG_SCSI_IZIP_SLOW_CTR is not set -CONFIG_SCSI_NCR53C406A=m -CONFIG_SCSI_NCR53C7xx=m -# CONFIG_SCSI_NCR53C7xx_sync is not set -CONFIG_SCSI_NCR53C7xx_FAST=y -CONFIG_SCSI_NCR53C7xx_DISCONNECT=y -CONFIG_SCSI_SYM53C8XX_2=m -CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1 -CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16 -CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64 -# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set -CONFIG_SCSI_NCR53C8XX=m -CONFIG_SCSI_SYM53C8XX=m -CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8 -CONFIG_SCSI_NCR53C8XX_MAX_TAGS=32 -CONFIG_SCSI_NCR53C8XX_SYNC=40 -# CONFIG_SCSI_NCR53C8XX_PROFILE is not set -# CONFIG_SCSI_NCR53C8XX_IOMAPPED is not set -# CONFIG_SCSI_NCR53C8XX_PQS_PDS is not set -# CONFIG_SCSI_NCR53C8XX_SYMBIOS_COMPAT is not set -CONFIG_SCSI_PAS16=m -CONFIG_SCSI_PCI2000=m -CONFIG_SCSI_PCI2220I=m -CONFIG_SCSI_PSI240I=m -CONFIG_SCSI_QLOGIC_FAS=m -CONFIG_SCSI_QLOGIC_ISP=m -CONFIG_SCSI_QLOGIC_FC=m -# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set -CONFIG_SCSI_QLOGIC_1280=m -CONFIG_SCSI_NEWISP=m -CONFIG_SCSI_SEAGATE=m -CONFIG_SCSI_SIM710=m -CONFIG_SCSI_SYM53C416=m -CONFIG_SCSI_DC390T=m -# CONFIG_SCSI_DC390T_NOGENSUPP is not set -CONFIG_SCSI_T128=m -CONFIG_SCSI_U14_34F=m -# CONFIG_SCSI_U14_34F_LINKED_COMMANDS is not set -CONFIG_SCSI_U14_34F_MAX_TAGS=8 -CONFIG_SCSI_ULTRASTOR=m -CONFIG_SCSI_NSP32=m -CONFIG_SCSI_DEBUG=m - -# -# PCMCIA SCSI adapter support -# -CONFIG_SCSI_PCMCIA=y -CONFIG_PCMCIA_AHA152X=m -CONFIG_PCMCIA_FDOMAIN=m -CONFIG_PCMCIA_NINJA_SCSI=m -CONFIG_PCMCIA_QLOGIC=m - -# -# Fusion MPT device support -# -CONFIG_FUSION=m -# CONFIG_FUSION_BOOT is not set -CONFIG_FUSION_MAX_SGE=40 -# CONFIG_FUSION_ISENSE is not set -CONFIG_FUSION_CTL=m -CONFIG_FUSION_LAN=m -CONFIG_NET_FC=y - -# -# IEEE 1394 (FireWire) support (EXPERIMENTAL) -# -CONFIG_IEEE1394=m -# CONFIG_IEEE1394_PCILYNX is not set -CONFIG_IEEE1394_OHCI1394=m -CONFIG_IEEE1394_VIDEO1394=m -CONFIG_IEEE1394_SBP2=m -CONFIG_IEEE1394_SBP2_PHYS_DMA=y -CONFIG_IEEE1394_ETH1394=m -CONFIG_IEEE1394_DV1394=m -CONFIG_IEEE1394_RAWIO=m -CONFIG_IEEE1394_CMP=m -CONFIG_IEEE1394_AMDTP=m -# CONFIG_IEEE1394_VERBOSEDEBUG is not set - -# -# I2O device support -# -CONFIG_I2O=m -CONFIG_I2O_PCI=m -CONFIG_I2O_BLOCK=m -CONFIG_I2O_LAN=m -CONFIG_I2O_SCSI=m -CONFIG_I2O_PROC=m - -# -# Network device support -# -CONFIG_NETDEVICES=y - -# -# ARCnet devices -# -# CONFIG_ARCNET is not set -CONFIG_DUMMY=m -CONFIG_BONDING=m -CONFIG_EQUALIZER=m -CONFIG_TUN=m -CONFIG_ETHERTAP=m -CONFIG_NET_SB1000=m - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_SUNLANCE is not set -CONFIG_HAPPYMEAL=m -# CONFIG_SUNBMAC is not set -# CONFIG_SUNQE is not set -CONFIG_SUNGEM=m -CONFIG_NET_VENDOR_3COM=y -CONFIG_EL1=m -CONFIG_EL2=m -CONFIG_ELPLUS=m -CONFIG_EL16=m -CONFIG_EL3=m -CONFIG_3C515=m -# CONFIG_ELMC is not set -# CONFIG_ELMC_II is not set -CONFIG_VORTEX=m -CONFIG_LANCE=m -CONFIG_NET_VENDOR_SMC=y -CONFIG_WD80x3=m -# CONFIG_ULTRAMCA is not set -CONFIG_ULTRA=m -CONFIG_ULTRA32=m -CONFIG_SMC9194=m -CONFIG_NET_VENDOR_RACAL=y -CONFIG_NI5010=m -CONFIG_NI52=m -CONFIG_NI65=m -CONFIG_AT1700=m -CONFIG_DEPCA=m -CONFIG_HP100=m -CONFIG_NET_ISA=y -CONFIG_E2100=m -CONFIG_EWRK3=m -CONFIG_EEXPRESS=m -CONFIG_EEXPRESS_PRO=m -CONFIG_HPLAN_PLUS=m -CONFIG_HPLAN=m -CONFIG_LP486E=m -CONFIG_ETH16I=m -CONFIG_NE2000=m -CONFIG_NET_PCI=y -CONFIG_PCNET32=m -CONFIG_AMD8111_ETH=m -CONFIG_ADAPTEC_STARFIRE=m -CONFIG_AC3200=m -CONFIG_APRICOT=m -CONFIG_CS89x0=m -CONFIG_TULIP=m -# CONFIG_TULIP_MWI is not set -CONFIG_TULIP_MMIO=y -CONFIG_DE4X5=m -CONFIG_DGRS=m -CONFIG_DM9102=m -CONFIG_EEPRO100=m -CONFIG_E100=m -CONFIG_LNE390=m -CONFIG_FEALNX=m -CONFIG_NATSEMI=m -CONFIG_NE2K_PCI=m -CONFIG_NE3210=m -CONFIG_ES3210=m -CONFIG_8139CP=m -CONFIG_8139TOO=m -# CONFIG_8139TOO_PIO is not set -# CONFIG_8139TOO_TUNE_TWISTER is not set -CONFIG_8139TOO_8129=y -# CONFIG_8139_OLD_RX_RESET is not set -CONFIG_SIS900=m -CONFIG_EPIC100=m -CONFIG_SUNDANCE=m -CONFIG_SUNDANCE_MMIO=y -CONFIG_TLAN=m -CONFIG_TC35815=m -CONFIG_VIA_RHINE=m -# CONFIG_VIA_RHINE_MMIO is not set -CONFIG_WINBOND_840=m -CONFIG_NET_POCKET=y -CONFIG_ATP=m -CONFIG_DE600=m -CONFIG_DE620=m - -# -# Ethernet (1000 Mbit) -# -CONFIG_ACENIC=m -# CONFIG_ACENIC_OMIT_TIGON_I is not set -CONFIG_DL2K=m -CONFIG_E1000=m -# CONFIG_MYRI_SBUS is not set -CONFIG_NS83820=m -CONFIG_HAMACHI=m -CONFIG_YELLOWFIN=m -CONFIG_R8169=m -CONFIG_SK98LIN=m -CONFIG_TIGON3=m -CONFIG_FDDI=y -CONFIG_DEFXX=m -CONFIG_SKFP=m -CONFIG_NETCONSOLE=m -# CONFIG_HIPPI is not set -CONFIG_PLIP=m -CONFIG_PPP=m -CONFIG_PPP_MULTILINK=y -CONFIG_PPP_FILTER=y -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_PPP_DEFLATE=m -# CONFIG_PPP_BSDCOMP is not set -# CONFIG_PPPOE is not set -CONFIG_PPPOATM=m -CONFIG_SLIP=m -CONFIG_SLIP_COMPRESSED=y -CONFIG_SLIP_SMART=y -CONFIG_SLIP_MODE_SLIP6=y - -# -# Wireless LAN (non-hamradio) -# -CONFIG_NET_RADIO=y -CONFIG_STRIP=m -CONFIG_WAVELAN=m -CONFIG_ARLAN=m -CONFIG_AIRONET4500=m -CONFIG_AIRONET4500_NONCS=m -CONFIG_AIRONET4500_PNP=y -CONFIG_AIRONET4500_PCI=y -CONFIG_AIRONET4500_ISA=y -CONFIG_AIRONET4500_I365=y -CONFIG_AIRONET4500_PROC=m -CONFIG_AIRO=m -CONFIG_HERMES=m -CONFIG_PLX_HERMES=m -CONFIG_PCI_HERMES=m -CONFIG_PCMCIA_HERMES=m -CONFIG_AIRO_CS=m -CONFIG_NET_WIRELESS=y -CONFIG_PCMCIA_HERMES_OLD=m - -# -# Token Ring devices -# -CONFIG_TR=y -CONFIG_IBMTR=m -CONFIG_IBMOL=m -CONFIG_IBMLS=m -CONFIG_3C359=m -CONFIG_TMS380TR=m -CONFIG_TMSPCI=m -CONFIG_TMSISA=m -CONFIG_ABYSS=m -# CONFIG_MADGEMC is not set -CONFIG_SMCTR=m -CONFIG_NET_FC=y -CONFIG_IPHASE5526=m -CONFIG_RCPCI=m -CONFIG_SHAPER=m - -# -# Wan interfaces -# -CONFIG_WAN=y -CONFIG_HOSTESS_SV11=m -CONFIG_COSA=m -# CONFIG_COMX is not set -# CONFIG_DSCC4 is not set -# CONFIG_LANMEDIA is not set -CONFIG_ATI_XX20=m -CONFIG_SEALEVEL_4021=m -# CONFIG_SYNCLINK_SYNCPPP is not set -# CONFIG_HDLC is not set -CONFIG_DLCI=m -CONFIG_DLCI_COUNT=24 -CONFIG_DLCI_MAX=8 -CONFIG_SDLA=m -CONFIG_WAN_ROUTER_DRIVERS=y -CONFIG_VENDOR_SANGOMA=m -CONFIG_WANPIPE_CHDLC=y -CONFIG_WANPIPE_FR=y -CONFIG_WANPIPE_X25=y -CONFIG_WANPIPE_PPP=y -CONFIG_WANPIPE_MULTPPP=y -CONFIG_CYCLADES_SYNC=m -CONFIG_CYCLOMX_X25=y -# CONFIG_LAPBETHER is not set -# CONFIG_X25_ASY is not set -CONFIG_SBNI=m -CONFIG_SBNI_MULTILINE=y - -# -# PCMCIA network device support -# -CONFIG_NET_PCMCIA=y -CONFIG_PCMCIA_3C589=m -CONFIG_PCMCIA_3C574=m -CONFIG_PCMCIA_FMVJ18X=m -CONFIG_PCMCIA_PCNET=m -CONFIG_PCMCIA_AXNET=m -CONFIG_PCMCIA_NMCLAN=m -CONFIG_PCMCIA_SMC91C92=m -CONFIG_PCMCIA_XIRC2PS=m -# CONFIG_ARCNET_COM20020_CS is not set -CONFIG_PCMCIA_IBMTR=m -CONFIG_PCMCIA_XIRCOM=m -CONFIG_PCMCIA_XIRTULIP=m -CONFIG_NET_PCMCIA_RADIO=y -CONFIG_PCMCIA_RAYCS=m -CONFIG_PCMCIA_NETWAVE=m -CONFIG_PCMCIA_WAVELAN=m -CONFIG_PCMCIA_WVLAN=m -CONFIG_AIRONET4500_CS=m - -# -# ATM drivers -# -CONFIG_ATM_TCP=m -CONFIG_ATM_LANAI=m -CONFIG_ATM_ENI=m -# CONFIG_ATM_ENI_DEBUG is not set -# CONFIG_ATM_ENI_TUNE_BURST is not set -CONFIG_ATM_FIRESTREAM=m -CONFIG_ATM_ZATM=m -# CONFIG_ATM_ZATM_DEBUG is not set -CONFIG_ATM_ZATM_EXACT_TS=y -CONFIG_ATM_NICSTAR=m -CONFIG_ATM_NICSTAR_USE_SUNI=y -CONFIG_ATM_NICSTAR_USE_IDT77105=y -CONFIG_ATM_IDT77252=m -# CONFIG_ATM_IDT77252_DEBUG is not set -# CONFIG_ATM_IDT77252_RCV_ALL is not set -CONFIG_ATM_IDT77252_USE_SUNI=y -CONFIG_ATM_AMBASSADOR=m -# CONFIG_ATM_AMBASSADOR_DEBUG is not set -CONFIG_ATM_HORIZON=m -# CONFIG_ATM_HORIZON_DEBUG is not set -CONFIG_ATM_IA=m -# CONFIG_ATM_IA_DEBUG is not set -CONFIG_ATM_FORE200E_MAYBE=m -CONFIG_ATM_FORE200E_PCA=y -CONFIG_ATM_FORE200E_PCA_DEFAULT_FW=y -CONFIG_ATM_FORE200E_TX_RETRY=16 -CONFIG_ATM_FORE200E_DEBUG=0 -CONFIG_ATM_FORE200E=m - -# -# Amateur Radio support -# -CONFIG_HAMRADIO=y -CONFIG_AX25=m -# CONFIG_AX25_DAMA_SLAVE is not set -CONFIG_NETROM=m -CONFIG_ROSE=m - -# -# AX.25 network device drivers -# -# CONFIG_MKISS is not set -# CONFIG_6PACK is not set -# CONFIG_BPQETHER is not set -# CONFIG_DMASCC is not set -# CONFIG_SCC is not set -# CONFIG_BAYCOM_SER_FDX is not set -# CONFIG_BAYCOM_SER_HDX is not set -# CONFIG_BAYCOM_PAR is not set -# CONFIG_BAYCOM_EPP is not set -CONFIG_SOUNDMODEM=m -CONFIG_SOUNDMODEM_SBC=y -CONFIG_SOUNDMODEM_WSS=y -CONFIG_SOUNDMODEM_AFSK1200=y -CONFIG_SOUNDMODEM_AFSK2400_7=y -CONFIG_SOUNDMODEM_AFSK2400_8=y -CONFIG_SOUNDMODEM_AFSK2666=y -CONFIG_SOUNDMODEM_HAPN4800=y -CONFIG_SOUNDMODEM_PSK4800=y -CONFIG_SOUNDMODEM_FSK9600=y -# CONFIG_YAM is not set - -# -# IrDA (infrared) support -# -CONFIG_IRDA=m -CONFIG_IRLAN=m -CONFIG_IRNET=m -CONFIG_IRCOMM=m -CONFIG_IRDA_ULTRA=y -CONFIG_IRDA_CACHE_LAST_LSAP=y -CONFIG_IRDA_FAST_RR=y -# CONFIG_IRDA_DEBUG is not set - -# -# Infrared-port device drivers -# -CONFIG_IRTTY_SIR=m -CONFIG_IRPORT_SIR=m -CONFIG_DONGLE=y -CONFIG_ESI_DONGLE=m -CONFIG_ACTISYS_DONGLE=m -CONFIG_TEKRAM_DONGLE=m -CONFIG_GIRBIL_DONGLE=m -CONFIG_LITELINK_DONGLE=m -CONFIG_MCP2120_DONGLE=m -CONFIG_OLD_BELKIN_DONGLE=m -CONFIG_ACT200L_DONGLE=m -CONFIG_MA600_DONGLE=m -CONFIG_USB_IRDA=m -CONFIG_NSC_FIR=m -CONFIG_WINBOND_FIR=m -CONFIG_TOSHIBA_OLD=m -CONFIG_TOSHIBA_FIR=m -CONFIG_SMC_IRCC_FIR=m -CONFIG_ALI_FIR=m -CONFIG_VLSI_FIR=m - -# -# ISDN subsystem -# -CONFIG_ISDN=m -CONFIG_ISDN_BOOL=y -CONFIG_ISDN_PPP=y -CONFIG_ISDN_PPP_VJ=y -CONFIG_ISDN_MPP=y -CONFIG_ISDN_PPP_BSDCOMP=m -CONFIG_ISDN_AUDIO=y -CONFIG_ISDN_TTY_FAX=y - -# -# ISDN feature submodules -# -CONFIG_ISDN_DRV_LOOP=m -# CONFIG_ISDN_DIVERSION is not set - -# -# Passive ISDN cards -# -CONFIG_ISDN_DRV_HISAX=m -CONFIG_ISDN_HISAX=y -CONFIG_HISAX_EURO=y -CONFIG_DE_AOC=y -# CONFIG_HISAX_NO_SENDCOMPLETE is not set -# CONFIG_HISAX_NO_LLC is not set -# CONFIG_HISAX_NO_KEYPAD is not set -CONFIG_HISAX_1TR6=y -CONFIG_HISAX_NI1=y -CONFIG_HISAX_MAX_CARDS=8 -CONFIG_HISAX_16_0=y -CONFIG_HISAX_16_3=y -CONFIG_HISAX_AVM_A1=y -CONFIG_HISAX_IX1MICROR2=y -CONFIG_HISAX_ASUSCOM=y -CONFIG_HISAX_TELEINT=y -CONFIG_HISAX_HFCS=y -CONFIG_HISAX_SPORTSTER=y -CONFIG_HISAX_MIC=y -CONFIG_HISAX_ISURF=y -CONFIG_HISAX_HSTSAPHIR=y -CONFIG_HISAX_TELESPCI=y -CONFIG_HISAX_S0BOX=y -CONFIG_HISAX_FRITZPCI=y -CONFIG_HISAX_AVM_A1_PCMCIA=y -CONFIG_HISAX_ELSA=y -CONFIG_HISAX_DIEHLDIVA=y -CONFIG_HISAX_SEDLBAUER=y -CONFIG_HISAX_NETJET=y -CONFIG_HISAX_NETJET_U=y -CONFIG_HISAX_NICCY=y -CONFIG_HISAX_BKM_A4T=y -CONFIG_HISAX_SCT_QUADRO=y -CONFIG_HISAX_GAZEL=y -CONFIG_HISAX_HFC_PCI=y -CONFIG_HISAX_W6692=y -CONFIG_HISAX_HFC_SX=y -CONFIG_HISAX_ENTERNOW_PCI=y -CONFIG_HISAX_DEBUG=y -CONFIG_HISAX_SEDLBAUER_CS=m -CONFIG_HISAX_ELSA_CS=m -CONFIG_HISAX_AVM_A1_CS=m -CONFIG_HISAX_ST5481=m -CONFIG_HISAX_FRITZ_PCIPNP=m - -# -# Active ISDN cards -# -CONFIG_ISDN_DRV_ICN=m -CONFIG_ISDN_DRV_PCBIT=m -# CONFIG_ISDN_DRV_SC is not set -# CONFIG_ISDN_DRV_ACT2000 is not set -CONFIG_ISDN_DRV_EICON=y -CONFIG_ISDN_DRV_EICON_DIVAS=m -# CONFIG_ISDN_DRV_EICON_OLD is not set -CONFIG_ISDN_DRV_TPAM=m -CONFIG_ISDN_CAPI=m -CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON=y -CONFIG_ISDN_CAPI_MIDDLEWARE=y -CONFIG_ISDN_CAPI_CAPI20=m -CONFIG_ISDN_CAPI_CAPIFS_BOOL=y -CONFIG_ISDN_CAPI_CAPIFS=m -CONFIG_ISDN_CAPI_CAPIDRV=m -CONFIG_ISDN_DRV_AVMB1_B1ISA=m -CONFIG_ISDN_DRV_AVMB1_B1PCI=m -CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y -CONFIG_ISDN_DRV_AVMB1_T1ISA=m -CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m -CONFIG_ISDN_DRV_AVMB1_AVM_CS=m -CONFIG_ISDN_DRV_AVMB1_T1PCI=m -CONFIG_ISDN_DRV_AVMB1_C4=m -CONFIG_HYSDN=m -CONFIG_HYSDN_CAPI=y -CONFIG_KALLSYMS=y - -# -# Old CD-ROM drivers (not SCSI, not IDE) -# -# CONFIG_CD_NO_IDESCSI is not set - -# -# Input core support -# -CONFIG_INPUT=m -CONFIG_INPUT_KEYBDEV=m -CONFIG_INPUT_MOUSEDEV=m -CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024 -CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768 -CONFIG_INPUT_JOYDEV=m -CONFIG_INPUT_EVDEV=m - -# -# Character devices -# -CONFIG_VT=y -CONFIG_ECC=m -CONFIG_VT_CONSOLE=y -CONFIG_SERIAL=y -CONFIG_SERIAL_CONSOLE=y -CONFIG_SERIAL_EXTENDED=y -CONFIG_SERIAL_MANY_PORTS=y -CONFIG_SERIAL_SHARE_IRQ=y -# CONFIG_SERIAL_DETECT_IRQ is not set -CONFIG_SERIAL_MULTIPORT=y -# CONFIG_HUB6 is not set -CONFIG_SERIAL_NONSTANDARD=y -CONFIG_COMPUTONE=m -CONFIG_ROCKETPORT=m -CONFIG_CYCLADES=m -# CONFIG_CYZ_INTR is not set -CONFIG_DIGIEPCA=m -CONFIG_ESPSERIAL=m -CONFIG_MOXA_INTELLIO=m -CONFIG_MOXA_SMARTIO=m -CONFIG_ISI=m -CONFIG_SYNCLINK=m -# CONFIG_SYNCLINKMP is not set -CONFIG_N_HDLC=m -CONFIG_RISCOM8=m -CONFIG_SPECIALIX=m -CONFIG_SPECIALIX_RTSCTS=y -CONFIG_SX=m -# CONFIG_RIO is not set -CONFIG_STALDRV=y -CONFIG_STALLION=m -CONFIG_ISTALLION=m -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=2048 -CONFIG_PRINTER=m -CONFIG_LP_CONSOLE=y -CONFIG_PPDEV=m -CONFIG_TIPAR=m - -# -# I2C support -# -CONFIG_I2C=m -CONFIG_I2C_ALGOBIT=m -CONFIG_I2C_PHILIPSPAR=m -CONFIG_I2C_ELV=m -CONFIG_I2C_VELLEMAN=m -# CONFIG_SCx200_I2C is not set -# CONFIG_SCx200_ACB is not set -CONFIG_I2C_ALGOPCF=m -CONFIG_I2C_ELEKTOR=m -CONFIG_I2C_MAINBOARD=y -CONFIG_I2C_ALI1535=m -CONFIG_I2C_ALI15X3=m -CONFIG_I2C_HYDRA=m -CONFIG_I2C_AMD756=m -# CONFIG_I2C_TSUNAMI is not set -CONFIG_I2C_I801=m -CONFIG_I2C_I810=m -CONFIG_I2C_PIIX4=m -CONFIG_I2C_SIS5595=m -CONFIG_I2C_VIA=m -CONFIG_I2C_VIAPRO=m -CONFIG_I2C_VOODOO3=m -CONFIG_I2C_ISA=m -CONFIG_I2C_CHARDEV=m -CONFIG_I2C_PROC=m - -# -# Hardware sensors support -# -CONFIG_SENSORS=y -CONFIG_SENSORS_ADM1021=m -CONFIG_SENSORS_ADM1024=m -CONFIG_SENSORS_ADM1025=m -CONFIG_SENSORS_ADM9240=m -CONFIG_SENSORS_DS1621=m -CONFIG_SENSORS_FSCPOS=m -CONFIG_SENSORS_FSCSCY=m -CONFIG_SENSORS_GL518SM=m -CONFIG_SENSORS_GL520SM=m -CONFIG_SENSORS_MAXILIFE=m -CONFIG_SENSORS_IT87=m -CONFIG_SENSORS_MTP008=m -CONFIG_SENSORS_LM75=m -CONFIG_SENSORS_LM78=m -CONFIG_SENSORS_LM80=m -CONFIG_SENSORS_LM87=m -CONFIG_SENSORS_LM92=m -CONFIG_SENSORS_SIS5595=m -CONFIG_SENSORS_SMSC47M1=m -CONFIG_SENSORS_THMC50=m -CONFIG_SENSORS_VIA686A=m -CONFIG_SENSORS_VT1211=m -CONFIG_SENSORS_VT8231=m -CONFIG_SENSORS_W83781D=m -CONFIG_SENSORS_OTHER=y -CONFIG_SENSORS_BT869=m -CONFIG_SENSORS_DDCMON=m -CONFIG_SENSORS_EEPROM=m -CONFIG_SENSORS_MATORB=m -CONFIG_SENSORS_PCF8574=m -CONFIG_SENSORS_PCF8591=m - -# -# Mice -# -CONFIG_BUSMOUSE=m -CONFIG_ATIXL_BUSMOUSE=m -CONFIG_LOGIBUSMOUSE=m -CONFIG_MS_BUSMOUSE=m -CONFIG_MOUSE=y -CONFIG_PSMOUSE=y -CONFIG_82C710_MOUSE=m -CONFIG_PC110_PAD=m -CONFIG_MK712_MOUSE=m - -# -# Joysticks -# -CONFIG_INPUT_GAMEPORT=m -CONFIG_INPUT_NS558=m -CONFIG_INPUT_LIGHTNING=m -CONFIG_INPUT_PCIGAME=m -CONFIG_INPUT_CS461X=m -CONFIG_INPUT_EMU10K1=m -CONFIG_INPUT_SERIO=m -CONFIG_INPUT_SERPORT=m -CONFIG_INPUT_ANALOG=m -CONFIG_INPUT_A3D=m -CONFIG_INPUT_ADI=m -CONFIG_INPUT_COBRA=m -CONFIG_INPUT_GF2K=m -CONFIG_INPUT_GRIP=m -CONFIG_INPUT_INTERACT=m -CONFIG_INPUT_TMDC=m -CONFIG_INPUT_SIDEWINDER=m -CONFIG_INPUT_IFORCE_USB=m -CONFIG_INPUT_IFORCE_232=m -CONFIG_INPUT_WARRIOR=m -CONFIG_INPUT_MAGELLAN=m -CONFIG_INPUT_SPACEORB=m -CONFIG_INPUT_SPACEBALL=m -CONFIG_INPUT_STINGER=m -CONFIG_INPUT_DB9=m -CONFIG_INPUT_GAMECON=m -CONFIG_INPUT_TURBOGRAFX=m -# CONFIG_QIC02_TAPE is not set -CONFIG_IPMI_HANDLER=m -# CONFIG_IPMI_PANIC_EVENT is not set -CONFIG_IPMI_DEVICE_INTERFACE=m -CONFIG_IPMI_KCS=m -CONFIG_IPMI_WATCHDOG=m - -# -# Watchdog Cards -# -CONFIG_WATCHDOG=y -# CONFIG_WATCHDOG_NOWAYOUT is not set -CONFIG_ACQUIRE_WDT=m -CONFIG_ADVANTECH_WDT=m -CONFIG_ALIM7101_WDT=m -CONFIG_SC520_WDT=m -CONFIG_PCWATCHDOG=m -CONFIG_EUROTECH_WDT=m -CONFIG_IB700_WDT=m -CONFIG_WAFER_WDT=m -CONFIG_I810_TCO=m -# CONFIG_MIXCOMWD is not set -# CONFIG_60XX_WDT is not set -CONFIG_SC1200_WDT=m -# CONFIG_SCx200_WDT is not set -CONFIG_SOFT_WATCHDOG=m -CONFIG_W83877F_WDT=m -CONFIG_WDT=m -CONFIG_WDTPCI=m -# CONFIG_WDT_501 is not set -CONFIG_MACHZ_WDT=m -CONFIG_AMD7XX_TCO=m -# CONFIG_SCx200_GPIO is not set -CONFIG_AMD_RNG=m -CONFIG_INTEL_RNG=m -CONFIG_AMD_PM768=m -CONFIG_NVRAM=m -CONFIG_RTC=y -CONFIG_DTLK=m -CONFIG_R3964=m -# CONFIG_APPLICOM is not set -CONFIG_SONYPI=m - -# -# Ftape, the floppy tape device driver -# -CONFIG_FTAPE=m -CONFIG_ZFTAPE=m -CONFIG_ZFT_DFLT_BLK_SZ=10240 -CONFIG_ZFT_COMPRESSOR=m -CONFIG_FT_NR_BUFFERS=3 -# CONFIG_FT_PROC_FS is not set -CONFIG_FT_NORMAL_DEBUG=y -# CONFIG_FT_FULL_DEBUG is not set -# CONFIG_FT_NO_TRACE is not set -# CONFIG_FT_NO_TRACE_AT_ALL is not set -CONFIG_FT_STD_FDC=y -# CONFIG_FT_MACH2 is not set -# CONFIG_FT_PROBE_FC10 is not set -# CONFIG_FT_ALT_FDC is not set -CONFIG_FT_FDC_THR=8 -CONFIG_FT_FDC_MAX_RATE=2000 -CONFIG_FT_ALPHA_CLOCK=0 -CONFIG_AGP=m -CONFIG_AGP_INTEL=y -CONFIG_AGP_I810=y -CONFIG_AGP_VIA=y -CONFIG_AGP_AMD=y -CONFIG_AGP_AMD_8151=y -CONFIG_AGP_SIS=y -CONFIG_AGP_ALI=y -CONFIG_AGP_SWORKS=y -CONFIG_DRM=y -# CONFIG_DRM_OLD is not set -CONFIG_DRM_NEW=y -CONFIG_DRM_TDFX=m -CONFIG_DRM_R128=m -CONFIG_DRM_RADEON=m -CONFIG_DRM_I810=m -# CONFIG_DRM_I810_XFREE_41 is not set -CONFIG_DRM_I830=m -CONFIG_DRM_MGA=m -CONFIG_DRM_SIS=m - -# -# PCMCIA character devices -# -CONFIG_PCMCIA_SERIAL_CS=m -CONFIG_SYNCLINK_CS=m -CONFIG_MWAVE=m -CONFIG_BATTERY_GERICOM=m - -# -# Multimedia devices -# -CONFIG_VIDEO_DEV=m - -# -# Video For Linux -# -CONFIG_VIDEO_PROC_FS=y -CONFIG_I2C_PARPORT=m -CONFIG_VIDEO_BT848=m -CONFIG_VIDEO_PMS=m -CONFIG_VIDEO_BWQCAM=m -CONFIG_VIDEO_CQCAM=m -CONFIG_VIDEO_W9966=m -CONFIG_VIDEO_CPIA=m -CONFIG_VIDEO_CPIA_PP=m -CONFIG_VIDEO_CPIA_USB=m -CONFIG_VIDEO_SAA5249=m -CONFIG_TUNER_3036=m -CONFIG_VIDEO_STRADIS=m -CONFIG_VIDEO_ZORAN=m -CONFIG_VIDEO_ZORAN_BUZ=m -CONFIG_VIDEO_ZORAN_DC10=m -CONFIG_VIDEO_ZORAN_LML33=m -CONFIG_VIDEO_ZR36120=m -CONFIG_VIDEO_MEYE=m - -# -# Radio Adapters -# -CONFIG_RADIO_CADET=m -CONFIG_RADIO_RTRACK=m -CONFIG_RADIO_RTRACK2=m -CONFIG_RADIO_AZTECH=m -CONFIG_RADIO_GEMTEK=m -CONFIG_RADIO_GEMTEK_PCI=m -CONFIG_RADIO_MAXIRADIO=m -CONFIG_RADIO_MAESTRO=m -CONFIG_RADIO_MIROPCM20=m -CONFIG_RADIO_MIROPCM20_RDS=m -CONFIG_RADIO_SF16FMI=m -CONFIG_RADIO_SF16FMR2=m -CONFIG_RADIO_TERRATEC=m -CONFIG_RADIO_TRUST=m -CONFIG_RADIO_TYPHOON=m -CONFIG_RADIO_TYPHOON_PROC_FS=y -CONFIG_RADIO_ZOLTRIX=m - -# -# Crypto Hardware support -# -CONFIG_CRYPTO=m -CONFIG_CRYPTO_BROADCOM=m - -# -# File systems -# -CONFIG_QUOTA=y -# CONFIG_QFMT_V1 is not set -CONFIG_QFMT_V2=y -# CONFIG_QIFACE_COMPAT is not set -CONFIG_AUTOFS_FS=m -CONFIG_AUTOFS4_FS=m -CONFIG_REISERFS_FS=m -# CONFIG_REISERFS_CHECK is not set -CONFIG_REISERFS_PROC_INFO=y -# CONFIG_ADFS_FS is not set -CONFIG_AFS_FS=m -# CONFIG_ADFS_FS_RW is not set -# CONFIG_AFFS_FS is not set -CONFIG_HFS_FS=m -CONFIG_BEFS_FS=m -# CONFIG_BEFS_DEBUG is not set -CONFIG_BFS_FS=m -CONFIG_EXT3_FS=m -CONFIG_EXT3_FS_XATTR=y -CONFIG_EXT3_FS_XATTR_SHARING=y -CONFIG_EXT3_FS_XATTR_USER=y -CONFIG_JBD=m -# CONFIG_JBD_DEBUG is not set -CONFIG_FAT_FS=m -CONFIG_MSDOS_FS=m -CONFIG_UMSDOS_FS=m -CONFIG_VFAT_FS=m -# CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set -# CONFIG_JFFS2_FS is not set -CONFIG_CRAMFS=m -CONFIG_TMPFS=y -CONFIG_RAMFS=y -CONFIG_ISO9660_FS=y -CONFIG_JOLIET=y -CONFIG_ZISOFS=y -CONFIG_JFS_FS=m -CONFIG_JFS_DEBUG=y -# CONFIG_JFS_STATISTICS is not set -CONFIG_MINIX_FS=m -CONFIG_VXFS_FS=m -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_RW is not set -# CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVFS_MOUNT is not set -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX4FS_RW is not set -CONFIG_ROMFS_FS=m -CONFIG_EXT2_FS=y -# CONFIG_EXT2_FS_XATTR is not set -# CONFIG_EXT2_FS_XATTR_SHARING is not set -# CONFIG_EXT2_FS_XATTR_USER is not set -CONFIG_SYSV_FS=m -CONFIG_UDF_FS=m -CONFIG_UDF_RW=y -CONFIG_UFS_FS=m -# CONFIG_UFS_FS_WRITE is not set - -# -# Network File Systems -# -CONFIG_CODA_FS=m -CONFIG_INTERMEZZO_FS=m -CONFIG_NFS_FS=m -CONFIG_NFS_V3=y -# CONFIG_ROOT_NFS is not set -CONFIG_NFSD=m -CONFIG_NFSD_V3=y -# CONFIG_NFSD_TCP is not set -CONFIG_SUNRPC=m -CONFIG_LOCKD=m -CONFIG_LOCKD_V4=y -CONFIG_SMB_FS=m -# CONFIG_SMB_NLS_DEFAULT is not set -CONFIG_NCP_FS=m -CONFIG_NCPFS_PACKET_SIGNING=y -CONFIG_NCPFS_IOCTL_LOCKING=y -CONFIG_NCPFS_STRONG=y -CONFIG_NCPFS_NFS_NS=y -CONFIG_NCPFS_OS2_NS=y -CONFIG_NCPFS_SMALLDOS=y -CONFIG_NCPFS_NLS=y -CONFIG_NCPFS_EXTRAS=y -CONFIG_ZISOFS_FS=y -CONFIG_FS_MBCACHE=y - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -CONFIG_OSF_PARTITION=y -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -CONFIG_MAC_PARTITION=y -CONFIG_MSDOS_PARTITION=y -CONFIG_BSD_DISKLABEL=y -CONFIG_MINIX_SUBPARTITION=y -CONFIG_SOLARIS_X86_PARTITION=y -CONFIG_UNIXWARE_DISKLABEL=y -# CONFIG_LDM_PARTITION is not set -CONFIG_SGI_PARTITION=y -# CONFIG_ULTRIX_PARTITION is not set -CONFIG_SUN_PARTITION=y -# CONFIG_EFI_PARTITION is not set -CONFIG_SMB_NLS=y -CONFIG_NLS=y - -# -# Native Language Support -# -CONFIG_NLS_DEFAULT="iso8859-1" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_CODEPAGE_737=m -CONFIG_NLS_CODEPAGE_775=m -CONFIG_NLS_CODEPAGE_850=m -CONFIG_NLS_CODEPAGE_852=m -CONFIG_NLS_CODEPAGE_855=m -CONFIG_NLS_CODEPAGE_857=m -CONFIG_NLS_CODEPAGE_860=m -CONFIG_NLS_CODEPAGE_861=m -CONFIG_NLS_CODEPAGE_862=m -CONFIG_NLS_CODEPAGE_863=m -CONFIG_NLS_CODEPAGE_864=m -CONFIG_NLS_CODEPAGE_865=m -CONFIG_NLS_CODEPAGE_866=m -CONFIG_NLS_CODEPAGE_869=m -CONFIG_NLS_CODEPAGE_936=m -CONFIG_NLS_CODEPAGE_950=m -CONFIG_NLS_CODEPAGE_932=m -CONFIG_NLS_CODEPAGE_949=m -CONFIG_NLS_CODEPAGE_874=m -CONFIG_NLS_ISO8859_8=m -CONFIG_NLS_CODEPAGE_1250=m -CONFIG_NLS_CODEPAGE_1251=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_ISO8859_2=m -CONFIG_NLS_ISO8859_3=m -CONFIG_NLS_ISO8859_4=m -CONFIG_NLS_ISO8859_5=m -CONFIG_NLS_ISO8859_6=m -CONFIG_NLS_ISO8859_7=m -CONFIG_NLS_ISO8859_9=m -CONFIG_NLS_ISO8859_13=m -CONFIG_NLS_ISO8859_14=m -CONFIG_NLS_ISO8859_15=m -CONFIG_NLS_KOI8_R=m -CONFIG_NLS_KOI8_U=m -CONFIG_NLS_UTF8=m - -# -# Console drivers -# -CONFIG_VGA_CONSOLE=y -CONFIG_VIDEO_SELECT=y -# CONFIG_VIDEO_IGNORE_BAD_MODE is not set -CONFIG_MDA_CONSOLE=m - -# -# Frame-buffer support -# -CONFIG_FB=y -CONFIG_DUMMY_CONSOLE=y -CONFIG_FB_RIVA=m -CONFIG_FB_CLGEN=m -CONFIG_FB_PM2=m -# CONFIG_FB_PM2_FIFO_DISCONNECT is not set -CONFIG_FB_PM2_PCI=y -CONFIG_FB_PM3=m -# CONFIG_FB_CYBER2000 is not set -CONFIG_FB_VESA=y -CONFIG_FB_VGA16=m -CONFIG_FB_HGA=m -CONFIG_VIDEO_SELECT=y -CONFIG_FB_MATROX=m -CONFIG_FB_MATROX_MILLENIUM=y -CONFIG_FB_MATROX_MYSTIQUE=y -# CONFIG_FB_MATROX_G450 is not set -CONFIG_FB_MATROX_G100A=y -CONFIG_FB_MATROX_G100=y -CONFIG_FB_MATROX_I2C=m -CONFIG_FB_MATROX_MAVEN=m -# CONFIG_FB_MATROX_PROC is not set -CONFIG_FB_MATROX_MULTIHEAD=y -CONFIG_FB_ATY=m -CONFIG_FB_ATY_GX=y -CONFIG_FB_ATY_CT=y -CONFIG_FB_ATY_CT_VAIO_LCD=y -CONFIG_FB_RADEON=m -CONFIG_FB_ATY128=m -CONFIG_FB_SIS=m -CONFIG_FB_SIS_300=y -CONFIG_FB_SIS_315=y -CONFIG_FB_NEOMAGIC=m -CONFIG_FB_3DFX=m -CONFIG_FB_VOODOO1=m -# CONFIG_FB_TRIDENT is not set -# CONFIG_FB_VIRTUAL is not set -# CONFIG_FBCON_ADVANCED is not set -CONFIG_FBCON_MFB=m -CONFIG_FBCON_CFB8=y -CONFIG_FBCON_CFB16=y -CONFIG_FBCON_CFB24=y -CONFIG_FBCON_CFB32=y -CONFIG_FBCON_VGA_PLANES=m -CONFIG_FBCON_HGA=m -# CONFIG_FBCON_FONTWIDTH8_ONLY is not set -# CONFIG_FBCON_FONTS is not set -CONFIG_FONT_8x8=y -CONFIG_FONT_8x16=y - -# -# Sound -# -CONFIG_SOUND=m -CONFIG_SOUND_ALI5455=m -CONFIG_SOUND_BT878=m -CONFIG_SOUND_CMPCI=m -CONFIG_SOUND_CMPCI_FM=y -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_FMIO=388 -CONFIG_SOUND_CMPCI_MIDI=y -CONFIG_SOUND_CMPCI_MPUIO=330 -CONFIG_SOUND_CMPCI_JOYSTICK=y -CONFIG_SOUND_CMPCI_CM8738=y -# CONFIG_SOUND_CMPCI_SPDIFINVERSE is not set -CONFIG_SOUND_CMPCI_SPDIFLOOP=y -CONFIG_SOUND_CMPCI_SPEAKERS=2 -CONFIG_SOUND_EMU10K1=m -CONFIG_MIDI_EMU10K1=y -CONFIG_SOUND_AUDIGY=m -CONFIG_SOUND_FUSION=m -CONFIG_SOUND_CS4281=m -CONFIG_SOUND_ES1370=m -CONFIG_SOUND_ES1371=m -CONFIG_SOUND_ESSSOLO1=m -CONFIG_SOUND_MAESTRO=m -CONFIG_SOUND_MAESTRO3=m -CONFIG_SOUND_FORTE=m -CONFIG_SOUND_ICH=m -CONFIG_SOUND_RME96XX=m -CONFIG_SOUND_SONICVIBES=m -CONFIG_SOUND_TRIDENT=m -CONFIG_SOUND_MSNDCLAS=m -# CONFIG_MSNDCLAS_HAVE_BOOT is not set -CONFIG_MSNDCLAS_INIT_FILE="/etc/sound/msndinit.bin" -CONFIG_MSNDCLAS_PERM_FILE="/etc/sound/msndperm.bin" -CONFIG_SOUND_MSNDPIN=m -# CONFIG_MSNDPIN_HAVE_BOOT is not set -CONFIG_MSNDPIN_INIT_FILE="/etc/sound/pndspini.bin" -CONFIG_MSNDPIN_PERM_FILE="/etc/sound/pndsperm.bin" -CONFIG_SOUND_VIA82CXXX=m -CONFIG_MIDI_VIA82CXXX=y -CONFIG_SOUND_OSS=m -# CONFIG_SOUND_TRACEINIT is not set -CONFIG_SOUND_DMAP=y -CONFIG_SOUND_AD1816=m -CONFIG_SOUND_AD1889=m -CONFIG_SOUND_SGALAXY=m -CONFIG_SOUND_ADLIB=m -CONFIG_SOUND_ACI_MIXER=m -CONFIG_SOUND_CS4232=m -CONFIG_SOUND_SSCAPE=m -CONFIG_SOUND_GUS=m -CONFIG_SOUND_GUS16=y -CONFIG_SOUND_GUSMAX=y -CONFIG_SOUND_VMIDI=m -CONFIG_SOUND_TRIX=m -CONFIG_SOUND_MSS=m -CONFIG_SOUND_MPU401=m -CONFIG_SOUND_NM256=m -CONFIG_SOUND_MAD16=m -CONFIG_MAD16_OLDCARD=y -CONFIG_SOUND_PAS=m -# CONFIG_PAS_JOYSTICK is not set -CONFIG_SOUND_PSS=m -# CONFIG_PSS_MIXER is not set -# CONFIG_PSS_HAVE_BOOT is not set -CONFIG_SOUND_SB=m -CONFIG_SOUND_AWE32_SYNTH=m -CONFIG_SOUND_WAVEFRONT=m -CONFIG_SOUND_MAUI=m -CONFIG_SOUND_YM3812=m -CONFIG_SOUND_OPL3SA1=m -CONFIG_SOUND_OPL3SA2=m -CONFIG_SOUND_YMFPCI=m -CONFIG_SOUND_YMFPCI_LEGACY=y -CONFIG_SOUND_UART6850=m -CONFIG_SOUND_AEDSP16=m -CONFIG_SC6600=y -CONFIG_SC6600_JOY=y -CONFIG_SC6600_CDROM=4 -CONFIG_SC6600_CDROMBASE=0 -CONFIG_AEDSP16_SBPRO=y -CONFIG_AEDSP16_MPU401=y -CONFIG_SOUND_TVMIXER=m - -# -# USB support -# -CONFIG_USB=m -# CONFIG_USB_DEBUG is not set -CONFIG_USB_DEVICEFS=y -# CONFIG_USB_BANDWIDTH is not set -CONFIG_USB_EHCI_HCD=m -CONFIG_USB_UHCI=m -CONFIG_USB_UHCI_ALT=m -CONFIG_USB_OHCI=m -CONFIG_USB_AUDIO=m -# CONFIG_USB_EMI26 is not set -CONFIG_USB_MIDI=m -CONFIG_USB_STORAGE=m -# CONFIG_USB_STORAGE_DEBUG is not set -CONFIG_USB_STORAGE_DATAFAB=y -CONFIG_USB_STORAGE_FREECOM=y -CONFIG_USB_STORAGE_ISD200=y -CONFIG_USB_STORAGE_DPCM=y -CONFIG_USB_STORAGE_HP8200e=y -CONFIG_USB_STORAGE_SDDR09=y -CONFIG_USB_STORAGE_SDDR55=y -CONFIG_USB_STORAGE_JUMPSHOT=y -CONFIG_USB_ACM=m -CONFIG_USB_PRINTER=m -CONFIG_USB_HID=m -CONFIG_USB_HIDINPUT=y -CONFIG_USB_HIDDEV=y -# CONFIG_USB_KBD is not set -# CONFIG_USB_MOUSE is not set -CONFIG_USB_AIPTEK=m -CONFIG_USB_WACOM=m -CONFIG_USB_POWERMATE=m -# CONFIG_USB_DC2XX is not set -CONFIG_USB_MDC800=m -CONFIG_USB_SCANNER=m -CONFIG_USB_MICROTEK=m -CONFIG_USB_HPUSBSCSI=m -CONFIG_USB_IBMCAM=m -CONFIG_USB_OV511=m -CONFIG_USB_PWC=m -CONFIG_USB_SE401=m -CONFIG_USB_STV680=m -CONFIG_USB_VICAM=m -CONFIG_USB_DSBR=m -CONFIG_USB_DABUSB=m -CONFIG_USB_PEGASUS=m -CONFIG_USB_RTL8150=m -CONFIG_USB_KAWETH=m -CONFIG_USB_CATC=m -CONFIG_USB_CDCETHER=m -CONFIG_USB_USBNET=m -CONFIG_USB_USS720=m - -# -# USB Serial Converter support -# -CONFIG_USB_SERIAL=m -# CONFIG_USB_SERIAL_DEBUG is not set -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_BELKIN=m -CONFIG_USB_SERIAL_WHITEHEAT=m -CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m -CONFIG_USB_SERIAL_EMPEG=m -CONFIG_USB_SERIAL_FTDI_SIO=m -CONFIG_USB_SERIAL_VISOR=m -CONFIG_USB_SERIAL_IPAQ=m -CONFIG_USB_SERIAL_IR=m -CONFIG_USB_SERIAL_EDGEPORT=m -CONFIG_USB_SERIAL_EDGEPORT_TI=m -CONFIG_USB_SERIAL_KEYSPAN_PDA=m -CONFIG_USB_SERIAL_KEYSPAN=m -# CONFIG_USB_SERIAL_KEYSPAN_USA28 is not set -# CONFIG_USB_SERIAL_KEYSPAN_USA28X is not set -CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y -CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y -# CONFIG_USB_SERIAL_KEYSPAN_USA19 is not set -# CONFIG_USB_SERIAL_KEYSPAN_USA18X is not set -# CONFIG_USB_SERIAL_KEYSPAN_USA19W is not set -CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y -CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y -# CONFIG_USB_SERIAL_KEYSPAN_USA49W is not set -CONFIG_USB_SERIAL_MCT_U232=m -CONFIG_USB_SERIAL_KLSI=m -CONFIG_USB_SERIAL_KOBIL_SCT=m -CONFIG_USB_SERIAL_PL2303=m -CONFIG_USB_SERIAL_CYBERJACK=m -CONFIG_USB_SERIAL_XIRCOM=m -CONFIG_USB_SERIAL_OMNINET=m -CONFIG_USB_RIO500=m -CONFIG_USB_AUERSWALD=m -CONFIG_USB_TIGL=m -CONFIG_USB_BRLVGER=m -CONFIG_USB_LCD=m - -# -# Additional device driver support -# -CONFIG_NET_BROADCOM=m -CONFIG_CIPE=m -CONFIG_CRYPTO_AEP=m -CONFIG_MEGARAC=m -CONFIG_FC_QLA2200=m -CONFIG_FC_QLA2300=m -CONFIG_SCSI_ISCSI=m - -# -# Bluetooth support -# -CONFIG_BLUEZ=m -CONFIG_BLUEZ_L2CAP=m -CONFIG_BLUEZ_SCO=m -CONFIG_BLUEZ_RFCOMM=m -CONFIG_BLUEZ_RFCOMM_TTY=y -CONFIG_BLUEZ_BNEP=m -CONFIG_BLUEZ_BNEP_MC_FILTER=y -CONFIG_BLUEZ_BNEP_PROTO_FILTER=y - -# -# Bluetooth device drivers -# -CONFIG_BLUEZ_HCIUSB=m -CONFIG_BLUEZ_USB_ZERO_PACKET=y -CONFIG_BLUEZ_HCIUART=m -CONFIG_BLUEZ_HCIUART_H4=y -CONFIG_BLUEZ_HCIUART_BCSP=y -CONFIG_BLUEZ_HCIUART_BCSP_TXCRC=y -CONFIG_BLUEZ_HCIDTL1=m -CONFIG_BLUEZ_HCIBT3C=m -CONFIG_BLUEZ_HCIBLUECARD=m -CONFIG_BLUEZ_HCIBTUART=m -CONFIG_BLUEZ_HCIVHCI=m - -# -# Profiling support -# -# CONFIG_PROFILING is not set - -# -# Kernel hacking -# -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_STACKOVERFLOW=y -# CONFIG_DEBUG_HIGHMEM is not set -# CONFIG_DEBUG_SLAB is not set -# CONFIG_DEBUG_IOVIRT is not set -CONFIG_MAGIC_SYSRQ=y -# CONFIG_DEBUG_SPINLOCK is not set -# CONFIG_FRAME_POINTER is not set -CONFIG_MCL_COREDUMP=y -CONFIG_BOOTIMG=y - -# -# Library routines -# -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=y diff --git a/lustre/kernel_patches/kernel_configs/config-linux-2.4.20-uml b/lustre/kernel_patches/kernel_configs/config-linux-2.4.20-uml deleted file mode 100644 index 2d4a2d5..0000000 --- a/lustre/kernel_patches/kernel_configs/config-linux-2.4.20-uml +++ /dev/null @@ -1,297 +0,0 @@ -# -# Automatically generated make config: don't edit -# -CONFIG_USERMODE=y -# CONFIG_ISA is not set -# CONFIG_SBUS is not set -# CONFIG_PCI is not set -CONFIG_UID16=y -CONFIG_RWSEM_XCHGADD_ALGORITHM=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# General Setup -# -# CONFIG_MODE_SKAS is not set -CONFIG_MODE_TT=y -CONFIG_MODE_TT=y -CONFIG_NET=y -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -# CONFIG_BINFMT_AOUT is not set -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=y -CONFIG_HOSTFS=y -# CONFIG_HPPFS is not set -CONFIG_MCONSOLE=y -CONFIG_MAGIC_SYSRQ=y -# CONFIG_HOST_2G_2G is not set -# CONFIG_UML_SMP is not set -# CONFIG_SMP is not set -CONFIG_NEST_LEVEL=0 -CONFIG_KERNEL_HALF_GIGS=1 -# CONFIG_HIGHMEM is not set -# CONFIG_PROC_MM is not set -CONFIG_KERNEL_STACK_ORDER=2 -CONFIG_MODE_TT=y -# -# Loadable module support -# -CONFIG_MODULES=y -# CONFIG_KMOD is not set - -# -# Character Devices -# -CONFIG_STDIO_CONSOLE=y -CONFIG_SSL=y -CONFIG_FD_CHAN=y -# CONFIG_NULL_CHAN is not set -CONFIG_PORT_CHAN=y -CONFIG_PTY_CHAN=y -CONFIG_TTY_CHAN=y -CONFIG_XTERM_CHAN=y -CONFIG_CON_ZERO_CHAN="fd:0,fd:1" -CONFIG_CON_CHAN="xterm" -CONFIG_SSL_CHAN="pty" -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=256 -# CONFIG_WATCHDOG is not set -# CONFIG_WATCHDOG_NOWAYOUT is not set -# CONFIG_SOFT_WATCHDOG is not set -# CONFIG_UML_WATCHDOG is not set -# CONFIG_UML_SOUND is not set -# CONFIG_SOUND is not set -# CONFIG_HOSTAUDIO is not set -# CONFIG_TTY_LOG is not set - -# -# Block Devices -# -CONFIG_BLK_DEV_UBD=y -# CONFIG_BLK_DEV_UBD_SYNC is not set -CONFIG_BLK_DEV_LOOP=y -# CONFIG_BLK_DEV_NBD is not set -# CONFIG_BLK_DEV_RAM is not set -# CONFIG_BLK_DEV_INITRD is not set -# CONFIG_MMAPPER is not set -CONFIG_NETDEVICES=y - -# -# Network Devices -# -CONFIG_UML_NET=y -CONFIG_UML_NET_ETHERTAP=y -CONFIG_UML_NET_TUNTAP=y -CONFIG_UML_NET_SLIP=y -# CONFIG_UML_NET_SLIRP is not set -CONFIG_UML_NET_DAEMON=y -CONFIG_UML_NET_MCAST=y -# CONFIG_UML_NET_PCAP is not set -CONFIG_DUMMY=y -# CONFIG_BONDING is not set -# CONFIG_EQUALIZER is not set -CONFIG_TUN=y -CONFIG_PPP=y -# CONFIG_PPP_MULTILINK is not set -# CONFIG_PPP_FILTER is not set -# CONFIG_PPP_ASYNC is not set -# CONFIG_PPP_SYNC_TTY is not set -# CONFIG_PPP_DEFLATE is not set -# CONFIG_PPP_BSDCOMP is not set -# CONFIG_PPPOE is not set -CONFIG_SLIP=y -# CONFIG_SLIP_COMPRESSED is not set -# CONFIG_SLIP_SMART is not set -# CONFIG_SLIP_MODE_SLIP6 is not set - -# -# Networking options -# -CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y -# CONFIG_NETLINK_DEV is not set -# CONFIG_NETFILTER is not set -# CONFIG_FILTER is not set -CONFIG_UNIX=y -CONFIG_INET=y -# CONFIG_IP_MULTICAST is not set -# CONFIG_IP_ADVANCED_ROUTER is not set -# CONFIG_IP_PNP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -# CONFIG_SYN_COOKIES is not set -# CONFIG_IPV6 is not set -# CONFIG_KHTTPD is not set -# CONFIG_ATM is not set -# CONFIG_VLAN_8021Q is not set - -# -# -# -# CONFIG_IPX is not set -# CONFIG_ATALK is not set - -# -# Appletalk devices -# -# CONFIG_DEV_APPLETALK is not set -# CONFIG_DECNET is not set -# CONFIG_BRIDGE is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_LLC is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set - -# -# File systems -# -CONFIG_QUOTA=y -# CONFIG_AUTOFS_FS is not set -CONFIG_AUTOFS4_FS=m -CONFIG_REISERFS_FS=y -# CONFIG_REISERFS_CHECK is not set -# CONFIG_REISERFS_PROC_INFO is not set -# CONFIG_ADFS_FS is not set -# CONFIG_ADFS_FS_RW is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BEFS_DEBUG is not set -# CONFIG_BFS_FS is not set -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_XATTR=y -# CONFIG_EXT3_FS_XATTR_SHARING is not set -# CONFIG_EXT3_FS_XATTR_USER is not set -CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set -# CONFIG_FAT_FS is not set -# CONFIG_MSDOS_FS is not set -# CONFIG_UMSDOS_FS is not set -# CONFIG_VFAT_FS is not set -# CONFIG_EFS_FS is not set -# CONFIG_JFFS_FS is not set -# CONFIG_JFFS2_FS is not set -# CONFIG_CRAMFS is not set -CONFIG_TMPFS=y -CONFIG_RAMFS=y -# CONFIG_ISO9660_FS is not set -# CONFIG_JOLIET is not set -# CONFIG_ZISOFS is not set -# CONFIG_JFS_FS is not set -# CONFIG_JFS_DEBUG is not set -# CONFIG_JFS_STATISTICS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_NTFS_FS is not set -# CONFIG_NTFS_RW is not set -# CONFIG_HPFS_FS is not set -CONFIG_PROC_FS=y -CONFIG_DEVFS_FS=y -CONFIG_DEVFS_MOUNT=y -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -# CONFIG_QNX4FS_FS is not set -# CONFIG_QNX4FS_RW is not set -# CONFIG_ROMFS_FS is not set -CONFIG_EXT2_FS=y -# CONFIG_EXT2_FS_XATTR is not set -# CONFIG_EXT2_FS_XATTR_SHARING is not set -# CONFIG_EXT2_FS_XATTR_USER is not set -# CONFIG_SYSV_FS is not set -# CONFIG_UDF_FS is not set -# CONFIG_UDF_RW is not set -# CONFIG_UFS_FS is not set -# CONFIG_UFS_FS_WRITE is not set - -# -# Network File Systems -# -# CONFIG_CODA_FS is not set -# CONFIG_INTERMEZZO_FS is not set -# CONFIG_NFS_FS is not set -# CONFIG_NFS_V3 is not set -# CONFIG_ROOT_NFS is not set -# CONFIG_NFSD is not set -# CONFIG_NFSD_V3 is not set -# CONFIG_NFSD_TCP is not set -# CONFIG_SUNRPC is not set -# CONFIG_LOCKD is not set -# CONFIG_SMB_FS is not set -# CONFIG_NCP_FS is not set -# CONFIG_NCPFS_PACKET_SIGNING is not set -# CONFIG_NCPFS_IOCTL_LOCKING is not set -# CONFIG_NCPFS_STRONG is not set -# CONFIG_NCPFS_NFS_NS is not set -# CONFIG_NCPFS_OS2_NS is not set -# CONFIG_NCPFS_SMALLDOS is not set -# CONFIG_NCPFS_NLS is not set -# CONFIG_NCPFS_EXTRAS is not set -# CONFIG_ZISOFS_FS is not set -CONFIG_FS_MBCACHE=y - -# -# Partition Types -# -# CONFIG_PARTITION_ADVANCED is not set -CONFIG_MSDOS_PARTITION=y -# CONFIG_SMB_NLS is not set -# CONFIG_NLS is not set - -# -# SCSI support -# -# CONFIG_SCSI is not set - -# -# Multi-device support (RAID and LVM) -# -# CONFIG_MD is not set -# CONFIG_BLK_DEV_MD is not set -# CONFIG_MD_LINEAR is not set -# CONFIG_MD_RAID0 is not set -# CONFIG_MD_RAID1 is not set -# CONFIG_MD_RAID5 is not set -# CONFIG_MD_MULTIPATH is not set -# CONFIG_BLK_DEV_LVM is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Library routines -# -# CONFIG_ZLIB_INFLATE is not set -# CONFIG_ZLIB_DEFLATE is not set - -# -# Kernel hacking -# -CONFIG_DEBUG_SLAB=y -CONFIG_DEBUGSYM=y -CONFIG_PT_PROXY=y -# CONFIG_GPROF is not set -# CONFIG_GCOV is not set diff --git a/lustre/kernel_patches/kernel_configs/jdike-2.5.69-uml.config b/lustre/kernel_patches/kernel_configs/jdike-2.5.69-uml.config deleted file mode 100644 index 4aa8a2c..0000000 --- a/lustre/kernel_patches/kernel_configs/jdike-2.5.69-uml.config +++ /dev/null @@ -1,321 +0,0 @@ -# -# Automatically generated make config: don't edit -# -CONFIG_USERMODE=y -CONFIG_MMU=y -CONFIG_UID16=y -CONFIG_RWSEM_GENERIC_SPINLOCK=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y - -# -# General setup -# -CONFIG_SWAP=y -CONFIG_SYSVIPC=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_SYSCTL=y -CONFIG_LOG_BUF_SHIFT=14 - -# -# Loadable module support -# -# CONFIG_MODULES is not set - -# -# UML-specific options -# -CONFIG_MODE_TT=y -# CONFIG_MODE_SKAS is not set -CONFIG_NET=y -CONFIG_HOSTFS=y -# CONFIG_HPPFS is not set -CONFIG_MCONSOLE=y -CONFIG_MAGIC_SYSRQ=y -# CONFIG_HOST_2G_2G is not set -# CONFIG_UML_SMP is not set -# CONFIG_SMP is not set -CONFIG_NEST_LEVEL=0 -CONFIG_KERNEL_HALF_GIGS=1 -# CONFIG_HIGHMEM is not set -# CONFIG_PROC_MM is not set -CONFIG_KERNEL_STACK_ORDER=3 - -# -# Executable file formats -# -# CONFIG_BINFMT_AOUT is not set -CONFIG_BINFMT_ELF=y -CONFIG_BINFMT_MISC=y - -# -# Character Devices -# -CONFIG_STDIO_CONSOLE=y -CONFIG_SSL=y -CONFIG_FD_CHAN=y -# CONFIG_NULL_CHAN is not set -CONFIG_PORT_CHAN=y -CONFIG_PTY_CHAN=y -CONFIG_TTY_CHAN=y -CONFIG_XTERM_CHAN=y -CONFIG_CON_ZERO_CHAN="fd:0,fd:1" -CONFIG_CON_CHAN="xterm" -CONFIG_SSL_CHAN="pty" -CONFIG_UNIX98_PTYS=y -CONFIG_UNIX98_PTY_COUNT=256 -# CONFIG_WATCHDOG is not set -# CONFIG_UML_SOUND is not set -# CONFIG_SOUND is not set -# CONFIG_HOSTAUDIO is not set - -# -# Block Devices -# -CONFIG_BLK_DEV_UBD=y -# CONFIG_BLK_DEV_UBD_SYNC is not set -CONFIG_BLK_DEV_LOOP=y -# CONFIG_BLK_DEV_NBD is not set -# CONFIG_BLK_DEV_RAM is not set -# CONFIG_MMAPPER is not set -CONFIG_NETDEVICES=y - -# -# Networking support -# - -# -# Networking options -# -CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y -# CONFIG_NETLINK_DEV is not set -# CONFIG_NETFILTER is not set -CONFIG_UNIX=y -# CONFIG_NET_KEY is not set -CONFIG_INET=y -# CONFIG_IP_MULTICAST is not set -# CONFIG_IP_ADVANCED_ROUTER is not set -# CONFIG_IP_PNP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_ARPD is not set -# CONFIG_INET_ECN is not set -# CONFIG_SYN_COOKIES is not set -# CONFIG_INET_AH is not set -# CONFIG_INET_ESP is not set -# CONFIG_INET_IPCOMP is not set -# CONFIG_IPV6 is not set -# CONFIG_XFRM_USER is not set - -# -# SCTP Configuration (EXPERIMENTAL) -# -CONFIG_IPV6_SCTP__=y -# CONFIG_IP_SCTP is not set -# CONFIG_ATM is not set -# CONFIG_VLAN_8021Q is not set -# CONFIG_LLC is not set -# CONFIG_DECNET is not set -# CONFIG_BRIDGE is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set -# CONFIG_NET_FASTROUTE is not set -# CONFIG_NET_HW_FLOWCONTROL is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set -CONFIG_DUMMY=y -# CONFIG_BONDING is not set -# CONFIG_EQUALIZER is not set -CONFIG_TUN=y -# CONFIG_ETHERTAP is not set - -# -# Ethernet (10 or 100Mbit) -# -# CONFIG_NET_ETHERNET is not set - -# -# Ethernet (1000 Mbit) -# - -# -# Ethernet (10000 Mbit) -# -CONFIG_PPP=y -# CONFIG_PPP_MULTILINK is not set -# CONFIG_PPP_FILTER is not set -# CONFIG_PPP_ASYNC is not set -# CONFIG_PPP_SYNC_TTY is not set -# CONFIG_PPP_DEFLATE is not set -# CONFIG_PPP_BSDCOMP is not set -# CONFIG_PPPOE is not set -CONFIG_SLIP=y -# CONFIG_SLIP_COMPRESSED is not set -# CONFIG_SLIP_SMART is not set -# CONFIG_SLIP_MODE_SLIP6 is not set - -# -# Wireless LAN (non-hamradio) -# -# CONFIG_NET_RADIO is not set - -# -# Token Ring devices (depends on LLC=y) -# -# CONFIG_SHAPER is not set - -# -# Wan interfaces -# -# CONFIG_WAN is not set - -# -# UML Network Devices -# -CONFIG_UML_NET=y -CONFIG_UML_NET_ETHERTAP=y -CONFIG_UML_NET_TUNTAP=y -CONFIG_UML_NET_SLIP=y -CONFIG_UML_NET_DAEMON=y -CONFIG_UML_NET_MCAST=y -# CONFIG_UML_NET_PCAP is not set -# CONFIG_UML_NET_SLIRP is not set - -# -# File systems -# -CONFIG_EXT2_FS=y -# CONFIG_EXT2_FS_XATTR is not set -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_XATTR=y -# CONFIG_EXT3_FS_POSIX_ACL is not set -CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set -CONFIG_FS_MBCACHE=y -CONFIG_REISERFS_FS=y -# CONFIG_REISERFS_CHECK is not set -# CONFIG_REISERFS_PROC_INFO is not set -# CONFIG_JFS_FS is not set -# CONFIG_XFS_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set -CONFIG_QUOTA=y -# CONFIG_QFMT_V1 is not set -# CONFIG_QFMT_V2 is not set -CONFIG_QUOTACTL=y -# CONFIG_AUTOFS_FS is not set -CONFIG_AUTOFS4_FS=y - -# -# CD-ROM/DVD Filesystems -# -# CONFIG_ISO9660_FS is not set -# CONFIG_UDF_FS is not set - -# -# DOS/FAT/NT Filesystems -# -# CONFIG_FAT_FS is not set -# CONFIG_NTFS_FS is not set - -# -# Pseudo filesystems -# -CONFIG_PROC_FS=y -CONFIG_DEVFS_FS=y -CONFIG_DEVFS_MOUNT=y -# CONFIG_DEVFS_DEBUG is not set -CONFIG_DEVPTS_FS=y -CONFIG_TMPFS=y -CONFIG_RAMFS=y - -# -# Miscellaneous filesystems -# -# CONFIG_ADFS_FS is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BFS_FS is not set -# CONFIG_EFS_FS is not set -# CONFIG_CRAMFS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_HPFS_FS is not set -# CONFIG_QNX4FS_FS is not set -# CONFIG_SYSV_FS is not set -# CONFIG_UFS_FS is not set - -# -# Network File Systems -# -# CONFIG_NFS_FS is not set -# CONFIG_NFSD is not set -# CONFIG_EXPORTFS is not set -# CONFIG_SMB_FS is not set -# CONFIG_CIFS is not set -# CONFIG_NCP_FS is not set -# CONFIG_CODA_FS is not set -# CONFIG_INTERMEZZO_FS is not set -# CONFIG_AFS_FS is not set - -# -# Partition Types -# -# CONFIG_PARTITION_ADVANCED is not set -CONFIG_MSDOS_PARTITION=y - -# -# Security options -# -# CONFIG_SECURITY is not set - -# -# Cryptographic options -# -# CONFIG_CRYPTO is not set - -# -# Library routines -# -CONFIG_CRC32=y - -# -# SCSI support -# -# CONFIG_SCSI is not set - -# -# Multi-device support (RAID and LVM) -# -# CONFIG_MD is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Kernel hacking -# -CONFIG_DEBUG_SLAB=y -CONFIG_DEBUG_SPINLOCK=y -CONFIG_DEBUGSYM=y -CONFIG_FRAME_POINTER=y -CONFIG_PT_PROXY=y -# CONFIG_GPROF is not set -# CONFIG_GCOV is not set diff --git a/lustre/kernel_patches/patches/dev_read_only.patch b/lustre/kernel_patches/patches/dev_read_only.patch deleted file mode 100644 index bac5ebf..0000000 --- a/lustre/kernel_patches/patches/dev_read_only.patch +++ /dev/null @@ -1,73 +0,0 @@ - - - - 0 files changed - ---- linux-2.4.18-17.8.0/drivers/block/blkpg.c~dev_read_only 2002-12-06 14:52:29.000000000 -0800 -+++ linux-2.4.18-17.8.0-zab/drivers/block/blkpg.c 2002-12-06 14:52:29.000000000 -0800 -@@ -297,3 +297,38 @@ int blk_ioctl(kdev_t dev, unsigned int c - } - - EXPORT_SYMBOL(blk_ioctl); -+ -+#define NUM_DEV_NO_WRITE 16 -+static int dev_no_write[NUM_DEV_NO_WRITE]; -+ -+/* -+ * Debug code for turning block devices "read-only" (will discard writes -+ * silently). This is for filesystem crash/recovery testing. -+ */ -+void dev_set_rdonly(kdev_t dev, int no_write) -+{ -+ if (dev) { -+ printk(KERN_WARNING "Turning device %s read-only\n", -+ bdevname(dev)); -+ dev_no_write[no_write] = 0xdead0000 + dev; -+ } -+} -+ -+int dev_check_rdonly(kdev_t dev) { -+ int i; -+ -+ for (i = 0; i < NUM_DEV_NO_WRITE; i++) { -+ if ((dev_no_write[i] & 0xffff0000) == 0xdead0000 && -+ dev == (dev_no_write[i] & 0xffff)) -+ return 1; -+ } -+ return 0; -+} -+ -+void dev_clear_rdonly(int no_write) { -+ dev_no_write[no_write] = 0; -+} -+ -+EXPORT_SYMBOL(dev_set_rdonly); -+EXPORT_SYMBOL(dev_check_rdonly); -+EXPORT_SYMBOL(dev_clear_rdonly); ---- linux-2.4.18-17.8.0/drivers/block/loop.c~dev_read_only 2002-12-06 14:52:29.000000000 -0800 -+++ linux-2.4.18-17.8.0-zab/drivers/block/loop.c 2002-12-06 14:52:29.000000000 -0800 -@@ -491,6 +491,9 @@ static int loop_make_request(request_que - spin_unlock_irq(&lo->lo_lock); - - if (rw == WRITE) { -+ if (dev_check_rdonly(rbh->b_rdev)) -+ goto err; -+ - if (lo->lo_flags & LO_FLAGS_READ_ONLY) - goto err; - } else if (rw == READA) { ---- linux-2.4.18-17.8.0/drivers/ide/ide-disk.c~dev_read_only 2002-12-06 14:52:29.000000000 -0800 -+++ linux-2.4.18-17.8.0-zab/drivers/ide/ide-disk.c 2002-12-06 14:52:29.000000000 -0800 -@@ -557,6 +557,10 @@ static ide_startstop_t lba_48_rw_disk (i - */ - static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block) - { -+ if (rq->cmd == WRITE && dev_check_rdonly(rq->rq_dev)) { -+ ide_end_request(1, HWGROUP(drive)); -+ return ide_stopped; -+ } - if (IDE_CONTROL_REG) - OUT_BYTE(drive->ctl,IDE_CONTROL_REG); - - -_ diff --git a/lustre/kernel_patches/patches/dev_read_only_2.4.20-rh.patch b/lustre/kernel_patches/patches/dev_read_only_2.4.20-rh.patch deleted file mode 100644 index 55057d9..0000000 --- a/lustre/kernel_patches/patches/dev_read_only_2.4.20-rh.patch +++ /dev/null @@ -1,77 +0,0 @@ - - - - drivers/block/blkpg.c | 35 +++++++++++++++++++++++++++++++++++ - drivers/block/loop.c | 3 +++ - drivers/ide/ide-disk.c | 5 ++++- - 3 files changed, 42 insertions(+), 1 deletion(-) - ---- rh-2.4.20/drivers/block/blkpg.c~dev_read_only_2.4.20 2003-04-11 14:05:03.000000000 +0800 -+++ rh-2.4.20-root/drivers/block/blkpg.c 2003-04-12 13:11:31.000000000 +0800 -@@ -297,3 +297,38 @@ int blk_ioctl(kdev_t dev, unsigned int c - } - - EXPORT_SYMBOL(blk_ioctl); -+ -+#define NUM_DEV_NO_WRITE 16 -+static int dev_no_write[NUM_DEV_NO_WRITE]; -+ -+/* -+ * Debug code for turning block devices "read-only" (will discard writes -+ * silently). This is for filesystem crash/recovery testing. -+ */ -+void dev_set_rdonly(kdev_t dev, int no_write) -+{ -+ if (dev) { -+ printk(KERN_WARNING "Turning device %s read-only\n", -+ bdevname(dev)); -+ dev_no_write[no_write] = 0xdead0000 + dev; -+ } -+} -+ -+int dev_check_rdonly(kdev_t dev) { -+ int i; -+ -+ for (i = 0; i < NUM_DEV_NO_WRITE; i++) { -+ if ((dev_no_write[i] & 0xffff0000) == 0xdead0000 && -+ dev == (dev_no_write[i] & 0xffff)) -+ return 1; -+ } -+ return 0; -+} -+ -+void dev_clear_rdonly(int no_write) { -+ dev_no_write[no_write] = 0; -+} -+ -+EXPORT_SYMBOL(dev_set_rdonly); -+EXPORT_SYMBOL(dev_check_rdonly); -+EXPORT_SYMBOL(dev_clear_rdonly); ---- rh-2.4.20/drivers/block/loop.c~dev_read_only_2.4.20 2003-04-11 14:05:08.000000000 +0800 -+++ rh-2.4.20-root/drivers/block/loop.c 2003-04-12 13:11:31.000000000 +0800 -@@ -491,6 +491,9 @@ static int loop_make_request(request_que - spin_unlock_irq(&lo->lo_lock); - - if (rw == WRITE) { -+ if (dev_check_rdonly(rbh->b_rdev)) -+ goto err; -+ - if (lo->lo_flags & LO_FLAGS_READ_ONLY) - goto err; - } else if (rw == READA) { ---- rh-2.4.20/drivers/ide/ide-disk.c~dev_read_only_2.4.20 2003-04-11 14:04:53.000000000 +0800 -+++ rh-2.4.20-root/drivers/ide/ide-disk.c 2003-04-12 13:14:48.000000000 +0800 -@@ -381,7 +381,10 @@ static ide_startstop_t do_rw_disk (ide_d - if (IS_PDC4030_DRIVE) - return promise_rw_disk(drive, rq, block); - #endif /* CONFIG_BLK_DEV_PDC4030 */ -- -+ if (rq->cmd == WRITE && dev_check_rdonly(rq->rq_dev)) { -+ ide_end_request(1, HWGROUP(drive)); -+ return ide_stopped; -+ } - if (IDE_CONTROL_REG) - hwif->OUTB(drive->ctl, IDE_CONTROL_REG); - - -_ diff --git a/lustre/kernel_patches/patches/dev_read_only_2.4.20.patch b/lustre/kernel_patches/patches/dev_read_only_2.4.20.patch deleted file mode 100644 index 0d3476c..0000000 --- a/lustre/kernel_patches/patches/dev_read_only_2.4.20.patch +++ /dev/null @@ -1,77 +0,0 @@ - drivers/block/blkpg.c | 39 +++++++++++++++++++++++++++++++++++++++ - drivers/block/loop.c | 3 +++ - drivers/ide/ide-disk.c | 4 ++++ - 3 files changed, 46 insertions(+) - ---- linux-2.4.20/drivers/block/blkpg.c~dev_read_only_hp 2003-04-09 15:14:54.000000000 -0600 -+++ linux-2.4.20-braam/drivers/block/blkpg.c 2003-04-09 15:37:02.000000000 -0600 -@@ -296,3 +296,42 @@ int blk_ioctl(kdev_t dev, unsigned int c - } - - EXPORT_SYMBOL(blk_ioctl); -+ -+ -+ -+#define NUM_DEV_NO_WRITE 16 -+static int dev_no_write[NUM_DEV_NO_WRITE]; -+ -+/* -+ * Debug code for turning block devices "read-only" (will discard writes -+ * silently). This is for filesystem crash/recovery testing. -+ */ -+void dev_set_rdonly(kdev_t dev, int no_write) -+{ -+ if (dev) { -+ printk(KERN_WARNING "Turning device %s read-only\n", -+ bdevname(dev)); -+ dev_no_write[no_write] = 0xdead0000 + dev; -+ } -+} -+ -+int dev_check_rdonly(kdev_t dev) { -+ int i; -+ -+ for (i = 0; i < NUM_DEV_NO_WRITE; i++) { -+ if ((dev_no_write[i] & 0xffff0000) == 0xdead0000 && -+ dev == (dev_no_write[i] & 0xffff)) -+ return 1; -+ } -+ return 0; -+} -+ -+void dev_clear_rdonly(int no_write) { -+ dev_no_write[no_write] = 0; -+} -+ -+EXPORT_SYMBOL(dev_set_rdonly); -+EXPORT_SYMBOL(dev_check_rdonly); -+EXPORT_SYMBOL(dev_clear_rdonly); -+ -+ ---- linux-2.4.20/drivers/block/loop.c~dev_read_only_hp 2003-04-09 15:14:54.000000000 -0600 -+++ linux-2.4.20-braam/drivers/block/loop.c 2003-04-09 15:37:02.000000000 -0600 -@@ -474,6 +474,9 @@ static int loop_make_request(request_que - spin_unlock_irq(&lo->lo_lock); - - if (rw == WRITE) { -+ if (dev_check_rdonly(rbh->b_rdev)) -+ goto err; -+ - if (lo->lo_flags & LO_FLAGS_READ_ONLY) - goto err; - } else if (rw == READA) { ---- linux-2.4.20/drivers/ide/ide-disk.c~dev_read_only_hp 2003-04-09 15:14:54.000000000 -0600 -+++ linux-2.4.20-braam/drivers/ide/ide-disk.c 2003-04-09 15:37:02.000000000 -0600 -@@ -558,6 +558,10 @@ static ide_startstop_t lba_48_rw_disk (i - */ - static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block) - { -+ if (rq->cmd == WRITE && dev_check_rdonly(rq->rq_dev)) { -+ ide_end_request(1, HWGROUP(drive)); -+ return ide_stopped; -+ } - if (IDE_CONTROL_REG) - OUT_BYTE(drive->ctl,IDE_CONTROL_REG); - - -_ diff --git a/lustre/kernel_patches/patches/dev_read_only_hp_2.4.20.patch b/lustre/kernel_patches/patches/dev_read_only_hp_2.4.20.patch deleted file mode 100644 index 60081db..0000000 --- a/lustre/kernel_patches/patches/dev_read_only_hp_2.4.20.patch +++ /dev/null @@ -1,77 +0,0 @@ - drivers/block/blkpg.c | 36 ++++++++++++++++++++++++++++++++++++ - drivers/block/loop.c | 3 +++ - drivers/ide/ide-disk.c | 4 ++++ - 3 files changed, 43 insertions(+) - ---- linux/drivers/block/blkpg.c~dev_read_only_hp_2.4.20 Mon May 19 07:07:52 2003 -+++ linux-mmonroe/drivers/block/blkpg.c Mon May 19 07:37:22 2003 -@@ -310,6 +310,42 @@ int blk_ioctl(kdev_t dev, unsigned int c - - EXPORT_SYMBOL(blk_ioctl); - -+ -+#define NUM_DEV_NO_WRITE 16 -+static int dev_no_write[NUM_DEV_NO_WRITE]; -+/* -+ * Debug code for turning block devices "read-only" (will discard writes -+ * silently). This is for filesystem crash/recovery testing. -+ */ -+void dev_set_rdonly(kdev_t dev, int no_write) -+{ -+ if (dev) { -+ printk(KERN_WARNING "Turning device %s read-only\n", -+ bdevname(dev)); -+ dev_no_write[no_write] = 0xdead0000 + dev; -+ } -+} -+ -+int dev_check_rdonly(kdev_t dev) { -+ int i; -+ -+ for (i = 0; i < NUM_DEV_NO_WRITE; i++) { -+ if ((dev_no_write[i] & 0xffff0000) == 0xdead0000 && -+ dev == (dev_no_write[i] & 0xffff)) -+ return 1; -+ } -+ return 0; -+} -+ -+void dev_clear_rdonly(int no_write) { -+ dev_no_write[no_write] = 0; -+} -+ -+EXPORT_SYMBOL(dev_set_rdonly); -+EXPORT_SYMBOL(dev_check_rdonly); -+EXPORT_SYMBOL(dev_clear_rdonly); -+ -+ - /** - * get_last_sector() - * ---- linux/drivers/block/loop.c~dev_read_only_hp_2.4.20 Thu Nov 28 15:53:12 2002 -+++ linux-mmonroe/drivers/block/loop.c Mon May 19 07:28:29 2003 -@@ -474,6 +474,9 @@ static int loop_make_request(request_que - spin_unlock_irq(&lo->lo_lock); - - if (rw == WRITE) { -+ if (dev_check_rdonly(rbh->b_rdev)) -+ goto err; -+ - if (lo->lo_flags & LO_FLAGS_READ_ONLY) - goto err; - } else if (rw == READA) { ---- linux/drivers/ide/ide-disk.c~dev_read_only_hp_2.4.20 Thu Nov 28 15:53:13 2002 -+++ linux-mmonroe/drivers/ide/ide-disk.c Mon May 19 07:28:29 2003 -@@ -558,6 +558,10 @@ static ide_startstop_t lba_48_rw_disk (i - */ - static ide_startstop_t do_rw_disk (ide_drive_t *drive, struct request *rq, unsigned long block) - { -+ if (rq->cmd == WRITE && dev_check_rdonly(rq->rq_dev)) { -+ ide_end_request(1, HWGROUP(drive)); -+ return ide_stopped; -+ } - if (IDE_CONTROL_REG) - OUT_BYTE(drive->ctl,IDE_CONTROL_REG); - - -_ diff --git a/lustre/kernel_patches/patches/dsp.patch b/lustre/kernel_patches/patches/dsp.patch deleted file mode 100644 index f2e5b93..0000000 --- a/lustre/kernel_patches/patches/dsp.patch +++ /dev/null @@ -1,130 +0,0 @@ - arch/i386/kernel/crash.c | 24 +++++++++++++++++------- - arch/i386/kernel/nmi.c | 2 +- - include/asm-i386/apic.h | 1 + - include/linux/crash.h | 2 +- - kernel/bootimg.c | 13 ++++++++++++- - kernel/bootimg_pic.c | 6 ++++-- - 6 files changed, 36 insertions(+), 12 deletions(-) - ---- linux-rh-2.4.20-8/kernel/bootimg.c~dsp 2003-05-07 19:30:47.000000000 +0800 -+++ linux-rh-2.4.20-8-root/kernel/bootimg.c 2003-05-07 19:31:12.000000000 +0800 -@@ -238,9 +238,20 @@ int boot_image() - int error = -ENOMEM; - - if (bootimg_checksum(__va(bootimg_dsc.page_dir),bootimg_dsc.pages) -- != bootimg_dsc.csum) -+ != bootimg_dsc.csum) { - printk("Checksum of kernel image failed. Rebooting via BIOS\n"); - -+ /* Before calling machine_restart(), make sure it will not -+ * simply call this function recursively. -+ */ -+ bootimg_dsc.page_dir = NULL; -+ machine_restart(NULL); -+ -+ /* We should never get here, but just in case... */ -+ for (; ; ) -+ __asm__ __volatile__ ("hlt"); -+ } -+ - code_page = get_identity_mapped_page(); - if (!code_page) goto out3; - code = (relocate_and_jump_t) virt_to_phys((void *) code_page); ---- linux-rh-2.4.20-8/kernel/bootimg_pic.c~dsp 2003-05-07 19:30:47.000000000 +0800 -+++ linux-rh-2.4.20-8-root/kernel/bootimg_pic.c 2003-05-07 19:31:12.000000000 +0800 -@@ -69,7 +69,8 @@ void __bootimg relocate_and_jump(void) - for (j = i+1; j < dsc.pages; j++) { - table = dsc.page_dir+FROM_TABLE(j); - if (((unsigned long) *table) == to) { -- copy_and_swap(*table,dsc.scratch); -+ copy_and_swap((unsigned long) (*table), -+ dsc.scratch); - break; - } - if ((*table)[PAGE_NR(j)] == to) { -@@ -79,7 +80,8 @@ void __bootimg relocate_and_jump(void) - } - table = dsc.page_dir+TO_TABLE(j); - if (((unsigned long) *table) == to) { -- copy_and_swap(*table,dsc.scratch); -+ copy_and_swap((unsigned long) (*table), -+ dsc.scratch); - break; - } - } ---- linux-rh-2.4.20-8/include/asm-i386/apic.h~dsp 2003-05-07 17:00:16.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/asm-i386/apic.h 2003-05-07 19:31:12.000000000 +0800 -@@ -86,6 +86,7 @@ extern struct pm_dev *apic_pm_register(p - extern void apic_pm_unregister(struct pm_dev*); - - extern int check_nmi_watchdog (void); -+extern void disable_apic_nmi_watchdog(void); - - extern unsigned int nmi_watchdog; - #define NMI_NONE 0 ---- linux-rh-2.4.20-8/include/linux/crash.h~dsp 2003-05-07 19:30:47.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/linux/crash.h 2003-05-07 19:31:12.000000000 +0800 -@@ -71,7 +71,7 @@ extern void stop_this_cpu(void *); - #define CRASH_ZALLOC_PAGES 16*5*2 /* 2 to handle crash in crash */ - #define CRASH_LOW_WATER_PAGES 100 - --#define CRASH_CPU_TIMEOUT 5000 /* 5 sec wait for other cpus to stop */ -+#define CRASH_CPU_TIMEOUT 15000 /* 15 sec wait for other cpus to stop */ - - #define CRASH_MARK_RESERVED(addr) (set_bit(PG_reserved,&mem_map[MAP_NR(addr)].flags)) - #define CRASH_CLEAR_RESERVED(addr) (clear_bit(PG_reserved,&mem_map[MAP_NR(addr)].flags)) ---- linux-rh-2.4.20-8/arch/i386/kernel/crash.c~dsp 2003-05-07 19:30:47.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/i386/kernel/crash.c 2003-05-07 19:31:39.000000000 +0800 -@@ -9,6 +9,8 @@ - #include - #include - #include -+#include -+#include - - inline void crash_save_regs(void) { - static unsigned long regs[8]; -@@ -30,15 +32,23 @@ inline void crash_save_regs(void) { - */ - void crash_save_current_state(struct task_struct *tp) - { -+ if (tp != NULL) { -+ /* -+ * Here we save ebp instead of esp just in case the compiler -+ * decides to put an extra push in before we execute this -+ * instruction (thus invalidating our frame pointer). -+ */ -+ asm volatile("movl %%ebp,%0":"=m" (*(u_long *)&tp->thread.esp)); -+ tp->thread.eip = (u_long)crash_save_current_state; -+ panic_ksp[smp_processor_id()] = tp->thread.esp; -+ mb(); -+ } -+ - /* -- * Here we save ebp instead of esp just in case the compiler -- * decides to put an extra push in before we execute this -- * instruction (thus invalidating our frame pointer). -+ * Just to be safe, disable the NMI watchdog on the calling CPU so it -+ * doesn't get in the way while we are trying to save a dump. - */ -- asm volatile("movl %%ebp,%0":"=m" (*(u_long *)&tp->thread.esp)); -- tp->thread.eip = (u_long)crash_save_current_state; -- panic_ksp[smp_processor_id()] = tp->thread.esp; -- mb(); -+ disable_apic_nmi_watchdog(); - - save_core(); - ---- linux-rh-2.4.20-8/arch/i386/kernel/nmi.c~dsp 2003-05-07 19:30:47.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/i386/kernel/nmi.c 2003-05-07 19:31:12.000000000 +0800 -@@ -138,7 +138,7 @@ __setup("nmi_watchdog=", setup_nmi_watch - - struct pm_dev *nmi_pmdev; - --static void disable_apic_nmi_watchdog(void) -+void disable_apic_nmi_watchdog(void) - { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - -_ diff --git a/lustre/kernel_patches/patches/export-truncate-2.5.63.patch b/lustre/kernel_patches/patches/export-truncate-2.5.63.patch deleted file mode 100644 index 3d82572..0000000 --- a/lustre/kernel_patches/patches/export-truncate-2.5.63.patch +++ /dev/null @@ -1,37 +0,0 @@ - include/linux/mm.h | 2 ++ - mm/truncate.c | 4 ++-- - 2 files changed, 4 insertions(+), 2 deletions(-) - ---- linux-2.5.63/include/linux/mm.h~export-truncate-2.5.63 Mon May 5 18:08:15 2003 -+++ linux-2.5.63-root/include/linux/mm.h Mon May 5 18:08:58 2003 -@@ -540,6 +540,8 @@ can_vma_merge(struct vm_area_struct *vma - else - return 0; - } -+/* truncate.c */ -+extern void truncate_complete_page(struct page *); - - /* filemap.c */ - extern unsigned long page_unuse(struct page *); ---- linux-2.5.63/mm/truncate.c~export-truncate-2.5.63 Mon May 5 18:09:50 2003 -+++ linux-2.5.63-root/mm/truncate.c Mon May 5 18:11:29 2003 -@@ -41,7 +41,7 @@ static inline void truncate_partial_page - * its lock, b) when a concurrent invalidate_inode_pages got there first and - * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. - */ --static void -+void - truncate_complete_page(struct address_space *mapping, struct page *page) - { - if (page->mapping != mapping) -@@ -56,7 +56,7 @@ truncate_complete_page(struct address_sp - remove_from_page_cache(page); - page_cache_release(page); /* pagecache ref */ - } -- -+EXPORT_SYMBOL_GPL(truncate_complete_page); - /* - * This is for invalidate_inode_pages(). That function can be called at - * any time, and is not supposed to throw away dirty pages. But pages can - -_ diff --git a/lustre/kernel_patches/patches/export-truncate.patch b/lustre/kernel_patches/patches/export-truncate.patch deleted file mode 100644 index 2cd96b9..0000000 --- a/lustre/kernel_patches/patches/export-truncate.patch +++ /dev/null @@ -1,35 +0,0 @@ - include/linux/mm.h | 1 + - mm/filemap.c | 3 ++- - 2 files changed, 3 insertions(+), 1 deletion(-) - ---- linux-2.4.18-18/include/linux/mm.h~export-truncate 2003-04-05 02:54:55.000000000 -0700 -+++ linux-2.4.18-18-braam/include/linux/mm.h 2003-04-09 17:37:46.000000000 -0600 -@@ -650,6 +650,7 @@ struct zone_t; - /* filemap.c */ - extern void remove_inode_page(struct page *); - extern unsigned long page_unuse(struct page *); -+extern void truncate_complete_page(struct page *); - extern void truncate_inode_pages(struct address_space *, loff_t); - - /* generic vm_area_ops exported for stackable file systems */ ---- linux-2.4.18-18/mm/filemap.c~export-truncate 2003-04-05 02:54:55.000000000 -0700 -+++ linux-2.4.18-18-braam/mm/filemap.c 2003-04-09 17:37:46.000000000 -0600 -@@ -245,7 +245,7 @@ static inline void truncate_partial_page - do_flushpage(page, partial); - } - --static void truncate_complete_page(struct page *page) -+void truncate_complete_page(struct page *page) - { - /* - * Leave it on the LRU if it gets converted into anonymous buffers -@@ -266,6 +266,7 @@ static void truncate_complete_page(struc - remove_inode_page(page); - page_cache_release(page); - } -+EXPORT_SYMBOL_GPL(truncate_complete_page); - - static int FASTCALL(truncate_list_pages(struct list_head *, unsigned long, unsigned *)); - static int truncate_list_pages(struct list_head *head, unsigned long start, unsigned *partial) - -_ diff --git a/lustre/kernel_patches/patches/exports.patch b/lustre/kernel_patches/patches/exports.patch deleted file mode 100644 index 33e0b6c..0000000 --- a/lustre/kernel_patches/patches/exports.patch +++ /dev/null @@ -1,57 +0,0 @@ - - - - fs/ext3/Makefile | 2 ++ - fs/ext3/super.c | 2 +- - include/linux/fs.h | 1 + - kernel/ksyms.c | 5 +++++ - 4 files changed, 9 insertions(+), 1 deletion(-) - ---- linux-2.4.18-18/fs/ext3/Makefile~exports Sat Apr 5 02:51:27 2003 -+++ linux-2.4.18-18-braam/fs/ext3/Makefile Sat Apr 5 02:54:45 2003 -@@ -9,6 +9,8 @@ - - O_TARGET := ext3.o - -+export-objs := super.o inode.o -+ - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o - obj-m := $(O_TARGET) ---- linux-2.4.18-18/fs/ext3/super.c~exports Sat Apr 5 02:51:27 2003 -+++ linux-2.4.18-18-braam/fs/ext3/super.c Sat Apr 5 02:54:28 2003 -@@ -1746,7 +1746,7 @@ static void __exit exit_ext3_fs(void) - unregister_filesystem(&ext3_fs_type); - } - --EXPORT_NO_SYMBOLS; -+EXPORT_SYMBOL(ext3_bread); - - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); - MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); ---- linux-2.4.18-18/include/linux/fs.h~exports Sat Apr 5 02:51:27 2003 -+++ linux-2.4.18-18-braam/include/linux/fs.h Sat Apr 5 02:54:29 2003 -@@ -1046,6 +1046,7 @@ extern int unregister_filesystem(struct - extern struct vfsmount *kern_mount(struct file_system_type *); - extern int may_umount(struct vfsmount *); - extern long do_mount(char *, char *, char *, unsigned long, void *); -+struct vfsmount *do_kern_mount(const char *type, int flags, char *name, void *data); - extern void umount_tree(struct vfsmount *); - - #define kern_umount mntput ---- linux-2.4.18-18/kernel/ksyms.c~exports Sat Apr 5 02:51:27 2003 -+++ linux-2.4.18-18-braam/kernel/ksyms.c Sat Apr 5 02:54:29 2003 -@@ -306,6 +306,11 @@ EXPORT_SYMBOL_GPL(buffermem_pages); - EXPORT_SYMBOL_GPL(nr_free_pages); - EXPORT_SYMBOL_GPL(page_cache_size); - -+/* lustre */ -+EXPORT_SYMBOL(panic_notifier_list); -+EXPORT_SYMBOL(pagecache_lock_cacheline); -+EXPORT_SYMBOL(do_kern_mount); -+ - /* for stackable file systems (lofs, wrapfs, cryptfs, etc.) */ - EXPORT_SYMBOL(default_llseek); - EXPORT_SYMBOL(dentry_open); - -_ diff --git a/lustre/kernel_patches/patches/exports_2.4.20-rh-hp.patch b/lustre/kernel_patches/patches/exports_2.4.20-rh-hp.patch deleted file mode 100644 index feaeec6..0000000 --- a/lustre/kernel_patches/patches/exports_2.4.20-rh-hp.patch +++ /dev/null @@ -1,53 +0,0 @@ - fs/ext3/Makefile | 2 ++ - fs/ext3/super.c | 2 +- - include/linux/fs.h | 1 + - kernel/ksyms.c | 4 ++++ - 4 files changed, 8 insertions(+), 1 deletion(-) - ---- linux/fs/ext3/Makefile~exports_2.4.20 Wed Apr 9 10:07:14 2003 -+++ linux-mmonroe/fs/ext3/Makefile Wed Apr 9 10:19:53 2003 -@@ -9,6 +9,8 @@ - - O_TARGET := ext3.o - -+export-objs := super.o inode.o -+ - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o - obj-m := $(O_TARGET) ---- linux/fs/ext3/super.c~exports_2.4.20 Wed Apr 9 10:07:14 2003 -+++ linux-mmonroe/fs/ext3/super.c Wed Apr 9 10:19:53 2003 -@@ -1769,7 +1769,7 @@ static void __exit exit_ext3_fs(void) - unregister_filesystem(&ext3_fs_type); - } - --EXPORT_NO_SYMBOLS; -+EXPORT_SYMBOL(ext3_bread); - - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); - MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); ---- linux/include/linux/fs.h~exports_2.4.20 Wed Apr 9 10:07:14 2003 -+++ linux-mmonroe/include/linux/fs.h Wed Apr 9 10:19:53 2003 -@@ -1020,6 +1020,7 @@ extern int unregister_filesystem(struct - extern struct vfsmount *kern_mount(struct file_system_type *); - extern int may_umount(struct vfsmount *); - extern long do_mount(char *, char *, char *, unsigned long, void *); -+struct vfsmount *do_kern_mount(const char *type, int flags, char *name, void *data); - - #define kern_umount mntput - ---- linux/kernel/ksyms.c~exports_2.4.20 Wed Apr 9 10:07:14 2003 -+++ linux-mmonroe/kernel/ksyms.c Wed Apr 9 10:19:53 2003 -@@ -308,6 +308,10 @@ EXPORT_SYMBOL(dcache_dir_fsync); - EXPORT_SYMBOL(dcache_readdir); - EXPORT_SYMBOL(dcache_dir_ops); - -+/* lustre */ -+EXPORT_SYMBOL(pagecache_lock_cacheline); -+EXPORT_SYMBOL(do_kern_mount); -+ - /* for stackable file systems (lofs, wrapfs, cryptfs, etc.) */ - EXPORT_SYMBOL(default_llseek); - EXPORT_SYMBOL(dentry_open); - -_ diff --git a/lustre/kernel_patches/patches/exports_2.4.20.patch b/lustre/kernel_patches/patches/exports_2.4.20.patch deleted file mode 100644 index bed8693..0000000 --- a/lustre/kernel_patches/patches/exports_2.4.20.patch +++ /dev/null @@ -1,57 +0,0 @@ - - - - fs/ext3/Makefile | 4 +++- - fs/ext3/super.c | 2 +- - include/linux/fs.h | 1 + - kernel/ksyms.c | 5 +++++ - 4 files changed, 10 insertions(+), 2 deletions(-) - ---- linux-2.4.20/fs/ext3/Makefile~exports_hp Sat Apr 5 03:55:19 2003 -+++ linux-2.4.20-braam/fs/ext3/Makefile Sat Apr 5 03:56:03 2003 -@@ -9,6 +9,8 @@ - - O_TARGET := ext3.o - -+export-objs := super.o inode.o -+ - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o - obj-m := $(O_TARGET) ---- linux-2.4.20/fs/ext3/super.c~exports_hp Sat Apr 5 03:55:19 2003 -+++ linux-2.4.20-braam/fs/ext3/super.c Sat Apr 5 03:55:19 2003 -@@ -1769,7 +1769,7 @@ static void __exit exit_ext3_fs(void) - unregister_filesystem(&ext3_fs_type); - } - --EXPORT_NO_SYMBOLS; -+EXPORT_SYMBOL(ext3_bread); - - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); - MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); ---- linux-2.4.20/include/linux/fs.h~exports_hp Sat Apr 5 03:55:19 2003 -+++ linux-2.4.20-braam/include/linux/fs.h Sat Apr 5 03:55:19 2003 -@@ -1005,6 +1005,7 @@ extern int unregister_filesystem(struct - extern struct vfsmount *kern_mount(struct file_system_type *); - extern int may_umount(struct vfsmount *); - extern long do_mount(char *, char *, char *, unsigned long, void *); -+struct vfsmount *do_kern_mount(const char *type, int flags, char *name, void *data); - - #define kern_umount mntput - ---- linux-2.4.20/kernel/ksyms.c~exports_hp Sat Apr 5 03:55:19 2003 -+++ linux-2.4.20-braam/kernel/ksyms.c Sat Apr 5 03:55:19 2003 -@@ -284,6 +284,11 @@ EXPORT_SYMBOL(dcache_dir_fsync); - EXPORT_SYMBOL(dcache_readdir); - EXPORT_SYMBOL(dcache_dir_ops); - -+/* lustre */ -+EXPORT_SYMBOL(pagecache_lock_cacheline); -+EXPORT_SYMBOL(panic_notifier_list); -+EXPORT_SYMBOL(do_kern_mount); -+ - /* for stackable file systems (lofs, wrapfs, cryptfs, etc.) */ - EXPORT_SYMBOL(default_llseek); - EXPORT_SYMBOL(dentry_open); - -_ diff --git a/lustre/kernel_patches/patches/ext-2.4-patch-1-chaos.patch b/lustre/kernel_patches/patches/ext-2.4-patch-1-chaos.patch deleted file mode 100644 index b59cea2..0000000 --- a/lustre/kernel_patches/patches/ext-2.4-patch-1-chaos.patch +++ /dev/null @@ -1,2527 +0,0 @@ - fs/ext3/Makefile | 2 - fs/ext3/dir.c | 299 +++++++++ - fs/ext3/file.c | 3 - fs/ext3/hash.c | 215 ++++++ - fs/ext3/namei.c | 1388 ++++++++++++++++++++++++++++++++++++++++----- - fs/ext3/super.c | 7 - include/linux/ext3_fs.h | 85 ++ - include/linux/ext3_fs_sb.h | 2 - include/linux/ext3_jbd.h | 2 - include/linux/rbtree.h | 2 - lib/rbtree.c | 42 + - 11 files changed, 1887 insertions(+), 160 deletions(-) - ---- linux-chaos-2.4.20-6/fs/ext3/Makefile~ext-2.4-patch-1-chaos 2003-04-09 16:10:38.000000000 -0600 -+++ linux-chaos-2.4.20-6-braam/fs/ext3/Makefile 2003-04-09 16:18:55.000000000 -0600 -@@ -12,7 +12,7 @@ O_TARGET := ext3.o - export-objs := super.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -- ioctl.o namei.o super.o symlink.o -+ ioctl.o namei.o super.o symlink.o hash.o - obj-m := $(O_TARGET) - - include $(TOPDIR)/Rules.make ---- linux-chaos-2.4.20-6/fs/ext3/dir.c~ext-2.4-patch-1-chaos 2002-05-07 15:53:46.000000000 -0600 -+++ linux-chaos-2.4.20-6-braam/fs/ext3/dir.c 2003-04-09 16:18:55.000000000 -0600 -@@ -21,12 +21,16 @@ - #include - #include - #include -+#include -+#include - - static unsigned char ext3_filetype_table[] = { - DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK - }; - - static int ext3_readdir(struct file *, void *, filldir_t); -+static int ext3_dx_readdir(struct file * filp, -+ void * dirent, filldir_t filldir); - - struct file_operations ext3_dir_operations = { - read: generic_read_dir, -@@ -35,6 +39,17 @@ struct file_operations ext3_dir_operatio - fsync: ext3_sync_file, /* BKL held */ - }; - -+ -+static unsigned char get_dtype(struct super_block *sb, int filetype) -+{ -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) || -+ (filetype >= EXT3_FT_MAX)) -+ return DT_UNKNOWN; -+ -+ return (ext3_filetype_table[filetype]); -+} -+ -+ - int ext3_check_dir_entry (const char * function, struct inode * dir, - struct ext3_dir_entry_2 * de, - struct buffer_head * bh, -@@ -79,6 +94,16 @@ static int ext3_readdir(struct file * fi - - sb = inode->i_sb; - -+ if (is_dx(inode)) { -+ err = ext3_dx_readdir(filp, dirent, filldir); -+ if (err != ERR_BAD_DX_DIR) -+ return err; -+ /* -+ * We don't set the inode dirty flag since it's not -+ * critical that it get flushed back to the disk. -+ */ -+ EXT3_I(filp->f_dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL; -+ } - stored = 0; - bh = NULL; - offset = filp->f_pos & (sb->s_blocksize - 1); -@@ -162,18 +187,12 @@ revalidate: - * during the copy operation. - */ - unsigned long version = filp->f_version; -- unsigned char d_type = DT_UNKNOWN; - -- if (EXT3_HAS_INCOMPAT_FEATURE(sb, -- EXT3_FEATURE_INCOMPAT_FILETYPE) -- && de->file_type < EXT3_FT_MAX) -- d_type = -- ext3_filetype_table[de->file_type]; - error = filldir(dirent, de->name, - de->name_len, - filp->f_pos, - le32_to_cpu(de->inode), -- d_type); -+ get_dtype(sb, de->file_type)); - if (error) - break; - if (version != filp->f_version) -@@ -188,3 +207,269 @@ revalidate: - UPDATE_ATIME(inode); - return 0; - } -+ -+#ifdef CONFIG_EXT3_INDEX -+/* -+ * These functions convert from the major/minor hash to an f_pos -+ * value. -+ * -+ * Currently we only use major hash numer. This is unfortunate, but -+ * on 32-bit machines, the same VFS interface is used for lseek and -+ * llseek, so if we use the 64 bit offset, then the 32-bit versions of -+ * lseek/telldir/seekdir will blow out spectacularly, and from within -+ * the ext2 low-level routine, we don't know if we're being called by -+ * a 64-bit version of the system call or the 32-bit version of the -+ * system call. Worse yet, NFSv2 only allows for a 32-bit readdir -+ * cookie. Sigh. -+ */ -+#define hash2pos(major, minor) (major >> 1) -+#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) -+#define pos2min_hash(pos) (0) -+ -+/* -+ * This structure holds the nodes of the red-black tree used to store -+ * the directory entry in hash order. -+ */ -+struct fname { -+ __u32 hash; -+ __u32 minor_hash; -+ rb_node_t rb_hash; -+ struct fname *next; -+ __u32 inode; -+ __u8 name_len; -+ __u8 file_type; -+ char name[0]; -+}; -+ -+/* -+ * This functoin implements a non-recursive way of freeing all of the -+ * nodes in the red-black tree. -+ */ -+static void free_rb_tree_fname(rb_root_t *root) -+{ -+ rb_node_t *n = root->rb_node; -+ rb_node_t *parent; -+ struct fname *fname; -+ -+ while (n) { -+ /* Do the node's children first */ -+ if ((n)->rb_left) { -+ n = n->rb_left; -+ continue; -+ } -+ if (n->rb_right) { -+ n = n->rb_right; -+ continue; -+ } -+ /* -+ * The node has no children; free it, and then zero -+ * out parent's link to it. Finally go to the -+ * beginning of the loop and try to free the parent -+ * node. -+ */ -+ parent = n->rb_parent; -+ fname = rb_entry(n, struct fname, rb_hash); -+ kfree(fname); -+ if (!parent) -+ root->rb_node = 0; -+ else if (parent->rb_left == n) -+ parent->rb_left = 0; -+ else if (parent->rb_right == n) -+ parent->rb_right = 0; -+ n = parent; -+ } -+ root->rb_node = 0; -+} -+ -+ -+struct dir_private_info *create_dir_info(loff_t pos) -+{ -+ struct dir_private_info *p; -+ -+ p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); -+ if (!p) -+ return NULL; -+ p->root.rb_node = 0; -+ p->curr_node = 0; -+ p->extra_fname = 0; -+ p->last_pos = 0; -+ p->curr_hash = pos2maj_hash(pos); -+ p->curr_minor_hash = pos2min_hash(pos); -+ p->next_hash = 0; -+ return p; -+} -+ -+void ext3_htree_free_dir_info(struct dir_private_info *p) -+{ -+ free_rb_tree_fname(&p->root); -+ kfree(p); -+} -+ -+/* -+ * Given a directory entry, enter it into the fname rb tree. -+ */ -+void ext3_htree_store_dirent(struct file *dir_file, __u32 hash, -+ __u32 minor_hash, -+ struct ext3_dir_entry_2 *dirent) -+{ -+ rb_node_t **p, *parent = NULL; -+ struct fname * fname, *new_fn; -+ struct dir_private_info *info; -+ int len; -+ -+ info = (struct dir_private_info *) dir_file->private_data; -+ p = &info->root.rb_node; -+ -+ /* Create and allocate the fname structure */ -+ len = sizeof(struct fname) + dirent->name_len + 1; -+ new_fn = kmalloc(len, GFP_KERNEL); -+ memset(new_fn, 0, len); -+ new_fn->hash = hash; -+ new_fn->minor_hash = minor_hash; -+ new_fn->inode = le32_to_cpu(dirent->inode); -+ new_fn->name_len = dirent->name_len; -+ new_fn->file_type = dirent->file_type; -+ memcpy(new_fn->name, dirent->name, dirent->name_len); -+ new_fn->name[dirent->name_len] = 0; -+ -+ while (*p) { -+ parent = *p; -+ fname = rb_entry(parent, struct fname, rb_hash); -+ -+ /* -+ * If the hash and minor hash match up, then we put -+ * them on a linked list. This rarely happens... -+ */ -+ if ((new_fn->hash == fname->hash) && -+ (new_fn->minor_hash == fname->minor_hash)) { -+ new_fn->next = fname->next; -+ fname->next = new_fn; -+ return; -+ } -+ -+ if (new_fn->hash < fname->hash) -+ p = &(*p)->rb_left; -+ else if (new_fn->hash > fname->hash) -+ p = &(*p)->rb_right; -+ else if (new_fn->minor_hash < fname->minor_hash) -+ p = &(*p)->rb_left; -+ else /* if (new_fn->minor_hash > fname->minor_hash) */ -+ p = &(*p)->rb_right; -+ } -+ -+ rb_link_node(&new_fn->rb_hash, parent, p); -+ rb_insert_color(&new_fn->rb_hash, &info->root); -+} -+ -+ -+ -+/* -+ * This is a helper function for ext3_dx_readdir. It calls filldir -+ * for all entres on the fname linked list. (Normally there is only -+ * one entry on the linked list, unless there are 62 bit hash collisions.) -+ */ -+static int call_filldir(struct file * filp, void * dirent, -+ filldir_t filldir, struct fname *fname) -+{ -+ struct dir_private_info *info = filp->private_data; -+ loff_t curr_pos; -+ struct inode *inode = filp->f_dentry->d_inode; -+ struct super_block * sb; -+ int error; -+ -+ sb = inode->i_sb; -+ -+ if (!fname) { -+ printk("call_filldir: called with null fname?!?\n"); -+ return 0; -+ } -+ curr_pos = hash2pos(fname->hash, fname->minor_hash); -+ while (fname) { -+ error = filldir(dirent, fname->name, -+ fname->name_len, curr_pos, -+ fname->inode, -+ get_dtype(sb, fname->file_type)); -+ if (error) { -+ filp->f_pos = curr_pos; -+ info->extra_fname = fname->next; -+ return error; -+ } -+ fname = fname->next; -+ } -+ return 0; -+} -+ -+static int ext3_dx_readdir(struct file * filp, -+ void * dirent, filldir_t filldir) -+{ -+ struct dir_private_info *info = filp->private_data; -+ struct inode *inode = filp->f_dentry->d_inode; -+ struct fname *fname; -+ int ret; -+ -+ if (!info) { -+ info = create_dir_info(filp->f_pos); -+ if (!info) -+ return -ENOMEM; -+ filp->private_data = info; -+ } -+ -+ /* Some one has messed with f_pos; reset the world */ -+ if (info->last_pos != filp->f_pos) { -+ free_rb_tree_fname(&info->root); -+ info->curr_node = 0; -+ info->extra_fname = 0; -+ info->curr_hash = pos2maj_hash(filp->f_pos); -+ info->curr_minor_hash = pos2min_hash(filp->f_pos); -+ } -+ -+ /* -+ * If there are any leftover names on the hash collision -+ * chain, return them first. -+ */ -+ if (info->extra_fname && -+ call_filldir(filp, dirent, filldir, info->extra_fname)) -+ goto finished; -+ -+ if (!info->curr_node) -+ info->curr_node = rb_get_first(&info->root); -+ -+ while (1) { -+ /* -+ * Fill the rbtree if we have no more entries, -+ * or the inode has changed since we last read in the -+ * cached entries. -+ */ -+ if ((!info->curr_node) || -+ (filp->f_version != inode->i_version)) { -+ info->curr_node = 0; -+ free_rb_tree_fname(&info->root); -+ filp->f_version = inode->i_version; -+ ret = ext3_htree_fill_tree(filp, info->curr_hash, -+ info->curr_minor_hash, -+ &info->next_hash); -+ if (ret < 0) -+ return ret; -+ if (ret == 0) -+ break; -+ info->curr_node = rb_get_first(&info->root); -+ } -+ -+ fname = rb_entry(info->curr_node, struct fname, rb_hash); -+ info->curr_hash = fname->hash; -+ info->curr_minor_hash = fname->minor_hash; -+ if (call_filldir(filp, dirent, filldir, fname)) -+ break; -+ -+ info->curr_node = rb_get_next(info->curr_node); -+ if (!info->curr_node) { -+ info->curr_hash = info->next_hash; -+ info->curr_minor_hash = 0; -+ } -+ } -+finished: -+ info->last_pos = filp->f_pos; -+ UPDATE_ATIME(inode); -+ return 0; -+} -+#endif ---- linux-chaos-2.4.20-6/fs/ext3/file.c~ext-2.4-patch-1-chaos 2003-02-14 15:59:09.000000000 -0700 -+++ linux-chaos-2.4.20-6-braam/fs/ext3/file.c 2003-04-09 16:18:55.000000000 -0600 -@@ -35,6 +35,9 @@ static int ext3_release_file (struct ino - { - if (filp->f_mode & FMODE_WRITE) - ext3_discard_prealloc (inode); -+ if (is_dx(inode) && filp->private_data) -+ ext3_htree_free_dir_info(filp->private_data); -+ - return 0; - } - ---- /dev/null 2003-01-30 03:24:37.000000000 -0700 -+++ linux-chaos-2.4.20-6-braam/fs/ext3/hash.c 2003-04-09 16:18:55.000000000 -0600 -@@ -0,0 +1,215 @@ -+/* -+ * linux/fs/ext3/hash.c -+ * -+ * Copyright (C) 2002 by Theodore Ts'o -+ * -+ * This file is released under the GPL v2. -+ * -+ * This file may be redistributed under the terms of the GNU Public -+ * License. -+ */ -+ -+#include -+#include -+#include -+#include -+ -+#define DELTA 0x9E3779B9 -+ -+static void TEA_transform(__u32 buf[4], __u32 const in[]) -+{ -+ __u32 sum = 0; -+ __u32 b0 = buf[0], b1 = buf[1]; -+ __u32 a = in[0], b = in[1], c = in[2], d = in[3]; -+ int n = 16; -+ -+ do { -+ sum += DELTA; -+ b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); -+ b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); -+ } while(--n); -+ -+ buf[0] += b0; -+ buf[1] += b1; -+} -+ -+/* F, G and H are basic MD4 functions: selection, majority, parity */ -+#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) -+#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z))) -+#define H(x, y, z) ((x) ^ (y) ^ (z)) -+ -+/* -+ * The generic round function. The application is so specific that -+ * we don't bother protecting all the arguments with parens, as is generally -+ * good macro practice, in favor of extra legibility. -+ * Rotation is separate from addition to prevent recomputation -+ */ -+#define ROUND(f, a, b, c, d, x, s) \ -+ (a += f(b, c, d) + x, a = (a << s) | (a >> (32-s))) -+#define K1 0 -+#define K2 013240474631UL -+#define K3 015666365641UL -+ -+/* -+ * Basic cut-down MD4 transform. Returns only 32 bits of result. -+ */ -+static void halfMD4Transform (__u32 buf[4], __u32 const in[]) -+{ -+ __u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3]; -+ -+ /* Round 1 */ -+ ROUND(F, a, b, c, d, in[0] + K1, 3); -+ ROUND(F, d, a, b, c, in[1] + K1, 7); -+ ROUND(F, c, d, a, b, in[2] + K1, 11); -+ ROUND(F, b, c, d, a, in[3] + K1, 19); -+ ROUND(F, a, b, c, d, in[4] + K1, 3); -+ ROUND(F, d, a, b, c, in[5] + K1, 7); -+ ROUND(F, c, d, a, b, in[6] + K1, 11); -+ ROUND(F, b, c, d, a, in[7] + K1, 19); -+ -+ /* Round 2 */ -+ ROUND(G, a, b, c, d, in[1] + K2, 3); -+ ROUND(G, d, a, b, c, in[3] + K2, 5); -+ ROUND(G, c, d, a, b, in[5] + K2, 9); -+ ROUND(G, b, c, d, a, in[7] + K2, 13); -+ ROUND(G, a, b, c, d, in[0] + K2, 3); -+ ROUND(G, d, a, b, c, in[2] + K2, 5); -+ ROUND(G, c, d, a, b, in[4] + K2, 9); -+ ROUND(G, b, c, d, a, in[6] + K2, 13); -+ -+ /* Round 3 */ -+ ROUND(H, a, b, c, d, in[3] + K3, 3); -+ ROUND(H, d, a, b, c, in[7] + K3, 9); -+ ROUND(H, c, d, a, b, in[2] + K3, 11); -+ ROUND(H, b, c, d, a, in[6] + K3, 15); -+ ROUND(H, a, b, c, d, in[1] + K3, 3); -+ ROUND(H, d, a, b, c, in[5] + K3, 9); -+ ROUND(H, c, d, a, b, in[0] + K3, 11); -+ ROUND(H, b, c, d, a, in[4] + K3, 15); -+ -+ buf[0] += a; -+ buf[1] += b; -+ buf[2] += c; -+ buf[3] += d; -+} -+ -+#undef ROUND -+#undef F -+#undef G -+#undef H -+#undef K1 -+#undef K2 -+#undef K3 -+ -+/* The old legacy hash */ -+static __u32 dx_hack_hash (const char *name, int len) -+{ -+ __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; -+ while (len--) { -+ __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373)); -+ -+ if (hash & 0x80000000) hash -= 0x7fffffff; -+ hash1 = hash0; -+ hash0 = hash; -+ } -+ return (hash0 << 1); -+} -+ -+static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) -+{ -+ __u32 pad, val; -+ int i; -+ -+ pad = (__u32)len | ((__u32)len << 8); -+ pad |= pad << 16; -+ -+ val = pad; -+ if (len > num*4) -+ len = num * 4; -+ for (i=0; i < len; i++) { -+ if ((i % 4) == 0) -+ val = pad; -+ val = msg[i] + (val << 8); -+ if ((i % 4) == 3) { -+ *buf++ = val; -+ val = pad; -+ num--; -+ } -+ } -+ if (--num >= 0) -+ *buf++ = val; -+ while (--num >= 0) -+ *buf++ = pad; -+} -+ -+/* -+ * Returns the hash of a filename. If len is 0 and name is NULL, then -+ * this function can be used to test whether or not a hash version is -+ * supported. -+ * -+ * The seed is an 4 longword (32 bits) "secret" which can be used to -+ * uniquify a hash. If the seed is all zero's, then some default seed -+ * may be used. -+ * -+ * A particular hash version specifies whether or not the seed is -+ * represented, and whether or not the returned hash is 32 bits or 64 -+ * bits. 32 bit hashes will return 0 for the minor hash. -+ */ -+int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) -+{ -+ __u32 hash; -+ __u32 minor_hash = 0; -+ const char *p; -+ int i; -+ __u32 in[8], buf[4]; -+ -+ /* Initialize the default seed for the hash checksum functions */ -+ buf[0] = 0x67452301; -+ buf[1] = 0xefcdab89; -+ buf[2] = 0x98badcfe; -+ buf[3] = 0x10325476; -+ -+ /* Check to see if the seed is all zero's */ -+ if (hinfo->seed) { -+ for (i=0; i < 4; i++) { -+ if (hinfo->seed[i]) -+ break; -+ } -+ if (i < 4) -+ memcpy(buf, hinfo->seed, sizeof(buf)); -+ } -+ -+ switch (hinfo->hash_version) { -+ case DX_HASH_LEGACY: -+ hash = dx_hack_hash(name, len); -+ break; -+ case DX_HASH_HALF_MD4: -+ p = name; -+ while (len > 0) { -+ str2hashbuf(p, len, in, 8); -+ halfMD4Transform(buf, in); -+ len -= 32; -+ p += 32; -+ } -+ minor_hash = buf[2]; -+ hash = buf[1]; -+ break; -+ case DX_HASH_TEA: -+ p = name; -+ while (len > 0) { -+ str2hashbuf(p, len, in, 4); -+ TEA_transform(buf, in); -+ len -= 16; -+ p += 16; -+ } -+ hash = buf[0]; -+ minor_hash = buf[1]; -+ break; -+ default: -+ hinfo->hash = 0; -+ return -1; -+ } -+ hinfo->hash = hash & ~1; -+ hinfo->minor_hash = minor_hash; -+ return 0; -+} ---- linux-chaos-2.4.20-6/fs/ext3/namei.c~ext-2.4-patch-1-chaos 2003-03-12 12:51:02.000000000 -0700 -+++ linux-chaos-2.4.20-6-braam/fs/ext3/namei.c 2003-04-09 16:26:04.000000000 -0600 -@@ -16,6 +16,12 @@ - * David S. Miller (davem@caip.rutgers.edu), 1995 - * Directory entry file type support and forward compatibility hooks - * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998 -+ * Hash Tree Directory indexing (c) -+ * Daniel Phillips, 2001 -+ * Hash Tree Directory indexing porting -+ * Christopher Li, 2002 -+ * Hash Tree Directory indexing cleanup -+ * Theodore Ts'o, 2002 - */ - - #include -@@ -38,6 +44,630 @@ - #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) - #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) - -+static struct buffer_head *ext3_append(handle_t *handle, -+ struct inode *inode, -+ u32 *block, int *err) -+{ -+ struct buffer_head *bh; -+ -+ *block = inode->i_size >> inode->i_sb->s_blocksize_bits; -+ -+ if ((bh = ext3_bread(handle, inode, *block, 1, err))) { -+ inode->i_size += inode->i_sb->s_blocksize; -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ ext3_journal_get_write_access(handle,bh); -+ } -+ return bh; -+} -+ -+#ifndef assert -+#define assert(test) J_ASSERT(test) -+#endif -+ -+#ifndef swap -+#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0) -+#endif -+ -+typedef struct { u32 v; } le_u32; -+typedef struct { u16 v; } le_u16; -+ -+#ifdef DX_DEBUG -+#define dxtrace(command) command -+#else -+#define dxtrace(command) -+#endif -+ -+struct fake_dirent -+{ -+ /*le*/u32 inode; -+ /*le*/u16 rec_len; -+ u8 name_len; -+ u8 file_type; -+}; -+ -+struct dx_countlimit -+{ -+ le_u16 limit; -+ le_u16 count; -+}; -+ -+struct dx_entry -+{ -+ le_u32 hash; -+ le_u32 block; -+}; -+ -+/* -+ * dx_root_info is laid out so that if it should somehow get overlaid by a -+ * dirent the two low bits of the hash version will be zero. Therefore, the -+ * hash version mod 4 should never be 0. Sincerely, the paranoia department. -+ */ -+ -+struct dx_root -+{ -+ struct fake_dirent dot; -+ char dot_name[4]; -+ struct fake_dirent dotdot; -+ char dotdot_name[4]; -+ struct dx_root_info -+ { -+ le_u32 reserved_zero; -+ u8 hash_version; -+ u8 info_length; /* 8 */ -+ u8 indirect_levels; -+ u8 unused_flags; -+ } -+ info; -+ struct dx_entry entries[0]; -+}; -+ -+struct dx_node -+{ -+ struct fake_dirent fake; -+ struct dx_entry entries[0]; -+}; -+ -+ -+struct dx_frame -+{ -+ struct buffer_head *bh; -+ struct dx_entry *entries; -+ struct dx_entry *at; -+}; -+ -+struct dx_map_entry -+{ -+ u32 hash; -+ u32 offs; -+}; -+ -+#ifdef CONFIG_EXT3_INDEX -+static inline unsigned dx_get_block (struct dx_entry *entry); -+static void dx_set_block (struct dx_entry *entry, unsigned value); -+static inline unsigned dx_get_hash (struct dx_entry *entry); -+static void dx_set_hash (struct dx_entry *entry, unsigned value); -+static unsigned dx_get_count (struct dx_entry *entries); -+static unsigned dx_get_limit (struct dx_entry *entries); -+static void dx_set_count (struct dx_entry *entries, unsigned value); -+static void dx_set_limit (struct dx_entry *entries, unsigned value); -+static unsigned dx_root_limit (struct inode *dir, unsigned infosize); -+static unsigned dx_node_limit (struct inode *dir); -+static struct dx_frame *dx_probe(struct dentry *dentry, -+ struct inode *dir, -+ struct dx_hash_info *hinfo, -+ struct dx_frame *frame, -+ int *err); -+static void dx_release (struct dx_frame *frames); -+static int dx_make_map (struct ext3_dir_entry_2 *de, int size, -+ struct dx_hash_info *hinfo, struct dx_map_entry map[]); -+static void dx_sort_map(struct dx_map_entry *map, unsigned count); -+static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to, -+ struct dx_map_entry *offsets, int count); -+static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size); -+static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); -+static int ext3_htree_next_block(struct inode *dir, __u32 hash, -+ struct dx_frame *frame, -+ struct dx_frame *frames, int *err, -+ __u32 *start_hash); -+static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, -+ struct ext3_dir_entry_2 **res_dir, int *err); -+static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, -+ struct inode *inode); -+ -+/* -+ * Future: use high four bits of block for coalesce-on-delete flags -+ * Mask them off for now. -+ */ -+ -+static inline unsigned dx_get_block (struct dx_entry *entry) -+{ -+ return le32_to_cpu(entry->block.v) & 0x00ffffff; -+} -+ -+static inline void dx_set_block (struct dx_entry *entry, unsigned value) -+{ -+ entry->block.v = cpu_to_le32(value); -+} -+ -+static inline unsigned dx_get_hash (struct dx_entry *entry) -+{ -+ return le32_to_cpu(entry->hash.v); -+} -+ -+static inline void dx_set_hash (struct dx_entry *entry, unsigned value) -+{ -+ entry->hash.v = cpu_to_le32(value); -+} -+ -+static inline unsigned dx_get_count (struct dx_entry *entries) -+{ -+ return le16_to_cpu(((struct dx_countlimit *) entries)->count.v); -+} -+ -+static inline unsigned dx_get_limit (struct dx_entry *entries) -+{ -+ return le16_to_cpu(((struct dx_countlimit *) entries)->limit.v); -+} -+ -+static inline void dx_set_count (struct dx_entry *entries, unsigned value) -+{ -+ ((struct dx_countlimit *) entries)->count.v = cpu_to_le16(value); -+} -+ -+static inline void dx_set_limit (struct dx_entry *entries, unsigned value) -+{ -+ ((struct dx_countlimit *) entries)->limit.v = cpu_to_le16(value); -+} -+ -+static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) -+{ -+ unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) - -+ EXT3_DIR_REC_LEN(2) - infosize; -+ return 0? 20: entry_space / sizeof(struct dx_entry); -+} -+ -+static inline unsigned dx_node_limit (struct inode *dir) -+{ -+ unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0); -+ return 0? 22: entry_space / sizeof(struct dx_entry); -+} -+ -+/* -+ * Debug -+ */ -+#ifdef DX_DEBUG -+struct stats -+{ -+ unsigned names; -+ unsigned space; -+ unsigned bcount; -+}; -+ -+static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de, -+ int size, int show_names) -+{ -+ unsigned names = 0, space = 0; -+ char *base = (char *) de; -+ struct dx_hash_info h = *hinfo; -+ -+ printk("names: "); -+ while ((char *) de < base + size) -+ { -+ if (de->inode) -+ { -+ if (show_names) -+ { -+ int len = de->name_len; -+ char *name = de->name; -+ while (len--) printk("%c", *name++); -+ ext3fs_dirhash(de->name, de->name_len, &h); -+ printk(":%x.%u ", h.hash, -+ ((char *) de - base)); -+ } -+ space += EXT3_DIR_REC_LEN(de->name_len); -+ names++; -+ } -+ de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); -+ } -+ printk("(%i)\n", names); -+ return (struct stats) { names, space, 1 }; -+} -+ -+struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, -+ struct dx_entry *entries, int levels) -+{ -+ unsigned blocksize = dir->i_sb->s_blocksize; -+ unsigned count = dx_get_count (entries), names = 0, space = 0, i; -+ unsigned bcount = 0; -+ struct buffer_head *bh; -+ int err; -+ printk("%i indexed blocks...\n", count); -+ for (i = 0; i < count; i++, entries++) -+ { -+ u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0; -+ u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; -+ struct stats stats; -+ printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); -+ if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue; -+ stats = levels? -+ dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): -+ dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0); -+ names += stats.names; -+ space += stats.space; -+ bcount += stats.bcount; -+ brelse (bh); -+ } -+ if (bcount) -+ printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", -+ names, space/bcount,(space/bcount)*100/blocksize); -+ return (struct stats) { names, space, bcount}; -+} -+#endif /* DX_DEBUG */ -+ -+/* -+ * Probe for a directory leaf block to search. -+ * -+ * dx_probe can return ERR_BAD_DX_DIR, which means there was a format -+ * error in the directory index, and the caller should fall back to -+ * searching the directory normally. The callers of dx_probe **MUST** -+ * check for this error code, and make sure it never gets reflected -+ * back to userspace. -+ */ -+static struct dx_frame * -+dx_probe(struct dentry *dentry, struct inode *dir, -+ struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) -+{ -+ unsigned count, indirect; -+ struct dx_entry *at, *entries, *p, *q, *m; -+ struct dx_root *root; -+ struct buffer_head *bh; -+ struct dx_frame *frame = frame_in; -+ u32 hash; -+ -+ frame->bh = NULL; -+ if (dentry) -+ dir = dentry->d_parent->d_inode; -+ if (!(bh = ext3_bread (NULL,dir, 0, 0, err))) -+ goto fail; -+ root = (struct dx_root *) bh->b_data; -+ if (root->info.hash_version != DX_HASH_TEA && -+ root->info.hash_version != DX_HASH_HALF_MD4 && -+ root->info.hash_version != DX_HASH_LEGACY) { -+ ext3_warning(dir->i_sb, __FUNCTION__, -+ "Unrecognised inode hash code %d", -+ root->info.hash_version); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ hinfo->hash_version = root->info.hash_version; -+ hinfo->seed = dir->i_sb->u.ext3_sb.s_hash_seed; -+ if (dentry) -+ ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); -+ hash = hinfo->hash; -+ -+ if (root->info.unused_flags & 1) { -+ ext3_warning(dir->i_sb, __FUNCTION__, -+ "Unimplemented inode hash flags: %#06x", -+ root->info.unused_flags); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ -+ if ((indirect = root->info.indirect_levels) > 1) { -+ ext3_warning(dir->i_sb, __FUNCTION__, -+ "Unimplemented inode hash depth: %#06x", -+ root->info.indirect_levels); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ -+ entries = (struct dx_entry *) (((char *)&root->info) + -+ root->info.info_length); -+ assert(dx_get_limit(entries) == dx_root_limit(dir, -+ root->info.info_length)); -+ dxtrace (printk("Look up %x", hash)); -+ while (1) -+ { -+ count = dx_get_count(entries); -+ assert (count && count <= dx_get_limit(entries)); -+ p = entries + 1; -+ q = entries + count - 1; -+ while (p <= q) -+ { -+ m = p + (q - p)/2; -+ dxtrace(printk(".")); -+ if (dx_get_hash(m) > hash) -+ q = m - 1; -+ else -+ p = m + 1; -+ } -+ -+ if (0) // linear search cross check -+ { -+ unsigned n = count - 1; -+ at = entries; -+ while (n--) -+ { -+ dxtrace(printk(",")); -+ if (dx_get_hash(++at) > hash) -+ { -+ at--; -+ break; -+ } -+ } -+ assert (at == p - 1); -+ } -+ -+ at = p - 1; -+ dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); -+ frame->bh = bh; -+ frame->entries = entries; -+ frame->at = at; -+ if (!indirect--) return frame; -+ if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err))) -+ goto fail2; -+ at = entries = ((struct dx_node *) bh->b_data)->entries; -+ assert (dx_get_limit(entries) == dx_node_limit (dir)); -+ frame++; -+ } -+fail2: -+ while (frame >= frame_in) { -+ brelse(frame->bh); -+ frame--; -+ } -+fail: -+ return NULL; -+} -+ -+static void dx_release (struct dx_frame *frames) -+{ -+ if (frames[0].bh == NULL) -+ return; -+ -+ if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels) -+ brelse(frames[1].bh); -+ brelse(frames[0].bh); -+} -+ -+/* -+ * This function increments the frame pointer to search the next leaf -+ * block, and reads in the necessary intervening nodes if the search -+ * should be necessary. Whether or not the search is necessary is -+ * controlled by the hash parameter. If the hash value is even, then -+ * the search is only continued if the next block starts with that -+ * hash value. This is used if we are searching for a specific file. -+ * -+ * If the hash value is HASH_NB_ALWAYS, then always go to the next block. -+ * -+ * This function returns 1 if the caller should continue to search, -+ * or 0 if it should not. If there is an error reading one of the -+ * index blocks, it will return -1. -+ * -+ * If start_hash is non-null, it will be filled in with the starting -+ * hash of the next page. -+ */ -+static int ext3_htree_next_block(struct inode *dir, __u32 hash, -+ struct dx_frame *frame, -+ struct dx_frame *frames, int *err, -+ __u32 *start_hash) -+{ -+ struct dx_frame *p; -+ struct buffer_head *bh; -+ int num_frames = 0; -+ __u32 bhash; -+ -+ *err = ENOENT; -+ p = frame; -+ /* -+ * Find the next leaf page by incrementing the frame pointer. -+ * If we run out of entries in the interior node, loop around and -+ * increment pointer in the parent node. When we break out of -+ * this loop, num_frames indicates the number of interior -+ * nodes need to be read. -+ */ -+ while (1) { -+ if (++(p->at) < p->entries + dx_get_count(p->entries)) -+ break; -+ if (p == frames) -+ return 0; -+ num_frames++; -+ p--; -+ } -+ -+ /* -+ * If the hash is 1, then continue only if the next page has a -+ * continuation hash of any value. This is used for readdir -+ * handling. Otherwise, check to see if the hash matches the -+ * desired contiuation hash. If it doesn't, return since -+ * there's no point to read in the successive index pages. -+ */ -+ bhash = dx_get_hash(p->at); -+ if (start_hash) -+ *start_hash = bhash; -+ if ((hash & 1) == 0) { -+ if ((bhash & ~1) != hash) -+ return 0; -+ } -+ /* -+ * If the hash is HASH_NB_ALWAYS, we always go to the next -+ * block so no check is necessary -+ */ -+ while (num_frames--) { -+ if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at), -+ 0, err))) -+ return -1; /* Failure */ -+ p++; -+ brelse (p->bh); -+ p->bh = bh; -+ p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; -+ } -+ return 1; -+} -+ -+ -+/* -+ * p is at least 6 bytes before the end of page -+ */ -+static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p) -+{ -+ return (struct ext3_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len)); -+} -+ -+/* -+ * This function fills a red-black tree with information from a -+ * directory. We start scanning the directory in hash order, starting -+ * at start_hash and start_minor_hash. -+ * -+ * This function returns the number of entries inserted into the tree, -+ * or a negative error code. -+ */ -+int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, -+ __u32 start_minor_hash, __u32 *next_hash) -+{ -+ struct dx_hash_info hinfo; -+ struct buffer_head *bh; -+ struct ext3_dir_entry_2 *de, *top; -+ static struct dx_frame frames[2], *frame; -+ struct inode *dir; -+ int block, err; -+ int count = 0; -+ int ret; -+ __u32 hashval; -+ -+ dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, -+ start_minor_hash)); -+ dir = dir_file->f_dentry->d_inode; -+ hinfo.hash = start_hash; -+ hinfo.minor_hash = 0; -+ frame = dx_probe(0, dir_file->f_dentry->d_inode, &hinfo, frames, &err); -+ if (!frame) -+ return err; -+ -+ while (1) { -+ block = dx_get_block(frame->at); -+ dxtrace(printk("Reading block %d\n", block)); -+ if (!(bh = ext3_bread (NULL, dir, block, 0, &err))) -+ goto errout; -+ -+ de = (struct ext3_dir_entry_2 *) bh->b_data; -+ top = (struct ext3_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize - -+ EXT3_DIR_REC_LEN(0)); -+ for (; de < top; de = ext3_next_entry(de)) { -+ ext3fs_dirhash(de->name, de->name_len, &hinfo); -+ if ((hinfo.hash < start_hash) || -+ ((hinfo.hash == start_hash) && -+ (hinfo.minor_hash < start_minor_hash))) -+ continue; -+ ext3_htree_store_dirent(dir_file, hinfo.hash, -+ hinfo.minor_hash, de); -+ count++; -+ } -+ brelse (bh); -+ hashval = ~1; -+ ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, -+ frame, frames, &err, &hashval); -+ if (next_hash) -+ *next_hash = hashval; -+ if (ret == -1) -+ goto errout; -+ /* -+ * Stop if: (a) there are no more entries, or -+ * (b) we have inserted at least one entry and the -+ * next hash value is not a continuation -+ */ -+ if ((ret == 0) || -+ (count && ((hashval & 1) == 0))) -+ break; -+ } -+ dx_release(frames); -+ dxtrace(printk("Fill tree: returned %d entries\n", count)); -+ return count; -+errout: -+ dx_release(frames); -+ return (err); -+} -+ -+ -+/* -+ * Directory block splitting, compacting -+ */ -+ -+static int dx_make_map (struct ext3_dir_entry_2 *de, int size, -+ struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) -+{ -+ int count = 0; -+ char *base = (char *) de; -+ struct dx_hash_info h = *hinfo; -+ -+ while ((char *) de < base + size) -+ { -+ if (de->name_len && de->inode) { -+ ext3fs_dirhash(de->name, de->name_len, &h); -+ map_tail--; -+ map_tail->hash = h.hash; -+ map_tail->offs = (u32) ((char *) de - base); -+ count++; -+ } -+ /* XXX: do we need to check rec_len == 0 case? -Chris */ -+ de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); -+ } -+ return count; -+} -+ -+static void dx_sort_map (struct dx_map_entry *map, unsigned count) -+{ -+ struct dx_map_entry *p, *q, *top = map + count - 1; -+ int more; -+ /* Combsort until bubble sort doesn't suck */ -+ while (count > 2) -+ { -+ count = count*10/13; -+ if (count - 9 < 2) /* 9, 10 -> 11 */ -+ count = 11; -+ for (p = top, q = p - count; q >= map; p--, q--) -+ if (p->hash < q->hash) -+ swap(*p, *q); -+ } -+ /* Garden variety bubble sort */ -+ do { -+ more = 0; -+ q = top; -+ while (q-- > map) -+ { -+ if (q[1].hash >= q[0].hash) -+ continue; -+ swap(*(q+1), *q); -+ more = 1; -+ } -+ } while(more); -+} -+ -+static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block) -+{ -+ struct dx_entry *entries = frame->entries; -+ struct dx_entry *old = frame->at, *new = old + 1; -+ int count = dx_get_count(entries); -+ -+ assert(count < dx_get_limit(entries)); -+ assert(old < entries + count); -+ memmove(new + 1, new, (char *)(entries + count) - (char *)(new)); -+ dx_set_hash(new, hash); -+ dx_set_block(new, block); -+ dx_set_count(entries, count + 1); -+} -+#endif -+ -+ -+static void ext3_update_dx_flag(struct inode *inode) -+{ -+ if (!EXT3_HAS_COMPAT_FEATURE(inode->i_sb, -+ EXT3_FEATURE_COMPAT_DIR_INDEX)) -+ EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL; -+} -+ - /* - * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure. - * -@@ -94,6 +724,7 @@ static int inline search_dirblock(struct - return 0; - } - -+ - /* - * ext3_find_entry() - * -@@ -105,6 +736,8 @@ static int inline search_dirblock(struct - * The returned buffer_head has ->b_count elevated. The caller is expected - * to brelse() it when appropriate. - */ -+ -+ - static struct buffer_head * ext3_find_entry (struct dentry *dentry, - struct ext3_dir_entry_2 ** res_dir) - { -@@ -119,12 +752,32 @@ static struct buffer_head * ext3_find_en - int num = 0; - int nblocks, i, err; - struct inode *dir = dentry->d_parent->d_inode; -+ int namelen; -+ const u8 *name; -+ unsigned blocksize; - - *res_dir = NULL; - sb = dir->i_sb; -- -+ blocksize = sb->s_blocksize; -+ namelen = dentry->d_name.len; -+ name = dentry->d_name.name; -+ if (namelen > EXT3_NAME_LEN) -+ return NULL; -+#ifdef CONFIG_EXT3_INDEX -+ if (is_dx(dir)) { -+ bh = ext3_dx_find_entry(dentry, res_dir, &err); -+ /* -+ * On success, or if the error was file not found, -+ * return. Otherwise, fall back to doing a search the -+ * old fashioned way. -+ */ -+ if (bh || (err != ERR_BAD_DX_DIR)) -+ return bh; -+ dxtrace(printk("ext3_find_entry: dx failed, falling back\n")); -+ } -+#endif - nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb); -- start = dir->u.ext3_i.i_dir_start_lookup; -+ start = EXT3_I(dir)->i_dir_start_lookup; - if (start >= nblocks) - start = 0; - block = start; -@@ -166,7 +819,7 @@ restart: - i = search_dirblock(bh, dir, dentry, - block << EXT3_BLOCK_SIZE_BITS(sb), res_dir); - if (i == 1) { -- dir->u.ext3_i.i_dir_start_lookup = block; -+ EXT3_I(dir)->i_dir_start_lookup = block; - ret = bh; - goto cleanup_and_exit; - } else { -@@ -197,6 +850,66 @@ cleanup_and_exit: - return ret; - } - -+#ifdef CONFIG_EXT3_INDEX -+static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, -+ struct ext3_dir_entry_2 **res_dir, int *err) -+{ -+ struct super_block * sb; -+ struct dx_hash_info hinfo; -+ u32 hash; -+ struct dx_frame frames[2], *frame; -+ struct ext3_dir_entry_2 *de, *top; -+ struct buffer_head *bh; -+ unsigned long block; -+ int retval; -+ int namelen = dentry->d_name.len; -+ const u8 *name = dentry->d_name.name; -+ struct inode *dir = dentry->d_parent->d_inode; -+ -+ sb = dir->i_sb; -+ if (!(frame = dx_probe (dentry, 0, &hinfo, frames, err))) -+ return NULL; -+ hash = hinfo.hash; -+ do { -+ block = dx_get_block(frame->at); -+ if (!(bh = ext3_bread (NULL,dir, block, 0, err))) -+ goto errout; -+ de = (struct ext3_dir_entry_2 *) bh->b_data; -+ top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize - -+ EXT3_DIR_REC_LEN(0)); -+ for (; de < top; de = ext3_next_entry(de)) -+ if (ext3_match (namelen, name, de)) { -+ if (!ext3_check_dir_entry("ext3_find_entry", -+ dir, de, bh, -+ (block<b_data))) { -+ brelse (bh); -+ goto errout; -+ } -+ *res_dir = de; -+ dx_release (frames); -+ return bh; -+ } -+ brelse (bh); -+ /* Check to see if we should continue to search */ -+ retval = ext3_htree_next_block(dir, hash, frame, -+ frames, err, 0); -+ if (retval == -1) { -+ ext3_warning(sb, __FUNCTION__, -+ "error reading index page in directory #%lu", -+ dir->i_ino); -+ goto errout; -+ } -+ } while (retval == 1); -+ -+ *err = -ENOENT; -+errout: -+ dxtrace(printk("%s not found\n", name)); -+ dx_release (frames); -+ return NULL; -+} -+#endif -+ - static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry) - { - struct inode * inode; -@@ -213,8 +926,9 @@ static struct dentry *ext3_lookup(struct - brelse (bh); - inode = iget(dir->i_sb, ino); - -- if (!inode) -+ if (!inode) { - return ERR_PTR(-EACCES); -+ } - } - d_add(dentry, inode); - return NULL; -@@ -238,6 +952,300 @@ static inline void ext3_set_de_type(stru - de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; - } - -+#ifdef CONFIG_EXT3_INDEX -+static struct ext3_dir_entry_2 * -+dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) -+{ -+ unsigned rec_len = 0; -+ -+ while (count--) { -+ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs); -+ rec_len = EXT3_DIR_REC_LEN(de->name_len); -+ memcpy (to, de, rec_len); -+ ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len; -+ de->inode = 0; -+ map++; -+ to += rec_len; -+ } -+ return (struct ext3_dir_entry_2 *) (to - rec_len); -+} -+ -+static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size) -+{ -+ struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base; -+ unsigned rec_len = 0; -+ -+ prev = to = de; -+ while ((char*)de < base + size) { -+ next = (struct ext3_dir_entry_2 *) ((char *) de + -+ le16_to_cpu(de->rec_len)); -+ if (de->inode && de->name_len) { -+ rec_len = EXT3_DIR_REC_LEN(de->name_len); -+ if (de > to) -+ memmove(to, de, rec_len); -+ to->rec_len = rec_len; -+ prev = to; -+ to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len); -+ } -+ de = next; -+ } -+ return prev; -+} -+ -+static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, -+ struct buffer_head **bh,struct dx_frame *frame, -+ struct dx_hash_info *hinfo, int *error) -+{ -+ unsigned blocksize = dir->i_sb->s_blocksize; -+ unsigned count, continued; -+ struct buffer_head *bh2; -+ u32 newblock; -+ u32 hash2; -+ struct dx_map_entry *map; -+ char *data1 = (*bh)->b_data, *data2; -+ unsigned split; -+ struct ext3_dir_entry_2 *de = NULL, *de2; -+ int err; -+ -+ bh2 = ext3_append (handle, dir, &newblock, error); -+ if (!(bh2)) { -+ brelse(*bh); -+ *bh = NULL; -+ goto errout; -+ } -+ -+ BUFFER_TRACE(*bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, *bh); -+ if (err) { -+ journal_error: -+ brelse(*bh); -+ brelse(bh2); -+ *bh = NULL; -+ ext3_std_error(dir->i_sb, err); -+ goto errout; -+ } -+ BUFFER_TRACE(frame->bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, frame->bh); -+ if (err) -+ goto journal_error; -+ -+ data2 = bh2->b_data; -+ -+ /* create map in the end of data2 block */ -+ map = (struct dx_map_entry *) (data2 + blocksize); -+ count = dx_make_map ((struct ext3_dir_entry_2 *) data1, -+ blocksize, hinfo, map); -+ map -= count; -+ split = count/2; // need to adjust to actual middle -+ dx_sort_map (map, count); -+ hash2 = map[split].hash; -+ continued = hash2 == map[split - 1].hash; -+ dxtrace(printk("Split block %i at %x, %i/%i\n", -+ dx_get_block(frame->at), hash2, split, count-split)); -+ -+ /* Fancy dance to stay within two buffers */ -+ de2 = dx_move_dirents(data1, data2, map + split, count - split); -+ de = dx_pack_dirents(data1,blocksize); -+ de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); -+ de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2); -+ dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1)); -+ dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1)); -+ -+ /* Which block gets the new entry? */ -+ if (hinfo->hash >= hash2) -+ { -+ swap(*bh, bh2); -+ de = de2; -+ } -+ dx_insert_block (frame, hash2 + continued, newblock); -+ err = ext3_journal_dirty_metadata (handle, bh2); -+ if (err) -+ goto journal_error; -+ err = ext3_journal_dirty_metadata (handle, frame->bh); -+ if (err) -+ goto journal_error; -+ brelse (bh2); -+ dxtrace(dx_show_index ("frame", frame->entries)); -+errout: -+ return de; -+} -+#endif -+ -+ -+/* -+ * Add a new entry into a directory (leaf) block. If de is non-NULL, -+ * it points to a directory entry which is guaranteed to be large -+ * enough for new directory entry. If de is NULL, then -+ * add_dirent_to_buf will attempt search the directory block for -+ * space. It will return -ENOSPC if no space is available, and -EIO -+ * and -EEXIST if directory entry already exists. -+ * -+ * NOTE! bh is NOT released in the case where ENOSPC is returned. In -+ * all other cases bh is released. -+ */ -+static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, -+ struct inode *inode, struct ext3_dir_entry_2 *de, -+ struct buffer_head * bh) -+{ -+ struct inode *dir = dentry->d_parent->d_inode; -+ const char *name = dentry->d_name.name; -+ int namelen = dentry->d_name.len; -+ unsigned long offset = 0; -+ unsigned short reclen; -+ int nlen, rlen, err; -+ char *top; -+ -+ reclen = EXT3_DIR_REC_LEN(namelen); -+ if (!de) { -+ de = (struct ext3_dir_entry_2 *)bh->b_data; -+ top = bh->b_data + dir->i_sb->s_blocksize - reclen; -+ while ((char *) de <= top) { -+ if (!ext3_check_dir_entry("ext3_add_entry", dir, de, -+ bh, offset)) { -+ brelse (bh); -+ return -EIO; -+ } -+ if (ext3_match (namelen, name, de)) { -+ brelse (bh); -+ return -EEXIST; -+ } -+ nlen = EXT3_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ if ((de->inode? rlen - nlen: rlen) >= reclen) -+ break; -+ de = (struct ext3_dir_entry_2 *)((char *)de + rlen); -+ offset += rlen; -+ } -+ if ((char *) de > top) -+ return -ENOSPC; -+ } -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) { -+ ext3_std_error(dir->i_sb, err); -+ brelse(bh); -+ return err; -+ } -+ -+ /* By now the buffer is marked for journaling */ -+ nlen = EXT3_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ if (de->inode) { -+ struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen); -+ de1->rec_len = cpu_to_le16(rlen - nlen); -+ de->rec_len = cpu_to_le16(nlen); -+ de = de1; -+ } -+ de->file_type = EXT3_FT_UNKNOWN; -+ if (inode) { -+ de->inode = cpu_to_le32(inode->i_ino); -+ ext3_set_de_type(dir->i_sb, de, inode->i_mode); -+ } else -+ de->inode = 0; -+ de->name_len = namelen; -+ memcpy (de->name, name, namelen); -+ /* -+ * XXX shouldn't update any times until successful -+ * completion of syscall, but too many callers depend -+ * on this. -+ * -+ * XXX similarly, too many callers depend on -+ * ext3_new_inode() setting the times, but error -+ * recovery deletes the inode, so the worst that can -+ * happen is that the times are slightly out of date -+ * and/or different from the directory change time. -+ */ -+ dir->i_mtime = dir->i_ctime = CURRENT_TIME; -+ ext3_update_dx_flag(dir); -+ dir->i_version = ++event; -+ ext3_mark_inode_dirty(handle, dir); -+ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, bh); -+ if (err) -+ ext3_std_error(dir->i_sb, err); -+ brelse(bh); -+ return 0; -+} -+ -+#ifdef CONFIG_EXT3_INDEX -+/* -+ * This converts a one block unindexed directory to a 3 block indexed -+ * directory, and adds the dentry to the indexed directory. -+ */ -+static int make_indexed_dir(handle_t *handle, struct dentry *dentry, -+ struct inode *inode, struct buffer_head *bh) -+{ -+ struct inode *dir = dentry->d_parent->d_inode; -+ const char *name = dentry->d_name.name; -+ int namelen = dentry->d_name.len; -+ struct buffer_head *bh2; -+ struct dx_root *root; -+ struct dx_frame frames[2], *frame; -+ struct dx_entry *entries; -+ struct ext3_dir_entry_2 *de, *de2; -+ char *data1, *top; -+ unsigned len; -+ int retval; -+ unsigned blocksize; -+ struct dx_hash_info hinfo; -+ u32 block; -+ -+ blocksize = dir->i_sb->s_blocksize; -+ dxtrace(printk("Creating index\n")); -+ retval = ext3_journal_get_write_access(handle, bh); -+ if (retval) { -+ ext3_std_error(dir->i_sb, retval); -+ brelse(bh); -+ return retval; -+ } -+ root = (struct dx_root *) bh->b_data; -+ -+ EXT3_I(dir)->i_flags |= EXT3_INDEX_FL; -+ bh2 = ext3_append (handle, dir, &block, &retval); -+ if (!(bh2)) { -+ brelse(bh); -+ return retval; -+ } -+ data1 = bh2->b_data; -+ -+ /* The 0th block becomes the root, move the dirents out */ -+ de = (struct ext3_dir_entry_2 *) &root->info; -+ len = ((char *) root) + blocksize - (char *) de; -+ memcpy (data1, de, len); -+ de = (struct ext3_dir_entry_2 *) data1; -+ top = data1 + len; -+ while (((char *) de2=(char*)de+le16_to_cpu(de->rec_len)) < top) -+ de = de2; -+ de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); -+ /* Initialize the root; the dot dirents already exist */ -+ de = (struct ext3_dir_entry_2 *) (&root->dotdot); -+ de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2)); -+ memset (&root->info, 0, sizeof(root->info)); -+ root->info.info_length = sizeof(root->info); -+ root->info.hash_version = dir->i_sb->u.ext3_sb.s_def_hash_version; -+ entries = root->entries; -+ dx_set_block (entries, 1); -+ dx_set_count (entries, 1); -+ dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); -+ -+ /* Initialize as for dx_probe */ -+ hinfo.hash_version = root->info.hash_version; -+ hinfo.seed = dir->i_sb->u.ext3_sb.s_hash_seed; -+ ext3fs_dirhash(name, namelen, &hinfo); -+ frame = frames; -+ frame->entries = entries; -+ frame->at = entries; -+ frame->bh = bh; -+ bh = bh2; -+ de = do_split(handle,dir, &bh, frame, &hinfo, &retval); -+ dx_release (frames); -+ if (!(de)) -+ return retval; -+ -+ return add_dirent_to_buf(handle, dentry, inode, de, bh); -+} -+#endif -+ - /* - * ext3_add_entry() - * -@@ -248,127 +1256,198 @@ static inline void ext3_set_de_type(stru - * may not sleep between calling this and putting something into - * the entry, as someone else might have used it while you slept. - */ -- --/* -- * AKPM: the journalling code here looks wrong on the error paths -- */ - static int ext3_add_entry (handle_t *handle, struct dentry *dentry, - struct inode *inode) - { - struct inode *dir = dentry->d_parent->d_inode; -- const char *name = dentry->d_name.name; -- int namelen = dentry->d_name.len; - unsigned long offset; -- unsigned short rec_len; - struct buffer_head * bh; -- struct ext3_dir_entry_2 * de, * de1; -+ struct ext3_dir_entry_2 *de; - struct super_block * sb; - int retval; -+#ifdef CONFIG_EXT3_INDEX -+ int dx_fallback=0; -+#endif -+ unsigned blocksize; -+ unsigned nlen, rlen; -+ u32 block, blocks; - - sb = dir->i_sb; -- -- if (!namelen) -+ blocksize = sb->s_blocksize; -+ if (!dentry->d_name.len) - return -EINVAL; -- bh = ext3_bread (handle, dir, 0, 0, &retval); -+#ifdef CONFIG_EXT3_INDEX -+ if (is_dx(dir)) { -+ retval = ext3_dx_add_entry(handle, dentry, inode); -+ if (!retval || (retval != ERR_BAD_DX_DIR)) -+ return retval; -+ EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL; -+ dx_fallback++; -+ ext3_mark_inode_dirty(handle, dir); -+ } -+#endif -+ blocks = dir->i_size >> sb->s_blocksize_bits; -+ for (block = 0, offset = 0; block < blocks; block++) { -+ bh = ext3_bread(handle, dir, block, 0, &retval); -+ if(!bh) -+ return retval; -+ retval = add_dirent_to_buf(handle, dentry, inode, 0, bh); -+ if (retval != -ENOSPC) -+ return retval; -+ -+#ifdef CONFIG_EXT3_INDEX -+ if (blocks == 1 && !dx_fallback && -+ EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX)) -+ return make_indexed_dir(handle, dentry, inode, bh); -+#endif -+ brelse(bh); -+ } -+ bh = ext3_append(handle, dir, &block, &retval); - if (!bh) - return retval; -- rec_len = EXT3_DIR_REC_LEN(namelen); -- offset = 0; - de = (struct ext3_dir_entry_2 *) bh->b_data; -- while (1) { -- if ((char *)de >= sb->s_blocksize + bh->b_data) { -- brelse (bh); -- bh = NULL; -- bh = ext3_bread (handle, dir, -- offset >> EXT3_BLOCK_SIZE_BITS(sb), 1, &retval); -- if (!bh) -- return retval; -- if (dir->i_size <= offset) { -- if (dir->i_size == 0) { -- brelse(bh); -- return -ENOENT; -- } -+ de->inode = 0; -+ de->rec_len = cpu_to_le16(rlen = blocksize); -+ nlen = 0; -+ return add_dirent_to_buf(handle, dentry, inode, de, bh); -+} - -- ext3_debug ("creating next block\n"); -+#ifdef CONFIG_EXT3_INDEX -+/* -+ * Returns 0 for success, or a negative error value -+ */ -+static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, -+ struct inode *inode) -+{ -+ struct dx_frame frames[2], *frame; -+ struct dx_entry *entries, *at; -+ struct dx_hash_info hinfo; -+ struct buffer_head * bh; -+ struct inode *dir = dentry->d_parent->d_inode; -+ struct super_block * sb = dir->i_sb; -+ struct ext3_dir_entry_2 *de; -+ int err; - -- BUFFER_TRACE(bh, "get_write_access"); -- ext3_journal_get_write_access(handle, bh); -- de = (struct ext3_dir_entry_2 *) bh->b_data; -- de->inode = 0; -- de->rec_len = le16_to_cpu(sb->s_blocksize); -- dir->u.ext3_i.i_disksize = -- dir->i_size = offset + sb->s_blocksize; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -- ext3_mark_inode_dirty(handle, dir); -- } else { -+ frame = dx_probe(dentry, 0, &hinfo, frames, &err); -+ if (!frame) -+ return err; -+ entries = frame->entries; -+ at = frame->at; - -- ext3_debug ("skipping to next block\n"); -+ if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err))) -+ goto cleanup; - -- de = (struct ext3_dir_entry_2 *) bh->b_data; -- } -- } -- if (!ext3_check_dir_entry ("ext3_add_entry", dir, de, bh, -- offset)) { -- brelse (bh); -- return -ENOENT; -- } -- if (ext3_match (namelen, name, de)) { -- brelse (bh); -- return -EEXIST; -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) -+ goto journal_error; -+ -+ err = add_dirent_to_buf(handle, dentry, inode, 0, bh); -+ if (err != -ENOSPC) { -+ bh = 0; -+ goto cleanup; -+ } -+ -+ /* Block full, should compress but for now just split */ -+ dxtrace(printk("using %u of %u node entries\n", -+ dx_get_count(entries), dx_get_limit(entries))); -+ /* Need to split index? */ -+ if (dx_get_count(entries) == dx_get_limit(entries)) { -+ u32 newblock; -+ unsigned icount = dx_get_count(entries); -+ int levels = frame - frames; -+ struct dx_entry *entries2; -+ struct dx_node *node2; -+ struct buffer_head *bh2; -+ -+ if (levels && (dx_get_count(frames->entries) == -+ dx_get_limit(frames->entries))) { -+ ext3_warning(sb, __FUNCTION__, -+ "Directory index full!\n"); -+ err = -ENOSPC; -+ goto cleanup; - } -- if ((le32_to_cpu(de->inode) == 0 && -- le16_to_cpu(de->rec_len) >= rec_len) || -- (le16_to_cpu(de->rec_len) >= -- EXT3_DIR_REC_LEN(de->name_len) + rec_len)) { -- BUFFER_TRACE(bh, "get_write_access"); -- ext3_journal_get_write_access(handle, bh); -- /* By now the buffer is marked for journaling */ -- offset += le16_to_cpu(de->rec_len); -- if (le32_to_cpu(de->inode)) { -- de1 = (struct ext3_dir_entry_2 *) ((char *) de + -- EXT3_DIR_REC_LEN(de->name_len)); -- de1->rec_len = -- cpu_to_le16(le16_to_cpu(de->rec_len) - -- EXT3_DIR_REC_LEN(de->name_len)); -- de->rec_len = cpu_to_le16( -- EXT3_DIR_REC_LEN(de->name_len)); -- de = de1; -+ bh2 = ext3_append (handle, dir, &newblock, &err); -+ if (!(bh2)) -+ goto cleanup; -+ node2 = (struct dx_node *)(bh2->b_data); -+ entries2 = node2->entries; -+ node2->fake.rec_len = cpu_to_le16(sb->s_blocksize); -+ node2->fake.inode = 0; -+ BUFFER_TRACE(frame->bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, frame->bh); -+ if (err) -+ goto journal_error; -+ if (levels) { -+ unsigned icount1 = icount/2, icount2 = icount - icount1; -+ unsigned hash2 = dx_get_hash(entries + icount1); -+ dxtrace(printk("Split index %i/%i\n", icount1, icount2)); -+ -+ BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ -+ err = ext3_journal_get_write_access(handle, -+ frames[0].bh); -+ if (err) -+ goto journal_error; -+ -+ memcpy ((char *) entries2, (char *) (entries + icount1), -+ icount2 * sizeof(struct dx_entry)); -+ dx_set_count (entries, icount1); -+ dx_set_count (entries2, icount2); -+ dx_set_limit (entries2, dx_node_limit(dir)); -+ -+ /* Which index block gets the new entry? */ -+ if (at - entries >= icount1) { -+ frame->at = at = at - entries - icount1 + entries2; -+ frame->entries = entries = entries2; -+ swap(frame->bh, bh2); - } -- de->file_type = EXT3_FT_UNKNOWN; -- if (inode) { -- de->inode = cpu_to_le32(inode->i_ino); -- ext3_set_de_type(dir->i_sb, de, inode->i_mode); -- } else -- de->inode = 0; -- de->name_len = namelen; -- memcpy (de->name, name, namelen); -- /* -- * XXX shouldn't update any times until successful -- * completion of syscall, but too many callers depend -- * on this. -- * -- * XXX similarly, too many callers depend on -- * ext3_new_inode() setting the times, but error -- * recovery deletes the inode, so the worst that can -- * happen is that the times are slightly out of date -- * and/or different from the directory change time. -- */ -- dir->i_mtime = dir->i_ctime = CURRENT_TIME; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -- dir->i_version = ++event; -- ext3_mark_inode_dirty(handle, dir); -- BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -- ext3_journal_dirty_metadata(handle, bh); -- brelse(bh); -- return 0; -+ dx_insert_block (frames + 0, hash2, newblock); -+ dxtrace(dx_show_index ("node", frames[1].entries)); -+ dxtrace(dx_show_index ("node", -+ ((struct dx_node *) bh2->b_data)->entries)); -+ err = ext3_journal_dirty_metadata(handle, bh2); -+ if (err) -+ goto journal_error; -+ brelse (bh2); -+ } else { -+ dxtrace(printk("Creating second level index...\n")); -+ memcpy((char *) entries2, (char *) entries, -+ icount * sizeof(struct dx_entry)); -+ dx_set_limit(entries2, dx_node_limit(dir)); -+ -+ /* Set up root */ -+ dx_set_count(entries, 1); -+ dx_set_block(entries + 0, newblock); -+ ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1; -+ -+ /* Add new access path frame */ -+ frame = frames + 1; -+ frame->at = at = at - entries + entries2; -+ frame->entries = entries = entries2; -+ frame->bh = bh2; -+ err = ext3_journal_get_write_access(handle, -+ frame->bh); -+ if (err) -+ goto journal_error; - } -- offset += le16_to_cpu(de->rec_len); -- de = (struct ext3_dir_entry_2 *) -- ((char *) de + le16_to_cpu(de->rec_len)); -+ ext3_journal_dirty_metadata(handle, frames[0].bh); - } -- brelse (bh); -- return -ENOSPC; -+ de = do_split(handle, dir, &bh, frame, &hinfo, &err); -+ if (!de) -+ goto cleanup; -+ err = add_dirent_to_buf(handle, dentry, inode, de, bh); -+ bh = 0; -+ goto cleanup; -+ -+journal_error: -+ ext3_std_error(dir->i_sb, err); -+cleanup: -+ if (bh) -+ brelse(bh); -+ dx_release(frames); -+ return err; - } -+#endif - - /* - * ext3_delete_entry deletes a directory entry by merging it with the -@@ -455,9 +1534,11 @@ static int ext3_create (struct inode * d - struct inode * inode; - int err; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -481,9 +1562,11 @@ static int ext3_mknod (struct inode * di - struct inode *inode; - int err; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -509,9 +1592,11 @@ static int ext3_mkdir(struct inode * dir - if (dir->i_nlink >= EXT3_LINK_MAX) - return -EMLINK; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -523,7 +1608,7 @@ static int ext3_mkdir(struct inode * dir - - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; -- inode->i_size = inode->u.ext3_i.i_disksize = inode->i_sb->s_blocksize; -+ inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; - inode->i_blocks = 0; - dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) { -@@ -556,21 +1641,19 @@ static int ext3_mkdir(struct inode * dir - inode->i_mode |= S_ISGID; - ext3_mark_inode_dirty(handle, inode); - err = ext3_add_entry (handle, dentry, inode); -- if (err) -- goto out_no_entry; -+ if (err) { -+ inode->i_nlink = 0; -+ ext3_mark_inode_dirty(handle, inode); -+ iput (inode); -+ goto out_stop; -+ } - dir->i_nlink++; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - d_instantiate(dentry, inode); - out_stop: - ext3_journal_stop(handle, dir); - return err; -- --out_no_entry: -- inode->i_nlink = 0; -- ext3_mark_inode_dirty(handle, inode); -- iput (inode); -- goto out_stop; - } - - /* -@@ -657,7 +1740,7 @@ int ext3_orphan_add(handle_t *handle, st - int err = 0, rc; - - lock_super(sb); -- if (!list_empty(&inode->u.ext3_i.i_orphan)) -+ if (!list_empty(&EXT3_I(inode)->i_orphan)) - goto out_unlock; - - /* Orphan handling is only valid for files with data blocks -@@ -698,7 +1781,7 @@ int ext3_orphan_add(handle_t *handle, st - * This is safe: on error we're going to ignore the orphan list - * anyway on the next recovery. */ - if (!err) -- list_add(&inode->u.ext3_i.i_orphan, &EXT3_SB(sb)->s_orphan); -+ list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); - - jbd_debug(4, "superblock will point to %ld\n", inode->i_ino); - jbd_debug(4, "orphan inode %ld will point to %d\n", -@@ -716,25 +1799,26 @@ out_unlock: - int ext3_orphan_del(handle_t *handle, struct inode *inode) - { - struct list_head *prev; -+ struct ext3_inode_info *ei = EXT3_I(inode); - struct ext3_sb_info *sbi; - unsigned long ino_next; - struct ext3_iloc iloc; - int err = 0; - - lock_super(inode->i_sb); -- if (list_empty(&inode->u.ext3_i.i_orphan)) { -+ if (list_empty(&ei->i_orphan)) { - unlock_super(inode->i_sb); - return 0; - } - - ino_next = NEXT_ORPHAN(inode); -- prev = inode->u.ext3_i.i_orphan.prev; -+ prev = ei->i_orphan.prev; - sbi = EXT3_SB(inode->i_sb); - - jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); - -- list_del(&inode->u.ext3_i.i_orphan); -- INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); -+ list_del(&ei->i_orphan); -+ INIT_LIST_HEAD(&ei->i_orphan); - - /* If we're on an error path, we may not have a valid - * transaction handle with which to update the orphan list on -@@ -795,8 +1879,9 @@ static int ext3_rmdir (struct inode * di - handle_t *handle; - - handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); -- if (IS_ERR(handle)) -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - retval = -ENOENT; - bh = ext3_find_entry (dentry, &de); -@@ -834,7 +1919,7 @@ static int ext3_rmdir (struct inode * di - dir->i_nlink--; - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - ext3_mark_inode_dirty(handle, inode); -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - - end_rmdir: -@@ -852,8 +1937,9 @@ static int ext3_unlink(struct inode * di - handle_t *handle; - - handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); -- if (IS_ERR(handle)) -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -880,7 +1966,7 @@ static int ext3_unlink(struct inode * di - if (retval) - goto end_unlink; - dir->i_ctime = dir->i_mtime = CURRENT_TIME; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - inode->i_nlink--; - if (!inode->i_nlink) -@@ -906,9 +1992,11 @@ static int ext3_symlink (struct inode * - if (l > dir->i_sb->s_blocksize) - return -ENAMETOOLONG; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 5); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -918,7 +2006,7 @@ static int ext3_symlink (struct inode * - if (IS_ERR(inode)) - goto out_stop; - -- if (l > sizeof (inode->u.ext3_i.i_data)) { -+ if (l > sizeof (EXT3_I(inode)->i_data)) { - inode->i_op = &page_symlink_inode_operations; - inode->i_mapping->a_ops = &ext3_aops; - /* -@@ -927,24 +2015,23 @@ static int ext3_symlink (struct inode * - * i_size in generic_commit_write(). - */ - err = block_symlink(inode, symname, l); -- if (err) -- goto out_no_entry; -+ if (err) { -+ ext3_dec_count(handle, inode); -+ ext3_mark_inode_dirty(handle, inode); -+ iput (inode); -+ goto out_stop; -+ } - } else { - inode->i_op = &ext3_fast_symlink_inode_operations; -- memcpy((char*)&inode->u.ext3_i.i_data,symname,l); -+ memcpy((char*)&EXT3_I(inode)->i_data,symname,l); - inode->i_size = l-1; - } -- inode->u.ext3_i.i_disksize = inode->i_size; -+ EXT3_I(inode)->i_disksize = inode->i_size; - err = ext3_add_nondir(handle, dentry, inode); -+ ext3_mark_inode_dirty(handle, inode); - out_stop: - ext3_journal_stop(handle, dir); - return err; -- --out_no_entry: -- ext3_dec_count(handle, inode); -- ext3_mark_inode_dirty(handle, inode); -- iput (inode); -- goto out_stop; - } - - static int ext3_link (struct dentry * old_dentry, -@@ -957,12 +2044,15 @@ static int ext3_link (struct dentry * ol - if (S_ISDIR(inode->i_mode)) - return -EPERM; - -- if (inode->i_nlink >= EXT3_LINK_MAX) -+ if (inode->i_nlink >= EXT3_LINK_MAX) { - return -EMLINK; -+ } - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -995,9 +2085,11 @@ static int ext3_rename (struct inode * o - - old_bh = new_bh = dir_bh = NULL; - -- handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(old_dir) || IS_SYNC(new_dir)) - handle->h_sync = 1; -@@ -1077,7 +2169,7 @@ static int ext3_rename (struct inode * o - new_inode->i_ctime = CURRENT_TIME; - } - old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; -- old_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(old_dir); - if (dir_bh) { - BUFFER_TRACE(dir_bh, "get_write_access"); - ext3_journal_get_write_access(handle, dir_bh); -@@ -1089,7 +2181,7 @@ static int ext3_rename (struct inode * o - new_inode->i_nlink--; - } else { - new_dir->i_nlink++; -- new_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(new_dir); - ext3_mark_inode_dirty(handle, new_dir); - } - } ---- linux-chaos-2.4.20-6/fs/ext3/super.c~ext-2.4-patch-1-chaos 2003-04-09 16:10:38.000000000 -0600 -+++ linux-chaos-2.4.20-6-braam/fs/ext3/super.c 2003-04-09 16:18:55.000000000 -0600 -@@ -710,6 +710,7 @@ static int ext3_setup_super(struct super - es->s_mtime = cpu_to_le32(CURRENT_TIME); - ext3_update_dynamic_rev(sb); - EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); -+ - ext3_commit_super (sb, es, 1); - if (test_opt (sb, DEBUG)) - printk (KERN_INFO -@@ -720,6 +721,7 @@ static int ext3_setup_super(struct super - EXT3_BLOCKS_PER_GROUP(sb), - EXT3_INODES_PER_GROUP(sb), - sbi->s_mount_opt); -+ - printk(KERN_INFO "EXT3 FS " EXT3FS_VERSION ", " EXT3FS_DATE " on %s, ", - bdevname(sb->s_dev)); - if (EXT3_SB(sb)->s_journal->j_inode == NULL) { -@@ -893,6 +895,7 @@ static loff_t ext3_max_size(int bits) - return res; - } - -+ - struct super_block * ext3_read_super (struct super_block * sb, void * data, - int silent) - { -@@ -1069,6 +1072,9 @@ struct super_block * ext3_read_super (st - sbi->s_mount_state = le16_to_cpu(es->s_state); - sbi->s_addr_per_block_bits = log2(EXT3_ADDR_PER_BLOCK(sb)); - sbi->s_desc_per_block_bits = log2(EXT3_DESC_PER_BLOCK(sb)); -+ for (i=0; i < 4; i++) -+ sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); -+ sbi->s_def_hash_version = es->s_def_hash_version; - - if (sbi->s_blocks_per_group > blocksize * 8) { - printk (KERN_ERR -@@ -1770,6 +1776,7 @@ static void __exit exit_ext3_fs(void) - unregister_filesystem(&ext3_fs_type); - } - -+EXPORT_SYMBOL(ext3_force_commit); - EXPORT_SYMBOL(ext3_bread); - - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); ---- linux-chaos-2.4.20-6/include/linux/ext3_fs.h~ext-2.4-patch-1-chaos 2003-03-12 12:51:27.000000000 -0700 -+++ linux-chaos-2.4.20-6-braam/include/linux/ext3_fs.h 2003-04-09 16:18:55.000000000 -0600 -@@ -40,6 +40,11 @@ - #define EXT3FS_VERSION "2.4-0.9.19" - - /* -+ * Always enable hashed directories -+ */ -+#define CONFIG_EXT3_INDEX -+ -+/* - * Debug code - */ - #ifdef EXT3FS_DEBUG -@@ -437,8 +442,11 @@ struct ext3_super_block { - /*E0*/ __u32 s_journal_inum; /* inode number of journal file */ - __u32 s_journal_dev; /* device number of journal file */ - __u32 s_last_orphan; /* start of list of inodes to delete */ -- --/*EC*/ __u32 s_reserved[197]; /* Padding to the end of the block */ -+ __u32 s_hash_seed[4]; /* HTREE hash seed */ -+ __u8 s_def_hash_version; /* Default hash version to use */ -+ __u8 s_reserved_char_pad; -+ __u16 s_reserved_word_pad; -+ __u32 s_reserved[192]; /* Padding to the end of the block */ - }; - - #ifdef __KERNEL__ -@@ -575,9 +583,46 @@ struct ext3_dir_entry_2 { - #define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1) - #define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \ - ~EXT3_DIR_ROUND) -+/* -+ * Hash Tree Directory indexing -+ * (c) Daniel Phillips, 2001 -+ */ -+ -+#ifdef CONFIG_EXT3_INDEX -+ #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \ -+ EXT3_FEATURE_COMPAT_DIR_INDEX) && \ -+ (EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) -+#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX) -+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) -+#else -+ #define is_dx(dir) 0 -+#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX) -+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2) -+#endif -+ -+/* Legal values for the dx_root hash_version field: */ -+ -+#define DX_HASH_LEGACY 0 -+#define DX_HASH_HALF_MD4 1 -+#define DX_HASH_TEA 2 -+ -+/* hash info structure used by the directory hash */ -+struct dx_hash_info -+{ -+ u32 hash; -+ u32 minor_hash; -+ int hash_version; -+ u32 *seed; -+}; - - #ifdef __KERNEL__ - /* -+ * Control parameters used by ext3_htree_next_block -+ */ -+#define HASH_NB_ALWAYS 1 -+ -+ -+/* - * Describe an inode's exact location on disk and in memory - */ - struct ext3_iloc -@@ -587,6 +632,27 @@ struct ext3_iloc - unsigned long block_group; - }; - -+ -+/* -+ * This structure is stuffed into the struct file's private_data field -+ * for directories. It is where we put information so that we can do -+ * readdir operations in hash tree order. -+ */ -+struct dir_private_info { -+ rb_root_t root; -+ rb_node_t *curr_node; -+ struct fname *extra_fname; -+ loff_t last_pos; -+ __u32 curr_hash; -+ __u32 curr_minor_hash; -+ __u32 next_hash; -+}; -+ -+/* -+ * Special error return code only used by dx_probe() and its callers. -+ */ -+#define ERR_BAD_DX_DIR -75000 -+ - /* - * Function prototypes - */ -@@ -614,11 +680,20 @@ extern struct ext3_group_desc * ext3_get - - /* dir.c */ - extern int ext3_check_dir_entry(const char *, struct inode *, -- struct ext3_dir_entry_2 *, struct buffer_head *, -- unsigned long); -+ struct ext3_dir_entry_2 *, -+ struct buffer_head *, unsigned long); -+extern void ext3_htree_store_dirent(struct file *dir_file, __u32 hash, -+ __u32 minor_hash, -+ struct ext3_dir_entry_2 *dirent); -+extern void ext3_htree_free_dir_info(struct dir_private_info *p); -+ - /* fsync.c */ - extern int ext3_sync_file (struct file *, struct dentry *, int); - -+/* hash.c */ -+extern int ext3fs_dirhash(const char *name, int len, struct -+ dx_hash_info *hinfo); -+ - /* ialloc.c */ - extern struct inode * ext3_new_inode (handle_t *, const struct inode *, int); - extern void ext3_free_inode (handle_t *, struct inode *); -@@ -650,6 +725,8 @@ extern int ext3_ioctl (struct inode *, s - /* namei.c */ - extern int ext3_orphan_add(handle_t *, struct inode *); - extern int ext3_orphan_del(handle_t *, struct inode *); -+extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, -+ __u32 start_minor_hash, __u32 *next_hash); - - /* super.c */ - extern void ext3_error (struct super_block *, const char *, const char *, ...) ---- linux-chaos-2.4.20-6/include/linux/ext3_fs_sb.h~ext-2.4-patch-1-chaos 2003-03-12 12:51:27.000000000 -0700 -+++ linux-chaos-2.4.20-6-braam/include/linux/ext3_fs_sb.h 2003-04-09 16:18:55.000000000 -0600 -@@ -62,6 +62,8 @@ struct ext3_sb_info { - int s_inode_size; - int s_first_ino; - u32 s_next_generation; -+ u32 s_hash_seed[4]; -+ int s_def_hash_version; - - /* Journaling */ - struct inode * s_journal_inode; ---- linux-chaos-2.4.20-6/include/linux/ext3_jbd.h~ext-2.4-patch-1-chaos 2003-03-12 12:51:27.000000000 -0700 -+++ linux-chaos-2.4.20-6-braam/include/linux/ext3_jbd.h 2003-04-09 16:18:55.000000000 -0600 -@@ -63,6 +63,8 @@ extern int ext3_writepage_trans_blocks(s - - #define EXT3_RESERVE_TRANS_BLOCKS 12U - -+#define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8 -+ - int - ext3_mark_iloc_dirty(handle_t *handle, - struct inode *inode, ---- linux-chaos-2.4.20-6/include/linux/rbtree.h~ext-2.4-patch-1-chaos 2002-05-07 15:53:47.000000000 -0600 -+++ linux-chaos-2.4.20-6-braam/include/linux/rbtree.h 2003-04-09 16:18:55.000000000 -0600 -@@ -120,6 +120,8 @@ rb_root_t; - - extern void rb_insert_color(rb_node_t *, rb_root_t *); - extern void rb_erase(rb_node_t *, rb_root_t *); -+extern rb_node_t *rb_get_first(rb_root_t *root); -+extern rb_node_t *rb_get_next(rb_node_t *n); - - static inline void rb_link_node(rb_node_t * node, rb_node_t * parent, rb_node_t ** rb_link) - { ---- linux-chaos-2.4.20-6/lib/rbtree.c~ext-2.4-patch-1-chaos 2002-09-25 11:14:03.000000000 -0600 -+++ linux-chaos-2.4.20-6-braam/lib/rbtree.c 2003-04-09 16:18:55.000000000 -0600 -@@ -17,6 +17,8 @@ - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - linux/lib/rbtree.c -+ -+ rb_get_first and rb_get_next written by Theodore Ts'o, 9/8/2002 - */ - - #include -@@ -294,3 +296,43 @@ void rb_erase(rb_node_t * node, rb_root_ - __rb_erase_color(child, parent, root); - } - EXPORT_SYMBOL(rb_erase); -+ -+/* -+ * This function returns the first node (in sort order) of the tree. -+ */ -+rb_node_t *rb_get_first(rb_root_t *root) -+{ -+ rb_node_t *n; -+ -+ n = root->rb_node; -+ if (!n) -+ return 0; -+ while (n->rb_left) -+ n = n->rb_left; -+ return n; -+} -+EXPORT_SYMBOL(rb_get_first); -+ -+/* -+ * Given a node, this function will return the next node in the tree. -+ */ -+rb_node_t *rb_get_next(rb_node_t *n) -+{ -+ rb_node_t *parent; -+ -+ if (n->rb_right) { -+ n = n->rb_right; -+ while (n->rb_left) -+ n = n->rb_left; -+ return n; -+ } else { -+ while ((parent = n->rb_parent)) { -+ if (n == parent->rb_left) -+ return parent; -+ n = parent; -+ } -+ return 0; -+ } -+} -+EXPORT_SYMBOL(rb_get_next); -+ - -_ diff --git a/lustre/kernel_patches/patches/ext-2.4-patch-1.patch b/lustre/kernel_patches/patches/ext-2.4-patch-1.patch deleted file mode 100644 index 09caec1..0000000 --- a/lustre/kernel_patches/patches/ext-2.4-patch-1.patch +++ /dev/null @@ -1,2527 +0,0 @@ - fs/ext3/Makefile | 2 - fs/ext3/dir.c | 299 +++++++++ - fs/ext3/file.c | 3 - fs/ext3/hash.c | 215 ++++++ - fs/ext3/namei.c | 1387 ++++++++++++++++++++++++++++++++++++++++----- - fs/ext3/super.c | 7 - include/linux/ext3_fs.h | 85 ++ - include/linux/ext3_fs_sb.h | 2 - include/linux/ext3_jbd.h | 2 - include/linux/rbtree.h | 2 - lib/rbtree.c | 42 + - 11 files changed, 1886 insertions(+), 160 deletions(-) - ---- linux-2.4.20/fs/ext3/Makefile~ext-2.4-patch-1 Sat Apr 5 03:56:31 2003 -+++ linux-2.4.20-braam/fs/ext3/Makefile Sat Apr 5 03:57:05 2003 -@@ -12,7 +12,7 @@ O_TARGET := ext3.o - export-objs := super.o inode.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -- ioctl.o namei.o super.o symlink.o -+ ioctl.o namei.o super.o symlink.o hash.o - obj-m := $(O_TARGET) - - include $(TOPDIR)/Rules.make ---- linux-2.4.20/fs/ext3/dir.c~ext-2.4-patch-1 Sat Apr 5 03:56:31 2003 -+++ linux-2.4.20-braam/fs/ext3/dir.c Sat Apr 5 03:56:31 2003 -@@ -21,12 +21,16 @@ - #include - #include - #include -+#include -+#include - - static unsigned char ext3_filetype_table[] = { - DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK - }; - - static int ext3_readdir(struct file *, void *, filldir_t); -+static int ext3_dx_readdir(struct file * filp, -+ void * dirent, filldir_t filldir); - - struct file_operations ext3_dir_operations = { - read: generic_read_dir, -@@ -35,6 +39,17 @@ struct file_operations ext3_dir_operatio - fsync: ext3_sync_file, /* BKL held */ - }; - -+ -+static unsigned char get_dtype(struct super_block *sb, int filetype) -+{ -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) || -+ (filetype >= EXT3_FT_MAX)) -+ return DT_UNKNOWN; -+ -+ return (ext3_filetype_table[filetype]); -+} -+ -+ - int ext3_check_dir_entry (const char * function, struct inode * dir, - struct ext3_dir_entry_2 * de, - struct buffer_head * bh, -@@ -79,6 +94,16 @@ static int ext3_readdir(struct file * fi - - sb = inode->i_sb; - -+ if (is_dx(inode)) { -+ err = ext3_dx_readdir(filp, dirent, filldir); -+ if (err != ERR_BAD_DX_DIR) -+ return err; -+ /* -+ * We don't set the inode dirty flag since it's not -+ * critical that it get flushed back to the disk. -+ */ -+ EXT3_I(filp->f_dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL; -+ } - stored = 0; - bh = NULL; - offset = filp->f_pos & (sb->s_blocksize - 1); -@@ -162,18 +187,12 @@ revalidate: - * during the copy operation. - */ - unsigned long version = filp->f_version; -- unsigned char d_type = DT_UNKNOWN; - -- if (EXT3_HAS_INCOMPAT_FEATURE(sb, -- EXT3_FEATURE_INCOMPAT_FILETYPE) -- && de->file_type < EXT3_FT_MAX) -- d_type = -- ext3_filetype_table[de->file_type]; - error = filldir(dirent, de->name, - de->name_len, - filp->f_pos, - le32_to_cpu(de->inode), -- d_type); -+ get_dtype(sb, de->file_type)); - if (error) - break; - if (version != filp->f_version) -@@ -188,3 +207,269 @@ revalidate: - UPDATE_ATIME(inode); - return 0; - } -+ -+#ifdef CONFIG_EXT3_INDEX -+/* -+ * These functions convert from the major/minor hash to an f_pos -+ * value. -+ * -+ * Currently we only use major hash numer. This is unfortunate, but -+ * on 32-bit machines, the same VFS interface is used for lseek and -+ * llseek, so if we use the 64 bit offset, then the 32-bit versions of -+ * lseek/telldir/seekdir will blow out spectacularly, and from within -+ * the ext2 low-level routine, we don't know if we're being called by -+ * a 64-bit version of the system call or the 32-bit version of the -+ * system call. Worse yet, NFSv2 only allows for a 32-bit readdir -+ * cookie. Sigh. -+ */ -+#define hash2pos(major, minor) (major >> 1) -+#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff) -+#define pos2min_hash(pos) (0) -+ -+/* -+ * This structure holds the nodes of the red-black tree used to store -+ * the directory entry in hash order. -+ */ -+struct fname { -+ __u32 hash; -+ __u32 minor_hash; -+ rb_node_t rb_hash; -+ struct fname *next; -+ __u32 inode; -+ __u8 name_len; -+ __u8 file_type; -+ char name[0]; -+}; -+ -+/* -+ * This functoin implements a non-recursive way of freeing all of the -+ * nodes in the red-black tree. -+ */ -+static void free_rb_tree_fname(rb_root_t *root) -+{ -+ rb_node_t *n = root->rb_node; -+ rb_node_t *parent; -+ struct fname *fname; -+ -+ while (n) { -+ /* Do the node's children first */ -+ if ((n)->rb_left) { -+ n = n->rb_left; -+ continue; -+ } -+ if (n->rb_right) { -+ n = n->rb_right; -+ continue; -+ } -+ /* -+ * The node has no children; free it, and then zero -+ * out parent's link to it. Finally go to the -+ * beginning of the loop and try to free the parent -+ * node. -+ */ -+ parent = n->rb_parent; -+ fname = rb_entry(n, struct fname, rb_hash); -+ kfree(fname); -+ if (!parent) -+ root->rb_node = 0; -+ else if (parent->rb_left == n) -+ parent->rb_left = 0; -+ else if (parent->rb_right == n) -+ parent->rb_right = 0; -+ n = parent; -+ } -+ root->rb_node = 0; -+} -+ -+ -+struct dir_private_info *create_dir_info(loff_t pos) -+{ -+ struct dir_private_info *p; -+ -+ p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); -+ if (!p) -+ return NULL; -+ p->root.rb_node = 0; -+ p->curr_node = 0; -+ p->extra_fname = 0; -+ p->last_pos = 0; -+ p->curr_hash = pos2maj_hash(pos); -+ p->curr_minor_hash = pos2min_hash(pos); -+ p->next_hash = 0; -+ return p; -+} -+ -+void ext3_htree_free_dir_info(struct dir_private_info *p) -+{ -+ free_rb_tree_fname(&p->root); -+ kfree(p); -+} -+ -+/* -+ * Given a directory entry, enter it into the fname rb tree. -+ */ -+void ext3_htree_store_dirent(struct file *dir_file, __u32 hash, -+ __u32 minor_hash, -+ struct ext3_dir_entry_2 *dirent) -+{ -+ rb_node_t **p, *parent = NULL; -+ struct fname * fname, *new_fn; -+ struct dir_private_info *info; -+ int len; -+ -+ info = (struct dir_private_info *) dir_file->private_data; -+ p = &info->root.rb_node; -+ -+ /* Create and allocate the fname structure */ -+ len = sizeof(struct fname) + dirent->name_len + 1; -+ new_fn = kmalloc(len, GFP_KERNEL); -+ memset(new_fn, 0, len); -+ new_fn->hash = hash; -+ new_fn->minor_hash = minor_hash; -+ new_fn->inode = le32_to_cpu(dirent->inode); -+ new_fn->name_len = dirent->name_len; -+ new_fn->file_type = dirent->file_type; -+ memcpy(new_fn->name, dirent->name, dirent->name_len); -+ new_fn->name[dirent->name_len] = 0; -+ -+ while (*p) { -+ parent = *p; -+ fname = rb_entry(parent, struct fname, rb_hash); -+ -+ /* -+ * If the hash and minor hash match up, then we put -+ * them on a linked list. This rarely happens... -+ */ -+ if ((new_fn->hash == fname->hash) && -+ (new_fn->minor_hash == fname->minor_hash)) { -+ new_fn->next = fname->next; -+ fname->next = new_fn; -+ return; -+ } -+ -+ if (new_fn->hash < fname->hash) -+ p = &(*p)->rb_left; -+ else if (new_fn->hash > fname->hash) -+ p = &(*p)->rb_right; -+ else if (new_fn->minor_hash < fname->minor_hash) -+ p = &(*p)->rb_left; -+ else /* if (new_fn->minor_hash > fname->minor_hash) */ -+ p = &(*p)->rb_right; -+ } -+ -+ rb_link_node(&new_fn->rb_hash, parent, p); -+ rb_insert_color(&new_fn->rb_hash, &info->root); -+} -+ -+ -+ -+/* -+ * This is a helper function for ext3_dx_readdir. It calls filldir -+ * for all entres on the fname linked list. (Normally there is only -+ * one entry on the linked list, unless there are 62 bit hash collisions.) -+ */ -+static int call_filldir(struct file * filp, void * dirent, -+ filldir_t filldir, struct fname *fname) -+{ -+ struct dir_private_info *info = filp->private_data; -+ loff_t curr_pos; -+ struct inode *inode = filp->f_dentry->d_inode; -+ struct super_block * sb; -+ int error; -+ -+ sb = inode->i_sb; -+ -+ if (!fname) { -+ printk("call_filldir: called with null fname?!?\n"); -+ return 0; -+ } -+ curr_pos = hash2pos(fname->hash, fname->minor_hash); -+ while (fname) { -+ error = filldir(dirent, fname->name, -+ fname->name_len, curr_pos, -+ fname->inode, -+ get_dtype(sb, fname->file_type)); -+ if (error) { -+ filp->f_pos = curr_pos; -+ info->extra_fname = fname->next; -+ return error; -+ } -+ fname = fname->next; -+ } -+ return 0; -+} -+ -+static int ext3_dx_readdir(struct file * filp, -+ void * dirent, filldir_t filldir) -+{ -+ struct dir_private_info *info = filp->private_data; -+ struct inode *inode = filp->f_dentry->d_inode; -+ struct fname *fname; -+ int ret; -+ -+ if (!info) { -+ info = create_dir_info(filp->f_pos); -+ if (!info) -+ return -ENOMEM; -+ filp->private_data = info; -+ } -+ -+ /* Some one has messed with f_pos; reset the world */ -+ if (info->last_pos != filp->f_pos) { -+ free_rb_tree_fname(&info->root); -+ info->curr_node = 0; -+ info->extra_fname = 0; -+ info->curr_hash = pos2maj_hash(filp->f_pos); -+ info->curr_minor_hash = pos2min_hash(filp->f_pos); -+ } -+ -+ /* -+ * If there are any leftover names on the hash collision -+ * chain, return them first. -+ */ -+ if (info->extra_fname && -+ call_filldir(filp, dirent, filldir, info->extra_fname)) -+ goto finished; -+ -+ if (!info->curr_node) -+ info->curr_node = rb_get_first(&info->root); -+ -+ while (1) { -+ /* -+ * Fill the rbtree if we have no more entries, -+ * or the inode has changed since we last read in the -+ * cached entries. -+ */ -+ if ((!info->curr_node) || -+ (filp->f_version != inode->i_version)) { -+ info->curr_node = 0; -+ free_rb_tree_fname(&info->root); -+ filp->f_version = inode->i_version; -+ ret = ext3_htree_fill_tree(filp, info->curr_hash, -+ info->curr_minor_hash, -+ &info->next_hash); -+ if (ret < 0) -+ return ret; -+ if (ret == 0) -+ break; -+ info->curr_node = rb_get_first(&info->root); -+ } -+ -+ fname = rb_entry(info->curr_node, struct fname, rb_hash); -+ info->curr_hash = fname->hash; -+ info->curr_minor_hash = fname->minor_hash; -+ if (call_filldir(filp, dirent, filldir, fname)) -+ break; -+ -+ info->curr_node = rb_get_next(info->curr_node); -+ if (!info->curr_node) { -+ info->curr_hash = info->next_hash; -+ info->curr_minor_hash = 0; -+ } -+ } -+finished: -+ info->last_pos = filp->f_pos; -+ UPDATE_ATIME(inode); -+ return 0; -+} -+#endif ---- linux-2.4.20/fs/ext3/file.c~ext-2.4-patch-1 Sat Apr 5 03:56:31 2003 -+++ linux-2.4.20-braam/fs/ext3/file.c Sat Apr 5 03:56:31 2003 -@@ -35,6 +35,9 @@ static int ext3_release_file (struct ino - { - if (filp->f_mode & FMODE_WRITE) - ext3_discard_prealloc (inode); -+ if (is_dx(inode) && filp->private_data) -+ ext3_htree_free_dir_info(filp->private_data); -+ - return 0; - } - ---- /dev/null Fri Aug 30 17:31:37 2002 -+++ linux-2.4.20-braam/fs/ext3/hash.c Sat Apr 5 03:56:31 2003 -@@ -0,0 +1,215 @@ -+/* -+ * linux/fs/ext3/hash.c -+ * -+ * Copyright (C) 2002 by Theodore Ts'o -+ * -+ * This file is released under the GPL v2. -+ * -+ * This file may be redistributed under the terms of the GNU Public -+ * License. -+ */ -+ -+#include -+#include -+#include -+#include -+ -+#define DELTA 0x9E3779B9 -+ -+static void TEA_transform(__u32 buf[4], __u32 const in[]) -+{ -+ __u32 sum = 0; -+ __u32 b0 = buf[0], b1 = buf[1]; -+ __u32 a = in[0], b = in[1], c = in[2], d = in[3]; -+ int n = 16; -+ -+ do { -+ sum += DELTA; -+ b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); -+ b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); -+ } while(--n); -+ -+ buf[0] += b0; -+ buf[1] += b1; -+} -+ -+/* F, G and H are basic MD4 functions: selection, majority, parity */ -+#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) -+#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z))) -+#define H(x, y, z) ((x) ^ (y) ^ (z)) -+ -+/* -+ * The generic round function. The application is so specific that -+ * we don't bother protecting all the arguments with parens, as is generally -+ * good macro practice, in favor of extra legibility. -+ * Rotation is separate from addition to prevent recomputation -+ */ -+#define ROUND(f, a, b, c, d, x, s) \ -+ (a += f(b, c, d) + x, a = (a << s) | (a >> (32-s))) -+#define K1 0 -+#define K2 013240474631UL -+#define K3 015666365641UL -+ -+/* -+ * Basic cut-down MD4 transform. Returns only 32 bits of result. -+ */ -+static void halfMD4Transform (__u32 buf[4], __u32 const in[]) -+{ -+ __u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3]; -+ -+ /* Round 1 */ -+ ROUND(F, a, b, c, d, in[0] + K1, 3); -+ ROUND(F, d, a, b, c, in[1] + K1, 7); -+ ROUND(F, c, d, a, b, in[2] + K1, 11); -+ ROUND(F, b, c, d, a, in[3] + K1, 19); -+ ROUND(F, a, b, c, d, in[4] + K1, 3); -+ ROUND(F, d, a, b, c, in[5] + K1, 7); -+ ROUND(F, c, d, a, b, in[6] + K1, 11); -+ ROUND(F, b, c, d, a, in[7] + K1, 19); -+ -+ /* Round 2 */ -+ ROUND(G, a, b, c, d, in[1] + K2, 3); -+ ROUND(G, d, a, b, c, in[3] + K2, 5); -+ ROUND(G, c, d, a, b, in[5] + K2, 9); -+ ROUND(G, b, c, d, a, in[7] + K2, 13); -+ ROUND(G, a, b, c, d, in[0] + K2, 3); -+ ROUND(G, d, a, b, c, in[2] + K2, 5); -+ ROUND(G, c, d, a, b, in[4] + K2, 9); -+ ROUND(G, b, c, d, a, in[6] + K2, 13); -+ -+ /* Round 3 */ -+ ROUND(H, a, b, c, d, in[3] + K3, 3); -+ ROUND(H, d, a, b, c, in[7] + K3, 9); -+ ROUND(H, c, d, a, b, in[2] + K3, 11); -+ ROUND(H, b, c, d, a, in[6] + K3, 15); -+ ROUND(H, a, b, c, d, in[1] + K3, 3); -+ ROUND(H, d, a, b, c, in[5] + K3, 9); -+ ROUND(H, c, d, a, b, in[0] + K3, 11); -+ ROUND(H, b, c, d, a, in[4] + K3, 15); -+ -+ buf[0] += a; -+ buf[1] += b; -+ buf[2] += c; -+ buf[3] += d; -+} -+ -+#undef ROUND -+#undef F -+#undef G -+#undef H -+#undef K1 -+#undef K2 -+#undef K3 -+ -+/* The old legacy hash */ -+static __u32 dx_hack_hash (const char *name, int len) -+{ -+ __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; -+ while (len--) { -+ __u32 hash = hash1 + (hash0 ^ (*name++ * 7152373)); -+ -+ if (hash & 0x80000000) hash -= 0x7fffffff; -+ hash1 = hash0; -+ hash0 = hash; -+ } -+ return (hash0 << 1); -+} -+ -+static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) -+{ -+ __u32 pad, val; -+ int i; -+ -+ pad = (__u32)len | ((__u32)len << 8); -+ pad |= pad << 16; -+ -+ val = pad; -+ if (len > num*4) -+ len = num * 4; -+ for (i=0; i < len; i++) { -+ if ((i % 4) == 0) -+ val = pad; -+ val = msg[i] + (val << 8); -+ if ((i % 4) == 3) { -+ *buf++ = val; -+ val = pad; -+ num--; -+ } -+ } -+ if (--num >= 0) -+ *buf++ = val; -+ while (--num >= 0) -+ *buf++ = pad; -+} -+ -+/* -+ * Returns the hash of a filename. If len is 0 and name is NULL, then -+ * this function can be used to test whether or not a hash version is -+ * supported. -+ * -+ * The seed is an 4 longword (32 bits) "secret" which can be used to -+ * uniquify a hash. If the seed is all zero's, then some default seed -+ * may be used. -+ * -+ * A particular hash version specifies whether or not the seed is -+ * represented, and whether or not the returned hash is 32 bits or 64 -+ * bits. 32 bit hashes will return 0 for the minor hash. -+ */ -+int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) -+{ -+ __u32 hash; -+ __u32 minor_hash = 0; -+ const char *p; -+ int i; -+ __u32 in[8], buf[4]; -+ -+ /* Initialize the default seed for the hash checksum functions */ -+ buf[0] = 0x67452301; -+ buf[1] = 0xefcdab89; -+ buf[2] = 0x98badcfe; -+ buf[3] = 0x10325476; -+ -+ /* Check to see if the seed is all zero's */ -+ if (hinfo->seed) { -+ for (i=0; i < 4; i++) { -+ if (hinfo->seed[i]) -+ break; -+ } -+ if (i < 4) -+ memcpy(buf, hinfo->seed, sizeof(buf)); -+ } -+ -+ switch (hinfo->hash_version) { -+ case DX_HASH_LEGACY: -+ hash = dx_hack_hash(name, len); -+ break; -+ case DX_HASH_HALF_MD4: -+ p = name; -+ while (len > 0) { -+ str2hashbuf(p, len, in, 8); -+ halfMD4Transform(buf, in); -+ len -= 32; -+ p += 32; -+ } -+ minor_hash = buf[2]; -+ hash = buf[1]; -+ break; -+ case DX_HASH_TEA: -+ p = name; -+ while (len > 0) { -+ str2hashbuf(p, len, in, 4); -+ TEA_transform(buf, in); -+ len -= 16; -+ p += 16; -+ } -+ hash = buf[0]; -+ minor_hash = buf[1]; -+ break; -+ default: -+ hinfo->hash = 0; -+ return -1; -+ } -+ hinfo->hash = hash & ~1; -+ hinfo->minor_hash = minor_hash; -+ return 0; -+} ---- linux-2.4.20/fs/ext3/namei.c~ext-2.4-patch-1 Sat Apr 5 03:56:31 2003 -+++ linux-2.4.20-braam/fs/ext3/namei.c Sat Apr 5 03:56:31 2003 -@@ -16,6 +16,12 @@ - * David S. Miller (davem@caip.rutgers.edu), 1995 - * Directory entry file type support and forward compatibility hooks - * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998 -+ * Hash Tree Directory indexing (c) -+ * Daniel Phillips, 2001 -+ * Hash Tree Directory indexing porting -+ * Christopher Li, 2002 -+ * Hash Tree Directory indexing cleanup -+ * Theodore Ts'o, 2002 - */ - - #include -@@ -38,6 +44,630 @@ - #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) - #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) - -+static struct buffer_head *ext3_append(handle_t *handle, -+ struct inode *inode, -+ u32 *block, int *err) -+{ -+ struct buffer_head *bh; -+ -+ *block = inode->i_size >> inode->i_sb->s_blocksize_bits; -+ -+ if ((bh = ext3_bread(handle, inode, *block, 1, err))) { -+ inode->i_size += inode->i_sb->s_blocksize; -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ ext3_journal_get_write_access(handle,bh); -+ } -+ return bh; -+} -+ -+#ifndef assert -+#define assert(test) J_ASSERT(test) -+#endif -+ -+#ifndef swap -+#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0) -+#endif -+ -+typedef struct { u32 v; } le_u32; -+typedef struct { u16 v; } le_u16; -+ -+#ifdef DX_DEBUG -+#define dxtrace(command) command -+#else -+#define dxtrace(command) -+#endif -+ -+struct fake_dirent -+{ -+ /*le*/u32 inode; -+ /*le*/u16 rec_len; -+ u8 name_len; -+ u8 file_type; -+}; -+ -+struct dx_countlimit -+{ -+ le_u16 limit; -+ le_u16 count; -+}; -+ -+struct dx_entry -+{ -+ le_u32 hash; -+ le_u32 block; -+}; -+ -+/* -+ * dx_root_info is laid out so that if it should somehow get overlaid by a -+ * dirent the two low bits of the hash version will be zero. Therefore, the -+ * hash version mod 4 should never be 0. Sincerely, the paranoia department. -+ */ -+ -+struct dx_root -+{ -+ struct fake_dirent dot; -+ char dot_name[4]; -+ struct fake_dirent dotdot; -+ char dotdot_name[4]; -+ struct dx_root_info -+ { -+ le_u32 reserved_zero; -+ u8 hash_version; -+ u8 info_length; /* 8 */ -+ u8 indirect_levels; -+ u8 unused_flags; -+ } -+ info; -+ struct dx_entry entries[0]; -+}; -+ -+struct dx_node -+{ -+ struct fake_dirent fake; -+ struct dx_entry entries[0]; -+}; -+ -+ -+struct dx_frame -+{ -+ struct buffer_head *bh; -+ struct dx_entry *entries; -+ struct dx_entry *at; -+}; -+ -+struct dx_map_entry -+{ -+ u32 hash; -+ u32 offs; -+}; -+ -+#ifdef CONFIG_EXT3_INDEX -+static inline unsigned dx_get_block (struct dx_entry *entry); -+static void dx_set_block (struct dx_entry *entry, unsigned value); -+static inline unsigned dx_get_hash (struct dx_entry *entry); -+static void dx_set_hash (struct dx_entry *entry, unsigned value); -+static unsigned dx_get_count (struct dx_entry *entries); -+static unsigned dx_get_limit (struct dx_entry *entries); -+static void dx_set_count (struct dx_entry *entries, unsigned value); -+static void dx_set_limit (struct dx_entry *entries, unsigned value); -+static unsigned dx_root_limit (struct inode *dir, unsigned infosize); -+static unsigned dx_node_limit (struct inode *dir); -+static struct dx_frame *dx_probe(struct dentry *dentry, -+ struct inode *dir, -+ struct dx_hash_info *hinfo, -+ struct dx_frame *frame, -+ int *err); -+static void dx_release (struct dx_frame *frames); -+static int dx_make_map (struct ext3_dir_entry_2 *de, int size, -+ struct dx_hash_info *hinfo, struct dx_map_entry map[]); -+static void dx_sort_map(struct dx_map_entry *map, unsigned count); -+static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to, -+ struct dx_map_entry *offsets, int count); -+static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size); -+static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); -+static int ext3_htree_next_block(struct inode *dir, __u32 hash, -+ struct dx_frame *frame, -+ struct dx_frame *frames, int *err, -+ __u32 *start_hash); -+static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, -+ struct ext3_dir_entry_2 **res_dir, int *err); -+static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, -+ struct inode *inode); -+ -+/* -+ * Future: use high four bits of block for coalesce-on-delete flags -+ * Mask them off for now. -+ */ -+ -+static inline unsigned dx_get_block (struct dx_entry *entry) -+{ -+ return le32_to_cpu(entry->block.v) & 0x00ffffff; -+} -+ -+static inline void dx_set_block (struct dx_entry *entry, unsigned value) -+{ -+ entry->block.v = cpu_to_le32(value); -+} -+ -+static inline unsigned dx_get_hash (struct dx_entry *entry) -+{ -+ return le32_to_cpu(entry->hash.v); -+} -+ -+static inline void dx_set_hash (struct dx_entry *entry, unsigned value) -+{ -+ entry->hash.v = cpu_to_le32(value); -+} -+ -+static inline unsigned dx_get_count (struct dx_entry *entries) -+{ -+ return le16_to_cpu(((struct dx_countlimit *) entries)->count.v); -+} -+ -+static inline unsigned dx_get_limit (struct dx_entry *entries) -+{ -+ return le16_to_cpu(((struct dx_countlimit *) entries)->limit.v); -+} -+ -+static inline void dx_set_count (struct dx_entry *entries, unsigned value) -+{ -+ ((struct dx_countlimit *) entries)->count.v = cpu_to_le16(value); -+} -+ -+static inline void dx_set_limit (struct dx_entry *entries, unsigned value) -+{ -+ ((struct dx_countlimit *) entries)->limit.v = cpu_to_le16(value); -+} -+ -+static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) -+{ -+ unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) - -+ EXT3_DIR_REC_LEN(2) - infosize; -+ return 0? 20: entry_space / sizeof(struct dx_entry); -+} -+ -+static inline unsigned dx_node_limit (struct inode *dir) -+{ -+ unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0); -+ return 0? 22: entry_space / sizeof(struct dx_entry); -+} -+ -+/* -+ * Debug -+ */ -+#ifdef DX_DEBUG -+struct stats -+{ -+ unsigned names; -+ unsigned space; -+ unsigned bcount; -+}; -+ -+static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de, -+ int size, int show_names) -+{ -+ unsigned names = 0, space = 0; -+ char *base = (char *) de; -+ struct dx_hash_info h = *hinfo; -+ -+ printk("names: "); -+ while ((char *) de < base + size) -+ { -+ if (de->inode) -+ { -+ if (show_names) -+ { -+ int len = de->name_len; -+ char *name = de->name; -+ while (len--) printk("%c", *name++); -+ ext3fs_dirhash(de->name, de->name_len, &h); -+ printk(":%x.%u ", h.hash, -+ ((char *) de - base)); -+ } -+ space += EXT3_DIR_REC_LEN(de->name_len); -+ names++; -+ } -+ de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); -+ } -+ printk("(%i)\n", names); -+ return (struct stats) { names, space, 1 }; -+} -+ -+struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, -+ struct dx_entry *entries, int levels) -+{ -+ unsigned blocksize = dir->i_sb->s_blocksize; -+ unsigned count = dx_get_count (entries), names = 0, space = 0, i; -+ unsigned bcount = 0; -+ struct buffer_head *bh; -+ int err; -+ printk("%i indexed blocks...\n", count); -+ for (i = 0; i < count; i++, entries++) -+ { -+ u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0; -+ u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; -+ struct stats stats; -+ printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); -+ if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue; -+ stats = levels? -+ dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): -+ dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0); -+ names += stats.names; -+ space += stats.space; -+ bcount += stats.bcount; -+ brelse (bh); -+ } -+ if (bcount) -+ printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", -+ names, space/bcount,(space/bcount)*100/blocksize); -+ return (struct stats) { names, space, bcount}; -+} -+#endif /* DX_DEBUG */ -+ -+/* -+ * Probe for a directory leaf block to search. -+ * -+ * dx_probe can return ERR_BAD_DX_DIR, which means there was a format -+ * error in the directory index, and the caller should fall back to -+ * searching the directory normally. The callers of dx_probe **MUST** -+ * check for this error code, and make sure it never gets reflected -+ * back to userspace. -+ */ -+static struct dx_frame * -+dx_probe(struct dentry *dentry, struct inode *dir, -+ struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) -+{ -+ unsigned count, indirect; -+ struct dx_entry *at, *entries, *p, *q, *m; -+ struct dx_root *root; -+ struct buffer_head *bh; -+ struct dx_frame *frame = frame_in; -+ u32 hash; -+ -+ frame->bh = NULL; -+ if (dentry) -+ dir = dentry->d_parent->d_inode; -+ if (!(bh = ext3_bread (NULL,dir, 0, 0, err))) -+ goto fail; -+ root = (struct dx_root *) bh->b_data; -+ if (root->info.hash_version != DX_HASH_TEA && -+ root->info.hash_version != DX_HASH_HALF_MD4 && -+ root->info.hash_version != DX_HASH_LEGACY) { -+ ext3_warning(dir->i_sb, __FUNCTION__, -+ "Unrecognised inode hash code %d", -+ root->info.hash_version); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ hinfo->hash_version = root->info.hash_version; -+ hinfo->seed = dir->i_sb->u.ext3_sb.s_hash_seed; -+ if (dentry) -+ ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); -+ hash = hinfo->hash; -+ -+ if (root->info.unused_flags & 1) { -+ ext3_warning(dir->i_sb, __FUNCTION__, -+ "Unimplemented inode hash flags: %#06x", -+ root->info.unused_flags); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ -+ if ((indirect = root->info.indirect_levels) > 1) { -+ ext3_warning(dir->i_sb, __FUNCTION__, -+ "Unimplemented inode hash depth: %#06x", -+ root->info.indirect_levels); -+ brelse(bh); -+ *err = ERR_BAD_DX_DIR; -+ goto fail; -+ } -+ -+ entries = (struct dx_entry *) (((char *)&root->info) + -+ root->info.info_length); -+ assert(dx_get_limit(entries) == dx_root_limit(dir, -+ root->info.info_length)); -+ dxtrace (printk("Look up %x", hash)); -+ while (1) -+ { -+ count = dx_get_count(entries); -+ assert (count && count <= dx_get_limit(entries)); -+ p = entries + 1; -+ q = entries + count - 1; -+ while (p <= q) -+ { -+ m = p + (q - p)/2; -+ dxtrace(printk(".")); -+ if (dx_get_hash(m) > hash) -+ q = m - 1; -+ else -+ p = m + 1; -+ } -+ -+ if (0) // linear search cross check -+ { -+ unsigned n = count - 1; -+ at = entries; -+ while (n--) -+ { -+ dxtrace(printk(",")); -+ if (dx_get_hash(++at) > hash) -+ { -+ at--; -+ break; -+ } -+ } -+ assert (at == p - 1); -+ } -+ -+ at = p - 1; -+ dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); -+ frame->bh = bh; -+ frame->entries = entries; -+ frame->at = at; -+ if (!indirect--) return frame; -+ if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err))) -+ goto fail2; -+ at = entries = ((struct dx_node *) bh->b_data)->entries; -+ assert (dx_get_limit(entries) == dx_node_limit (dir)); -+ frame++; -+ } -+fail2: -+ while (frame >= frame_in) { -+ brelse(frame->bh); -+ frame--; -+ } -+fail: -+ return NULL; -+} -+ -+static void dx_release (struct dx_frame *frames) -+{ -+ if (frames[0].bh == NULL) -+ return; -+ -+ if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels) -+ brelse(frames[1].bh); -+ brelse(frames[0].bh); -+} -+ -+/* -+ * This function increments the frame pointer to search the next leaf -+ * block, and reads in the necessary intervening nodes if the search -+ * should be necessary. Whether or not the search is necessary is -+ * controlled by the hash parameter. If the hash value is even, then -+ * the search is only continued if the next block starts with that -+ * hash value. This is used if we are searching for a specific file. -+ * -+ * If the hash value is HASH_NB_ALWAYS, then always go to the next block. -+ * -+ * This function returns 1 if the caller should continue to search, -+ * or 0 if it should not. If there is an error reading one of the -+ * index blocks, it will return -1. -+ * -+ * If start_hash is non-null, it will be filled in with the starting -+ * hash of the next page. -+ */ -+static int ext3_htree_next_block(struct inode *dir, __u32 hash, -+ struct dx_frame *frame, -+ struct dx_frame *frames, int *err, -+ __u32 *start_hash) -+{ -+ struct dx_frame *p; -+ struct buffer_head *bh; -+ int num_frames = 0; -+ __u32 bhash; -+ -+ *err = ENOENT; -+ p = frame; -+ /* -+ * Find the next leaf page by incrementing the frame pointer. -+ * If we run out of entries in the interior node, loop around and -+ * increment pointer in the parent node. When we break out of -+ * this loop, num_frames indicates the number of interior -+ * nodes need to be read. -+ */ -+ while (1) { -+ if (++(p->at) < p->entries + dx_get_count(p->entries)) -+ break; -+ if (p == frames) -+ return 0; -+ num_frames++; -+ p--; -+ } -+ -+ /* -+ * If the hash is 1, then continue only if the next page has a -+ * continuation hash of any value. This is used for readdir -+ * handling. Otherwise, check to see if the hash matches the -+ * desired contiuation hash. If it doesn't, return since -+ * there's no point to read in the successive index pages. -+ */ -+ bhash = dx_get_hash(p->at); -+ if (start_hash) -+ *start_hash = bhash; -+ if ((hash & 1) == 0) { -+ if ((bhash & ~1) != hash) -+ return 0; -+ } -+ /* -+ * If the hash is HASH_NB_ALWAYS, we always go to the next -+ * block so no check is necessary -+ */ -+ while (num_frames--) { -+ if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at), -+ 0, err))) -+ return -1; /* Failure */ -+ p++; -+ brelse (p->bh); -+ p->bh = bh; -+ p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; -+ } -+ return 1; -+} -+ -+ -+/* -+ * p is at least 6 bytes before the end of page -+ */ -+static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p) -+{ -+ return (struct ext3_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len)); -+} -+ -+/* -+ * This function fills a red-black tree with information from a -+ * directory. We start scanning the directory in hash order, starting -+ * at start_hash and start_minor_hash. -+ * -+ * This function returns the number of entries inserted into the tree, -+ * or a negative error code. -+ */ -+int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, -+ __u32 start_minor_hash, __u32 *next_hash) -+{ -+ struct dx_hash_info hinfo; -+ struct buffer_head *bh; -+ struct ext3_dir_entry_2 *de, *top; -+ static struct dx_frame frames[2], *frame; -+ struct inode *dir; -+ int block, err; -+ int count = 0; -+ int ret; -+ __u32 hashval; -+ -+ dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, -+ start_minor_hash)); -+ dir = dir_file->f_dentry->d_inode; -+ hinfo.hash = start_hash; -+ hinfo.minor_hash = 0; -+ frame = dx_probe(0, dir_file->f_dentry->d_inode, &hinfo, frames, &err); -+ if (!frame) -+ return err; -+ -+ while (1) { -+ block = dx_get_block(frame->at); -+ dxtrace(printk("Reading block %d\n", block)); -+ if (!(bh = ext3_bread (NULL, dir, block, 0, &err))) -+ goto errout; -+ -+ de = (struct ext3_dir_entry_2 *) bh->b_data; -+ top = (struct ext3_dir_entry_2 *) ((char *) de + dir->i_sb->s_blocksize - -+ EXT3_DIR_REC_LEN(0)); -+ for (; de < top; de = ext3_next_entry(de)) { -+ ext3fs_dirhash(de->name, de->name_len, &hinfo); -+ if ((hinfo.hash < start_hash) || -+ ((hinfo.hash == start_hash) && -+ (hinfo.minor_hash < start_minor_hash))) -+ continue; -+ ext3_htree_store_dirent(dir_file, hinfo.hash, -+ hinfo.minor_hash, de); -+ count++; -+ } -+ brelse (bh); -+ hashval = ~1; -+ ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, -+ frame, frames, &err, &hashval); -+ if (next_hash) -+ *next_hash = hashval; -+ if (ret == -1) -+ goto errout; -+ /* -+ * Stop if: (a) there are no more entries, or -+ * (b) we have inserted at least one entry and the -+ * next hash value is not a continuation -+ */ -+ if ((ret == 0) || -+ (count && ((hashval & 1) == 0))) -+ break; -+ } -+ dx_release(frames); -+ dxtrace(printk("Fill tree: returned %d entries\n", count)); -+ return count; -+errout: -+ dx_release(frames); -+ return (err); -+} -+ -+ -+/* -+ * Directory block splitting, compacting -+ */ -+ -+static int dx_make_map (struct ext3_dir_entry_2 *de, int size, -+ struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) -+{ -+ int count = 0; -+ char *base = (char *) de; -+ struct dx_hash_info h = *hinfo; -+ -+ while ((char *) de < base + size) -+ { -+ if (de->name_len && de->inode) { -+ ext3fs_dirhash(de->name, de->name_len, &h); -+ map_tail--; -+ map_tail->hash = h.hash; -+ map_tail->offs = (u32) ((char *) de - base); -+ count++; -+ } -+ /* XXX: do we need to check rec_len == 0 case? -Chris */ -+ de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len)); -+ } -+ return count; -+} -+ -+static void dx_sort_map (struct dx_map_entry *map, unsigned count) -+{ -+ struct dx_map_entry *p, *q, *top = map + count - 1; -+ int more; -+ /* Combsort until bubble sort doesn't suck */ -+ while (count > 2) -+ { -+ count = count*10/13; -+ if (count - 9 < 2) /* 9, 10 -> 11 */ -+ count = 11; -+ for (p = top, q = p - count; q >= map; p--, q--) -+ if (p->hash < q->hash) -+ swap(*p, *q); -+ } -+ /* Garden variety bubble sort */ -+ do { -+ more = 0; -+ q = top; -+ while (q-- > map) -+ { -+ if (q[1].hash >= q[0].hash) -+ continue; -+ swap(*(q+1), *q); -+ more = 1; -+ } -+ } while(more); -+} -+ -+static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block) -+{ -+ struct dx_entry *entries = frame->entries; -+ struct dx_entry *old = frame->at, *new = old + 1; -+ int count = dx_get_count(entries); -+ -+ assert(count < dx_get_limit(entries)); -+ assert(old < entries + count); -+ memmove(new + 1, new, (char *)(entries + count) - (char *)(new)); -+ dx_set_hash(new, hash); -+ dx_set_block(new, block); -+ dx_set_count(entries, count + 1); -+} -+#endif -+ -+ -+static void ext3_update_dx_flag(struct inode *inode) -+{ -+ if (!EXT3_HAS_COMPAT_FEATURE(inode->i_sb, -+ EXT3_FEATURE_COMPAT_DIR_INDEX)) -+ EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL; -+} -+ - /* - * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure. - * -@@ -94,6 +724,7 @@ static int inline search_dirblock(struct - return 0; - } - -+ - /* - * ext3_find_entry() - * -@@ -105,6 +736,8 @@ static int inline search_dirblock(struct - * The returned buffer_head has ->b_count elevated. The caller is expected - * to brelse() it when appropriate. - */ -+ -+ - static struct buffer_head * ext3_find_entry (struct dentry *dentry, - struct ext3_dir_entry_2 ** res_dir) - { -@@ -119,12 +752,32 @@ static struct buffer_head * ext3_find_en - int num = 0; - int nblocks, i, err; - struct inode *dir = dentry->d_parent->d_inode; -+ int namelen; -+ const u8 *name; -+ unsigned blocksize; - - *res_dir = NULL; - sb = dir->i_sb; -- -+ blocksize = sb->s_blocksize; -+ namelen = dentry->d_name.len; -+ name = dentry->d_name.name; -+ if (namelen > EXT3_NAME_LEN) -+ return NULL; -+#ifdef CONFIG_EXT3_INDEX -+ if (is_dx(dir)) { -+ bh = ext3_dx_find_entry(dentry, res_dir, &err); -+ /* -+ * On success, or if the error was file not found, -+ * return. Otherwise, fall back to doing a search the -+ * old fashioned way. -+ */ -+ if (bh || (err != ERR_BAD_DX_DIR)) -+ return bh; -+ dxtrace(printk("ext3_find_entry: dx failed, falling back\n")); -+ } -+#endif - nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb); -- start = dir->u.ext3_i.i_dir_start_lookup; -+ start = EXT3_I(dir)->i_dir_start_lookup; - if (start >= nblocks) - start = 0; - block = start; -@@ -165,7 +818,7 @@ restart: - i = search_dirblock(bh, dir, dentry, - block << EXT3_BLOCK_SIZE_BITS(sb), res_dir); - if (i == 1) { -- dir->u.ext3_i.i_dir_start_lookup = block; -+ EXT3_I(dir)->i_dir_start_lookup = block; - ret = bh; - goto cleanup_and_exit; - } else { -@@ -196,6 +849,66 @@ cleanup_and_exit: - return ret; - } - -+#ifdef CONFIG_EXT3_INDEX -+static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, -+ struct ext3_dir_entry_2 **res_dir, int *err) -+{ -+ struct super_block * sb; -+ struct dx_hash_info hinfo; -+ u32 hash; -+ struct dx_frame frames[2], *frame; -+ struct ext3_dir_entry_2 *de, *top; -+ struct buffer_head *bh; -+ unsigned long block; -+ int retval; -+ int namelen = dentry->d_name.len; -+ const u8 *name = dentry->d_name.name; -+ struct inode *dir = dentry->d_parent->d_inode; -+ -+ sb = dir->i_sb; -+ if (!(frame = dx_probe (dentry, 0, &hinfo, frames, err))) -+ return NULL; -+ hash = hinfo.hash; -+ do { -+ block = dx_get_block(frame->at); -+ if (!(bh = ext3_bread (NULL,dir, block, 0, err))) -+ goto errout; -+ de = (struct ext3_dir_entry_2 *) bh->b_data; -+ top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize - -+ EXT3_DIR_REC_LEN(0)); -+ for (; de < top; de = ext3_next_entry(de)) -+ if (ext3_match (namelen, name, de)) { -+ if (!ext3_check_dir_entry("ext3_find_entry", -+ dir, de, bh, -+ (block<b_data))) { -+ brelse (bh); -+ goto errout; -+ } -+ *res_dir = de; -+ dx_release (frames); -+ return bh; -+ } -+ brelse (bh); -+ /* Check to see if we should continue to search */ -+ retval = ext3_htree_next_block(dir, hash, frame, -+ frames, err, 0); -+ if (retval == -1) { -+ ext3_warning(sb, __FUNCTION__, -+ "error reading index page in directory #%lu", -+ dir->i_ino); -+ goto errout; -+ } -+ } while (retval == 1); -+ -+ *err = -ENOENT; -+errout: -+ dxtrace(printk("%s not found\n", name)); -+ dx_release (frames); -+ return NULL; -+} -+#endif -+ - static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry) - { - struct inode * inode; -@@ -212,8 +925,9 @@ static struct dentry *ext3_lookup(struct - brelse (bh); - inode = iget(dir->i_sb, ino); - -- if (!inode) -+ if (!inode) { - return ERR_PTR(-EACCES); -+ } - } - d_add(dentry, inode); - return NULL; -@@ -237,6 +951,300 @@ static inline void ext3_set_de_type(stru - de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; - } - -+#ifdef CONFIG_EXT3_INDEX -+static struct ext3_dir_entry_2 * -+dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) -+{ -+ unsigned rec_len = 0; -+ -+ while (count--) { -+ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs); -+ rec_len = EXT3_DIR_REC_LEN(de->name_len); -+ memcpy (to, de, rec_len); -+ ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len; -+ de->inode = 0; -+ map++; -+ to += rec_len; -+ } -+ return (struct ext3_dir_entry_2 *) (to - rec_len); -+} -+ -+static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size) -+{ -+ struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base; -+ unsigned rec_len = 0; -+ -+ prev = to = de; -+ while ((char*)de < base + size) { -+ next = (struct ext3_dir_entry_2 *) ((char *) de + -+ le16_to_cpu(de->rec_len)); -+ if (de->inode && de->name_len) { -+ rec_len = EXT3_DIR_REC_LEN(de->name_len); -+ if (de > to) -+ memmove(to, de, rec_len); -+ to->rec_len = rec_len; -+ prev = to; -+ to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len); -+ } -+ de = next; -+ } -+ return prev; -+} -+ -+static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, -+ struct buffer_head **bh,struct dx_frame *frame, -+ struct dx_hash_info *hinfo, int *error) -+{ -+ unsigned blocksize = dir->i_sb->s_blocksize; -+ unsigned count, continued; -+ struct buffer_head *bh2; -+ u32 newblock; -+ u32 hash2; -+ struct dx_map_entry *map; -+ char *data1 = (*bh)->b_data, *data2; -+ unsigned split; -+ struct ext3_dir_entry_2 *de = NULL, *de2; -+ int err; -+ -+ bh2 = ext3_append (handle, dir, &newblock, error); -+ if (!(bh2)) { -+ brelse(*bh); -+ *bh = NULL; -+ goto errout; -+ } -+ -+ BUFFER_TRACE(*bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, *bh); -+ if (err) { -+ journal_error: -+ brelse(*bh); -+ brelse(bh2); -+ *bh = NULL; -+ ext3_std_error(dir->i_sb, err); -+ goto errout; -+ } -+ BUFFER_TRACE(frame->bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, frame->bh); -+ if (err) -+ goto journal_error; -+ -+ data2 = bh2->b_data; -+ -+ /* create map in the end of data2 block */ -+ map = (struct dx_map_entry *) (data2 + blocksize); -+ count = dx_make_map ((struct ext3_dir_entry_2 *) data1, -+ blocksize, hinfo, map); -+ map -= count; -+ split = count/2; // need to adjust to actual middle -+ dx_sort_map (map, count); -+ hash2 = map[split].hash; -+ continued = hash2 == map[split - 1].hash; -+ dxtrace(printk("Split block %i at %x, %i/%i\n", -+ dx_get_block(frame->at), hash2, split, count-split)); -+ -+ /* Fancy dance to stay within two buffers */ -+ de2 = dx_move_dirents(data1, data2, map + split, count - split); -+ de = dx_pack_dirents(data1,blocksize); -+ de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); -+ de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2); -+ dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1)); -+ dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1)); -+ -+ /* Which block gets the new entry? */ -+ if (hinfo->hash >= hash2) -+ { -+ swap(*bh, bh2); -+ de = de2; -+ } -+ dx_insert_block (frame, hash2 + continued, newblock); -+ err = ext3_journal_dirty_metadata (handle, bh2); -+ if (err) -+ goto journal_error; -+ err = ext3_journal_dirty_metadata (handle, frame->bh); -+ if (err) -+ goto journal_error; -+ brelse (bh2); -+ dxtrace(dx_show_index ("frame", frame->entries)); -+errout: -+ return de; -+} -+#endif -+ -+ -+/* -+ * Add a new entry into a directory (leaf) block. If de is non-NULL, -+ * it points to a directory entry which is guaranteed to be large -+ * enough for new directory entry. If de is NULL, then -+ * add_dirent_to_buf will attempt search the directory block for -+ * space. It will return -ENOSPC if no space is available, and -EIO -+ * and -EEXIST if directory entry already exists. -+ * -+ * NOTE! bh is NOT released in the case where ENOSPC is returned. In -+ * all other cases bh is released. -+ */ -+static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, -+ struct inode *inode, struct ext3_dir_entry_2 *de, -+ struct buffer_head * bh) -+{ -+ struct inode *dir = dentry->d_parent->d_inode; -+ const char *name = dentry->d_name.name; -+ int namelen = dentry->d_name.len; -+ unsigned long offset = 0; -+ unsigned short reclen; -+ int nlen, rlen, err; -+ char *top; -+ -+ reclen = EXT3_DIR_REC_LEN(namelen); -+ if (!de) { -+ de = (struct ext3_dir_entry_2 *)bh->b_data; -+ top = bh->b_data + dir->i_sb->s_blocksize - reclen; -+ while ((char *) de <= top) { -+ if (!ext3_check_dir_entry("ext3_add_entry", dir, de, -+ bh, offset)) { -+ brelse (bh); -+ return -EIO; -+ } -+ if (ext3_match (namelen, name, de)) { -+ brelse (bh); -+ return -EEXIST; -+ } -+ nlen = EXT3_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ if ((de->inode? rlen - nlen: rlen) >= reclen) -+ break; -+ de = (struct ext3_dir_entry_2 *)((char *)de + rlen); -+ offset += rlen; -+ } -+ if ((char *) de > top) -+ return -ENOSPC; -+ } -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) { -+ ext3_std_error(dir->i_sb, err); -+ brelse(bh); -+ return err; -+ } -+ -+ /* By now the buffer is marked for journaling */ -+ nlen = EXT3_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ if (de->inode) { -+ struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen); -+ de1->rec_len = cpu_to_le16(rlen - nlen); -+ de->rec_len = cpu_to_le16(nlen); -+ de = de1; -+ } -+ de->file_type = EXT3_FT_UNKNOWN; -+ if (inode) { -+ de->inode = cpu_to_le32(inode->i_ino); -+ ext3_set_de_type(dir->i_sb, de, inode->i_mode); -+ } else -+ de->inode = 0; -+ de->name_len = namelen; -+ memcpy (de->name, name, namelen); -+ /* -+ * XXX shouldn't update any times until successful -+ * completion of syscall, but too many callers depend -+ * on this. -+ * -+ * XXX similarly, too many callers depend on -+ * ext3_new_inode() setting the times, but error -+ * recovery deletes the inode, so the worst that can -+ * happen is that the times are slightly out of date -+ * and/or different from the directory change time. -+ */ -+ dir->i_mtime = dir->i_ctime = CURRENT_TIME; -+ ext3_update_dx_flag(dir); -+ dir->i_version = ++event; -+ ext3_mark_inode_dirty(handle, dir); -+ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, bh); -+ if (err) -+ ext3_std_error(dir->i_sb, err); -+ brelse(bh); -+ return 0; -+} -+ -+#ifdef CONFIG_EXT3_INDEX -+/* -+ * This converts a one block unindexed directory to a 3 block indexed -+ * directory, and adds the dentry to the indexed directory. -+ */ -+static int make_indexed_dir(handle_t *handle, struct dentry *dentry, -+ struct inode *inode, struct buffer_head *bh) -+{ -+ struct inode *dir = dentry->d_parent->d_inode; -+ const char *name = dentry->d_name.name; -+ int namelen = dentry->d_name.len; -+ struct buffer_head *bh2; -+ struct dx_root *root; -+ struct dx_frame frames[2], *frame; -+ struct dx_entry *entries; -+ struct ext3_dir_entry_2 *de, *de2; -+ char *data1, *top; -+ unsigned len; -+ int retval; -+ unsigned blocksize; -+ struct dx_hash_info hinfo; -+ u32 block; -+ -+ blocksize = dir->i_sb->s_blocksize; -+ dxtrace(printk("Creating index\n")); -+ retval = ext3_journal_get_write_access(handle, bh); -+ if (retval) { -+ ext3_std_error(dir->i_sb, retval); -+ brelse(bh); -+ return retval; -+ } -+ root = (struct dx_root *) bh->b_data; -+ -+ EXT3_I(dir)->i_flags |= EXT3_INDEX_FL; -+ bh2 = ext3_append (handle, dir, &block, &retval); -+ if (!(bh2)) { -+ brelse(bh); -+ return retval; -+ } -+ data1 = bh2->b_data; -+ -+ /* The 0th block becomes the root, move the dirents out */ -+ de = (struct ext3_dir_entry_2 *) &root->info; -+ len = ((char *) root) + blocksize - (char *) de; -+ memcpy (data1, de, len); -+ de = (struct ext3_dir_entry_2 *) data1; -+ top = data1 + len; -+ while (((char *) de2=(char*)de+le16_to_cpu(de->rec_len)) < top) -+ de = de2; -+ de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de); -+ /* Initialize the root; the dot dirents already exist */ -+ de = (struct ext3_dir_entry_2 *) (&root->dotdot); -+ de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2)); -+ memset (&root->info, 0, sizeof(root->info)); -+ root->info.info_length = sizeof(root->info); -+ root->info.hash_version = dir->i_sb->u.ext3_sb.s_def_hash_version; -+ entries = root->entries; -+ dx_set_block (entries, 1); -+ dx_set_count (entries, 1); -+ dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); -+ -+ /* Initialize as for dx_probe */ -+ hinfo.hash_version = root->info.hash_version; -+ hinfo.seed = dir->i_sb->u.ext3_sb.s_hash_seed; -+ ext3fs_dirhash(name, namelen, &hinfo); -+ frame = frames; -+ frame->entries = entries; -+ frame->at = entries; -+ frame->bh = bh; -+ bh = bh2; -+ de = do_split(handle,dir, &bh, frame, &hinfo, &retval); -+ dx_release (frames); -+ if (!(de)) -+ return retval; -+ -+ return add_dirent_to_buf(handle, dentry, inode, de, bh); -+} -+#endif -+ - /* - * ext3_add_entry() - * -@@ -247,127 +1255,198 @@ static inline void ext3_set_de_type(stru - * may not sleep between calling this and putting something into - * the entry, as someone else might have used it while you slept. - */ -- --/* -- * AKPM: the journalling code here looks wrong on the error paths -- */ - static int ext3_add_entry (handle_t *handle, struct dentry *dentry, - struct inode *inode) - { - struct inode *dir = dentry->d_parent->d_inode; -- const char *name = dentry->d_name.name; -- int namelen = dentry->d_name.len; - unsigned long offset; -- unsigned short rec_len; - struct buffer_head * bh; -- struct ext3_dir_entry_2 * de, * de1; -+ struct ext3_dir_entry_2 *de; - struct super_block * sb; - int retval; -+#ifdef CONFIG_EXT3_INDEX -+ int dx_fallback=0; -+#endif -+ unsigned blocksize; -+ unsigned nlen, rlen; -+ u32 block, blocks; - - sb = dir->i_sb; -- -- if (!namelen) -+ blocksize = sb->s_blocksize; -+ if (!dentry->d_name.len) - return -EINVAL; -- bh = ext3_bread (handle, dir, 0, 0, &retval); -+#ifdef CONFIG_EXT3_INDEX -+ if (is_dx(dir)) { -+ retval = ext3_dx_add_entry(handle, dentry, inode); -+ if (!retval || (retval != ERR_BAD_DX_DIR)) -+ return retval; -+ EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL; -+ dx_fallback++; -+ ext3_mark_inode_dirty(handle, dir); -+ } -+#endif -+ blocks = dir->i_size >> sb->s_blocksize_bits; -+ for (block = 0, offset = 0; block < blocks; block++) { -+ bh = ext3_bread(handle, dir, block, 0, &retval); -+ if(!bh) -+ return retval; -+ retval = add_dirent_to_buf(handle, dentry, inode, 0, bh); -+ if (retval != -ENOSPC) -+ return retval; -+ -+#ifdef CONFIG_EXT3_INDEX -+ if (blocks == 1 && !dx_fallback && -+ EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX)) -+ return make_indexed_dir(handle, dentry, inode, bh); -+#endif -+ brelse(bh); -+ } -+ bh = ext3_append(handle, dir, &block, &retval); - if (!bh) - return retval; -- rec_len = EXT3_DIR_REC_LEN(namelen); -- offset = 0; - de = (struct ext3_dir_entry_2 *) bh->b_data; -- while (1) { -- if ((char *)de >= sb->s_blocksize + bh->b_data) { -- brelse (bh); -- bh = NULL; -- bh = ext3_bread (handle, dir, -- offset >> EXT3_BLOCK_SIZE_BITS(sb), 1, &retval); -- if (!bh) -- return retval; -- if (dir->i_size <= offset) { -- if (dir->i_size == 0) { -- brelse(bh); -- return -ENOENT; -- } -+ de->inode = 0; -+ de->rec_len = cpu_to_le16(rlen = blocksize); -+ nlen = 0; -+ return add_dirent_to_buf(handle, dentry, inode, de, bh); -+} - -- ext3_debug ("creating next block\n"); -+#ifdef CONFIG_EXT3_INDEX -+/* -+ * Returns 0 for success, or a negative error value -+ */ -+static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, -+ struct inode *inode) -+{ -+ struct dx_frame frames[2], *frame; -+ struct dx_entry *entries, *at; -+ struct dx_hash_info hinfo; -+ struct buffer_head * bh; -+ struct inode *dir = dentry->d_parent->d_inode; -+ struct super_block * sb = dir->i_sb; -+ struct ext3_dir_entry_2 *de; -+ int err; - -- BUFFER_TRACE(bh, "get_write_access"); -- ext3_journal_get_write_access(handle, bh); -- de = (struct ext3_dir_entry_2 *) bh->b_data; -- de->inode = 0; -- de->rec_len = le16_to_cpu(sb->s_blocksize); -- dir->u.ext3_i.i_disksize = -- dir->i_size = offset + sb->s_blocksize; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -- ext3_mark_inode_dirty(handle, dir); -- } else { -+ frame = dx_probe(dentry, 0, &hinfo, frames, &err); -+ if (!frame) -+ return err; -+ entries = frame->entries; -+ at = frame->at; - -- ext3_debug ("skipping to next block\n"); -+ if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err))) -+ goto cleanup; - -- de = (struct ext3_dir_entry_2 *) bh->b_data; -- } -- } -- if (!ext3_check_dir_entry ("ext3_add_entry", dir, de, bh, -- offset)) { -- brelse (bh); -- return -ENOENT; -- } -- if (ext3_match (namelen, name, de)) { -- brelse (bh); -- return -EEXIST; -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) -+ goto journal_error; -+ -+ err = add_dirent_to_buf(handle, dentry, inode, 0, bh); -+ if (err != -ENOSPC) { -+ bh = 0; -+ goto cleanup; -+ } -+ -+ /* Block full, should compress but for now just split */ -+ dxtrace(printk("using %u of %u node entries\n", -+ dx_get_count(entries), dx_get_limit(entries))); -+ /* Need to split index? */ -+ if (dx_get_count(entries) == dx_get_limit(entries)) { -+ u32 newblock; -+ unsigned icount = dx_get_count(entries); -+ int levels = frame - frames; -+ struct dx_entry *entries2; -+ struct dx_node *node2; -+ struct buffer_head *bh2; -+ -+ if (levels && (dx_get_count(frames->entries) == -+ dx_get_limit(frames->entries))) { -+ ext3_warning(sb, __FUNCTION__, -+ "Directory index full!\n"); -+ err = -ENOSPC; -+ goto cleanup; - } -- if ((le32_to_cpu(de->inode) == 0 && -- le16_to_cpu(de->rec_len) >= rec_len) || -- (le16_to_cpu(de->rec_len) >= -- EXT3_DIR_REC_LEN(de->name_len) + rec_len)) { -- BUFFER_TRACE(bh, "get_write_access"); -- ext3_journal_get_write_access(handle, bh); -- /* By now the buffer is marked for journaling */ -- offset += le16_to_cpu(de->rec_len); -- if (le32_to_cpu(de->inode)) { -- de1 = (struct ext3_dir_entry_2 *) ((char *) de + -- EXT3_DIR_REC_LEN(de->name_len)); -- de1->rec_len = -- cpu_to_le16(le16_to_cpu(de->rec_len) - -- EXT3_DIR_REC_LEN(de->name_len)); -- de->rec_len = cpu_to_le16( -- EXT3_DIR_REC_LEN(de->name_len)); -- de = de1; -+ bh2 = ext3_append (handle, dir, &newblock, &err); -+ if (!(bh2)) -+ goto cleanup; -+ node2 = (struct dx_node *)(bh2->b_data); -+ entries2 = node2->entries; -+ node2->fake.rec_len = cpu_to_le16(sb->s_blocksize); -+ node2->fake.inode = 0; -+ BUFFER_TRACE(frame->bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, frame->bh); -+ if (err) -+ goto journal_error; -+ if (levels) { -+ unsigned icount1 = icount/2, icount2 = icount - icount1; -+ unsigned hash2 = dx_get_hash(entries + icount1); -+ dxtrace(printk("Split index %i/%i\n", icount1, icount2)); -+ -+ BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ -+ err = ext3_journal_get_write_access(handle, -+ frames[0].bh); -+ if (err) -+ goto journal_error; -+ -+ memcpy ((char *) entries2, (char *) (entries + icount1), -+ icount2 * sizeof(struct dx_entry)); -+ dx_set_count (entries, icount1); -+ dx_set_count (entries2, icount2); -+ dx_set_limit (entries2, dx_node_limit(dir)); -+ -+ /* Which index block gets the new entry? */ -+ if (at - entries >= icount1) { -+ frame->at = at = at - entries - icount1 + entries2; -+ frame->entries = entries = entries2; -+ swap(frame->bh, bh2); - } -- de->file_type = EXT3_FT_UNKNOWN; -- if (inode) { -- de->inode = cpu_to_le32(inode->i_ino); -- ext3_set_de_type(dir->i_sb, de, inode->i_mode); -- } else -- de->inode = 0; -- de->name_len = namelen; -- memcpy (de->name, name, namelen); -- /* -- * XXX shouldn't update any times until successful -- * completion of syscall, but too many callers depend -- * on this. -- * -- * XXX similarly, too many callers depend on -- * ext3_new_inode() setting the times, but error -- * recovery deletes the inode, so the worst that can -- * happen is that the times are slightly out of date -- * and/or different from the directory change time. -- */ -- dir->i_mtime = dir->i_ctime = CURRENT_TIME; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -- dir->i_version = ++event; -- ext3_mark_inode_dirty(handle, dir); -- BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -- ext3_journal_dirty_metadata(handle, bh); -- brelse(bh); -- return 0; -+ dx_insert_block (frames + 0, hash2, newblock); -+ dxtrace(dx_show_index ("node", frames[1].entries)); -+ dxtrace(dx_show_index ("node", -+ ((struct dx_node *) bh2->b_data)->entries)); -+ err = ext3_journal_dirty_metadata(handle, bh2); -+ if (err) -+ goto journal_error; -+ brelse (bh2); -+ } else { -+ dxtrace(printk("Creating second level index...\n")); -+ memcpy((char *) entries2, (char *) entries, -+ icount * sizeof(struct dx_entry)); -+ dx_set_limit(entries2, dx_node_limit(dir)); -+ -+ /* Set up root */ -+ dx_set_count(entries, 1); -+ dx_set_block(entries + 0, newblock); -+ ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1; -+ -+ /* Add new access path frame */ -+ frame = frames + 1; -+ frame->at = at = at - entries + entries2; -+ frame->entries = entries = entries2; -+ frame->bh = bh2; -+ err = ext3_journal_get_write_access(handle, -+ frame->bh); -+ if (err) -+ goto journal_error; - } -- offset += le16_to_cpu(de->rec_len); -- de = (struct ext3_dir_entry_2 *) -- ((char *) de + le16_to_cpu(de->rec_len)); -+ ext3_journal_dirty_metadata(handle, frames[0].bh); - } -- brelse (bh); -- return -ENOSPC; -+ de = do_split(handle, dir, &bh, frame, &hinfo, &err); -+ if (!de) -+ goto cleanup; -+ err = add_dirent_to_buf(handle, dentry, inode, de, bh); -+ bh = 0; -+ goto cleanup; -+ -+journal_error: -+ ext3_std_error(dir->i_sb, err); -+cleanup: -+ if (bh) -+ brelse(bh); -+ dx_release(frames); -+ return err; - } -+#endif - - /* - * ext3_delete_entry deletes a directory entry by merging it with the -@@ -451,9 +1530,11 @@ static int ext3_create (struct inode * d - struct inode * inode; - int err; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -478,9 +1559,11 @@ static int ext3_mknod (struct inode * di - struct inode *inode; - int err; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -507,9 +1590,11 @@ static int ext3_mkdir(struct inode * dir - if (dir->i_nlink >= EXT3_LINK_MAX) - return -EMLINK; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -521,7 +1606,7 @@ static int ext3_mkdir(struct inode * dir - - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; -- inode->i_size = inode->u.ext3_i.i_disksize = inode->i_sb->s_blocksize; -+ inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; - inode->i_blocks = 0; - dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) { -@@ -554,21 +1639,19 @@ static int ext3_mkdir(struct inode * dir - inode->i_mode |= S_ISGID; - ext3_mark_inode_dirty(handle, inode); - err = ext3_add_entry (handle, dentry, inode); -- if (err) -- goto out_no_entry; -+ if (err) { -+ inode->i_nlink = 0; -+ ext3_mark_inode_dirty(handle, inode); -+ iput (inode); -+ goto out_stop; -+ } - dir->i_nlink++; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - d_instantiate(dentry, inode); - out_stop: - ext3_journal_stop(handle, dir); - return err; -- --out_no_entry: -- inode->i_nlink = 0; -- ext3_mark_inode_dirty(handle, inode); -- iput (inode); -- goto out_stop; - } - - /* -@@ -655,7 +1738,7 @@ int ext3_orphan_add(handle_t *handle, st - int err = 0, rc; - - lock_super(sb); -- if (!list_empty(&inode->u.ext3_i.i_orphan)) -+ if (!list_empty(&EXT3_I(inode)->i_orphan)) - goto out_unlock; - - /* Orphan handling is only valid for files with data blocks -@@ -696,7 +1779,7 @@ int ext3_orphan_add(handle_t *handle, st - * This is safe: on error we're going to ignore the orphan list - * anyway on the next recovery. */ - if (!err) -- list_add(&inode->u.ext3_i.i_orphan, &EXT3_SB(sb)->s_orphan); -+ list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); - - jbd_debug(4, "superblock will point to %ld\n", inode->i_ino); - jbd_debug(4, "orphan inode %ld will point to %d\n", -@@ -714,25 +1797,26 @@ out_unlock: - int ext3_orphan_del(handle_t *handle, struct inode *inode) - { - struct list_head *prev; -+ struct ext3_inode_info *ei = EXT3_I(inode); - struct ext3_sb_info *sbi; - ino_t ino_next; - struct ext3_iloc iloc; - int err = 0; - - lock_super(inode->i_sb); -- if (list_empty(&inode->u.ext3_i.i_orphan)) { -+ if (list_empty(&ei->i_orphan)) { - unlock_super(inode->i_sb); - return 0; - } - - ino_next = NEXT_ORPHAN(inode); -- prev = inode->u.ext3_i.i_orphan.prev; -+ prev = ei->i_orphan.prev; - sbi = EXT3_SB(inode->i_sb); - - jbd_debug(4, "remove inode %ld from orphan list\n", inode->i_ino); - -- list_del(&inode->u.ext3_i.i_orphan); -- INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); -+ list_del(&ei->i_orphan); -+ INIT_LIST_HEAD(&ei->i_orphan); - - /* If we're on an error path, we may not have a valid - * transaction handle with which to update the orphan list on -@@ -793,8 +1877,9 @@ static int ext3_rmdir (struct inode * di - handle_t *handle; - - handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); -- if (IS_ERR(handle)) -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - retval = -ENOENT; - bh = ext3_find_entry (dentry, &de); -@@ -832,7 +1917,7 @@ static int ext3_rmdir (struct inode * di - dir->i_nlink--; - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - ext3_mark_inode_dirty(handle, inode); -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - - end_rmdir: -@@ -850,8 +1935,9 @@ static int ext3_unlink(struct inode * di - handle_t *handle; - - handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); -- if (IS_ERR(handle)) -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -878,7 +1964,7 @@ static int ext3_unlink(struct inode * di - if (retval) - goto end_unlink; - dir->i_ctime = dir->i_mtime = CURRENT_TIME; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - inode->i_nlink--; - if (!inode->i_nlink) -@@ -904,9 +1990,11 @@ static int ext3_symlink (struct inode * - if (l > dir->i_sb->s_blocksize) - return -ENAMETOOLONG; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 5); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -916,7 +2004,7 @@ static int ext3_symlink (struct inode * - if (IS_ERR(inode)) - goto out_stop; - -- if (l > sizeof (inode->u.ext3_i.i_data)) { -+ if (l > sizeof (EXT3_I(inode)->i_data)) { - inode->i_op = &page_symlink_inode_operations; - inode->i_mapping->a_ops = &ext3_aops; - /* -@@ -925,25 +2013,23 @@ static int ext3_symlink (struct inode * - * i_size in generic_commit_write(). - */ - err = block_symlink(inode, symname, l); -- if (err) -- goto out_no_entry; -+ if (err) { -+ ext3_dec_count(handle, inode); -+ ext3_mark_inode_dirty(handle, inode); -+ iput (inode); -+ goto out_stop; -+ } - } else { - inode->i_op = &ext3_fast_symlink_inode_operations; -- memcpy((char*)&inode->u.ext3_i.i_data,symname,l); -+ memcpy((char*)&EXT3_I(inode)->i_data,symname,l); - inode->i_size = l-1; - } -- inode->u.ext3_i.i_disksize = inode->i_size; -+ EXT3_I(inode)->i_disksize = inode->i_size; - err = ext3_add_nondir(handle, dentry, inode); - ext3_mark_inode_dirty(handle, inode); - out_stop: - ext3_journal_stop(handle, dir); - return err; -- --out_no_entry: -- ext3_dec_count(handle, inode); -- ext3_mark_inode_dirty(handle, inode); -- iput (inode); -- goto out_stop; - } - - static int ext3_link (struct dentry * old_dentry, -@@ -956,12 +2042,15 @@ static int ext3_link (struct dentry * ol - if (S_ISDIR(inode->i_mode)) - return -EPERM; - -- if (inode->i_nlink >= EXT3_LINK_MAX) -+ if (inode->i_nlink >= EXT3_LINK_MAX) { - return -EMLINK; -+ } - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(dir)) - handle->h_sync = 1; -@@ -995,9 +2084,11 @@ static int ext3_rename (struct inode * o - - old_bh = new_bh = dir_bh = NULL; - -- handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2); -- if (IS_ERR(handle)) -+ handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); -+ if (IS_ERR(handle)) { - return PTR_ERR(handle); -+ } - - if (IS_SYNC(old_dir) || IS_SYNC(new_dir)) - handle->h_sync = 1; -@@ -1077,7 +2168,7 @@ static int ext3_rename (struct inode * o - new_inode->i_ctime = CURRENT_TIME; - } - old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; -- old_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(old_dir); - if (dir_bh) { - BUFFER_TRACE(dir_bh, "get_write_access"); - ext3_journal_get_write_access(handle, dir_bh); -@@ -1089,7 +2180,7 @@ static int ext3_rename (struct inode * o - new_inode->i_nlink--; - } else { - new_dir->i_nlink++; -- new_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(new_dir); - ext3_mark_inode_dirty(handle, new_dir); - } - } ---- linux-2.4.20/fs/ext3/super.c~ext-2.4-patch-1 Sat Apr 5 03:56:31 2003 -+++ linux-2.4.20-braam/fs/ext3/super.c Sat Apr 5 03:56:31 2003 -@@ -707,6 +707,7 @@ static int ext3_setup_super(struct super - es->s_mtime = cpu_to_le32(CURRENT_TIME); - ext3_update_dynamic_rev(sb); - EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); -+ - ext3_commit_super (sb, es, 1); - if (test_opt (sb, DEBUG)) - printk (KERN_INFO -@@ -717,6 +718,7 @@ static int ext3_setup_super(struct super - EXT3_BLOCKS_PER_GROUP(sb), - EXT3_INODES_PER_GROUP(sb), - sbi->s_mount_opt); -+ - printk(KERN_INFO "EXT3 FS " EXT3FS_VERSION ", " EXT3FS_DATE " on %s, ", - bdevname(sb->s_dev)); - if (EXT3_SB(sb)->s_journal->j_inode == NULL) { -@@ -890,6 +892,7 @@ static loff_t ext3_max_size(int bits) - return res; - } - -+ - struct super_block * ext3_read_super (struct super_block * sb, void * data, - int silent) - { -@@ -1066,6 +1069,9 @@ struct super_block * ext3_read_super (st - sbi->s_mount_state = le16_to_cpu(es->s_state); - sbi->s_addr_per_block_bits = log2(EXT3_ADDR_PER_BLOCK(sb)); - sbi->s_desc_per_block_bits = log2(EXT3_DESC_PER_BLOCK(sb)); -+ for (i=0; i < 4; i++) -+ sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); -+ sbi->s_def_hash_version = es->s_def_hash_version; - - if (sbi->s_blocks_per_group > blocksize * 8) { - printk (KERN_ERR -@@ -1769,6 +1775,7 @@ static void __exit exit_ext3_fs(void) - unregister_filesystem(&ext3_fs_type); - } - -+EXPORT_SYMBOL(ext3_force_commit); - EXPORT_SYMBOL(ext3_bread); - - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); ---- linux-2.4.20/include/linux/ext3_fs.h~ext-2.4-patch-1 Sat Apr 5 03:56:31 2003 -+++ linux-2.4.20-braam/include/linux/ext3_fs.h Sat Apr 5 03:56:31 2003 -@@ -40,6 +40,11 @@ - #define EXT3FS_VERSION "2.4-0.9.19" - - /* -+ * Always enable hashed directories -+ */ -+#define CONFIG_EXT3_INDEX -+ -+/* - * Debug code - */ - #ifdef EXT3FS_DEBUG -@@ -437,8 +442,11 @@ struct ext3_super_block { - /*E0*/ __u32 s_journal_inum; /* inode number of journal file */ - __u32 s_journal_dev; /* device number of journal file */ - __u32 s_last_orphan; /* start of list of inodes to delete */ -- --/*EC*/ __u32 s_reserved[197]; /* Padding to the end of the block */ -+ __u32 s_hash_seed[4]; /* HTREE hash seed */ -+ __u8 s_def_hash_version; /* Default hash version to use */ -+ __u8 s_reserved_char_pad; -+ __u16 s_reserved_word_pad; -+ __u32 s_reserved[192]; /* Padding to the end of the block */ - }; - - #ifdef __KERNEL__ -@@ -575,9 +583,46 @@ struct ext3_dir_entry_2 { - #define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1) - #define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \ - ~EXT3_DIR_ROUND) -+/* -+ * Hash Tree Directory indexing -+ * (c) Daniel Phillips, 2001 -+ */ -+ -+#ifdef CONFIG_EXT3_INDEX -+ #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \ -+ EXT3_FEATURE_COMPAT_DIR_INDEX) && \ -+ (EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) -+#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX) -+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) -+#else -+ #define is_dx(dir) 0 -+#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX) -+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2) -+#endif -+ -+/* Legal values for the dx_root hash_version field: */ -+ -+#define DX_HASH_LEGACY 0 -+#define DX_HASH_HALF_MD4 1 -+#define DX_HASH_TEA 2 -+ -+/* hash info structure used by the directory hash */ -+struct dx_hash_info -+{ -+ u32 hash; -+ u32 minor_hash; -+ int hash_version; -+ u32 *seed; -+}; - - #ifdef __KERNEL__ - /* -+ * Control parameters used by ext3_htree_next_block -+ */ -+#define HASH_NB_ALWAYS 1 -+ -+ -+/* - * Describe an inode's exact location on disk and in memory - */ - struct ext3_iloc -@@ -587,6 +632,27 @@ struct ext3_iloc - unsigned long block_group; - }; - -+ -+/* -+ * This structure is stuffed into the struct file's private_data field -+ * for directories. It is where we put information so that we can do -+ * readdir operations in hash tree order. -+ */ -+struct dir_private_info { -+ rb_root_t root; -+ rb_node_t *curr_node; -+ struct fname *extra_fname; -+ loff_t last_pos; -+ __u32 curr_hash; -+ __u32 curr_minor_hash; -+ __u32 next_hash; -+}; -+ -+/* -+ * Special error return code only used by dx_probe() and its callers. -+ */ -+#define ERR_BAD_DX_DIR -75000 -+ - /* - * Function prototypes - */ -@@ -614,11 +680,20 @@ extern struct ext3_group_desc * ext3_get - - /* dir.c */ - extern int ext3_check_dir_entry(const char *, struct inode *, -- struct ext3_dir_entry_2 *, struct buffer_head *, -- unsigned long); -+ struct ext3_dir_entry_2 *, -+ struct buffer_head *, unsigned long); -+extern void ext3_htree_store_dirent(struct file *dir_file, __u32 hash, -+ __u32 minor_hash, -+ struct ext3_dir_entry_2 *dirent); -+extern void ext3_htree_free_dir_info(struct dir_private_info *p); -+ - /* fsync.c */ - extern int ext3_sync_file (struct file *, struct dentry *, int); - -+/* hash.c */ -+extern int ext3fs_dirhash(const char *name, int len, struct -+ dx_hash_info *hinfo); -+ - /* ialloc.c */ - extern struct inode * ext3_new_inode (handle_t *, const struct inode *, int); - extern void ext3_free_inode (handle_t *, struct inode *); -@@ -650,6 +725,8 @@ extern int ext3_ioctl (struct inode *, s - /* namei.c */ - extern int ext3_orphan_add(handle_t *, struct inode *); - extern int ext3_orphan_del(handle_t *, struct inode *); -+extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash, -+ __u32 start_minor_hash, __u32 *next_hash); - - /* super.c */ - extern void ext3_error (struct super_block *, const char *, const char *, ...) ---- linux-2.4.20/include/linux/ext3_fs_sb.h~ext-2.4-patch-1 Sat Apr 5 03:56:31 2003 -+++ linux-2.4.20-braam/include/linux/ext3_fs_sb.h Sat Apr 5 03:56:31 2003 -@@ -62,6 +62,8 @@ struct ext3_sb_info { - int s_inode_size; - int s_first_ino; - u32 s_next_generation; -+ u32 s_hash_seed[4]; -+ int s_def_hash_version; - - /* Journaling */ - struct inode * s_journal_inode; ---- linux-2.4.20/include/linux/ext3_jbd.h~ext-2.4-patch-1 Sat Apr 5 03:56:31 2003 -+++ linux-2.4.20-braam/include/linux/ext3_jbd.h Sat Apr 5 03:56:31 2003 -@@ -63,6 +63,8 @@ extern int ext3_writepage_trans_blocks(s - - #define EXT3_RESERVE_TRANS_BLOCKS 12 - -+#define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8 -+ - int - ext3_mark_iloc_dirty(handle_t *handle, - struct inode *inode, ---- linux-2.4.20/include/linux/rbtree.h~ext-2.4-patch-1 Sat Apr 5 03:56:31 2003 -+++ linux-2.4.20-braam/include/linux/rbtree.h Sat Apr 5 03:56:31 2003 -@@ -120,6 +120,8 @@ rb_root_t; - - extern void rb_insert_color(rb_node_t *, rb_root_t *); - extern void rb_erase(rb_node_t *, rb_root_t *); -+extern rb_node_t *rb_get_first(rb_root_t *root); -+extern rb_node_t *rb_get_next(rb_node_t *n); - - static inline void rb_link_node(rb_node_t * node, rb_node_t * parent, rb_node_t ** rb_link) - { ---- linux-2.4.20/lib/rbtree.c~ext-2.4-patch-1 Sat Apr 5 03:56:31 2003 -+++ linux-2.4.20-braam/lib/rbtree.c Sat Apr 5 03:56:31 2003 -@@ -17,6 +17,8 @@ - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - linux/lib/rbtree.c -+ -+ rb_get_first and rb_get_next written by Theodore Ts'o, 9/8/2002 - */ - - #include -@@ -294,3 +296,43 @@ void rb_erase(rb_node_t * node, rb_root_ - __rb_erase_color(child, parent, root); - } - EXPORT_SYMBOL(rb_erase); -+ -+/* -+ * This function returns the first node (in sort order) of the tree. -+ */ -+rb_node_t *rb_get_first(rb_root_t *root) -+{ -+ rb_node_t *n; -+ -+ n = root->rb_node; -+ if (!n) -+ return 0; -+ while (n->rb_left) -+ n = n->rb_left; -+ return n; -+} -+EXPORT_SYMBOL(rb_get_first); -+ -+/* -+ * Given a node, this function will return the next node in the tree. -+ */ -+rb_node_t *rb_get_next(rb_node_t *n) -+{ -+ rb_node_t *parent; -+ -+ if (n->rb_right) { -+ n = n->rb_right; -+ while (n->rb_left) -+ n = n->rb_left; -+ return n; -+ } else { -+ while ((parent = n->rb_parent)) { -+ if (n == parent->rb_left) -+ return parent; -+ n = parent; -+ } -+ return 0; -+ } -+} -+EXPORT_SYMBOL(rb_get_next); -+ - -_ diff --git a/lustre/kernel_patches/patches/ext-2.4-patch-2.patch b/lustre/kernel_patches/patches/ext-2.4-patch-2.patch deleted file mode 100644 index 689d33b..0000000 --- a/lustre/kernel_patches/patches/ext-2.4-patch-2.patch +++ /dev/null @@ -1,34 +0,0 @@ -# This is a BitKeeper generated patch for the following project: -# Project Name: Linux kernel tree -# -# namei.c | 9 +++++++++ -# 1 files changed, 9 insertions(+) -# -# The following is the BitKeeper ChangeSet Log -# -------------------------------------------- -# 02/11/07 tytso@snap.thunk.org 1.777 -# Add '.' and '..' entries to be returned by readdir of htree directories -# -# This patch from Chris Li adds '.' and '..' to the rbtree so that they -# are properly returned by readdir. -# -------------------------------------------- -# -diff -Nru a/fs/ext3/namei.c b/fs/ext3/namei.c ---- a/fs/ext3/namei.c Thu Nov 7 10:57:30 2002 -+++ b/fs/ext3/namei.c Thu Nov 7 10:57:30 2002 -@@ -546,6 +546,15 @@ - if (!frame) - return err; - -+ /* Add '.' and '..' from the htree header */ -+ if (!start_hash && !start_minor_hash) { -+ de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data; -+ ext3_htree_store_dirent(dir_file, 0, 0, de); -+ de = ext3_next_entry(de); -+ ext3_htree_store_dirent(dir_file, 0, 0, de); -+ count += 2; -+ } -+ - while (1) { - block = dx_get_block(frame->at); - dxtrace(printk("Reading block %d\n", block)); diff --git a/lustre/kernel_patches/patches/ext-2.4-patch-3.patch b/lustre/kernel_patches/patches/ext-2.4-patch-3.patch deleted file mode 100644 index 2600b02..0000000 --- a/lustre/kernel_patches/patches/ext-2.4-patch-3.patch +++ /dev/null @@ -1,96 +0,0 @@ -# This is a BitKeeper generated patch for the following project: -# Project Name: Linux kernel tree -# -# fs/ext3/dir.c | 7 +++++-- -# fs/ext3/namei.c | 11 +++++++---- -# include/linux/ext3_fs.h | 2 +- -# 3 files changed, 13 insertions(+), 7 deletions(-) -# -# The following is the BitKeeper ChangeSet Log -# -------------------------------------------- -# 02/11/07 tytso@snap.thunk.org 1.778 -# Check for failed kmalloc() in ext3_htree_store_dirent() -# -# This patch checks for a failed kmalloc() in ext3_htree_store_dirent(), -# and passes the error up to its caller, ext3_htree_fill_tree(). -# -------------------------------------------- -# -diff -Nru a/fs/ext3/dir.c b/fs/ext3/dir.c ---- a/fs/ext3/dir.c Thu Nov 7 10:57:34 2002 -+++ b/fs/ext3/dir.c Thu Nov 7 10:57:34 2002 -@@ -308,7 +308,7 @@ - /* - * Given a directory entry, enter it into the fname rb tree. - */ --void ext3_htree_store_dirent(struct file *dir_file, __u32 hash, -+int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, - __u32 minor_hash, - struct ext3_dir_entry_2 *dirent) - { -@@ -323,6 +323,8 @@ - /* Create and allocate the fname structure */ - len = sizeof(struct fname) + dirent->name_len + 1; - new_fn = kmalloc(len, GFP_KERNEL); -+ if (!new_fn) -+ return -ENOMEM; - memset(new_fn, 0, len); - new_fn->hash = hash; - new_fn->minor_hash = minor_hash; -@@ -344,7 +346,7 @@ - (new_fn->minor_hash == fname->minor_hash)) { - new_fn->next = fname->next; - fname->next = new_fn; -- return; -+ return 0; - } - - if (new_fn->hash < fname->hash) -@@ -359,6 +361,7 @@ - - rb_link_node(&new_fn->rb_hash, parent, p); - rb_insert_color(&new_fn->rb_hash, &info->root); -+ return 0; - } - - -diff -Nru a/fs/ext3/namei.c b/fs/ext3/namei.c ---- a/fs/ext3/namei.c Thu Nov 7 10:57:34 2002 -+++ b/fs/ext3/namei.c Thu Nov 7 10:57:34 2002 -@@ -549,9 +549,11 @@ - /* Add '.' and '..' from the htree header */ - if (!start_hash && !start_minor_hash) { - de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data; -- ext3_htree_store_dirent(dir_file, 0, 0, de); -+ if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0) -+ goto errout; - de = ext3_next_entry(de); -- ext3_htree_store_dirent(dir_file, 0, 0, de); -+ if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0) -+ goto errout; - count += 2; - } - -@@ -570,8 +572,9 @@ - ((hinfo.hash == start_hash) && - (hinfo.minor_hash < start_minor_hash))) - continue; -- ext3_htree_store_dirent(dir_file, hinfo.hash, -- hinfo.minor_hash, de); -+ if ((err = ext3_htree_store_dirent(dir_file, -+ hinfo.hash, hinfo.minor_hash, de)) != 0) -+ goto errout; - count++; - } - brelse (bh); -diff -Nru a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h ---- a/include/linux/ext3_fs.h Thu Nov 7 10:57:34 2002 -+++ b/include/linux/ext3_fs.h Thu Nov 7 10:57:34 2002 -@@ -682,7 +682,7 @@ - extern int ext3_check_dir_entry(const char *, struct inode *, - struct ext3_dir_entry_2 *, - struct buffer_head *, unsigned long); --extern void ext3_htree_store_dirent(struct file *dir_file, __u32 hash, -+extern int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, - __u32 minor_hash, - struct ext3_dir_entry_2 *dirent); - extern void ext3_htree_free_dir_info(struct dir_private_info *p); diff --git a/lustre/kernel_patches/patches/ext-2.4-patch-4.patch b/lustre/kernel_patches/patches/ext-2.4-patch-4.patch deleted file mode 100644 index 67f5afa..0000000 --- a/lustre/kernel_patches/patches/ext-2.4-patch-4.patch +++ /dev/null @@ -1,48 +0,0 @@ -# This is a BitKeeper generated patch for the following project: -# Project Name: Linux kernel tree -# -# namei.c | 21 ++++++++++++++++++++- -# 1 files changed, 20 insertions(+), 1 deletion(-) -# -# The following is the BitKeeper ChangeSet Log -# -------------------------------------------- -# 02/11/07 tytso@snap.thunk.org 1.779 -# Fix ext3 htree rename bug. -# -# This fixes an ext3 htree bug pointed out by Christopher Li; if -# adding the new name to the directory causes a split, this can cause -# the directory entry containing the old name to move to another -# block, and then the removal of the old name will fail. -# -------------------------------------------- -# -diff -Nru a/fs/ext3/namei.c b/fs/ext3/namei.c ---- a/fs/ext3/namei.c Thu Nov 7 10:57:49 2002 -+++ b/fs/ext3/namei.c Thu Nov 7 10:57:49 2002 -@@ -2173,7 +2173,26 @@ - /* - * ok, that's it - */ -- ext3_delete_entry(handle, old_dir, old_de, old_bh); -+ retval = ext3_delete_entry(handle, old_dir, old_de, old_bh); -+ if (retval == -ENOENT) { -+ /* -+ * old_de could have moved out from under us. -+ */ -+ struct buffer_head *old_bh2; -+ struct ext3_dir_entry_2 *old_de2; -+ -+ old_bh2 = ext3_find_entry(old_dentry, &old_de2); -+ if (old_bh2) { -+ retval = ext3_delete_entry(handle, old_dir, -+ old_de2, old_bh2); -+ brelse(old_bh2); -+ } -+ } -+ if (retval) { -+ ext3_warning(old_dir->i_sb, "ext3_rename", -+ "Deleting old file (%lu), %d, error=%d", -+ old_dir->i_ino, old_dir->i_nlink, retval); -+ } - - if (new_inode) { - new_inode->i_nlink--; diff --git a/lustre/kernel_patches/patches/ext3-2.4-ino_t.patch b/lustre/kernel_patches/patches/ext3-2.4-ino_t.patch deleted file mode 100644 index 1786d0f..0000000 --- a/lustre/kernel_patches/patches/ext3-2.4-ino_t.patch +++ /dev/null @@ -1,144 +0,0 @@ - fs/ext3/ialloc.c | 20 ++++++++++---------- - fs/ext3/namei.c | 16 ++++++++-------- - include/linux/ext3_fs.h | 2 +- - 3 files changed, 19 insertions(+), 19 deletions(-) - ---- linux-2.4.20/fs/ext3/ialloc.c~ext3-2.4-ino_t 2003-04-08 23:35:24.000000000 -0600 -+++ linux-2.4.20-braam/fs/ext3/ialloc.c 2003-04-08 23:35:24.000000000 -0600 -@@ -65,8 +65,8 @@ static int read_inode_bitmap (struct sup - if (!bh) { - ext3_error (sb, "read_inode_bitmap", - "Cannot read inode bitmap - " -- "block_group = %lu, inode_bitmap = %lu", -- block_group, (unsigned long) gdp->bg_inode_bitmap); -+ "block_group = %lu, inode_bitmap = %u", -+ block_group, gdp->bg_inode_bitmap); - retval = -EIO; - } - /* -@@ -533,19 +533,19 @@ out: - } - - /* Verify that we are loading a valid orphan from disk */ --struct inode *ext3_orphan_get (struct super_block * sb, ino_t ino) -+struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino) - { -- ino_t max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count); -+ unsigned long max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count); - unsigned long block_group; - int bit; - int bitmap_nr; - struct buffer_head *bh; - struct inode *inode = NULL; -- -+ - /* Error cases - e2fsck has already cleaned up for us */ - if (ino > max_ino) { - ext3_warning(sb, __FUNCTION__, -- "bad orphan ino %ld! e2fsck was run?\n", ino); -+ "bad orphan ino %lu! e2fsck was run?\n", ino); - return NULL; - } - -@@ -554,7 +554,7 @@ struct inode *ext3_orphan_get (struct su - if ((bitmap_nr = load_inode_bitmap(sb, block_group)) < 0 || - !(bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr])) { - ext3_warning(sb, __FUNCTION__, -- "inode bitmap error for orphan %ld\n", ino); -+ "inode bitmap error for orphan %lu\n", ino); - return NULL; - } - -@@ -565,16 +565,16 @@ struct inode *ext3_orphan_get (struct su - if (!ext3_test_bit(bit, bh->b_data) || !(inode = iget(sb, ino)) || - is_bad_inode(inode) || NEXT_ORPHAN(inode) > max_ino) { - ext3_warning(sb, __FUNCTION__, -- "bad orphan inode %ld! e2fsck was run?\n", ino); -+ "bad orphan inode %lu! e2fsck was run?\n", ino); - printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%ld) = %d\n", - bit, bh->b_blocknr, ext3_test_bit(bit, bh->b_data)); - printk(KERN_NOTICE "inode=%p\n", inode); - if (inode) { - printk(KERN_NOTICE "is_bad_inode(inode)=%d\n", - is_bad_inode(inode)); -- printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%d\n", -+ printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", - NEXT_ORPHAN(inode)); -- printk(KERN_NOTICE "max_ino=%ld\n", max_ino); -+ printk(KERN_NOTICE "max_ino=%lu\n", max_ino); - } - /* Avoid freeing blocks if we got a bad deleted inode */ - if (inode && inode->i_nlink == 0) ---- linux-2.4.20/fs/ext3/namei.c~ext3-2.4-ino_t 2003-04-08 23:35:24.000000000 -0600 -+++ linux-2.4.20-braam/fs/ext3/namei.c 2003-04-08 23:35:24.000000000 -0600 -@@ -1808,10 +1808,10 @@ int ext3_orphan_del(handle_t *handle, st - struct list_head *prev; - struct ext3_inode_info *ei = EXT3_I(inode); - struct ext3_sb_info *sbi; -- ino_t ino_next; -+ unsigned long ino_next; - struct ext3_iloc iloc; - int err = 0; -- -+ - lock_super(inode->i_sb); - if (list_empty(&ei->i_orphan)) { - unlock_super(inode->i_sb); -@@ -1822,7 +1822,7 @@ int ext3_orphan_del(handle_t *handle, st - prev = ei->i_orphan.prev; - sbi = EXT3_SB(inode->i_sb); - -- jbd_debug(4, "remove inode %ld from orphan list\n", inode->i_ino); -+ jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); - - list_del(&ei->i_orphan); - INIT_LIST_HEAD(&ei->i_orphan); -@@ -1833,13 +1833,13 @@ int ext3_orphan_del(handle_t *handle, st - * list in memory. */ - if (!handle) - goto out; -- -+ - err = ext3_reserve_inode_write(handle, inode, &iloc); - if (err) - goto out_err; - - if (prev == &sbi->s_orphan) { -- jbd_debug(4, "superblock will point to %ld\n", ino_next); -+ jbd_debug(4, "superblock will point to %lu\n", ino_next); - BUFFER_TRACE(sbi->s_sbh, "get_write_access"); - err = ext3_journal_get_write_access(handle, sbi->s_sbh); - if (err) -@@ -1850,8 +1850,8 @@ int ext3_orphan_del(handle_t *handle, st - struct ext3_iloc iloc2; - struct inode *i_prev = - list_entry(prev, struct inode, u.ext3_i.i_orphan); -- -- jbd_debug(4, "orphan inode %ld will point to %ld\n", -+ -+ jbd_debug(4, "orphan inode %lu will point to %lu\n", - i_prev->i_ino, ino_next); - err = ext3_reserve_inode_write(handle, i_prev, &iloc2); - if (err) -@@ -1866,7 +1866,7 @@ int ext3_orphan_del(handle_t *handle, st - if (err) - goto out_brelse; - --out_err: -+out_err: - ext3_std_error(inode->i_sb, err); - out: - unlock_super(inode->i_sb); ---- linux-2.4.20/include/linux/ext3_fs.h~ext3-2.4-ino_t 2003-04-08 23:35:24.000000000 -0600 -+++ linux-2.4.20-braam/include/linux/ext3_fs.h 2003-04-08 23:35:24.000000000 -0600 -@@ -673,7 +673,7 @@ extern int ext3fs_dirhash(const char *na - /* ialloc.c */ - extern struct inode * ext3_new_inode (handle_t *, const struct inode *, int); - extern void ext3_free_inode (handle_t *, struct inode *); --extern struct inode * ext3_orphan_get (struct super_block *, ino_t); -+extern struct inode * ext3_orphan_get (struct super_block *, unsigned long); - extern unsigned long ext3_count_free_inodes (struct super_block *); - extern void ext3_check_inodes_bitmap (struct super_block *); - extern unsigned long ext3_count_free (struct buffer_head *, unsigned); - -_ diff --git a/lustre/kernel_patches/patches/ext3-2.4.18-fixes.patch b/lustre/kernel_patches/patches/ext3-2.4.18-fixes.patch deleted file mode 100644 index 56e841e..0000000 --- a/lustre/kernel_patches/patches/ext3-2.4.18-fixes.patch +++ /dev/null @@ -1,353 +0,0 @@ -diff -ru lum-2.4.18-um30/fs/ext3/balloc.c uml-2.4.18-12.5/fs/ext3/balloc.c ---- lum-2.4.18-um30/fs/ext3/balloc.c Mon Feb 25 12:38:08 2002 -+++ uml-2.4.18-12.5/fs/ext3/balloc.c Thu Sep 19 13:40:11 2002 -@@ -276,7 +276,8 @@ - } - lock_super (sb); - es = sb->u.ext3_sb.s_es; -- if (block < le32_to_cpu(es->s_first_data_block) || -+ if (block < le32_to_cpu(es->s_first_data_block) || -+ block + count < block || - (block + count) > le32_to_cpu(es->s_blocks_count)) { - ext3_error (sb, "ext3_free_blocks", - "Freeing blocks not in datazone - " -@@ -309,17 +310,6 @@ - if (!gdp) - goto error_return; - -- if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) || -- in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) || -- in_range (block, le32_to_cpu(gdp->bg_inode_table), -- sb->u.ext3_sb.s_itb_per_group) || -- in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table), -- sb->u.ext3_sb.s_itb_per_group)) -- ext3_error (sb, "ext3_free_blocks", -- "Freeing blocks in system zones - " -- "Block = %lu, count = %lu", -- block, count); -- - /* - * We are about to start releasing blocks in the bitmap, - * so we need undo access. -@@ -345,14 +335,24 @@ - if (err) - goto error_return; - -- for (i = 0; i < count; i++) { -+ for (i = 0; i < count; i++, block++) { -+ if (block == le32_to_cpu(gdp->bg_block_bitmap) || -+ block == le32_to_cpu(gdp->bg_inode_bitmap) || -+ in_range(block, le32_to_cpu(gdp->bg_inode_table), -+ sb->u.ext2_sb.s_itb_per_group)) { -+ ext3_error(sb, __FUNCTION__, -+ "Freeing block in system zone - block = %lu", -+ block); -+ continue; -+ } -+ - /* - * An HJ special. This is expensive... - */ - #ifdef CONFIG_JBD_DEBUG - { - struct buffer_head *debug_bh; -- debug_bh = sb_get_hash_table(sb, block + i); -+ debug_bh = sb_get_hash_table(sb, block); - if (debug_bh) { - BUFFER_TRACE(debug_bh, "Deleted!"); - if (!bh2jh(bitmap_bh)->b_committed_data) -@@ -365,9 +365,8 @@ - #endif - BUFFER_TRACE(bitmap_bh, "clear bit"); - if (!ext3_clear_bit (bit + i, bitmap_bh->b_data)) { -- ext3_error (sb, __FUNCTION__, -- "bit already cleared for block %lu", -- block + i); -+ ext3_error(sb, __FUNCTION__, -+ "bit already cleared for block %lu", block); - BUFFER_TRACE(bitmap_bh, "bit already cleared"); - } else { - dquot_freed_blocks++; -@@ -415,7 +417,6 @@ - if (!err) err = ret; - - if (overflow && !err) { -- block += count; - count = overflow; - goto do_more; - } -@@ -542,6 +543,7 @@ - int i, j, k, tmp, alloctmp; - int bitmap_nr; - int fatal = 0, err; -+ int performed_allocation = 0; - struct super_block * sb; - struct ext3_group_desc * gdp; - struct ext3_super_block * es; -@@ -575,6 +577,7 @@ - - ext3_debug ("goal=%lu.\n", goal); - -+repeat: - /* - * First, test whether the goal block is free. - */ -@@ -644,8 +647,7 @@ - } - - /* No space left on the device */ -- unlock_super (sb); -- return 0; -+ goto out; - - search_back: - /* -@@ -684,16 +686,28 @@ - if (tmp == le32_to_cpu(gdp->bg_block_bitmap) || - tmp == le32_to_cpu(gdp->bg_inode_bitmap) || - in_range (tmp, le32_to_cpu(gdp->bg_inode_table), -- sb->u.ext3_sb.s_itb_per_group)) -- ext3_error (sb, "ext3_new_block", -- "Allocating block in system zone - " -- "block = %u", tmp); -+ EXT3_SB(sb)->s_itb_per_group)) { -+ ext3_error(sb, __FUNCTION__, -+ "Allocating block in system zone - block = %u", tmp); -+ -+ /* Note: This will potentially use up one of the handle's -+ * buffer credits. Normally we have way too many credits, -+ * so that is OK. In _very_ rare cases it might not be OK. -+ * We will trigger an assertion if we run out of credits, -+ * and we will have to do a full fsck of the filesystem - -+ * better than randomly corrupting filesystem metadata. -+ */ -+ ext3_set_bit(j, bh->b_data); -+ goto repeat; -+ } -+ - - /* The superblock lock should guard against anybody else beating - * us to this point! */ - J_ASSERT_BH(bh, !ext3_test_bit(j, bh->b_data)); - BUFFER_TRACE(bh, "setting bitmap bit"); - ext3_set_bit(j, bh->b_data); -+ performed_allocation = 1; - - #ifdef CONFIG_JBD_DEBUG - { -@@ -815,6 +829,11 @@ - ext3_std_error(sb, fatal); - } - unlock_super (sb); -+ /* -+ * Undo the block allocation -+ */ -+ if (!performed_allocation) -+ DQUOT_FREE_BLOCK(inode, 1); - return 0; - - } -diff -ru lum-2.4.18-um30/fs/ext3/file.c uml-2.4.18-12.5/fs/ext3/file.c ---- lum-2.4.18-um30/fs/ext3/file.c Thu Nov 15 14:37:55 2001 -+++ uml-2.4.18-12.5/fs/ext3/file.c Thu Sep 19 13:40:11 2002 -@@ -61,19 +61,52 @@ - static ssize_t - ext3_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) - { -+ int ret, err; - struct inode *inode = file->f_dentry->d_inode; - -- /* -- * Nasty: if the file is subject to synchronous writes then we need -- * to force generic_osync_inode() to call ext3_write_inode(). -- * We do that by marking the inode dirty. This adds much more -- * computational expense than we need, but we're going to sync -- * anyway. -- */ -- if (IS_SYNC(inode) || (file->f_flags & O_SYNC)) -- mark_inode_dirty(inode); -+ ret = generic_file_write(file, buf, count, ppos); - -- return generic_file_write(file, buf, count, ppos); -+ /* Skip file flushing code if there was an error, or if nothing -+ was written. */ -+ if (ret <= 0) -+ return ret; -+ -+ /* If the inode is IS_SYNC, or is O_SYNC and we are doing -+ data-journaling, then we need to make sure that we force the -+ transaction to disk to keep all metadata uptodate -+ synchronously. */ -+ -+ if (file->f_flags & O_SYNC) { -+ /* If we are non-data-journaled, then the dirty data has -+ already been flushed to backing store by -+ generic_osync_inode, and the inode has been flushed -+ too if there have been any modifications other than -+ mere timestamp updates. -+ -+ Open question --- do we care about flushing -+ timestamps too if the inode is IS_SYNC? */ -+ if (!ext3_should_journal_data(inode)) -+ return ret; -+ -+ goto force_commit; -+ } -+ -+ /* So we know that there has been no forced data flush. If the -+ inode is marked IS_SYNC, we need to force one ourselves. */ -+ if (!IS_SYNC(inode)) -+ return ret; -+ -+ /* Open question #2 --- should we force data to disk here too? -+ If we don't, the only impact is that data=writeback -+ filesystems won't flush data to disk automatically on -+ IS_SYNC, only metadata (but historically, that is what ext2 -+ has done.) */ -+ -+force_commit: -+ err = ext3_force_commit(inode->i_sb); -+ if (err) -+ return err; -+ return ret; - } - - struct file_operations ext3_file_operations = { -diff -ru lum-2.4.18-um30/fs/ext3/fsync.c uml-2.4.18-12.5/fs/ext3/fsync.c ---- lum-2.4.18-um30/fs/ext3/fsync.c Tue Nov 20 22:34:13 2001 -+++ uml-2.4.18-12.5/fs/ext3/fsync.c Thu Sep 19 13:40:11 2002 -@@ -62,7 +62,12 @@ - * we'll end up waiting on them in commit. - */ - ret = fsync_inode_buffers(inode); -- ret |= fsync_inode_data_buffers(inode); -+ -+ /* In writeback mode, we need to force out data buffers too. In -+ * the other modes, ext3_force_commit takes care of forcing out -+ * just the right data blocks. */ -+ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA) -+ ret |= fsync_inode_data_buffers(inode); - - ext3_force_commit(inode->i_sb); - -diff -ru lum-2.4.18-um30/fs/ext3/ialloc.c uml-2.4.18-12.5/fs/ext3/ialloc.c ---- lum-2.4.18-um30/fs/ext3/ialloc.c Mon Feb 25 12:38:08 2002 -+++ uml-2.4.18-12.5/fs/ext3/ialloc.c Thu Sep 19 13:40:11 2002 -@@ -392,7 +392,7 @@ - - err = -ENOSPC; - if (!gdp) -- goto fail; -+ goto out; - - err = -EIO; - bitmap_nr = load_inode_bitmap (sb, i); -@@ -523,9 +523,10 @@ - return inode; - - fail: -+ ext3_std_error(sb, err); -+out: - unlock_super(sb); - iput(inode); -- ext3_std_error(sb, err); - return ERR_PTR(err); - } - -diff -ru lum-2.4.18-um30/fs/ext3/inode.c uml-2.4.18-12.5/fs/ext3/inode.c ---- lum-2.4.18-um30/fs/ext3/inode.c Mon Feb 25 12:38:08 2002 -+++ uml-2.4.18-12.5/fs/ext3/inode.c Thu Sep 19 13:40:11 2002 -@@ -412,6 +412,7 @@ - return NULL; - - changed: -+ brelse(bh); - *err = -EAGAIN; - goto no_block; - failure: -@@ -581,8 +582,6 @@ - - parent = nr; - } -- if (IS_SYNC(inode)) -- handle->h_sync = 1; - } - if (n == num) - return 0; -@@ -1015,8 +1018,8 @@ - unsigned from, unsigned to) - { - struct inode *inode = page->mapping->host; -- handle_t *handle = ext3_journal_current_handle(); - int ret, needed_blocks = ext3_writepage_trans_blocks(inode); -+ handle_t *handle; - - lock_kernel(); - handle = ext3_journal_start(inode, needed_blocks); -diff -ru lum-2.4.18-um30/fs/ext3/namei.c uml-2.4.18-12.5/fs/ext3/namei.c ---- lum-2.4.18-um30/fs/ext3/namei.c Fri Nov 9 15:25:04 2001 -+++ uml-2.4.18-12.5/fs/ext3/namei.c Thu Sep 19 13:40:11 2002 -@@ -354,8 +355,8 @@ - */ - dir->i_mtime = dir->i_ctime = CURRENT_TIME; - dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -- ext3_mark_inode_dirty(handle, dir); - dir->i_version = ++event; -+ ext3_mark_inode_dirty(handle, dir); - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, bh); - brelse(bh); -@@ -464,8 +465,8 @@ - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; - inode->i_mapping->a_ops = &ext3_aops; -- ext3_mark_inode_dirty(handle, inode); - err = ext3_add_nondir(handle, dentry, inode); -+ ext3_mark_inode_dirty(handle, inode); - } - ext3_journal_stop(handle, dir); - return err; -@@ -489,8 +490,8 @@ - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - init_special_inode(inode, mode, rdev); -- ext3_mark_inode_dirty(handle, inode); - err = ext3_add_nondir(handle, dentry, inode); -+ ext3_mark_inode_dirty(handle, inode); - } - ext3_journal_stop(handle, dir); - return err; -@@ -933,8 +934,8 @@ - inode->i_size = l-1; - } - inode->u.ext3_i.i_disksize = inode->i_size; -- ext3_mark_inode_dirty(handle, inode); - err = ext3_add_nondir(handle, dentry, inode); -+ ext3_mark_inode_dirty(handle, inode); - out_stop: - ext3_journal_stop(handle, dir); - return err; -@@ -970,8 +971,8 @@ - ext3_inc_count(handle, inode); - atomic_inc(&inode->i_count); - -- ext3_mark_inode_dirty(handle, inode); - err = ext3_add_nondir(handle, dentry, inode); -+ ext3_mark_inode_dirty(handle, inode); - ext3_journal_stop(handle, dir); - return err; - } -diff -ru lum-2.4.18-um30/fs/ext3/super.c uml-2.4.18-12.5/fs/ext3/super.c ---- lum-2.4.18-um30/fs/ext3/super.c Fri Jul 12 17:59:37 2002 -+++ uml-2.4.18-12.5/fs/ext3/super.c Thu Sep 19 13:40:11 2002 -@@ -1589,8 +1589,10 @@ - journal_t *journal = EXT3_SB(sb)->s_journal; - - /* Now we set up the journal barrier. */ -+ unlock_super(sb); - journal_lock_updates(journal); - journal_flush(journal); -+ lock_super(sb); - - /* Journal blocked and flushed, clear needs_recovery flag. */ - EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); diff --git a/lustre/kernel_patches/patches/ext3-2.4.18-ino_sb_macro.patch b/lustre/kernel_patches/patches/ext3-2.4.18-ino_sb_macro.patch deleted file mode 100644 index 2ddff7d..0000000 --- a/lustre/kernel_patches/patches/ext3-2.4.18-ino_sb_macro.patch +++ /dev/null @@ -1,1540 +0,0 @@ ---- ./fs/ext3/balloc.c.orig Fri Apr 12 10:27:49 2002 -+++ ./fs/ext3/balloc.c Tue May 7 15:35:59 2002 -@@ -46,18 +46,18 @@ struct ext3_group_desc * ext3_get_group_ - unsigned long desc; - struct ext3_group_desc * gdp; - -- if (block_group >= sb->u.ext3_sb.s_groups_count) { -+ if (block_group >= EXT3_SB(sb)->s_groups_count) { - ext3_error (sb, "ext3_get_group_desc", - "block_group >= groups_count - " - "block_group = %d, groups_count = %lu", -- block_group, sb->u.ext3_sb.s_groups_count); -+ block_group, EXT3_SB(sb)->s_groups_count); - - return NULL; - } - - group_desc = block_group / EXT3_DESC_PER_BLOCK(sb); - desc = block_group % EXT3_DESC_PER_BLOCK(sb); -- if (!sb->u.ext3_sb.s_group_desc[group_desc]) { -+ if (!EXT3_SB(sb)->s_group_desc[group_desc]) { - ext3_error (sb, "ext3_get_group_desc", - "Group descriptor not loaded - " - "block_group = %d, group_desc = %lu, desc = %lu", -@@ -66,9 +66,9 @@ struct ext3_group_desc * ext3_get_group_ - } - - gdp = (struct ext3_group_desc *) -- sb->u.ext3_sb.s_group_desc[group_desc]->b_data; -+ EXT3_SB(sb)->s_group_desc[group_desc]->b_data; - if (bh) -- *bh = sb->u.ext3_sb.s_group_desc[group_desc]; -+ *bh = EXT3_SB(sb)->s_group_desc[group_desc]; - return gdp + desc; - } - -@@ -104,8 +104,8 @@ static int read_block_bitmap (struct sup - * this group. The IO will be retried next time. - */ - error_out: -- sb->u.ext3_sb.s_block_bitmap_number[bitmap_nr] = block_group; -- sb->u.ext3_sb.s_block_bitmap[bitmap_nr] = bh; -+ EXT3_SB(sb)->s_block_bitmap_number[bitmap_nr] = block_group; -+ EXT3_SB(sb)->s_block_bitmap[bitmap_nr] = bh; - return retval; - } - -@@ -128,16 +128,17 @@ static int __load_block_bitmap (struct s - int i, j, retval = 0; - unsigned long block_bitmap_number; - struct buffer_head * block_bitmap; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); - -- if (block_group >= sb->u.ext3_sb.s_groups_count) -+ if (block_group >= sbi->s_groups_count) - ext3_panic (sb, "load_block_bitmap", - "block_group >= groups_count - " - "block_group = %d, groups_count = %lu", -- block_group, sb->u.ext3_sb.s_groups_count); -+ block_group, EXT3_SB(sb)->s_groups_count); - -- if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED) { -- if (sb->u.ext3_sb.s_block_bitmap[block_group]) { -- if (sb->u.ext3_sb.s_block_bitmap_number[block_group] == -+ if (sbi->s_groups_count <= EXT3_MAX_GROUP_LOADED) { -+ if (sbi->s_block_bitmap[block_group]) { -+ if (sbi->s_block_bitmap_number[block_group] == - block_group) - return block_group; - ext3_error (sb, "__load_block_bitmap", -@@ -149,21 +150,20 @@ static int __load_block_bitmap (struct s - return block_group; - } - -- for (i = 0; i < sb->u.ext3_sb.s_loaded_block_bitmaps && -- sb->u.ext3_sb.s_block_bitmap_number[i] != block_group; i++) -+ for (i = 0; i < sbi->s_loaded_block_bitmaps && -+ sbi->s_block_bitmap_number[i] != block_group; i++) - ; -- if (i < sb->u.ext3_sb.s_loaded_block_bitmaps && -- sb->u.ext3_sb.s_block_bitmap_number[i] == block_group) { -- block_bitmap_number = sb->u.ext3_sb.s_block_bitmap_number[i]; -- block_bitmap = sb->u.ext3_sb.s_block_bitmap[i]; -+ if (i < sbi->s_loaded_block_bitmaps && -+ sbi->s_block_bitmap_number[i] == block_group) { -+ block_bitmap_number = sbi->s_block_bitmap_number[i]; -+ block_bitmap = sbi->s_block_bitmap[i]; - for (j = i; j > 0; j--) { -- sb->u.ext3_sb.s_block_bitmap_number[j] = -- sb->u.ext3_sb.s_block_bitmap_number[j - 1]; -- sb->u.ext3_sb.s_block_bitmap[j] = -- sb->u.ext3_sb.s_block_bitmap[j - 1]; -+ sbi->s_block_bitmap_number[j] = -+ sbi->s_block_bitmap_number[j - 1]; -+ sbi->s_block_bitmap[j] = sbi->s_block_bitmap[j - 1]; - } -- sb->u.ext3_sb.s_block_bitmap_number[0] = block_bitmap_number; -- sb->u.ext3_sb.s_block_bitmap[0] = block_bitmap; -+ sbi->s_block_bitmap_number[0] = block_bitmap_number; -+ sbi->s_block_bitmap[0] = block_bitmap; - - /* - * There's still one special case here --- if block_bitmap == 0 -@@ -173,17 +173,14 @@ static int __load_block_bitmap (struct s - if (!block_bitmap) - retval = read_block_bitmap (sb, block_group, 0); - } else { -- if (sb->u.ext3_sb.s_loaded_block_bitmapsu.ext3_sb.s_loaded_block_bitmaps++; -+ if (sbi->s_loaded_block_bitmapss_loaded_block_bitmaps++; - else -- brelse (sb->u.ext3_sb.s_block_bitmap -- [EXT3_MAX_GROUP_LOADED - 1]); -- for (j = sb->u.ext3_sb.s_loaded_block_bitmaps - 1; -- j > 0; j--) { -- sb->u.ext3_sb.s_block_bitmap_number[j] = -- sb->u.ext3_sb.s_block_bitmap_number[j - 1]; -- sb->u.ext3_sb.s_block_bitmap[j] = -- sb->u.ext3_sb.s_block_bitmap[j - 1]; -+ brelse(sbi->s_block_bitmap[EXT3_MAX_GROUP_LOADED - 1]); -+ for (j = sbi->s_loaded_block_bitmaps - 1; j > 0; j--) { -+ sbi->s_block_bitmap_number[j] = -+ sbi->s_block_bitmap_number[j - 1]; -+ sbi->s_block_bitmap[j] = sbi->s_block_bitmap[j - 1]; - } - retval = read_block_bitmap (sb, block_group, 0); - } -@@ -206,24 +203,25 @@ static int __load_block_bitmap (struct s - static inline int load_block_bitmap (struct super_block * sb, - unsigned int block_group) - { -+ struct ext3_sb_info *sbi = EXT3_SB(sb); - int slot; -- -+ - /* - * Do the lookup for the slot. First of all, check if we're asking - * for the same slot as last time, and did we succeed that last time? - */ -- if (sb->u.ext3_sb.s_loaded_block_bitmaps > 0 && -- sb->u.ext3_sb.s_block_bitmap_number[0] == block_group && -- sb->u.ext3_sb.s_block_bitmap[0]) { -+ if (sbi->s_loaded_block_bitmaps > 0 && -+ sbi->s_block_bitmap_number[0] == block_group && -+ sbi->s_block_bitmap[0]) { - return 0; - } - /* - * Or can we do a fast lookup based on a loaded group on a filesystem - * small enough to be mapped directly into the superblock? - */ -- else if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED && -- sb->u.ext3_sb.s_block_bitmap_number[block_group]==block_group -- && sb->u.ext3_sb.s_block_bitmap[block_group]) { -+ else if (sbi->s_groups_count <= EXT3_MAX_GROUP_LOADED && -+ sbi->s_block_bitmap_number[block_group] == block_group -+ && sbi->s_block_bitmap[block_group]) { - slot = block_group; - } - /* -@@ -243,7 +241,7 @@ static inline int load_block_bitmap (str - * If it's a valid slot, we may still have cached a previous IO error, - * in which case the bh in the superblock cache will be zero. - */ -- if (!sb->u.ext3_sb.s_block_bitmap[slot]) -+ if (!sbi->s_block_bitmap[slot]) - return -EIO; - - /* -@@ -275,7 +273,7 @@ void ext3_free_blocks (handle_t *handle, - return; - } - lock_super (sb); -- es = sb->u.ext3_sb.s_es; -+ es = EXT3_SB(sb)->s_es; - if (block < le32_to_cpu(es->s_first_data_block) || - block + count < block || - (block + count) > le32_to_cpu(es->s_blocks_count)) { -@@ -304,7 +302,7 @@ do_more: - if (bitmap_nr < 0) - goto error_return; - -- bitmap_bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr]; -+ bitmap_bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr]; - gdp = ext3_get_group_desc (sb, block_group, &gd_bh); - if (!gdp) - goto error_return; -@@ -330,8 +328,8 @@ do_more: - if (err) - goto error_return; - -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access"); -- err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); - if (err) - goto error_return; - -@@ -341,7 +339,7 @@ - if (block == le32_to_cpu(gdp->bg_block_bitmap) || - block == le32_to_cpu(gdp->bg_inode_bitmap) || - in_range(block, le32_to_cpu(gdp->bg_inode_table), -- sb->u.ext2_sb.s_itb_per_group)) { -+ EXT3_SB(sb)->s_itb_per_group)) { - ext3_error(sb, __FUNCTION__, - "Freeing block in system zone - block = %lu", - block); -@@ -410,8 +407,8 @@ do_more: - if (!err) err = ret; - - /* And the superblock */ -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "dirtied superblock"); -- ret = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "dirtied superblock"); -+ ret = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); - if (!err) err = ret; - - if (overflow && !err) { -@@ -564,12 +560,12 @@ int ext3_new_block (handle_t *handle, st - } - - lock_super (sb); -- es = sb->u.ext3_sb.s_es; -+ es = EXT3_SB(sb)->s_es; - if (le32_to_cpu(es->s_free_blocks_count) <= - le32_to_cpu(es->s_r_blocks_count) && -- ((sb->u.ext3_sb.s_resuid != current->fsuid) && -- (sb->u.ext3_sb.s_resgid == 0 || -- !in_group_p (sb->u.ext3_sb.s_resgid)) && -+ ((EXT3_SB(sb)->s_resuid != current->fsuid) && -+ (EXT3_SB(sb)->s_resgid == 0 || -+ !in_group_p (EXT3_SB(sb)->s_resgid)) && - !capable(CAP_SYS_RESOURCE))) - goto out; - -@@ -598,7 +595,7 @@ int ext3_new_block (handle_t *handle, st - if (bitmap_nr < 0) - goto io_error; - -- bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr]; -+ bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr]; - - ext3_debug ("goal is at %d:%d.\n", i, j); - -@@ -621,9 +618,9 @@ int ext3_new_block (handle_t *handle, st - * Now search the rest of the groups. We assume that - * i and gdp correctly point to the last group visited. - */ -- for (k = 0; k < sb->u.ext3_sb.s_groups_count; k++) { -+ for (k = 0; k < EXT3_SB(sb)->s_groups_count; k++) { - i++; -- if (i >= sb->u.ext3_sb.s_groups_count) -+ if (i >= EXT3_SB(sb)->s_groups_count) - i = 0; - gdp = ext3_get_group_desc (sb, i, &bh2); - if (!gdp) { -@@ -635,7 +632,7 @@ int ext3_new_block (handle_t *handle, st - if (bitmap_nr < 0) - goto io_error; - -- bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr]; -+ bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr]; - j = find_next_usable_block(-1, bh, - EXT3_BLOCKS_PER_GROUP(sb)); - if (j >= 0) -@@ -674,8 +671,8 @@ got_block: - fatal = ext3_journal_get_write_access(handle, bh2); - if (fatal) goto out; - -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access"); -- fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access"); -+ fatal = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); - if (fatal) goto out; - - tmp = j + i * EXT3_BLOCKS_PER_GROUP(sb) -@@ -796,7 +804,7 @@ got_block: - if (!fatal) fatal = err; - - BUFFER_TRACE(bh, "journal_dirty_metadata for superblock"); -- err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); - if (!fatal) fatal = err; - - sb->s_dirt = 1; -@@ -829,11 +837,11 @@ unsigned long ext3_count_free_blocks (st - int i; - - lock_super (sb); -- es = sb->u.ext3_sb.s_es; -+ es = EXT3_SB(sb)->s_es; - desc_count = 0; - bitmap_count = 0; - gdp = NULL; -- for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) { -+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) { - gdp = ext3_get_group_desc (sb, i, NULL); - if (!gdp) - continue; -@@ -842,7 +850,7 @@ unsigned long ext3_count_free_blocks (st - if (bitmap_nr < 0) - continue; - -- x = ext3_count_free (sb->u.ext3_sb.s_block_bitmap[bitmap_nr], -+ x = ext3_count_free (EXT3_SB(sb)->s_block_bitmap[bitmap_nr], - sb->s_blocksize); - printk ("group %d: stored = %d, counted = %lu\n", - i, le16_to_cpu(gdp->bg_free_blocks_count), x); -@@ -853,7 +861,7 @@ unsigned long ext3_count_free_blocks (st - unlock_super (sb); - return bitmap_count; - #else -- return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_blocks_count); -+ return le32_to_cpu(EXT3_SB(sb)->s_es->s_free_blocks_count); - #endif - } - -@@ -862,7 +870,7 @@ static inline int block_in_use (unsigned - unsigned char * map) - { - return ext3_test_bit ((block - -- le32_to_cpu(sb->u.ext3_sb.s_es->s_first_data_block)) % -+ le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) % - EXT3_BLOCKS_PER_GROUP(sb), map); - } - -@@ -930,11 +938,11 @@ void ext3_check_blocks_bitmap (struct su - struct ext3_group_desc * gdp; - int i; - -- es = sb->u.ext3_sb.s_es; -+ es = EXT3_SB(sb)->s_es; - desc_count = 0; - bitmap_count = 0; - gdp = NULL; -- for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) { -+ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) { - gdp = ext3_get_group_desc (sb, i, NULL); - if (!gdp) - continue; -@@ -968,7 +976,7 @@ void ext3_check_blocks_bitmap (struct su - "Inode bitmap for group %d is marked free", - i); - -- for (j = 0; j < sb->u.ext3_sb.s_itb_per_group; j++) -+ for (j = 0; j < EXT3_SB(sb)->s_itb_per_group; j++) - if (!block_in_use (le32_to_cpu(gdp->bg_inode_table) + j, - sb, bh->b_data)) - ext3_error (sb, "ext3_check_blocks_bitmap", ---- ./fs/ext3/dir.c.orig Fri Apr 12 10:27:49 2002 -+++ ./fs/ext3/dir.c Tue May 7 14:54:13 2002 -@@ -52,7 +52,7 @@ int ext3_check_dir_entry (const char * f - else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) - error_msg = "directory entry across blocks"; - else if (le32_to_cpu(de->inode) > -- le32_to_cpu(dir->i_sb->u.ext3_sb.s_es->s_inodes_count)) -+ le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)) - error_msg = "inode out of bounds"; - - if (error_msg != NULL) ---- ./fs/ext3/ialloc.c.orig Fri Apr 12 10:27:49 2002 -+++ ./fs/ext3/ialloc.c Tue May 7 15:39:26 2002 -@@ -73,8 +73,8 @@ static int read_inode_bitmap (struct sup - * this group. The IO will be retried next time. - */ - error_out: -- sb->u.ext3_sb.s_inode_bitmap_number[bitmap_nr] = block_group; -- sb->u.ext3_sb.s_inode_bitmap[bitmap_nr] = bh; -+ EXT3_SB(sb)->s_inode_bitmap_number[bitmap_nr] = block_group; -+ EXT3_SB(sb)->s_inode_bitmap[bitmap_nr] = bh; - return retval; - } - -@@ -225,7 +225,7 @@ void ext3_free_inode (handle_t *handle, - clear_inode (inode); - - lock_super (sb); -- es = sb->u.ext3_sb.s_es; -+ es = EXT3_SB(sb)->s_es; - if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { - ext3_error (sb, "ext3_free_inode", - "reserved or nonexistent inode %lu", ino); -@@ -237,7 +237,7 @@ void ext3_free_inode (handle_t *handle, - if (bitmap_nr < 0) - goto error_return; - -- bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr]; -+ bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr]; - - BUFFER_TRACE(bh, "get_write_access"); - fatal = ext3_journal_get_write_access(handle, bh); -@@ -255,8 +255,8 @@ void ext3_free_inode (handle_t *handle, - fatal = ext3_journal_get_write_access(handle, bh2); - if (fatal) goto error_return; - -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get write access"); -- fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get write access"); -+ fatal = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); - if (fatal) goto error_return; - - if (gdp) { -@@ -271,9 +271,9 @@ void ext3_free_inode (handle_t *handle, - if (!fatal) fatal = err; - es->s_free_inodes_count = - cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) + 1); -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, - "call ext3_journal_dirty_metadata"); -- err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); - if (!fatal) fatal = err; - } - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -@@ -305,6 +305,8 @@ struct inode * ext3_new_inode (handle_t - int i, j, avefreei; - struct inode * inode; - int bitmap_nr; -+ struct ext3_inode_info *ei; -+ struct ext3_sb_info *sbi; - struct ext3_group_desc * gdp; - struct ext3_group_desc * tmp; - struct ext3_super_block * es; -@@ -318,7 +320,9 @@ struct inode * ext3_new_inode (handle_t - inode = new_inode(sb); - if (!inode) - return ERR_PTR(-ENOMEM); -- init_rwsem(&inode->u.ext3_i.truncate_sem); -+ sbi = EXT3_SB(sb); -+ ei = EXT3_I(inode); -+ init_rwsem(&ei->truncate_sem); - - lock_super (sb); - es = sb->u.ext3_sb.s_es; -@@ -328,9 +332,9 @@ struct inode * ext3_new_inode (handle_t - - if (S_ISDIR(mode)) { - avefreei = le32_to_cpu(es->s_free_inodes_count) / -- sb->u.ext3_sb.s_groups_count; -+ sbi->s_groups_count; - if (!gdp) { -- for (j = 0; j < sb->u.ext3_sb.s_groups_count; j++) { -+ for (j = 0; j < sbi->s_groups_count; j++) { - struct buffer_head *temp_buffer; - tmp = ext3_get_group_desc (sb, j, &temp_buffer); - if (tmp && -@@ -350,7 +354,7 @@ repeat: - /* - * Try to place the inode in its parent directory - */ -- i = dir->u.ext3_i.i_block_group; -+ i = EXT3_I(dir)->i_block_group; - tmp = ext3_get_group_desc (sb, i, &bh2); - if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) - gdp = tmp; -@@ -360,10 +364,10 @@ repeat: - * Use a quadratic hash to find a group with a - * free inode - */ -- for (j = 1; j < sb->u.ext3_sb.s_groups_count; j <<= 1) { -+ for (j = 1; j < sbi->s_groups_count; j <<= 1) { - i += j; -- if (i >= sb->u.ext3_sb.s_groups_count) -- i -= sb->u.ext3_sb.s_groups_count; -+ if (i >= sbi->s_groups_count) -+ i -= sbi->s_groups_count; - tmp = ext3_get_group_desc (sb, i, &bh2); - if (tmp && - le16_to_cpu(tmp->bg_free_inodes_count)) { -@@ -376,9 +380,9 @@ repeat: - /* - * That failed: try linear search for a free inode - */ -- i = dir->u.ext3_i.i_block_group + 1; -- for (j = 2; j < sb->u.ext3_sb.s_groups_count; j++) { -- if (++i >= sb->u.ext3_sb.s_groups_count) -+ i = EXT3_I(dir)->i_block_group + 1; -+ for (j = 2; j < sbi->s_groups_count; j++) { -+ if (++i >= sbi->s_groups_count) - i = 0; - tmp = ext3_get_group_desc (sb, i, &bh2); - if (tmp && -@@ -399,11 +403,11 @@ repeat: - if (bitmap_nr < 0) - goto fail; - -- bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr]; -+ bh = sbi->s_inode_bitmap[bitmap_nr]; - - if ((j = ext3_find_first_zero_bit ((unsigned long *) bh->b_data, -- EXT3_INODES_PER_GROUP(sb))) < -- EXT3_INODES_PER_GROUP(sb)) { -+ sbi->s_inodes_per_group)) < -+ sbi->s_inodes_per_group) { - BUFFER_TRACE(bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, bh); - if (err) goto fail; -@@ -457,13 +461,13 @@ repeat: - err = ext3_journal_dirty_metadata(handle, bh2); - if (err) goto fail; - -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access"); -- err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(sbi->s_sbh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, sbi->s_sbh); - if (err) goto fail; - es->s_free_inodes_count = - cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1); -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "call ext3_journal_dirty_metadata"); -- err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(sbi->s_sbh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, sbi->s_sbh); - sb->s_dirt = 1; - if (err) goto fail; - -@@ -483,31 +487,31 @@ repeat: - inode->i_blksize = PAGE_SIZE; - inode->i_blocks = 0; - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; -- inode->u.ext3_i.i_flags = dir->u.ext3_i.i_flags & ~EXT3_INDEX_FL; -+ ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL; - if (S_ISLNK(mode)) -- inode->u.ext3_i.i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); -+ ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); - #ifdef EXT3_FRAGMENTS -- inode->u.ext3_i.i_faddr = 0; -- inode->u.ext3_i.i_frag_no = 0; -- inode->u.ext3_i.i_frag_size = 0; -+ ei->i_faddr = 0; -+ ei->i_frag_no = 0; -+ ei->i_frag_size = 0; - #endif -- inode->u.ext3_i.i_file_acl = 0; -- inode->u.ext3_i.i_dir_acl = 0; -- inode->u.ext3_i.i_dtime = 0; -- INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); -+ ei->i_file_acl = 0; -+ ei->i_dir_acl = 0; -+ ei->i_dtime = 0; -+ INIT_LIST_HEAD(&ei->i_orphan); - #ifdef EXT3_PREALLOCATE -- inode->u.ext3_i.i_prealloc_count = 0; -+ ei->i_prealloc_count = 0; - #endif -- inode->u.ext3_i.i_block_group = i; -+ ei->i_block_group = i; - -- if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL) -+ if (ei->i_flags & EXT3_SYNC_FL) - inode->i_flags |= S_SYNC; - if (IS_SYNC(inode)) - handle->h_sync = 1; - insert_inode_hash(inode); -- inode->i_generation = sb->u.ext3_sb.s_next_generation++; -+ inode->i_generation = sbi->s_next_generation++; - -- inode->u.ext3_i.i_state = EXT3_STATE_NEW; -+ ei->i_state = EXT3_STATE_NEW; - err = ext3_mark_inode_dirty(handle, inode); - if (err) goto fail; - -@@ -585,19 +589,19 @@ struct inode *ext3_orphan_get (struct su - - unsigned long ext3_count_free_inodes (struct super_block * sb) - { -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct ext3_super_block *es = sbi->s_es; - #ifdef EXT3FS_DEBUG -- struct ext3_super_block * es; - unsigned long desc_count, bitmap_count, x; - int bitmap_nr; - struct ext3_group_desc * gdp; - int i; - - lock_super (sb); -- es = sb->u.ext3_sb.s_es; - desc_count = 0; - bitmap_count = 0; - gdp = NULL; -- for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) { -+ for (i = 0; i < sbi->s_groups_count; i++) { - gdp = ext3_get_group_desc (sb, i, NULL); - if (!gdp) - continue; -@@ -606,8 +610,8 @@ unsigned long ext3_count_free_inodes (st - if (bitmap_nr < 0) - continue; - -- x = ext3_count_free (sb->u.ext3_sb.s_inode_bitmap[bitmap_nr], -- EXT3_INODES_PER_GROUP(sb) / 8); -+ x = ext3_count_free(sbi->s_inode_bitmap[bitmap_nr], -+ sbi->s_inodes_per_group / 8); - printk ("group %d: stored = %d, counted = %lu\n", - i, le16_to_cpu(gdp->bg_free_inodes_count), x); - bitmap_count += x; -@@ -617,7 +621,7 @@ unsigned long ext3_count_free_inodes (st - unlock_super (sb); - return desc_count; - #else -- return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_inodes_count); -+ return le32_to_cpu(es->s_free_inodes_count); - #endif - } - -@@ -626,16 +630,18 @@ unsigned long ext3_count_free_inodes (st - void ext3_check_inodes_bitmap (struct super_block * sb) - { - struct ext3_super_block * es; -+ struct ext3_sb_info *sbi; - unsigned long desc_count, bitmap_count, x; - int bitmap_nr; - struct ext3_group_desc * gdp; - int i; - -- es = sb->u.ext3_sb.s_es; -+ sbi = EXT3_SB(sb); -+ es = sbi->s_es; - desc_count = 0; - bitmap_count = 0; - gdp = NULL; -- for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) { -+ for (i = 0; i < sbi->s_groups_count; i++) { - gdp = ext3_get_group_desc (sb, i, NULL); - if (!gdp) - continue; -@@ -644,7 +650,7 @@ void ext3_check_inodes_bitmap (struct su - if (bitmap_nr < 0) - continue; - -- x = ext3_count_free (sb->u.ext3_sb.s_inode_bitmap[bitmap_nr], -+ x = ext3_count_free (sbi->s_inode_bitmap[bitmap_nr], - EXT3_INODES_PER_GROUP(sb) / 8); - if (le16_to_cpu(gdp->bg_free_inodes_count) != x) - ext3_error (sb, "ext3_check_inodes_bitmap", ---- ./fs/ext3/inode.c.orig Fri Apr 12 10:27:49 2002 -+++ ./fs/ext3/inode.c Tue May 7 15:41:23 2002 -@@ -196,7 +196,7 @@ void ext3_delete_inode (struct inode * i - * (Well, we could do this if we need to, but heck - it works) - */ - ext3_orphan_del(handle, inode); -- inode->u.ext3_i.i_dtime = CURRENT_TIME; -+ EXT3_I(inode)->i_dtime = CURRENT_TIME; - - /* - * One subtle ordering requirement: if anything has gone wrong -@@ -220,13 +220,14 @@ no_delete: - void ext3_discard_prealloc (struct inode * inode) - { - #ifdef EXT3_PREALLOCATE -+ struct ext3_inode_info *ei = EXT3_I(inode); - lock_kernel(); - /* Writer: ->i_prealloc* */ -- if (inode->u.ext3_i.i_prealloc_count) { -- unsigned short total = inode->u.ext3_i.i_prealloc_count; -- unsigned long block = inode->u.ext3_i.i_prealloc_block; -- inode->u.ext3_i.i_prealloc_count = 0; -- inode->u.ext3_i.i_prealloc_block = 0; -+ if (ei->i_prealloc_count) { -+ unsigned short total = ei->i_prealloc_count; -+ unsigned long block = ei->i_prealloc_block; -+ ei->i_prealloc_count = 0; -+ ei->i_prealloc_block = 0; - /* Writer: end */ - ext3_free_blocks (inode, block, total); - } -@@ -243,13 +244,15 @@ static int ext3_alloc_block (handle_t *h - unsigned long result; - - #ifdef EXT3_PREALLOCATE -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ - /* Writer: ->i_prealloc* */ -- if (inode->u.ext3_i.i_prealloc_count && -- (goal == inode->u.ext3_i.i_prealloc_block || -- goal + 1 == inode->u.ext3_i.i_prealloc_block)) -+ if (ei->i_prealloc_count && -+ (goal == ei->i_prealloc_block || -+ goal + 1 == ei->i_prealloc_block)) - { -- result = inode->u.ext3_i.i_prealloc_block++; -- inode->u.ext3_i.i_prealloc_count--; -+ result = ei->i_prealloc_block++; -+ ei->i_prealloc_count--; - /* Writer: end */ - ext3_debug ("preallocation hit (%lu/%lu).\n", - ++alloc_hits, ++alloc_attempts); -@@ -259,8 +262,8 @@ static int ext3_alloc_block (handle_t *h - alloc_hits, ++alloc_attempts); - if (S_ISREG(inode->i_mode)) - result = ext3_new_block (inode, goal, -- &inode->u.ext3_i.i_prealloc_count, -- &inode->u.ext3_i.i_prealloc_block, err); -+ &ei->i_prealloc_count, -+ &ei->i_prealloc_block, err); - else - result = ext3_new_block (inode, goal, 0, 0, err); - /* -@@ -394,7 +397,7 @@ static Indirect *ext3_get_branch(struct - - *err = 0; - /* i_data is not going away, no lock needed */ -- add_chain (chain, NULL, inode->u.ext3_i.i_data + *offsets); -+ add_chain (chain, NULL, EXT3_I(inode)->i_data + *offsets); - if (!p->key) - goto no_block; - while (--depth) { -@@ -437,7 +440,8 @@ no_block: - - static inline unsigned long ext3_find_near(struct inode *inode, Indirect *ind) - { -- u32 *start = ind->bh ? (u32*) ind->bh->b_data : inode->u.ext3_i.i_data; -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ u32 *start = ind->bh ? (u32*) ind->bh->b_data : ei->i_data; - u32 *p; - - /* Try to find previous block */ -@@ -453,9 +456,8 @@ static inline unsigned long ext3_find_ne - * It is going to be refered from inode itself? OK, just put it into - * the same cylinder group then. - */ -- return (inode->u.ext3_i.i_block_group * -- EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -- le32_to_cpu(inode->i_sb->u.ext3_sb.s_es->s_first_data_block); -+ return (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); - } - - /** -@@ -474,14 +477,15 @@ - static int ext3_find_goal(struct inode *inode, long block, Indirect chain[4], - Indirect *partial, unsigned long *goal) - { -+ struct ext3_inode_info *ei = EXT3_I(inode); - /* Writer: ->i_next_alloc* */ -- if (block == inode->u.ext3_i.i_next_alloc_block + 1) { -- inode->u.ext3_i.i_next_alloc_block++; -- inode->u.ext3_i.i_next_alloc_goal++; -+ if (block == ei->i_next_alloc_block + 1) { -+ ei->i_next_alloc_block++; -+ ei->i_next_alloc_goal++; - } - #ifdef SEARCH_FROM_ZERO -- inode->u.ext3_i.i_next_alloc_block = 0; -- inode->u.ext3_i.i_next_alloc_goal = 0; -+ ei->i_next_alloc_block = 0; -+ ei->i_next_alloc_goal = 0; - #endif - /* Writer: end */ - /* Reader: pointers, ->i_next_alloc* */ -@@ -490,8 +493,8 @@ static int ext3_find_goal(struct inode * - * try the heuristic for sequential allocation, - * failing that at least try to get decent locality. - */ -- if (block == inode->u.ext3_i.i_next_alloc_block) -- *goal = inode->u.ext3_i.i_next_alloc_goal; -+ if (block == ei->i_next_alloc_block) -+ *goal = ei->i_next_alloc_goal; - if (!*goal) - *goal = ext3_find_near(inode, partial); - #ifdef SEARCH_FROM_ZERO -@@ -619,6 +621,7 @@ - { - int i; - int err = 0; -+ struct ext3_inode_info *ei = EXT3_I(inode); - - /* - * If we're splicing into a [td]indirect block (as opposed to the -@@ -641,11 +644,11 @@ static int ext3_splice_branch(handle_t * - /* That's it */ - - *where->p = where->key; -- inode->u.ext3_i.i_next_alloc_block = block; -- inode->u.ext3_i.i_next_alloc_goal = le32_to_cpu(where[num-1].key); -+ ei->i_next_alloc_block = block; -+ ei->i_next_alloc_goal = le32_to_cpu(where[num-1].key); - #ifdef SEARCH_FROM_ZERO -- inode->u.ext3_i.i_next_alloc_block = 0; -- inode->u.ext3_i.i_next_alloc_goal = 0; -+ ei->i_next_alloc_block = 0; -+ ei->i_next_alloc_goal = 0; - #endif - /* Writer: end */ - -@@ -729,6 +732,7 @@ - unsigned long goal; - int left; - int depth = ext3_block_to_path(inode, iblock, offsets); -+ struct ext3_inode_info *ei = EXT3_I(inode); - loff_t new_size; - - J_ASSERT(handle != NULL || create == 0); -@@ -782,7 +785,7 @@ out: - /* - * Block out ext3_truncate while we alter the tree - */ -- down_read(&inode->u.ext3_i.truncate_sem); -+ down_read(&ei->truncate_sem); - err = ext3_alloc_branch(handle, inode, left, goal, - offsets+(partial-chain), partial); - -@@ -794,7 +797,7 @@ out: - if (!err) - err = ext3_splice_branch(handle, inode, iblock, chain, - partial, left); -- up_read(&inode->u.ext3_i.truncate_sem); -+ up_read(&ei->truncate_sem); - if (err == -EAGAIN) - goto changed; - if (err) -@@ -807,8 +810,8 @@ out: - * truncate is in progress. It is racy between multiple parallel - * instances of get_block, but we have the BKL. - */ -- if (new_size > inode->u.ext3_i.i_disksize) -- inode->u.ext3_i.i_disksize = new_size; -+ if (new_size > ei->i_disksize) -+ ei->i_disksize = new_size; - - bh_result->b_state |= (1UL << BH_New); - goto got_it; -@@ -921,7 +924,7 @@ struct buffer_head *ext3_bread(handle_t - struct buffer_head *tmp_bh; - - for (i = 1; -- inode->u.ext3_i.i_prealloc_count && -+ EXT3_I(inode)->i_prealloc_count && - i < EXT3_SB(inode->i_sb)->s_es->s_prealloc_dir_blocks; - i++) { - /* -@@ -1131,8 +1134,8 @@ static int ext3_commit_write(struct file - kunmap(page); - } - } -- if (inode->i_size > inode->u.ext3_i.i_disksize) { -- inode->u.ext3_i.i_disksize = inode->i_size; -+ if (inode->i_size > EXT3_I(inode)->i_disksize) { -+ EXT3_I(inode)->i_disksize = inode->i_size; - ret2 = ext3_mark_inode_dirty(handle, inode); - if (!ret) - ret = ret2; -@@ -1832,7 +1835,8 @@ static void ext3_free_branches(handle_t - void ext3_truncate(struct inode * inode) - { - handle_t *handle; -- u32 *i_data = inode->u.ext3_i.i_data; -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ u32 *i_data = EXT3_I(inode)->i_data; - int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); - int offsets[4]; - Indirect chain[4]; -@@ -1884,13 +1887,13 @@ void ext3_truncate(struct inode * inode) - * on-disk inode. We do this via i_disksize, which is the value which - * ext3 *really* writes onto the disk inode. - */ -- inode->u.ext3_i.i_disksize = inode->i_size; -+ ei->i_disksize = inode->i_size; - - /* - * From here we block out all ext3_get_block() callers who want to - * modify the block allocation tree. - */ -- down_write(&inode->u.ext3_i.truncate_sem); -+ down_write(&ei->truncate_sem); - - if (n == 1) { /* direct blocks */ - ext3_free_data(handle, inode, NULL, i_data+offsets[0], -@@ -1954,7 +1957,7 @@ do_indirects: - case EXT3_TIND_BLOCK: - ; - } -- up_write(&inode->u.ext3_i.truncate_sem); -+ up_write(&ei->truncate_sem); - inode->i_mtime = inode->i_ctime = CURRENT_TIME; - ext3_mark_inode_dirty(handle, inode); - -@@ -1983,6 +1986,8 @@ out_stop: - - int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc) - { -+ struct super_block *sb = inode->i_sb; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); - struct buffer_head *bh = 0; - unsigned long block; - unsigned long block_group; -@@ -1997,23 +2010,19 @@ int ext3_get_inode_loc (struct inode *in - inode->i_ino != EXT3_JOURNAL_INO && -- inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || -- inode->i_ino > le32_to_cpu( -- inode->i_sb->u.ext3_sb.s_es->s_inodes_count)) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -- "bad inode number: %lu", inode->i_ino); -+ inode->i_ino < EXT3_FIRST_INO(sb)) || -+ inode->i_ino > le32_to_cpu(sbi->s_es->s_inodes_count)) { -+ ext3_error (sb, __FUNCTION__, "bad inode #%lu", inode->i_ino); - goto bad_inode; - } -- block_group = (inode->i_ino - 1) / EXT3_INODES_PER_GROUP(inode->i_sb); -- if (block_group >= inode->i_sb->u.ext3_sb.s_groups_count) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -- "group >= groups count"); -+ block_group = (inode->i_ino - 1) / sbi->s_inodes_per_group; -+ if (block_group >= sbi->s_groups_count) { -+ ext3_error(sb, __FUNCTION__, "group >= groups count"); - goto bad_inode; - } -- group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(inode->i_sb); -- desc = block_group & (EXT3_DESC_PER_BLOCK(inode->i_sb) - 1); -- bh = inode->i_sb->u.ext3_sb.s_group_desc[group_desc]; -+ group_desc = block_group >> sbi->s_desc_per_block_bits; -+ desc = block_group & (sbi->s_desc_per_block - 1); -+ bh = sbi->s_group_desc[group_desc]; - if (!bh) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -- "Descriptor not loaded"); -+ ext3_error(sb, __FUNCTION__, "Descriptor not loaded"); - goto bad_inode; - } - -@@ -2021,17 +2022,17 @@ int ext3_get_inode_loc (struct inode *in - /* - * Figure out the offset within the block group inode table - */ -- offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb)) * -- EXT3_INODE_SIZE(inode->i_sb); -+ offset = ((inode->i_ino - 1) % sbi->s_inodes_per_group) * -+ sbi->s_inode_size; - block = le32_to_cpu(gdp[desc].bg_inode_table) + -- (offset >> EXT3_BLOCK_SIZE_BITS(inode->i_sb)); -- if (!(bh = sb_bread(inode->i_sb, block))) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -+ (offset >> EXT3_BLOCK_SIZE_BITS(sb)); -+ if (!(bh = sb_bread(sb, block))) { -+ ext3_error (sb, __FUNCTION__, - "unable to read inode block - " - "inode=%lu, block=%lu", inode->i_ino, block); - goto bad_inode; - } -- offset &= (EXT3_BLOCK_SIZE(inode->i_sb) - 1); -+ offset &= (EXT3_BLOCK_SIZE(sb) - 1); - - iloc->bh = bh; - iloc->raw_inode = (struct ext3_inode *) (bh->b_data + offset); -@@ -2047,6 +2048,7 @@ void ext3_read_inode(struct inode * inod - { - struct ext3_iloc iloc; - struct ext3_inode *raw_inode; -+ struct ext3_inode_info *ei = EXT3_I(inode); - struct buffer_head *bh; - int block; - -@@ -2054,7 +2056,7 @@ void ext3_read_inode(struct inode * inod - goto bad_inode; - bh = iloc.bh; - raw_inode = iloc.raw_inode; -- init_rwsem(&inode->u.ext3_i.truncate_sem); -+ init_rwsem(&ei->truncate_sem); - inode->i_mode = le16_to_cpu(raw_inode->i_mode); - inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); - inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); -@@ -2067,7 +2069,7 @@ void ext3_read_inode(struct inode * inod - inode->i_atime = le32_to_cpu(raw_inode->i_atime); - inode->i_ctime = le32_to_cpu(raw_inode->i_ctime); - inode->i_mtime = le32_to_cpu(raw_inode->i_mtime); -- inode->u.ext3_i.i_dtime = le32_to_cpu(raw_inode->i_dtime); -+ ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); - /* We now have enough fields to check if the inode was active or not. - * This is needed because nfsd might try to access dead inodes - * the test is that same one that e2fsck uses -@@ -2075,7 +2077,7 @@ void ext3_read_inode(struct inode * inod - */ - if (inode->i_nlink == 0) { - if (inode->i_mode == 0 || -- !(inode->i_sb->u.ext3_sb.s_mount_state & EXT3_ORPHAN_FS)) { -+ !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) { - /* this inode is deleted */ - brelse (bh); - goto bad_inode; -@@ -2090,33 +2092,33 @@ void ext3_read_inode(struct inode * inod - * size */ - inode->i_blocks = le32_to_cpu(raw_inode->i_blocks); - inode->i_version = ++event; -- inode->u.ext3_i.i_flags = le32_to_cpu(raw_inode->i_flags); -+ ei->i_flags = le32_to_cpu(raw_inode->i_flags); - #ifdef EXT3_FRAGMENTS -- inode->u.ext3_i.i_faddr = le32_to_cpu(raw_inode->i_faddr); -- inode->u.ext3_i.i_frag_no = raw_inode->i_frag; -- inode->u.ext3_i.i_frag_size = raw_inode->i_fsize; -+ ei->i_faddr = le32_to_cpu(raw_inode->i_faddr); -+ ei->i_frag_no = raw_inode->i_frag; -+ ei->i_frag_size = raw_inode->i_fsize; - #endif -- inode->u.ext3_i.i_file_acl = le32_to_cpu(raw_inode->i_file_acl); -+ ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl); - if (!S_ISREG(inode->i_mode)) { -- inode->u.ext3_i.i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); -+ ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl); - } else { - inode->i_size |= - ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32; - } -- inode->u.ext3_i.i_disksize = inode->i_size; -+ ei->i_disksize = inode->i_size; - inode->i_generation = le32_to_cpu(raw_inode->i_generation); - #ifdef EXT3_PREALLOCATE -- inode->u.ext3_i.i_prealloc_count = 0; -+ ei->i_prealloc_count = 0; - #endif -- inode->u.ext3_i.i_block_group = iloc.block_group; -+ ei->i_block_group = iloc.block_group; - - /* - * NOTE! The in-memory inode i_data array is in little-endian order - * even on big-endian machines: we do NOT byteswap the block numbers! - */ - for (block = 0; block < EXT3_N_BLOCKS; block++) -- inode->u.ext3_i.i_data[block] = iloc.raw_inode->i_block[block]; -- INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); -+ ei->i_data[block] = iloc.raw_inode->i_block[block]; -+ INIT_LIST_HEAD(&ei->i_orphan); - - brelse (iloc.bh); - -@@ -2143,17 +2145,17 @@ void ext3_read_inode(struct inode * inod - /* inode->i_attr_flags = 0; unused */ -- if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL) { -+ if (ei->i_flags & EXT3_SYNC_FL) { - /* inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS; unused */ - inode->i_flags |= S_SYNC; - } -- if (inode->u.ext3_i.i_flags & EXT3_APPEND_FL) { -+ if (ei->i_flags & EXT3_APPEND_FL) { - /* inode->i_attr_flags |= ATTR_FLAG_APPEND; unused */ - inode->i_flags |= S_APPEND; - } -- if (inode->u.ext3_i.i_flags & EXT3_IMMUTABLE_FL) { -+ if (ei->i_flags & EXT3_IMMUTABLE_FL) { - /* inode->i_attr_flags |= ATTR_FLAG_IMMUTABLE; unused */ - inode->i_flags |= S_IMMUTABLE; - } -- if (inode->u.ext3_i.i_flags & EXT3_NOATIME_FL) { -+ if (ei->i_flags & EXT3_NOATIME_FL) { - /* inode->i_attr_flags |= ATTR_FLAG_NOATIME; unused */ - inode->i_flags |= S_NOATIME; - } -@@ -2175,6 +2177,7 @@ static int ext3_do_update_inode(handle_t - struct ext3_iloc *iloc) - { - struct ext3_inode *raw_inode = iloc->raw_inode; -+ struct ext3_inode_info *ei = EXT3_I(inode); - struct buffer_head *bh = iloc->bh; - int err = 0, rc, block; - -@@ -2192,7 +2195,7 @@ static int ext3_do_update_inode(handle_t - * Fix up interoperability with old kernels. Otherwise, old inodes get - * re-used with the upper 16 bits of the uid/gid intact - */ -- if(!inode->u.ext3_i.i_dtime) { -+ if(!ei->i_dtime) { - raw_inode->i_uid_high = - cpu_to_le16(high_16_bits(inode->i_uid)); - raw_inode->i_gid_high = -@@ -2210,34 +2213,33 @@ static int ext3_do_update_inode(handle_t - raw_inode->i_gid_high = 0; - } - raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); -- raw_inode->i_size = cpu_to_le32(inode->u.ext3_i.i_disksize); -+ raw_inode->i_size = cpu_to_le32(ei->i_disksize); - raw_inode->i_atime = cpu_to_le32(inode->i_atime); - raw_inode->i_ctime = cpu_to_le32(inode->i_ctime); - raw_inode->i_mtime = cpu_to_le32(inode->i_mtime); - raw_inode->i_blocks = cpu_to_le32(inode->i_blocks); -- raw_inode->i_dtime = cpu_to_le32(inode->u.ext3_i.i_dtime); -- raw_inode->i_flags = cpu_to_le32(inode->u.ext3_i.i_flags); -+ raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); -+ raw_inode->i_flags = cpu_to_le32(ei->i_flags); - #ifdef EXT3_FRAGMENTS -- raw_inode->i_faddr = cpu_to_le32(inode->u.ext3_i.i_faddr); -- raw_inode->i_frag = inode->u.ext3_i.i_frag_no; -- raw_inode->i_fsize = inode->u.ext3_i.i_frag_size; -+ raw_inode->i_faddr = cpu_to_le32(ei->i_faddr); -+ raw_inode->i_frag = ei->i_frag_no; -+ raw_inode->i_fsize = ei->i_frag_size; - #else - /* If we are not tracking these fields in the in-memory inode, - * then preserve them on disk, but still initialise them to zero - * for new inodes. */ -- if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) { -+ if (ei->i_state & EXT3_STATE_NEW) { - raw_inode->i_faddr = 0; - raw_inode->i_frag = 0; - raw_inode->i_fsize = 0; - } - #endif -- raw_inode->i_file_acl = cpu_to_le32(inode->u.ext3_i.i_file_acl); -+ raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl); - if (!S_ISREG(inode->i_mode)) { -- raw_inode->i_dir_acl = cpu_to_le32(inode->u.ext3_i.i_dir_acl); -+ raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl); - } else { -- raw_inode->i_size_high = -- cpu_to_le32(inode->u.ext3_i.i_disksize >> 32); -- if (inode->u.ext3_i.i_disksize > 0x7fffffffULL) { -+ raw_inode->i_size_high = cpu_to_le32(ei->i_disksize >> 32); -+ if (ei->i_disksize > MAX_NON_LFS) { - struct super_block *sb = inode->i_sb; - if (!EXT3_HAS_RO_COMPAT_FEATURE(sb, - EXT3_FEATURE_RO_COMPAT_LARGE_FILE) || -@@ -2247,7 +2249,7 @@ static int ext3_do_update_inode(handle_t - * created, add a flag to the superblock. - */ - err = ext3_journal_get_write_access(handle, -- sb->u.ext3_sb.s_sbh); -+ EXT3_SB(sb)->s_sbh); - if (err) - goto out_brelse; - ext3_update_dynamic_rev(sb); -@@ -2256,7 +2258,7 @@ static int ext3_do_update_inode(handle_t - sb->s_dirt = 1; - handle->h_sync = 1; - err = ext3_journal_dirty_metadata(handle, -- sb->u.ext3_sb.s_sbh); -+ EXT3_SB(sb)->s_sbh); - } - } - } -@@ -2265,13 +2267,13 @@ static int ext3_do_update_inode(handle_t - raw_inode->i_block[0] = - cpu_to_le32(kdev_t_to_nr(inode->i_rdev)); - else for (block = 0; block < EXT3_N_BLOCKS; block++) -- raw_inode->i_block[block] = inode->u.ext3_i.i_data[block]; -+ raw_inode->i_block[block] = ei->i_data[block]; - - BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); - rc = ext3_journal_dirty_metadata(handle, bh); - if (!err) - err = rc; -- EXT3_I(inode)->i_state &= ~EXT3_STATE_NEW; -+ ei->i_state &= ~EXT3_STATE_NEW; - - out_brelse: - brelse (bh); -@@ -2379,7 +2381,7 @@ int ext3_setattr(struct dentry *dentry, - } - - error = ext3_orphan_add(handle, inode); -- inode->u.ext3_i.i_disksize = attr->ia_size; -+ EXT3_I(inode)->i_disksize = attr->ia_size; - rc = ext3_mark_inode_dirty(handle, inode); - if (!error) - error = rc; -@@ -2622,9 +2624,9 @@ int ext3_change_inode_journal_flag(struc - */ - - if (val) -- inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL; -+ EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL; - else -- inode->u.ext3_i.i_flags &= ~EXT3_JOURNAL_DATA_FL; -+ EXT3_I(inode)->i_flags &= ~EXT3_JOURNAL_DATA_FL; - - journal_unlock_updates(journal); - ---- ./fs/ext3/ioctl.c.orig Fri Apr 12 10:27:49 2002 -+++ ./fs/ext3/ioctl.c Tue May 7 15:20:52 2002 -@@ -18,13 +18,14 @@ - int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd, - unsigned long arg) - { -+ struct ext3_inode_info *ei = EXT3_I(inode); - unsigned int flags; - - ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg); - - switch (cmd) { - case EXT3_IOC_GETFLAGS: -- flags = inode->u.ext3_i.i_flags & EXT3_FL_USER_VISIBLE; -+ flags = ei->i_flags & EXT3_FL_USER_VISIBLE; - return put_user(flags, (int *) arg); - case EXT3_IOC_SETFLAGS: { - handle_t *handle = NULL; -@@ -42,7 +42,7 @@ int ext3_ioctl (struct inode * inode, st - if (get_user(flags, (int *) arg)) - return -EFAULT; - -- oldflags = inode->u.ext3_i.i_flags; -+ oldflags = ei->i_flags; - - /* The JOURNAL_DATA flag is modifiable only by root */ - jflag = flags & EXT3_JOURNAL_DATA_FL; -@@ -79,7 +79,7 @@ int ext3_ioctl (struct inode * inode, st - - flags = flags & EXT3_FL_USER_MODIFIABLE; - flags |= oldflags & ~EXT3_FL_USER_MODIFIABLE; -- inode->u.ext3_i.i_flags = flags; -+ ei->i_flags = flags; - - if (flags & EXT3_SYNC_FL) - inode->i_flags |= S_SYNC; -@@ -155,12 +155,12 @@ flags_err: - int ret = 0; - - set_current_state(TASK_INTERRUPTIBLE); -- add_wait_queue(&sb->u.ext3_sb.ro_wait_queue, &wait); -- if (timer_pending(&sb->u.ext3_sb.turn_ro_timer)) { -+ add_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait); -+ if (timer_pending(&EXT3_SB(sb)->turn_ro_timer)) { - schedule(); - ret = 1; - } -- remove_wait_queue(&sb->u.ext3_sb.ro_wait_queue, &wait); -+ remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait); - return ret; - } - #endif ---- ./fs/ext3/namei.c.orig Fri Apr 12 10:27:49 2002 -+++ ./fs/ext3/namei.c Tue May 7 16:05:51 2002 -@@ -636,7 +636,7 @@ static struct buffer_head * ext3_find_en - } - - nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb); -- start = dir->u.ext3_i.i_dir_start_lookup; -+ start = EXT3_I(dir)->i_dir_start_lookup; - if (start >= nblocks) - start = 0; - block = start; -@@ -677,7 +677,7 @@ restart: - i = search_dirblock(bh, dir, dentry, - block << EXT3_BLOCK_SIZE_BITS(sb), res_dir); - if (i == 1) { -- dir->u.ext3_i.i_dir_start_lookup = block; -+ EXT3_I(dir)->i_dir_start_lookup = block; - ret = bh; - goto cleanup_and_exit; - } else { -@@ -1419,7 +1419,7 @@ int ext3_orphan_add(handle_t *handle, st - int err = 0, rc; - - lock_super(sb); -- if (!list_empty(&inode->u.ext3_i.i_orphan)) -+ if (!list_empty(&EXT3_I(inode)->i_orphan)) - goto out_unlock; - - /* Orphan handling is only valid for files with data blocks -@@ -1430,8 +1430,8 @@ int ext3_orphan_add(handle_t *handle, st - J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); - -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access"); -- err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); - if (err) - goto out_unlock; - -@@ -1442,7 +1442,7 @@ int ext3_orphan_add(handle_t *handle, st - /* Insert this inode at the head of the on-disk orphan list... */ - NEXT_ORPHAN(inode) = le32_to_cpu(EXT3_SB(sb)->s_es->s_last_orphan); - EXT3_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); -- err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); - rc = ext3_mark_iloc_dirty(handle, inode, &iloc); - if (!err) - err = rc; -@@ -1456,7 +1456,7 @@ int ext3_orphan_add(handle_t *handle, st - * This is safe: on error we're going to ignore the orphan list - * anyway on the next recovery. */ - if (!err) -- list_add(&inode->u.ext3_i.i_orphan, &EXT3_SB(sb)->s_orphan); -+ list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan); - - jbd_debug(4, "superblock will point to %ld\n", inode->i_ino); - jbd_debug(4, "orphan inode %ld will point to %d\n", -@@ -714,25 +770,25 @@ - int ext3_orphan_del(handle_t *handle, struct inode *inode) - { - struct list_head *prev; -+ struct ext3_inode_info *ei = EXT3_I(inode); - struct ext3_sb_info *sbi; - unsigned long ino_next; - struct ext3_iloc iloc; - int err = 0; - - lock_super(inode->i_sb); -- if (list_empty(&inode->u.ext3_i.i_orphan)) { -+ if (list_empty(&ei->i_orphan)) { - unlock_super(inode->i_sb); - return 0; - } - - ino_next = NEXT_ORPHAN(inode); -- prev = inode->u.ext3_i.i_orphan.prev; -+ prev = ei->i_orphan.prev; - sbi = EXT3_SB(inode->i_sb); - - jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); - -- list_del(&inode->u.ext3_i.i_orphan); -- INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); -+ list_del_init(&ei->i_orphan); - - /* If we're on an error path, we may not have a valid - * transaction handle with which to update the orphan list on -@@ -1520,8 +1520,7 @@ int ext3_orphan_del(handle_t *handle, st - err = ext3_journal_dirty_metadata(handle, sbi->s_sbh); - } else { - struct ext3_iloc iloc2; -- struct inode *i_prev = -- list_entry(prev, struct inode, u.ext3_i.i_orphan); -+ struct inode *i_prev = orphan_list_entry(prev); - - jbd_debug(4, "orphan inode %lu will point to %lu\n", - i_prev->i_ino, ino_next); -@@ -1695,10 +1695,10 @@ static int ext3_symlink (struct inode * - goto out_no_entry; - } else { - inode->i_op = &ext3_fast_symlink_inode_operations; -- memcpy((char*)&inode->u.ext3_i.i_data,symname,l); -+ memcpy((char*)&EXT3_I(inode)->i_data,symname,l); - inode->i_size = l-1; - } -- inode->u.ext3_i.i_disksize = inode->i_size; -+ EXT3_I(inode)->i_disksize = inode->i_size; - err = ext3_add_nondir(handle, dentry, inode); - ext3_mark_inode_dirty(handle, inode); - out_stop: ---- ./fs/ext3/super.c.orig Fri Apr 12 10:27:49 2002 -+++ ./fs/ext3/super.c Tue May 7 16:05:44 2002 -@@ -121,7 +121,7 @@ static int ext3_error_behaviour(struct s - /* If no overrides were specified on the mount, then fall back - * to the default behaviour set in the filesystem's superblock - * on disk. */ -- switch (le16_to_cpu(sb->u.ext3_sb.s_es->s_errors)) { -+ switch (le16_to_cpu(EXT3_SB(sb)->s_es->s_errors)) { - case EXT3_ERRORS_PANIC: - return EXT3_ERRORS_PANIC; - case EXT3_ERRORS_RO: -@@ -269,9 +269,9 @@ void ext3_abort (struct super_block * sb - return; - - printk (KERN_CRIT "Remounting filesystem read-only\n"); -- sb->u.ext3_sb.s_mount_state |= EXT3_ERROR_FS; -+ EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; - sb->s_flags |= MS_RDONLY; -- sb->u.ext3_sb.s_mount_opt |= EXT3_MOUNT_ABORT; -+ EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT; - journal_abort(EXT3_SB(sb)->s_journal, -EIO); - } - -@@ -377,8 +377,6 @@ static int ext3_blkdev_remove(struct ext3 - return ret; - } - --#define orphan_list_entry(l) list_entry((l), struct inode, u.ext3_i.i_orphan) -- - static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi) - { - struct list_head *l; -@@ -818,7 +818,7 @@ static void ext3_orphan_cleanup (struct - sb->s_flags &= ~MS_RDONLY; - } - -- if (sb->u.ext3_sb.s_mount_state & EXT3_ERROR_FS) { -+ if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) { - if (es->s_last_orphan) - jbd_debug(1, "Errors on filesystem, " - "clearing orphan list.\n"); -@@ -1463,12 +1463,14 @@ static void ext3_commit_super (struct su - struct ext3_super_block * es, - int sync) - { -+ struct buffer_head *sbh = EXT3_SB(sb)->s_sbh; -+ - es->s_wtime = cpu_to_le32(CURRENT_TIME); -- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "marking dirty"); -- mark_buffer_dirty(sb->u.ext3_sb.s_sbh); -+ BUFFER_TRACE(sbh, "marking dirty"); -+ mark_buffer_dirty(sbh); - if (sync) { -- ll_rw_block(WRITE, 1, &sb->u.ext3_sb.s_sbh); -- wait_on_buffer(sb->u.ext3_sb.s_sbh); -+ ll_rw_block(WRITE, 1, &sbh); -+ wait_on_buffer(sbh); - } - } - -@@ -1519,7 +1521,7 @@ static void ext3_clear_journal_err(struc - ext3_warning(sb, __FUNCTION__, "Marking fs in need of " - "filesystem check."); - -- sb->u.ext3_sb.s_mount_state |= EXT3_ERROR_FS; -+ EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; - es->s_state |= cpu_to_le16(EXT3_ERROR_FS); - ext3_commit_super (sb, es, 1); - ---- ./fs/ext3/symlink.c.orig Fri Apr 12 10:27:49 2002 -+++ ./fs/ext3/symlink.c Tue May 7 15:25:39 2002 -@@ -23,13 +23,13 @@ - - static int ext3_readlink(struct dentry *dentry, char *buffer, int buflen) - { -- char *s = (char *)dentry->d_inode->u.ext3_i.i_data; -- return vfs_readlink(dentry, buffer, buflen, s); -+ struct ext3_inode_info *ei = EXT3_I(dentry->d_inode); -+ return vfs_readlink(dentry, buffer, buflen, (char *)ei->i_data); - } - - static int ext3_follow_link(struct dentry *dentry, struct nameidata *nd) - { -- char *s = (char *)dentry->d_inode->u.ext3_i.i_data; -- return vfs_follow_link(nd, s); -+ struct ext3_inode_info *ei = EXT3_I(dentry->d_inode); -+ return vfs_follow_link(nd, (char*)ei->i_data); - } - ---- ./include/linux/ext3_fs.h.orig Tue Apr 16 14:27:25 2002 -+++ ./include/linux/ext3_fs.h Tue May 7 16:47:36 2002 -@@ -84,22 +84,25 @@ - #define EXT3_MIN_BLOCK_SIZE 1024 - #define EXT3_MAX_BLOCK_SIZE 4096 - #define EXT3_MIN_BLOCK_LOG_SIZE 10 -+ - #ifdef __KERNEL__ --# define EXT3_BLOCK_SIZE(s) ((s)->s_blocksize) --#else --# define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size) --#endif --#define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32)) --#ifdef __KERNEL__ --# define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) --#else --# define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) --#endif --#ifdef __KERNEL__ --#define EXT3_ADDR_PER_BLOCK_BITS(s) ((s)->u.ext3_sb.s_addr_per_block_bits) --#define EXT3_INODE_SIZE(s) ((s)->u.ext3_sb.s_inode_size) --#define EXT3_FIRST_INO(s) ((s)->u.ext3_sb.s_first_ino) -+#define EXT3_SB(sb) (&((sb)->u.ext3_sb)) -+#define EXT3_I(inode) (&((inode)->u.ext3_i)) -+ -+#define EXT3_BLOCK_SIZE(s) ((s)->s_blocksize) -+#define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -+#define EXT3_ADDR_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_addr_per_block_bits) -+#define EXT3_INODE_SIZE(s) (EXT3_SB(s)->s_inode_size) -+#define EXT3_FIRST_INO(s) (EXT3_SB(s)->s_first_ino) - #else -+ -+/* Assume that user mode programs are passing in an ext3fs superblock, not -+ * a kernel struct super_block. This will allow us to call the feature-test -+ * macros from user land. */ -+#define EXT3_SB(sb) (sb) -+ -+#define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size) -+#define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10) - #define EXT3_INODE_SIZE(s) (((s)->s_rev_level == EXT3_GOOD_OLD_REV) ? \ - EXT3_GOOD_OLD_INODE_SIZE : \ - (s)->s_inode_size) -@@ -108,6 +110,7 @@ - EXT3_GOOD_OLD_FIRST_INO : \ - (s)->s_first_ino) - #endif -+#define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32)) - - /* - * Macro-instructions used to manage fragments -@@ -116,8 +120,8 @@ - #define EXT3_MAX_FRAG_SIZE 4096 - #define EXT3_MIN_FRAG_LOG_SIZE 10 - #ifdef __KERNEL__ --# define EXT3_FRAG_SIZE(s) ((s)->u.ext3_sb.s_frag_size) --# define EXT3_FRAGS_PER_BLOCK(s) ((s)->u.ext3_sb.s_frags_per_block) -+# define EXT3_FRAG_SIZE(s) (EXT3_SB(s)->s_frag_size) -+# define EXT3_FRAGS_PER_BLOCK(s) (EXT3_SB(s)->s_frags_per_block) - #else - # define EXT3_FRAG_SIZE(s) (EXT3_MIN_FRAG_SIZE << (s)->s_log_frag_size) - # define EXT3_FRAGS_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / EXT3_FRAG_SIZE(s)) -@@ -163,15 +167,13 @@ - /* - * Macro-instructions used to manage group descriptors - */ -+# define EXT3_BLOCKS_PER_GROUP(s) (EXT3_SB(s)->s_blocks_per_group) -+# define EXT3_INODES_PER_GROUP(s) (EXT3_SB(s)->s_inodes_per_group) - #ifdef __KERNEL__ --# define EXT3_BLOCKS_PER_GROUP(s) ((s)->u.ext3_sb.s_blocks_per_group) --# define EXT3_DESC_PER_BLOCK(s) ((s)->u.ext3_sb.s_desc_per_block) --# define EXT3_INODES_PER_GROUP(s) ((s)->u.ext3_sb.s_inodes_per_group) --# define EXT3_DESC_PER_BLOCK_BITS(s) ((s)->u.ext3_sb.s_desc_per_block_bits) -+# define EXT3_DESC_PER_BLOCK(s) (EXT3_SB(s)->s_desc_per_block) -+# define EXT3_DESC_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_desc_per_block_bits) - #else --# define EXT3_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group) - # define EXT3_DESC_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_group_desc)) --# define EXT3_INODES_PER_GROUP(s) ((s)->s_inodes_per_group) - #endif - - /* -@@ -344,7 +347,7 @@ - #ifndef _LINUX_EXT2_FS_H - #define clear_opt(o, opt) o &= ~EXT3_MOUNT_##opt - #define set_opt(o, opt) o |= EXT3_MOUNT_##opt --#define test_opt(sb, opt) ((sb)->u.ext3_sb.s_mount_opt & \ -+#define test_opt(sb, opt) (EXT3_SB(sb)->s_mount_opt & \ - EXT3_MOUNT_##opt) - #else - #define EXT2_MOUNT_NOLOAD EXT3_MOUNT_NOLOAD -@@ -441,17 +443,11 @@ - /*EC*/ __u32 s_reserved[197]; /* Padding to the end of the block */ - }; - --#ifdef __KERNEL__ --#define EXT3_SB(sb) (&((sb)->u.ext3_sb)) --#define EXT3_I(inode) (&((inode)->u.ext3_i)) --#else --/* Assume that user mode programs are passing in an ext3fs superblock, not -- * a kernel struct super_block. This will allow us to call the feature-test -- * macros from user land. */ --#define EXT3_SB(sb) (sb) --#endif -- --#define NEXT_ORPHAN(inode) (inode)->u.ext3_i.i_dtime -+#define NEXT_ORPHAN(inode) EXT3_I(inode)->i_dtime -+static inline struct inode *orphan_list_entry(struct list_head *l) -+{ -+ return list_entry(l, struct inode, u.ext3_i.i_orphan); -+} - - /* - * Codes for operating systems ---- ./include/linux/ext3_jbd.h.orig Tue May 7 14:44:08 2002 -+++ ./include/linux/ext3_jbd.h Tue May 7 14:44:43 2002 -@@ -291,7 +291,7 @@ - return 1; - if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA) - return 1; -- if (inode->u.ext3_i.i_flags & EXT3_JOURNAL_DATA_FL) -+ if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL) - return 1; - return 0; - } diff --git a/lustre/kernel_patches/patches/ext3-2.4.20-fixes.patch b/lustre/kernel_patches/patches/ext3-2.4.20-fixes.patch deleted file mode 100644 index 5f566de..0000000 --- a/lustre/kernel_patches/patches/ext3-2.4.20-fixes.patch +++ /dev/null @@ -1,118 +0,0 @@ - - - - fs/ext3/balloc.c | 53 +++++++++++++++++++++++++++++++---------------------- - 1 files changed, 31 insertions(+), 22 deletions(-) - ---- linux-2.4.20/fs/ext3/balloc.c~ext3-2.4.20-fixes 2003-04-08 23:35:17.000000000 -0600 -+++ linux-2.4.20-braam/fs/ext3/balloc.c 2003-04-08 23:35:17.000000000 -0600 -@@ -276,7 +276,8 @@ void ext3_free_blocks (handle_t *handle, - } - lock_super (sb); - es = sb->u.ext3_sb.s_es; -- if (block < le32_to_cpu(es->s_first_data_block) || -+ if (block < le32_to_cpu(es->s_first_data_block) || -+ block + count < block || - (block + count) > le32_to_cpu(es->s_blocks_count)) { - ext3_error (sb, "ext3_free_blocks", - "Freeing blocks not in datazone - " -@@ -309,17 +310,6 @@ do_more: - if (!gdp) - goto error_return; - -- if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) || -- in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) || -- in_range (block, le32_to_cpu(gdp->bg_inode_table), -- sb->u.ext3_sb.s_itb_per_group) || -- in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table), -- sb->u.ext3_sb.s_itb_per_group)) -- ext3_error (sb, "ext3_free_blocks", -- "Freeing blocks in system zones - " -- "Block = %lu, count = %lu", -- block, count); -- - /* - * We are about to start releasing blocks in the bitmap, - * so we need undo access. -@@ -345,14 +335,24 @@ do_more: - if (err) - goto error_return; - -- for (i = 0; i < count; i++) { -+ for (i = 0; i < count; i++, block++) { -+ if (block == le32_to_cpu(gdp->bg_block_bitmap) || -+ block == le32_to_cpu(gdp->bg_inode_bitmap) || -+ in_range(block, le32_to_cpu(gdp->bg_inode_table), -+ EXT3_SB(sb)->s_itb_per_group)) { -+ ext3_error(sb, __FUNCTION__, -+ "Freeing block in system zone - block = %lu", -+ block); -+ continue; -+ } -+ - /* - * An HJ special. This is expensive... - */ - #ifdef CONFIG_JBD_DEBUG - { - struct buffer_head *debug_bh; -- debug_bh = sb_get_hash_table(sb, block + i); -+ debug_bh = sb_get_hash_table(sb, block); - if (debug_bh) { - BUFFER_TRACE(debug_bh, "Deleted!"); - if (!bh2jh(bitmap_bh)->b_committed_data) -@@ -365,9 +365,8 @@ do_more: - #endif - BUFFER_TRACE(bitmap_bh, "clear bit"); - if (!ext3_clear_bit (bit + i, bitmap_bh->b_data)) { -- ext3_error (sb, __FUNCTION__, -- "bit already cleared for block %lu", -- block + i); -+ ext3_error(sb, __FUNCTION__, -+ "bit already cleared for block %lu", block); - BUFFER_TRACE(bitmap_bh, "bit already cleared"); - } else { - dquot_freed_blocks++; -@@ -415,7 +414,6 @@ do_more: - if (!err) err = ret; - - if (overflow && !err) { -- block += count; - count = overflow; - goto do_more; - } -@@ -576,6 +574,7 @@ int ext3_new_block (handle_t *handle, st - - ext3_debug ("goal=%lu.\n", goal); - -+repeat: - /* - * First, test whether the goal block is free. - */ -@@ -684,10 +683,20 @@ got_block: - if (tmp == le32_to_cpu(gdp->bg_block_bitmap) || - tmp == le32_to_cpu(gdp->bg_inode_bitmap) || - in_range (tmp, le32_to_cpu(gdp->bg_inode_table), -- sb->u.ext3_sb.s_itb_per_group)) -- ext3_error (sb, "ext3_new_block", -- "Allocating block in system zone - " -- "block = %u", tmp); -+ EXT3_SB(sb)->s_itb_per_group)) { -+ ext3_error(sb, __FUNCTION__, -+ "Allocating block in system zone - block = %u", tmp); -+ -+ /* Note: This will potentially use up one of the handle's -+ * buffer credits. Normally we have way too many credits, -+ * so that is OK. In _very_ rare cases it might not be OK. -+ * We will trigger an assertion if we run out of credits, -+ * and we will have to do a full fsck of the filesystem - -+ * better than randomly corrupting filesystem metadata. -+ */ -+ ext3_set_bit(j, bh->b_data); -+ goto repeat; -+ } - - /* The superblock lock should guard against anybody else beating - * us to this point! */ - -_ diff --git a/lustre/kernel_patches/patches/ext3-2.5-noread.patch b/lustre/kernel_patches/patches/ext3-2.5-noread.patch deleted file mode 100644 index 1aa2578..0000000 --- a/lustre/kernel_patches/patches/ext3-2.5-noread.patch +++ /dev/null @@ -1,266 +0,0 @@ -===== fs/ext3/ialloc.c 1.26 vs edited ===== ---- 1.26/fs/ext3/ialloc.c Fri Feb 14 19:24:09 2003 -+++ edited/fs/ext3/ialloc.c Sat Mar 8 01:20:55 2003 -@@ -195,6 +195,36 @@ - } - - /* -+ * @block_group: block group of inode -+ * @offset: relative offset of inode within @block_group -+ * -+ * Check whether any of the inodes in this disk block are in use. -+ * -+ * Caller must be holding superblock lock (group/bitmap read lock in -+ * future). -+ */ -+int ext3_itable_block_used(struct super_block *sb, unsigned int block_group, -+ int offset) -+{ -+ struct buffer_head *ibitmap = read_inode_bitmap(sb, block_group); -+ int inodes_per_block; -+ unsigned long inum, iend; -+ -+ if (!ibitmap) -+ return 1; -+ -+ inodes_per_block = sb->s_blocksize / EXT3_SB(sb)->s_inode_size; -+ inum = offset & ~(inodes_per_block - 1); -+ iend = inum + inodes_per_block; -+ for (; inum < iend; inum++) { -+ if (inum != offset && ext3_test_bit(inum, ibitmap->b_data)) -+ return 1; -+ } -+ -+ return 0; -+} -+ -+/* - * There are two policies for allocating an inode. If the new inode is - * a directory, then a forward search is made for a block group with both - * free space and a low directory-to-inode ratio; if that fails, then of -@@ -422,8 +452,9 @@ - struct ext3_group_desc * gdp; - struct ext3_super_block * es; - struct ext3_inode_info *ei; -- int err = 0; -+ struct ext3_iloc iloc; - struct inode *ret; -+ int err = 0; - - /* Cannot create files in a deleted directory */ - if (!dir || !dir->i_nlink) -@@ -587,16 +618,23 @@ - goto fail2; - } - err = ext3_init_acl(handle, inode, dir); -+ if (err) -+ goto fail3; -+ -+ err = ext3_get_inode_loc_new(inode, &iloc, 1); -+ if (err) -+ goto fail3; -+ -+ BUFFER_TRACE(iloc->bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, iloc.bh); - if (err) { -- DQUOT_FREE_INODE(inode); -- goto fail2; -- } -- err = ext3_mark_inode_dirty(handle, inode); -- if (err) { -- ext3_std_error(sb, err); -- DQUOT_FREE_INODE(inode); -- goto fail2; -- } -+ brelse(iloc.bh); -+ iloc.bh = NULL; -+ goto fail3; -+ } -+ err = ext3_mark_iloc_dirty(handle, inode, &iloc); -+ if (err) -+ goto fail3; - - ext3_debug("allocating inode %lu\n", inode->i_ino); - goto really_out; -@@ -610,6 +648,9 @@ - brelse(bitmap_bh); - return ret; - -+fail3: -+ ext3_std_error(sb, err); -+ DQUOT_FREE_INODE(inode); - fail2: - inode->i_flags |= S_NOQUOTA; - inode->i_nlink = 0; -===== fs/ext3/inode.c 1.62 vs edited ===== ---- 1.62/fs/ext3/inode.c Fri Feb 14 19:24:09 2003 -+++ edited/fs/ext3/inode.c Sat Mar 8 02:10:39 2003 -@@ -2144,69 +2144,118 @@ - unlock_kernel(); - } - --/* -- * ext3_get_inode_loc returns with an extra refcount against the -- * inode's underlying buffer_head on success. -- */ -+#define NUM_INODE_PREREAD 16 - --int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc) -+/* -+ * ext3_get_inode_loc returns with an extra refcount against the inode's -+ * underlying buffer_head on success. If this is for a new inode allocation -+ * (new is non-zero) then we may be able to optimize away the read if there -+ * are no other in-use inodes in this inode table block. If we need to do -+ * a read, then read in a whole chunk of blocks to avoid blocking again soon -+ * if we are doing lots of creates/updates. -+ */ -+int ext3_get_inode_loc_new(struct inode *inode, struct ext3_iloc *iloc, int new) - { -- struct buffer_head *bh = 0; -+ struct buffer_head *bh[NUM_INODE_PREREAD]; -+ struct super_block *sb = inode->i_sb; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ unsigned long ino = inode->i_ino; - unsigned long block; - unsigned long block_group; - unsigned long group_desc; - unsigned long desc; - unsigned long offset; - struct ext3_group_desc * gdp; -- -- if ((inode->i_ino != EXT3_ROOT_INO && -- inode->i_ino != EXT3_JOURNAL_INO && -- inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || -- inode->i_ino > le32_to_cpu( -- EXT3_SB(inode->i_sb)->s_es->s_inodes_count)) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -- "bad inode number: %lu", inode->i_ino); -+ -+ if ((ino != EXT3_ROOT_INO && ino != EXT3_JOURNAL_INO && -+ ino < EXT3_FIRST_INO(sb)) || -+ ino > le32_to_cpu(sbi->s_es->s_inodes_count)) { -+ ext3_error(sb, "ext3_get_inode_loc", "bad inode number: %lu", -+ ino); - goto bad_inode; - } -- block_group = (inode->i_ino - 1) / EXT3_INODES_PER_GROUP(inode->i_sb); -- if (block_group >= EXT3_SB(inode->i_sb)->s_groups_count) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -- "group >= groups count"); -+ block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb); -+ if (block_group >= EXT3_SB(sb)->s_groups_count) { -+ ext3_error(sb, "ext3_get_inode_loc", "group >= groups count"); - goto bad_inode; - } -- group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(inode->i_sb); -- desc = block_group & (EXT3_DESC_PER_BLOCK(inode->i_sb) - 1); -- bh = EXT3_SB(inode->i_sb)->s_group_desc[group_desc]; -- if (!bh) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -- "Descriptor not loaded"); -+ group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb); -+ desc = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1); -+ if (!sbi->s_group_desc[group_desc]) { -+ ext3_error(sb, "ext3_get_inode_loc", "Descriptor not loaded"); - goto bad_inode; - } - -- gdp = (struct ext3_group_desc *) bh->b_data; -+ gdp = (struct ext3_group_desc *)(sbi->s_group_desc[group_desc]->b_data); - /* - * Figure out the offset within the block group inode table - */ -- offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb)) * -- EXT3_INODE_SIZE(inode->i_sb); -+ offset = ((ino - 1) % EXT3_INODES_PER_GROUP(sb)); - block = le32_to_cpu(gdp[desc].bg_inode_table) + -- (offset >> EXT3_BLOCK_SIZE_BITS(inode->i_sb)); -- if (!(bh = sb_bread(inode->i_sb, block))) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -- "unable to read inode block - " -- "inode=%lu, block=%lu", inode->i_ino, block); -- goto bad_inode; -+ (offset * sbi->s_inode_size >> EXT3_BLOCK_SIZE_BITS(sb)); -+ bh[0] = sb_getblk(sb, block); -+ if (buffer_uptodate(bh[0])) -+ goto done; -+ -+ /* If we don't really need to read this block, and it isn't already -+ * in memory, then we just zero it out. Otherwise, we keep the -+ * current block contents (deleted inode data) for posterity. -+ */ -+ if (new && !ext3_itable_block_used(sb, block_group, offset)) { -+ lock_buffer(bh[0]); -+ memset(bh[0]->b_data, 0, bh[0]->b_size); -+ set_buffer_uptodate(bh[0]); -+ unlock_buffer(bh[0]); -+ } else { -+ unsigned long block_end, itable_end; -+ int count = 1; -+ -+ itable_end = le32_to_cpu(gdp[desc].bg_inode_table) + -+ sbi->s_itb_per_group; -+ block_end = block + NUM_INODE_PREREAD; -+ if (block_end > itable_end) -+ block_end = itable_end; -+ -+ for (++block; block < block_end; block++) { -+ bh[count] = sb_getblk(sb, block); -+ if (count && (buffer_uptodate(bh[count]) || -+ buffer_locked(bh[count]))) { -+ __brelse(bh[count]); -+ } else -+ count++; -+ } -+ -+ ll_rw_block(READ, count, bh); -+ -+ /* Release all but the block we actually need (bh[0]) */ -+ while (--count > 0) -+ __brelse(bh[count]); -+ -+ wait_on_buffer(bh[0]); -+ if (!buffer_uptodate(bh[0])) { -+ ext3_error(sb, __FUNCTION__, -+ "unable to read inode block - " -+ "inode=%lu, block=%llu", ino, -+ (unsigned long long)bh[0]->b_blocknr); -+ goto bad_inode; -+ } - } -- offset &= (EXT3_BLOCK_SIZE(inode->i_sb) - 1); -+done: -+ offset = (offset * sbi->s_inode_size) & (EXT3_BLOCK_SIZE(sb) - 1); - -- iloc->bh = bh; -- iloc->raw_inode = (struct ext3_inode *) (bh->b_data + offset); -+ iloc->bh = bh[0]; -+ iloc->raw_inode = (struct ext3_inode *)(bh[0]->b_data + offset); - iloc->block_group = block_group; -- -+ - return 0; -- -+ - bad_inode: - return -EIO; -+} -+ -+int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc) -+{ -+ return ext3_get_inode_loc_new(inode, iloc, 0); - } - - void ext3_read_inode(struct inode * inode) -===== include/linux/ext3_fs.h 1.22 vs edited ===== ---- 1.22/include/linux/ext3_fs.h Tue Jan 14 00:56:29 2003 -+++ edited/include/linux/ext3_fs.h Sat Mar 8 01:56:28 2003 -@@ -719,6 +719,8 @@ - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); - -+extern int ext3_itable_block_used(struct super_block *, unsigned int, int); -+extern int ext3_get_inode_loc_new(struct inode *, struct ext3_iloc *, int); - extern int ext3_get_inode_loc (struct inode *, struct ext3_iloc *); - extern void ext3_read_inode (struct inode *); - extern void ext3_write_inode (struct inode *, int); diff --git a/lustre/kernel_patches/patches/ext3-2.5.63.patch b/lustre/kernel_patches/patches/ext3-2.5.63.patch deleted file mode 100644 index fd28cd8..0000000 --- a/lustre/kernel_patches/patches/ext3-2.5.63.patch +++ /dev/null @@ -1,150 +0,0 @@ - fs/ext3/inode.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++++- - fs/ext3/super.c | 5 ++- - fs/ext3/xattr.c | 5 +++ - fs/ext3/xattr.h | 2 - - 4 files changed, 92 insertions(+), 4 deletions(-) - ---- linux-2.5.63-nointent/fs/ext3/xattr.c~ext3-2.5.63 Fri Mar 21 18:47:19 2003 -+++ linux-2.5.63-nointent-root/fs/ext3/xattr.c Fri Mar 21 18:47:19 2003 -@@ -1181,3 +1181,8 @@ exit_ext3_xattr(void) - ext3_xattr_unregister(EXT3_XATTR_INDEX_USER, - &ext3_xattr_user_handler); - } -+ -+EXPORT_SYMBOL(ext3_xattr_get); -+EXPORT_SYMBOL(ext3_xattr_set); -+EXPORT_SYMBOL(ext3_xattr_set_handle); -+ ---- linux-2.5.63-nointent/fs/ext3/inode.c~ext3-2.5.63 Fri Mar 21 18:47:19 2003 -+++ linux-2.5.63-nointent-root/fs/ext3/inode.c Fri Mar 21 18:47:19 2003 -@@ -1019,7 +1019,7 @@ struct buffer_head *ext3_bread(handle_t - *err = -EIO; - return NULL; - } -- -+EXPORT_SYMBOL(ext3_bread); - static int walk_page_buffers( handle_t *handle, - struct buffer_head *head, - unsigned from, -@@ -2870,3 +2870,85 @@ int ext3_change_inode_journal_flag(struc - * here, in ext3_aops_journal_start() to ensure that the forthcoming "see if we - * need to extend" test in ext3_prepare_write() succeeds. - */ -+ -+/* for each block: 1 ind + 1 dind + 1 tind -+ * for each block: 3 bitmap blocks -+ * for each block: 3 group descriptor blocks -+ * i inode block -+ * 1 superblock -+ * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quote files -+ * ((1+1+1) * 3 * nblocks) + 1 + 1 + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS -+ * -+ * XXX assuming: -+ * (1) fs logic block size == page size -+ * (2) ext3 in writeback mode -+ */ -+static inline int ext3_san_write_trans_blocks(int nblocks) -+{ -+ int ret; -+ -+ ret = (1 + 1 + 1) * 3 * nblocks + 1 + 1; -+ -+#ifdef CONFIG_QUOTA -+ ret += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ -+ return ret; -+} -+ -+/* Alloc blocks for an inode, while don't create any buffer/page -+ * for data I/O; set the inode size if file is extended. -+ * -+ * @inode: target inode -+ * @blocks: array of logic block number -+ * @nblocks: how many blocks need be alloced -+ * @newsize: new filesize we should set -+ * -+ * return: 0 success, otherwise failed -+ * (*blocks) contains physical block number alloced -+ * -+ * XXX this assume the fs block size == page size -+ */ -+int ext3_prep_san_write(struct inode *inode, long *blocks, -+ int nblocks, loff_t newsize) -+{ -+ handle_t *handle; -+ struct buffer_head bh_tmp; -+ int needed_blocks; -+ int i, ret = 0, ret2; -+ -+ needed_blocks = ext3_san_write_trans_blocks(nblocks); -+ -+ lock_kernel(); -+ handle = ext3_journal_start(inode, needed_blocks); -+ if (IS_ERR(handle)) { -+ unlock_kernel(); -+ return PTR_ERR(handle); -+ } -+ unlock_kernel(); -+ -+ /* alloc blocks one by one */ -+ for (i = 0; i < nblocks; i++) { -+ ret = ext3_get_block_handle(handle, inode, blocks[i], -+ &bh_tmp, 1, 1); -+ if (ret) -+ break; -+ -+ blocks[i] = bh_tmp.b_blocknr; -+ } -+ -+ /* set inode size if needed */ -+ if (!ret && (newsize > inode->i_size)) { -+ inode->i_size = newsize; -+ ext3_mark_inode_dirty(handle, inode); -+ } -+ -+ lock_kernel(); -+ ret2 = ext3_journal_stop(handle, inode); -+ unlock_kernel(); -+ -+ if (!ret) -+ ret = ret2; -+ return ret; -+} -+EXPORT_SYMBOL(ext3_prep_san_write); ---- linux-2.5.63-nointent/fs/ext3/super.c~ext3-2.5.63 Fri Mar 21 18:47:19 2003 -+++ linux-2.5.63-nointent-root/fs/ext3/super.c Fri Mar 21 18:47:19 2003 -@@ -1492,10 +1492,10 @@ static journal_t *ext3_get_dev_journal(s - printk(KERN_ERR "EXT3-fs: I/O error on journal device\n"); - goto out_journal; - } -- if (ntohl(journal->j_superblock->s_nr_users) != 1) { -+ if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { - printk(KERN_ERR "EXT3-fs: External journal has more than one " - "user (unsupported) - %d\n", -- ntohl(journal->j_superblock->s_nr_users)); -+ be32_to_cpu(journal->j_superblock->s_nr_users)); - goto out_journal; - } - EXT3_SB(sb)->journal_bdev = bdev; -@@ -1703,6 +1703,7 @@ int ext3_force_commit(struct super_block - unlock_kernel(); - return ret; - } -+EXPORT_SYMBOL(ext3_force_commit); - - /* - * Ext3 always journals updates to the superblock itself, so we don't ---- linux-2.5.63-nointent/fs/ext3/xattr.h~ext3-2.5.63 Fri Mar 21 18:47:19 2003 -+++ linux-2.5.63-nointent-root/fs/ext3/xattr.h Fri Mar 21 18:47:19 2003 -@@ -5,7 +5,7 @@ - - (C) 2001 Andreas Gruenbacher, - */ -- -+#include - #include - #include - - -_ diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch deleted file mode 100644 index e01feca..0000000 --- a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18.patch +++ /dev/null @@ -1,326 +0,0 @@ - fs/ext3/super.c | 229 +++++++++++++++++++++++++++++++++++++++++++++ - include/linux/ext3_fs.h | 2 - include/linux/ext3_fs_sb.h | 10 + - 3 files changed, 241 insertions(+) - ---- linux-2.4.18-18.8.0-l15/fs/ext3/super.c~ext3-delete_thread-2.4.18 Tue Jun 3 17:26:21 2003 -+++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/super.c Wed Jun 18 11:59:14 2003 -@@ -396,6 +396,219 @@ static void dump_orphan_list(struct supe - } - } - -+#ifdef EXT3_DELETE_THREAD -+/* -+ * Delete inodes in a loop until there are no more to be deleted. -+ * Normally, we run in the background doing the deletes and sleeping again, -+ * and clients just add new inodes to be deleted onto the end of the list. -+ * If someone is concerned about free space (e.g. block allocation or similar) -+ * then they can sleep on s_delete_waiter_queue and be woken up when space -+ * has been freed. -+ */ -+int ext3_delete_thread(void *data) -+{ -+ struct super_block *sb = data; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct task_struct *tsk = current; -+ -+ /* Almost like daemonize, but not quite */ -+ exit_mm(current); -+ tsk->session = 1; -+ tsk->pgrp = 1; -+ tsk->tty = NULL; -+ exit_files(current); -+ reparent_to_init(); -+ -+ sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev)); -+ sigfillset(&tsk->blocked); -+ -+ /*tsk->flags |= PF_KERNTHREAD;*/ -+ -+ INIT_LIST_HEAD(&sbi->s_delete_list); -+ wake_up(&sbi->s_delete_waiter_queue); -+ ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev)); -+ -+ /* main loop */ -+ for (;;) { -+ wait_event_interruptible(sbi->s_delete_thread_queue, -+ !list_empty(&sbi->s_delete_list) || -+ !test_opt(sb, ASYNCDEL)); -+ ext3_debug("%s woken up: %lu inodes, %lu blocks\n", -+ tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks); -+ -+ spin_lock(&sbi->s_delete_lock); -+ if (list_empty(&sbi->s_delete_list)) { -+ clear_opt(sbi->s_mount_opt, ASYNCDEL); -+ memset(&sbi->s_delete_list, 0, -+ sizeof(sbi->s_delete_list)); -+ spin_unlock(&sbi->s_delete_lock); -+ ext3_debug("delete thread on %s exiting\n", -+ kdevname(sb->s_dev)); -+ wake_up(&sbi->s_delete_waiter_queue); -+ break; -+ } -+ -+ while (!list_empty(&sbi->s_delete_list)) { -+ struct inode *inode=list_entry(sbi->s_delete_list.next, -+ struct inode, i_dentry); -+ unsigned long blocks = inode->i_blocks >> -+ (inode->i_blkbits - 9); -+ -+ list_del_init(&inode->i_dentry); -+ spin_unlock(&sbi->s_delete_lock); -+ ext3_debug("%s delete ino %lu blk %lu\n", -+ tsk->comm, inode->i_ino, blocks); -+ -+ iput(inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ sbi->s_delete_blocks -= blocks; -+ sbi->s_delete_inodes--; -+ } -+ if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) { -+ ext3_warning(sb, __FUNCTION__, -+ "%lu blocks, %lu inodes on list?\n", -+ sbi->s_delete_blocks,sbi->s_delete_inodes); -+ sbi->s_delete_blocks = 0; -+ sbi->s_delete_inodes = 0; -+ } -+ spin_unlock(&sbi->s_delete_lock); -+ wake_up(&sbi->s_delete_waiter_queue); -+ } -+ -+ return 0; -+} -+ -+static void ext3_start_delete_thread(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int rc; -+ -+ spin_lock_init(&sbi->s_delete_lock); -+ init_waitqueue_head(&sbi->s_delete_thread_queue); -+ init_waitqueue_head(&sbi->s_delete_waiter_queue); -+ -+ if (!test_opt(sb, ASYNCDEL)) -+ return; -+ -+ rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES); -+ if (rc < 0) -+ printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n", -+ rc); -+ else -+ wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next); -+} -+ -+static void ext3_stop_delete_thread(struct ext3_sb_info *sbi) -+{ -+ if (sbi->s_delete_list.next == 0) /* thread never started */ -+ return; -+ -+ clear_opt(sbi->s_mount_opt, ASYNCDEL); -+ wake_up(&sbi->s_delete_thread_queue); -+ wait_event(sbi->s_delete_waiter_queue, list_empty(&sbi->s_delete_list)); -+} -+ -+/* Instead of playing games with the inode flags, destruction, etc we just -+ * create a new inode locally and put it on a list for the truncate thread. -+ * We need large parts of the inode struct in order to complete the -+ * truncate and unlink, so we may as well just have a real inode to do it. -+ * -+ * If we have any problem deferring the delete, just delete it right away. -+ * If we defer it, we also mark how many blocks it would free, so that we -+ * can keep the statfs data correct, and we know if we should sleep on the -+ * truncate thread when we run out of space. -+ * -+ * In 2.5 this can be done much more cleanly by just registering a "drop" -+ * method in the super_operations struct. -+ */ -+static void ext3_delete_inode_thread(struct inode *old_inode) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb); -+ struct inode *new_inode; -+ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9); -+ -+ if (is_bad_inode(old_inode)) { -+ clear_inode(old_inode); -+ return; -+ } -+ -+ if (!test_opt(old_inode->i_sb, ASYNCDEL)) { -+ ext3_delete_inode(old_inode); -+ return; -+ } -+ -+ /* We may want to delete the inode immediately and not defer it */ -+ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS || -+ !sbi->s_delete_list.next) { -+ ext3_delete_inode(old_inode); -+ return; -+ } -+ -+ if ((EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) || -+ (EXT3_SB(old_inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) { -+ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", -+ old_inode->i_ino, blocks); -+ ext3_delete_inode(old_inode); -+ return; -+ } -+ -+ /* We can iget this inode again here, because our caller has unhashed -+ * old_inode, so new_inode will be in a different inode struct. -+ * -+ * We need to ensure that the i_orphan pointers in the other inodes -+ * point at the new inode copy instead of the old one so the orphan -+ * list doesn't get corrupted when the old orphan inode is freed. -+ */ -+ down(&sbi->s_orphan_lock); -+ -+ EXT3_SB(old_inode->i_sb)->s_mount_state |= EXT3_ORPHAN_FS; -+ new_inode = iget(old_inode->i_sb, old_inode->i_ino); -+ EXT3_SB(old_inode->i_sb)->s_mount_state &= ~EXT3_ORPHAN_FS; -+ if (is_bad_inode(new_inode)) { -+ printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino); -+ iput(new_inode); -+ new_inode = NULL; -+ } -+ if (!new_inode) { -+ up(&sbi->s_orphan_lock); -+ ext3_debug("delete inode %lu directly (bad read)\n", -+ old_inode->i_ino); -+ ext3_delete_inode(old_inode); -+ return; -+ } -+ J_ASSERT(new_inode != old_inode); -+ -+ J_ASSERT(!list_empty(&EXT3_I(old_inode)->i_orphan)); -+ /* Ugh. We need to insert new_inode into the same spot on the list -+ * as old_inode was, to ensure the in-memory orphan list is still -+ * in the same order as the on-disk orphan list (badness otherwise). -+ */ -+ EXT3_I(new_inode)->i_orphan = EXT3_I(old_inode)->i_orphan; -+ EXT3_I(new_inode)->i_orphan.next->prev = &EXT3_I(new_inode)->i_orphan; -+ EXT3_I(new_inode)->i_orphan.prev->next = &EXT3_I(new_inode)->i_orphan; -+ EXT3_I(new_inode)->i_state |= EXT3_STATE_DELETE; -+ up(&sbi->s_orphan_lock); -+ -+ clear_inode(old_inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ J_ASSERT(list_empty(&new_inode->i_dentry)); -+ list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list); -+ sbi->s_delete_blocks += blocks; -+ sbi->s_delete_inodes++; -+ spin_unlock(&sbi->s_delete_lock); -+ -+ ext3_debug("delete inode %lu (%lu blocks) by thread\n", -+ new_inode->i_ino, blocks); -+ -+ wake_up(&sbi->s_delete_thread_queue); -+} -+#else -+#define ext3_start_delete_thread(sbi) do {} while(0) -+#define ext3_stop_delete_thread(sbi) do {} while(0) -+#endif /* EXT3_DELETE_THREAD */ -+ - void ext3_put_super (struct super_block * sb) - { - struct ext3_sb_info *sbi = EXT3_SB(sb); -@@ -403,6 +615,7 @@ void ext3_put_super (struct super_block - kdev_t j_dev = sbi->s_journal->j_dev; - int i; - -+ ext3_stop_delete_thread(sbi); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -451,7 +664,11 @@ static struct super_operations ext3_sops - write_inode: ext3_write_inode, /* BKL not held. Don't need */ - dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */ - put_inode: ext3_put_inode, /* BKL not held. Don't need */ -+#ifdef EXT3_DELETE_THREAD -+ delete_inode: ext3_delete_inode_thread,/* BKL not held. We take it */ -+#else - delete_inode: ext3_delete_inode, /* BKL not held. We take it */ -+#endif - put_super: ext3_put_super, /* BKL held */ - write_super: ext3_write_super, /* BKL held */ - write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */ -@@ -511,6 +728,14 @@ static int parse_options (char * options - this_char = strtok (NULL, ",")) { - if ((value = strchr (this_char, '=')) != NULL) - *value++ = 0; -+#ifdef EXT3_DELETE_THREAD -+ if (!strcmp(this_char, "asyncdel")) -+ set_opt(*mount_options, ASYNCDEL); -+ else if (!strcmp(this_char, "noasyncdel")) -+ clear_opt(*mount_options, ASYNCDEL); -+ else -+#endif -+ - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -1206,6 +1431,7 @@ struct super_block * ext3_read_super (st - } - - ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); -+ ext3_start_delete_thread(sb); - /* - * akpm: core read_super() calls in here with the superblock locked. - * That deadlocks, because orphan cleanup needs to lock the superblock -@@ -1648,6 +1874,9 @@ int ext3_remount (struct super_block * s - if (!parse_options(data, &tmp, sbi, &tmp, 1)) - return -EINVAL; - -+ if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY)) -+ ext3_stop_delete_thread(sbi); -+ - if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) - ext3_abort(sb, __FUNCTION__, "Abort forced by user"); - ---- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs.h~ext3-delete_thread-2.4.18 Tue Jun 3 17:26:20 2003 -+++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs.h Tue Jun 17 12:36:56 2003 -@@ -190,6 +190,7 @@ struct ext3_group_desc - */ - #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ - #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */ -+#define EXT3_STATE_DELETE 0x00000010 /* deferred delete inode */ - - /* - * ioctl commands -@@ -317,6 +318,7 @@ struct ext3_inode { - #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ - #define EXT3_MOUNT_INDEX 0x4000 /* Enable directory index */ -+#define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H ---- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.18 Tue Jun 3 17:26:21 2003 -+++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs_sb.h Tue Jun 17 12:36:56 2003 -@@ -29,6 +29,8 @@ - - #define EXT3_MAX_GROUP_LOADED 32 - -+#define EXT3_DELETE_THREAD -+ - /* - * third extended-fs super-block data in memory - */ -@@ -74,6 +76,14 @@ struct ext3_sb_info { - struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ - wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ - #endif -+#ifdef EXT3_DELETE_THREAD -+ spinlock_t s_delete_lock; -+ struct list_head s_delete_list; -+ unsigned long s_delete_blocks; -+ unsigned long s_delete_inodes; -+ wait_queue_head_t s_delete_thread_queue; -+ wait_queue_head_t s_delete_waiter_queue; -+#endif - }; - - #endif /* _LINUX_EXT3_FS_SB */ - -_ diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch deleted file mode 100644 index 34c5158..0000000 --- a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch +++ /dev/null @@ -1,321 +0,0 @@ -diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c ---- origin/fs/ext3/super.c 2003-05-04 17:23:52.000000000 +0400 -+++ linux/fs/ext3/super.c 2003-05-04 17:09:20.000000000 +0400 -@@ -398,6 +398,219 @@ static void dump_orphan_list(struct supe - } - } - -+#ifdef EXT3_DELETE_THREAD -+/* -+ * Delete inodes in a loop until there are no more to be deleted. -+ * Normally, we run in the background doing the deletes and sleeping again, -+ * and clients just add new inodes to be deleted onto the end of the list. -+ * If someone is concerned about free space (e.g. block allocation or similar) -+ * then they can sleep on s_delete_waiter_queue and be woken up when space -+ * has been freed. -+ */ -+int ext3_delete_thread(void *data) -+{ -+ struct super_block *sb = data; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct task_struct *tsk = current; -+ -+ /* Almost like daemonize, but not quite */ -+ exit_mm(current); -+ tsk->session = 1; -+ tsk->pgrp = 1; -+ tsk->tty = NULL; -+ exit_files(current); -+ reparent_to_init(); -+ -+ sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev)); -+ sigfillset(&tsk->blocked); -+ -+ /*tsk->flags |= PF_KERNTHREAD;*/ -+ -+ INIT_LIST_HEAD(&sbi->s_delete_list); -+ wake_up(&sbi->s_delete_waiter_queue); -+ ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev)); -+ -+ /* main loop */ -+ for (;;) { -+ wait_event_interruptible(sbi->s_delete_thread_queue, -+ !list_empty(&sbi->s_delete_list) || -+ !test_opt(sb, ASYNCDEL)); -+ ext3_debug("%s woken up: %lu inodes, %lu blocks\n", -+ tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks); -+ -+ spin_lock(&sbi->s_delete_lock); -+ if (list_empty(&sbi->s_delete_list)) { -+ clear_opt(sbi->s_mount_opt, ASYNCDEL); -+ memset(&sbi->s_delete_list, 0, -+ sizeof(sbi->s_delete_list)); -+ spin_unlock(&sbi->s_delete_lock); -+ ext3_debug("delete thread on %s exiting\n", -+ kdevname(sb->s_dev)); -+ wake_up(&sbi->s_delete_waiter_queue); -+ break; -+ } -+ -+ while (!list_empty(&sbi->s_delete_list)) { -+ struct inode *inode=list_entry(sbi->s_delete_list.next, -+ struct inode, i_dentry); -+ unsigned long blocks = inode->i_blocks >> -+ (inode->i_blkbits - 9); -+ -+ list_del_init(&inode->i_dentry); -+ spin_unlock(&sbi->s_delete_lock); -+ ext3_debug("%s delete ino %lu blk %lu\n", -+ tsk->comm, inode->i_ino, blocks); -+ -+ iput(inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ sbi->s_delete_blocks -= blocks; -+ sbi->s_delete_inodes--; -+ } -+ if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) { -+ ext3_warning(sb, __FUNCTION__, -+ "%lu blocks, %lu inodes on list?\n", -+ sbi->s_delete_blocks,sbi->s_delete_inodes); -+ sbi->s_delete_blocks = 0; -+ sbi->s_delete_inodes = 0; -+ } -+ spin_unlock(&sbi->s_delete_lock); -+ wake_up(&sbi->s_delete_waiter_queue); -+ } -+ -+ return 0; -+} -+ -+static void ext3_start_delete_thread(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int rc; -+ -+ spin_lock_init(&sbi->s_delete_lock); -+ init_waitqueue_head(&sbi->s_delete_thread_queue); -+ init_waitqueue_head(&sbi->s_delete_waiter_queue); -+ -+ if (!test_opt(sb, ASYNCDEL)) -+ return; -+ -+ rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES); -+ if (rc < 0) -+ printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n", -+ rc); -+ else -+ wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next); -+} -+ -+static void ext3_stop_delete_thread(struct ext3_sb_info *sbi) -+{ -+ if (sbi->s_delete_list.next == 0) /* thread never started */ -+ return; -+ -+ clear_opt(sbi->s_mount_opt, ASYNCDEL); -+ wake_up(&sbi->s_delete_thread_queue); -+ wait_event(sbi->s_delete_waiter_queue, list_empty(&sbi->s_delete_list)); -+} -+ -+/* Instead of playing games with the inode flags, destruction, etc we just -+ * create a new inode locally and put it on a list for the truncate thread. -+ * We need large parts of the inode struct in order to complete the -+ * truncate and unlink, so we may as well just have a real inode to do it. -+ * -+ * If we have any problem deferring the delete, just delete it right away. -+ * If we defer it, we also mark how many blocks it would free, so that we -+ * can keep the statfs data correct, and we know if we should sleep on the -+ * truncate thread when we run out of space. -+ * -+ * In 2.5 this can be done much more cleanly by just registering a "drop" -+ * method in the super_operations struct. -+ */ -+static void ext3_delete_inode_thread(struct inode *old_inode) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb); -+ struct inode *new_inode; -+ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9); -+ -+ if (is_bad_inode(old_inode)) { -+ clear_inode(old_inode); -+ return; -+ } -+ -+ if (!test_opt(old_inode->i_sb, ASYNCDEL)) { -+ ext3_delete_inode(old_inode); -+ return; -+ } -+ -+ /* We may want to delete the inode immediately and not defer it */ -+ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS || -+ !sbi->s_delete_list.next) { -+ ext3_delete_inode(old_inode); -+ return; -+ } -+ -+ if ((EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) || -+ (EXT3_SB(old_inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) { -+ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", -+ old_inode->i_ino, blocks); -+ ext3_delete_inode(old_inode); -+ return; -+ } -+ -+ /* We can iget this inode again here, because our caller has unhashed -+ * old_inode, so new_inode will be in a different inode struct. -+ * -+ * We need to ensure that the i_orphan pointers in the other inodes -+ * point at the new inode copy instead of the old one so the orphan -+ * list doesn't get corrupted when the old orphan inode is freed. -+ */ -+ down(&sbi->s_orphan_lock); -+ -+ EXT3_SB(old_inode->i_sb)->s_mount_state |= EXT3_ORPHAN_FS; -+ new_inode = iget(old_inode->i_sb, old_inode->i_ino); -+ EXT3_SB(old_inode->i_sb)->s_mount_state &= ~EXT3_ORPHAN_FS; -+ if (is_bad_inode(new_inode)) { -+ printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino); -+ iput(new_inode); -+ new_inode = NULL; -+ } -+ if (!new_inode) { -+ up(&sbi->s_orphan_lock); -+ ext3_debug("delete inode %lu directly (bad read)\n", -+ old_inode->i_ino); -+ ext3_delete_inode(old_inode); -+ return; -+ } -+ J_ASSERT(new_inode != old_inode); -+ -+ J_ASSERT(!list_empty(&EXT3_I(old_inode)->i_orphan)); -+ /* Ugh. We need to insert new_inode into the same spot on the list -+ * as old_inode was, to ensure the in-memory orphan list is still -+ * in the same order as the on-disk orphan list (badness otherwise). -+ */ -+ EXT3_I(new_inode)->i_orphan = EXT3_I(old_inode)->i_orphan; -+ EXT3_I(new_inode)->i_orphan.next->prev = &EXT3_I(new_inode)->i_orphan; -+ EXT3_I(new_inode)->i_orphan.prev->next = &EXT3_I(new_inode)->i_orphan; -+ EXT3_I(new_inode)->i_state |= EXT3_STATE_DELETE; -+ up(&sbi->s_orphan_lock); -+ -+ clear_inode(old_inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ J_ASSERT(list_empty(&new_inode->i_dentry)); -+ list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list); -+ sbi->s_delete_blocks += blocks; -+ sbi->s_delete_inodes++; -+ spin_unlock(&sbi->s_delete_lock); -+ -+ ext3_debug("delete inode %lu (%lu blocks) by thread\n", -+ new_inode->i_ino, blocks); -+ -+ wake_up(&sbi->s_delete_thread_queue); -+} -+#else -+#define ext3_start_delete_thread(sbi) do {} while(0) -+#define ext3_stop_delete_thread(sbi) do {} while(0) -+#endif /* EXT3_DELETE_THREAD */ -+ - void ext3_put_super (struct super_block * sb) - { - struct ext3_sb_info *sbi = EXT3_SB(sb); -@@ -405,6 +611,7 @@ void ext3_put_super (struct super_block - kdev_t j_dev = sbi->s_journal->j_dev; - int i; - -+ ext3_stop_delete_thread(sbi); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -453,7 +660,11 @@ static struct super_operations ext3_sops - write_inode: ext3_write_inode, /* BKL not held. Don't need */ - dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */ - put_inode: ext3_put_inode, /* BKL not held. Don't need */ -+#ifdef EXT3_DELETE_THREAD -+ delete_inode: ext3_delete_inode_thread,/* BKL not held. We take it */ -+#else - delete_inode: ext3_delete_inode, /* BKL not held. We take it */ -+#endif - put_super: ext3_put_super, /* BKL held */ - write_super: ext3_write_super, /* BKL held */ - write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */ -@@ -514,6 +725,13 @@ static int parse_options (char * options - this_char = strtok (NULL, ",")) { - if ((value = strchr (this_char, '=')) != NULL) - *value++ = 0; -+#ifdef EXT3_DELETE_THREAD -+ if (!strcmp(this_char, "asyncdel")) -+ set_opt(*mount_options, ASYNCDEL); -+ else if (!strcmp(this_char, "noasyncdel")) -+ clear_opt(*mount_options, ASYNCDEL); -+ else -+#endif - #ifdef CONFIG_EXT3_FS_XATTR_USER - if (!strcmp (this_char, "user_xattr")) - set_opt (*mount_options, XATTR_USER); -@@ -1220,6 +1436,7 @@ struct super_block * ext3_read_super (st - } - - ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); -+ ext3_start_delete_thread(sb); - /* - * akpm: core read_super() calls in here with the superblock locked. - * That deadlocks, because orphan cleanup needs to lock the superblock -@@ -1648,6 +1874,9 @@ int ext3_remount (struct super_block * s - if (!parse_options(data, &tmp, sbi, &tmp, 1)) - return -EINVAL; - -+ if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY)) -+ ext3_stop_delete_thread(sbi); -+ - if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) - ext3_abort(sb, __FUNCTION__, "Abort forced by user"); - -diff -puNr origin/include/linux/ext3_fs.h linux/include/linux/ext3_fs.h ---- origin/include/linux/ext3_fs.h 2003-05-04 17:22:49.000000000 +0400 -+++ linux/include/linux/ext3_fs.h 2003-05-04 15:06:10.000000000 +0400 -@@ -193,6 +193,7 @@ struct ext3_group_desc - */ - #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ - #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */ -+#define EXT3_STATE_DELETE 0x00000010 /* deferred delete inode */ - - /* - * ioctl commands -@@ -321,6 +322,7 @@ struct ext3_inode { - #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ - #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ -+#define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -diff -puNr origin/include/linux/ext3_fs_sb.h linux/include/linux/ext3_fs_sb.h ---- origin/include/linux/ext3_fs_sb.h 2003-05-04 17:23:52.000000000 +0400 -+++ linux/include/linux/ext3_fs_sb.h 2003-05-04 11:37:04.000000000 +0400 -@@ -29,6 +29,8 @@ - - #define EXT3_MAX_GROUP_LOADED 8 - -+#define EXT3_DELETE_THREAD -+ - /* - * third extended-fs super-block data in memory - */ -@@ -76,6 +78,14 @@ struct ext3_sb_info { - struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ - wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ - #endif -+#ifdef EXT3_DELETE_THREAD -+ spinlock_t s_delete_lock; -+ struct list_head s_delete_list; -+ unsigned long s_delete_blocks; -+ unsigned long s_delete_inodes; -+ wait_queue_head_t s_delete_thread_queue; -+ wait_queue_head_t s_delete_waiter_queue; -+#endif - }; - - #endif /* _LINUX_EXT3_FS_SB */ diff --git a/lustre/kernel_patches/patches/ext3-largefile.patch b/lustre/kernel_patches/patches/ext3-largefile.patch deleted file mode 100644 index aa7a2f2..0000000 --- a/lustre/kernel_patches/patches/ext3-largefile.patch +++ /dev/null @@ -1,16 +0,0 @@ - fs/ext3/inode.c | 2 +- - 1 files changed, 1 insertion(+), 1 deletion(-) - ---- linux-2.4.20/fs/ext3/inode.c~ext3-largefile 2003-04-08 23:35:36.000000000 -0600 -+++ linux-2.4.20-braam/fs/ext3/inode.c 2003-04-08 23:35:36.000000000 -0600 -@@ -2562,7 +2562,7 @@ void ext3_dirty_inode(struct inode *inod - handle_t *handle; - - lock_kernel(); -- handle = ext3_journal_start(inode, 1); -+ handle = ext3_journal_start(inode, 2); - if (IS_ERR(handle)) - goto out; - if (current_handle && - -_ diff --git a/lustre/kernel_patches/patches/ext3-noread-2.4.20.patch b/lustre/kernel_patches/patches/ext3-noread-2.4.20.patch deleted file mode 100644 index b14b869..0000000 --- a/lustre/kernel_patches/patches/ext3-noread-2.4.20.patch +++ /dev/null @@ -1,218 +0,0 @@ - fs/ext3/ialloc.c | 47 ++++++++++++++++++++++- - fs/ext3/inode.c | 96 +++++++++++++++++++++++++++++++++++++----------- - include/linux/ext3_fs.h | 2 + - 3 files changed, 121 insertions(+), 24 deletions(-) - ---- linux-2.4.20/fs/ext3/ialloc.c~ext3-noread-2.4.20 2003-05-16 12:21:39.000000000 +0800 -+++ linux-2.4.20-root/fs/ext3/ialloc.c 2003-05-16 12:21:46.000000000 +0800 -@@ -289,6 +289,37 @@ error_return: - } - - /* -+ * @block_group: block group of inode -+ * @offset: relative offset of inode within @block_group -+ * -+ * Check whether any of the inodes in this disk block are in use. -+ * -+ * Caller must be holding superblock lock (group/bitmap read lock in future). -+ */ -+int ext3_itable_block_used(struct super_block *sb, unsigned int block_group, -+ int offset) -+{ -+ int bitmap_nr = load_inode_bitmap(sb, block_group); -+ int inodes_per_block; -+ unsigned long inum, iend; -+ struct buffer_head *ibitmap; -+ -+ if (bitmap_nr < 0) -+ return 1; -+ -+ inodes_per_block = sb->s_blocksize / EXT3_SB(sb)->s_inode_size; -+ inum = offset & ~(inodes_per_block - 1); -+ iend = inum + inodes_per_block; -+ ibitmap = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr]; -+ for (; inum < iend; inum++) { -+ if (inum != offset && ext3_test_bit(inum, ibitmap->b_data)) -+ return 1; -+ } -+ -+ return 0; -+} -+ -+/* - * There are two policies for allocating an inode. If the new inode is - * a directory, then a forward search is made for a block group with both - * free space and a low directory-to-inode ratio; if that fails, then of -@@ -310,6 +341,7 @@ struct inode * ext3_new_inode (handle_t - struct ext3_group_desc * gdp; - struct ext3_group_desc * tmp; - struct ext3_super_block * es; -+ struct ext3_iloc iloc; - int err = 0; - - /* Cannot create files in a deleted directory */ -@@ -510,8 +542,19 @@ repeat: - inode->i_generation = sb->u.ext3_sb.s_next_generation++; - - inode->u.ext3_i.i_state = EXT3_STATE_NEW; -- err = ext3_mark_inode_dirty(handle, inode); -- if (err) goto fail; -+ err = ext3_get_inode_loc_new(inode, &iloc, 1); -+ if (err) goto fail; -+ BUFFER_TRACE(iloc->bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, iloc.bh); -+ if (err) { -+ brelse(iloc.bh); -+ iloc.bh = NULL; -+ goto fail; -+ } -+ err = ext3_mark_iloc_dirty(handle, inode, &iloc); -+ if (err) goto fail; -+ -+ - - unlock_super (sb); - if(DQUOT_ALLOC_INODE(inode)) { ---- linux-2.4.20/fs/ext3/inode.c~ext3-noread-2.4.20 2003-05-16 12:21:41.000000000 +0800 -+++ linux-2.4.20-root/fs/ext3/inode.c 2003-05-16 12:22:15.000000000 +0800 -@@ -2013,14 +2013,19 @@ out_stop: - ext3_journal_stop(handle, inode); - } - --/* -- * ext3_get_inode_loc returns with an extra refcount against the -- * inode's underlying buffer_head on success. -- */ -- --int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc) -+#define NUM_INODE_PREREAD 16 -+ -+/* -+ * ext3_get_inode_loc returns with an extra refcount against the inode's -+ * underlying buffer_head on success. If this is for a new inode allocation -+ * (new is non-zero) then we may be able to optimize away the read if there -+ * are no other in-use inodes in this inode table block. If we need to do -+ * a read, then read in a whole chunk of blocks to avoid blocking again soon -+ * if we are doing lots of creates/updates. -+ */ -+int ext3_get_inode_loc_new(struct inode *inode, struct ext3_iloc *iloc, int new) - { -- struct buffer_head *bh = 0; -+ struct buffer_head *bh[NUM_INODE_PREREAD]; - unsigned long block; - unsigned long block_group; - unsigned long group_desc; -@@ -2045,31 +2050,73 @@ int ext3_get_inode_loc (struct inode *in - } - group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(inode->i_sb); - desc = block_group & (EXT3_DESC_PER_BLOCK(inode->i_sb) - 1); -- bh = inode->i_sb->u.ext3_sb.s_group_desc[group_desc]; -- if (!bh) { -+ if (!(inode->i_sb->u.ext3_sb.s_group_desc[group_desc])) { - ext3_error (inode->i_sb, "ext3_get_inode_loc", - "Descriptor not loaded"); - goto bad_inode; - } - -- gdp = (struct ext3_group_desc *) bh->b_data; -+ gdp = (struct ext3_group_desc *)(inode->i_sb->u.ext3_sb.s_group_desc[group_desc]->b_data); - /* - * Figure out the offset within the block group inode table - */ -- offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb)) * -- EXT3_INODE_SIZE(inode->i_sb); -+ offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb)); -+ - block = le32_to_cpu(gdp[desc].bg_inode_table) + -- (offset >> EXT3_BLOCK_SIZE_BITS(inode->i_sb)); -- if (!(bh = sb_bread(inode->i_sb, block))) { -- ext3_error (inode->i_sb, "ext3_get_inode_loc", -- "unable to read inode block - " -- "inode=%lu, block=%lu", inode->i_ino, block); -- goto bad_inode; -- } -- offset &= (EXT3_BLOCK_SIZE(inode->i_sb) - 1); -+ (offset * EXT3_INODE_SIZE(inode->i_sb) >> EXT3_BLOCK_SIZE_BITS(inode->i_sb)); - -- iloc->bh = bh; -- iloc->raw_inode = (struct ext3_inode *) (bh->b_data + offset); -+ bh[0] = sb_getblk(inode->i_sb, block); -+ if (buffer_uptodate(bh[0])) -+ goto done; -+ -+ /* If we don't really need to read this block, and it isn't already -+ * in memory, then we just zero it out. Otherwise, we keep the -+ * current block contents (deleted inode data) for posterity. -+ */ -+ if (new && !ext3_itable_block_used(inode->i_sb, block_group, offset)) { -+ lock_buffer(bh[0]); -+ memset(bh[0]->b_data, 0, bh[0]->b_size); -+ mark_buffer_uptodate(bh[0], 1); -+ unlock_buffer(bh[0]); -+ } else { -+ unsigned long block_end, itable_end; -+ int count = 1; -+ -+ itable_end = le32_to_cpu(gdp[desc].bg_inode_table) + -+ inode->i_sb->u.ext3_sb.s_itb_per_group; -+ block_end = block + NUM_INODE_PREREAD; -+ if (block_end > itable_end) -+ block_end = itable_end; -+ -+ for (++block; block < block_end; block++) { -+ bh[count] = sb_getblk(inode->i_sb, block); -+ if (count && (buffer_uptodate(bh[count]) || -+ buffer_locked(bh[count]))) { -+ __brelse(bh[count]); -+ } else -+ count++; -+ } -+ -+ ll_rw_block(READ, count, bh); -+ -+ /* Release all but the block we actually need (bh[0]) */ -+ while (--count > 0) -+ __brelse(bh[count]); -+ -+ wait_on_buffer(bh[0]); -+ if (!buffer_uptodate(bh[0])) { -+ ext3_error(inode->i_sb, __FUNCTION__, -+ "unable to read inode block - " -+ "inode=%lu, block=%lu", inode->i_ino, -+ bh[0]->b_blocknr); -+ goto bad_inode; -+ } -+ } -+ done: -+ offset = (offset * EXT3_INODE_SIZE(inode->i_sb)) & (EXT3_BLOCK_SIZE(inode->i_sb) - 1); -+ -+ iloc->bh = bh[0]; -+ iloc->raw_inode = (struct ext3_inode *)(bh[0]->b_data + offset); - iloc->block_group = block_group; - - return 0; -@@ -2078,6 +2125,11 @@ int ext3_get_inode_loc (struct inode *in - return -EIO; - } - -+int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc) -+{ -+ return ext3_get_inode_loc_new(inode, iloc, 0); -+} -+ - void ext3_read_inode(struct inode * inode) - { - struct ext3_iloc iloc; ---- linux-2.4.20/include/linux/ext3_fs.h~ext3-noread-2.4.20 2003-05-16 12:21:39.000000000 +0800 -+++ linux-2.4.20-root/include/linux/ext3_fs.h 2003-05-16 12:21:46.000000000 +0800 -@@ -683,6 +683,8 @@ extern int ext3_forget(handle_t *, int, - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); - -+extern int ext3_itable_block_used(struct super_block *sb, unsigned int, int); -+extern int ext3_get_inode_loc_new(struct inode *, struct ext3_iloc *, int); - extern int ext3_get_inode_loc (struct inode *, struct ext3_iloc *); - extern void ext3_read_inode (struct inode *); - extern void ext3_write_inode (struct inode *, int); - -_ diff --git a/lustre/kernel_patches/patches/ext3-orphan_lock.patch b/lustre/kernel_patches/patches/ext3-orphan_lock.patch deleted file mode 100644 index d1e5c8d..0000000 --- a/lustre/kernel_patches/patches/ext3-orphan_lock.patch +++ /dev/null @@ -1,79 +0,0 @@ ---- linux/fs/ext3/namei.c.orig Fri Mar 14 14:11:58 2003 -+++ linux/fs/ext3/namei.c Fri Mar 14 14:39:48 2003 -@@ -1406,8 +1409,8 @@ - struct super_block *sb = inode->i_sb; - struct ext3_iloc iloc; - int err = 0, rc; -- -- lock_super(sb); -+ -+ down(&EXT3_SB(sb)->s_orphan_lock); - if (!list_empty(&EXT3_I(inode)->i_orphan)) - goto out_unlock; - -@@ -1455,7 +1458,7 @@ - jbd_debug(4, "orphan inode %ld will point to %d\n", - inode->i_ino, NEXT_ORPHAN(inode)); - out_unlock: -- unlock_super(sb); -+ up(&EXT3_SB(sb)->s_orphan_lock); - ext3_std_error(inode->i_sb, err); - return err; - } -@@ -1468,20 +1471,19 @@ - { - struct list_head *prev; - struct ext3_inode_info *ei = EXT3_I(inode); -- struct ext3_sb_info *sbi; -+ struct ext3_sb_info *sbi = EXT3_SB(inode->i_sb); - unsigned long ino_next; - struct ext3_iloc iloc; - int err = 0; - -- lock_super(inode->i_sb); -+ down(&sbi->s_orphan_lock); - if (list_empty(&ei->i_orphan)) { -- unlock_super(inode->i_sb); -+ up(&sbi->s_orphan_lock); - return 0; - } - - ino_next = NEXT_ORPHAN(inode); - prev = ei->i_orphan.prev; -- sbi = EXT3_SB(inode->i_sb); - - jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); - -@@ -1525,10 +1527,10 @@ - if (err) - goto out_brelse; - --out_err: -+out_err: - ext3_std_error(inode->i_sb, err); - out: -- unlock_super(inode->i_sb); -+ up(&sbi->s_orphan_lock); - return err; - - out_brelse: ---- linux/fs/ext3/super.c.orig Fri Mar 14 14:11:58 2003 -+++ linux/fs/ext3/super.c Fri Mar 14 14:36:00 2003 -@@ -1134,6 +1314,7 @@ - */ - sb->s_op = &ext3_sops; - INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ -+ sema_init(&sbi->s_orphan_lock, 1); - - sb->s_root = 0; - ---- linux/include/linux/ext3_fs_sb.h.orig Tue Feb 11 16:34:33 2003 -+++ linux/include/linux/ext3_fs_sb.h Fri Mar 14 14:30:11 2003 -@@ -67,6 +69,7 @@ - struct inode * s_journal_inode; - struct journal_s * s_journal; - struct list_head s_orphan; -+ struct semaphore s_orphan_lock; - unsigned long s_commit_interval; - struct block_device *journal_bdev; - #ifdef CONFIG_JBD_DEBUG diff --git a/lustre/kernel_patches/patches/ext3-san-2.4.20.patch b/lustre/kernel_patches/patches/ext3-san-2.4.20.patch deleted file mode 100644 index 148f4e3..0000000 --- a/lustre/kernel_patches/patches/ext3-san-2.4.20.patch +++ /dev/null @@ -1,117 +0,0 @@ - fs/ext3/ext3-exports.c | 9 ++++- - fs/ext3/inode.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 89 insertions(+), 1 deletion(-) - ---- linux/fs/ext3/inode.c~ext3-san-2.4.20-hp Tue Apr 29 11:01:52 2003 -+++ linux-mmonroe/fs/ext3/inode.c Tue Apr 29 11:01:53 2003 -@@ -2734,3 +2734,84 @@ int ext3_change_inode_journal_flag(struc - * here, in ext3_aops_journal_start() to ensure that the forthcoming "see if we - * need to extend" test in ext3_prepare_write() succeeds. - */ -+ -+/* for each block: 1 ind + 1 dind + 1 tind -+ * for each block: 3 bitmap blocks -+ * for each block: 3 group descriptor blocks -+ * i inode block -+ * 1 superblock -+ * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quote files -+ * ((1+1+1) * 3 * nblocks) + 1 + 1 + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS -+ * -+ * XXX assuming: -+ * (1) fs logic block size == page size -+ * (2) ext3 in writeback mode -+ */ -+static inline int ext3_san_write_trans_blocks(int nblocks) -+{ -+ int ret; -+ -+ ret = (1 + 1 + 1) * 3 * nblocks + 1 + 1; -+ -+#ifdef CONFIG_QUOTA -+ ret += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ -+ return ret; -+} -+ -+/* Alloc blocks for an inode, while don't create any buffer/page -+ * for data I/O; set the inode size if file is extended. -+ * -+ * @inode: target inode -+ * @blocks: array of logic block number -+ * @nblocks: how many blocks need be alloced -+ * @newsize: new filesize we should set -+ * -+ * return: 0 success, otherwise failed -+ * (*blocks) contains physical block number alloced -+ * -+ * XXX this assume the fs block size == page size -+ */ -+int ext3_prep_san_write(struct inode *inode, long *blocks, -+ int nblocks, loff_t newsize) -+{ -+ handle_t *handle; -+ struct buffer_head bh_tmp; -+ int needed_blocks; -+ int i, ret = 0, ret2; -+ -+ needed_blocks = ext3_san_write_trans_blocks(nblocks); -+ -+ lock_kernel(); -+ handle = ext3_journal_start(inode, needed_blocks); -+ if (IS_ERR(handle)) { -+ unlock_kernel(); -+ return PTR_ERR(handle); -+ } -+ unlock_kernel(); -+ -+ /* alloc blocks one by one */ -+ for (i = 0; i < nblocks; i++) { -+ ret = ext3_get_block_handle(handle, inode, blocks[i], -+ &bh_tmp, 1); -+ if (ret) -+ break; -+ -+ blocks[i] = bh_tmp.b_blocknr; -+ } -+ -+ /* set inode size if needed */ -+ if (!ret && (newsize > inode->i_size)) { -+ inode->i_size = newsize; -+ ext3_mark_inode_dirty(handle, inode); -+ } -+ -+ lock_kernel(); -+ ret2 = ext3_journal_stop(handle, inode); -+ unlock_kernel(); -+ -+ if (!ret) -+ ret = ret2; -+ return ret; -+} ---- linux/fs/ext3/ext3-exports.c~ext3-san-2.4.20-hp Tue Apr 29 11:01:51 2003 -+++ linux-mmonroe/fs/ext3/ext3-exports.c Tue Apr 29 11:07:19 2003 -@@ -1,9 +1,15 @@ - #include - #include --#include -+#include -+#include -+#include - #include -+#include - #include - -+int ext3_prep_san_write(struct inode *inode, long *blocks, -+ int nblocks, loff_t newsize); -+ - EXPORT_SYMBOL(ext3_force_commit); - EXPORT_SYMBOL(ext3_bread); - EXPORT_SYMBOL(ext3_xattr_register); -@@ -11,3 +17,4 @@ EXPORT_SYMBOL(ext3_xattr_unregister); - EXPORT_SYMBOL(ext3_xattr_get); - EXPORT_SYMBOL(ext3_xattr_list); - EXPORT_SYMBOL(ext3_xattr_set); -+EXPORT_SYMBOL(ext3_prep_san_write); - -_ diff --git a/lustre/kernel_patches/patches/ext3-truncate_blocks-chaos.patch.patch b/lustre/kernel_patches/patches/ext3-truncate_blocks-chaos.patch.patch deleted file mode 100644 index ce3928d..0000000 --- a/lustre/kernel_patches/patches/ext3-truncate_blocks-chaos.patch.patch +++ /dev/null @@ -1,92 +0,0 @@ ---- ./fs/ext3/inode.c.orig Wed Mar 12 02:44:06 2003 -+++ ./fs/ext3/inode.c Wed Mar 12 11:55:20 2003 -@@ -99,7 +99,35 @@ int ext3_forget(handle_t *handle, int is - return err; - } - --/* -+/* -+ * Work out how many blocks we need to progress with the next chunk of a -+ * truncate transaction. -+ */ -+ -+static unsigned long blocks_for_truncate(struct inode *inode) -+{ -+ unsigned long needed; -+ -+ needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); -+ -+ /* Give ourselves just enough room to cope with inodes in which -+ * i_blocks is corrupt: we've seen disk corruptions in the past -+ * which resulted in random data in an inode which looked enough -+ * like a regular file for ext3 to try to delete it. Things -+ * will go a bit crazy if that happens, but at least we should -+ * try not to panic the whole kernel. */ -+ if (needed < 2) -+ needed = 2; -+ -+ /* But we need to bound the transaction so we don't overflow the -+ * journal. */ -+ if (needed > EXT3_MAX_TRANS_DATA) -+ needed = EXT3_MAX_TRANS_DATA; -+ -+ return EXT3_DATA_TRANS_BLOCKS + needed; -+} -+ -+/* - * Truncate transactions can be complex and absolutely huge. So we need to - * be able to restart the transaction at a conventient checkpoint to make - * sure we don't overflow the journal. -@@ -110,19 +138,14 @@ int ext3_forget(handle_t *handle, int is - * transaction in the top-level truncate loop. --sct - */ - --static handle_t *start_transaction(struct inode *inode) -+static handle_t *start_transaction(struct inode *inode) - { -- long needed; - handle_t *result; -- -- needed = inode->i_blocks; -- if (needed > EXT3_MAX_TRANS_DATA) -- needed = EXT3_MAX_TRANS_DATA; -- -- result = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS + needed); -+ -+ result = ext3_journal_start(inode, blocks_for_truncate(inode)); - if (!IS_ERR(result)) - return result; -- -+ - ext3_std_error(inode->i_sb, PTR_ERR(result)); - return result; - } -@@ -135,14 +158,9 @@ static handle_t *start_transaction(struc - */ - static int try_to_extend_transaction(handle_t *handle, struct inode *inode) - { -- long needed; -- - if (handle->h_buffer_credits > EXT3_RESERVE_TRANS_BLOCKS) - return 0; -- needed = inode->i_blocks; -- if (needed > EXT3_MAX_TRANS_DATA) -- needed = EXT3_MAX_TRANS_DATA; -- if (!ext3_journal_extend(handle, EXT3_RESERVE_TRANS_BLOCKS + needed)) -+ if (!ext3_journal_extend(handle, blocks_for_truncate(inode))) - return 0; - return 1; - } -@@ -154,11 +172,8 @@ static int try_to_extend_transaction(han - */ - static int ext3_journal_test_restart(handle_t *handle, struct inode *inode) - { -- long needed = inode->i_blocks; -- if (needed > EXT3_MAX_TRANS_DATA) -- needed = EXT3_MAX_TRANS_DATA; - jbd_debug(2, "restarting handle %p\n", handle); -- return ext3_journal_restart(handle, EXT3_DATA_TRANS_BLOCKS + needed); -+ return ext3_journal_restart(handle, blocks_for_truncate(inode)); - } - - /* diff --git a/lustre/kernel_patches/patches/ext3-truncate_blocks.patch b/lustre/kernel_patches/patches/ext3-truncate_blocks.patch deleted file mode 100644 index ce3928d..0000000 --- a/lustre/kernel_patches/patches/ext3-truncate_blocks.patch +++ /dev/null @@ -1,92 +0,0 @@ ---- ./fs/ext3/inode.c.orig Wed Mar 12 02:44:06 2003 -+++ ./fs/ext3/inode.c Wed Mar 12 11:55:20 2003 -@@ -99,7 +99,35 @@ int ext3_forget(handle_t *handle, int is - return err; - } - --/* -+/* -+ * Work out how many blocks we need to progress with the next chunk of a -+ * truncate transaction. -+ */ -+ -+static unsigned long blocks_for_truncate(struct inode *inode) -+{ -+ unsigned long needed; -+ -+ needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); -+ -+ /* Give ourselves just enough room to cope with inodes in which -+ * i_blocks is corrupt: we've seen disk corruptions in the past -+ * which resulted in random data in an inode which looked enough -+ * like a regular file for ext3 to try to delete it. Things -+ * will go a bit crazy if that happens, but at least we should -+ * try not to panic the whole kernel. */ -+ if (needed < 2) -+ needed = 2; -+ -+ /* But we need to bound the transaction so we don't overflow the -+ * journal. */ -+ if (needed > EXT3_MAX_TRANS_DATA) -+ needed = EXT3_MAX_TRANS_DATA; -+ -+ return EXT3_DATA_TRANS_BLOCKS + needed; -+} -+ -+/* - * Truncate transactions can be complex and absolutely huge. So we need to - * be able to restart the transaction at a conventient checkpoint to make - * sure we don't overflow the journal. -@@ -110,19 +138,14 @@ int ext3_forget(handle_t *handle, int is - * transaction in the top-level truncate loop. --sct - */ - --static handle_t *start_transaction(struct inode *inode) -+static handle_t *start_transaction(struct inode *inode) - { -- long needed; - handle_t *result; -- -- needed = inode->i_blocks; -- if (needed > EXT3_MAX_TRANS_DATA) -- needed = EXT3_MAX_TRANS_DATA; -- -- result = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS + needed); -+ -+ result = ext3_journal_start(inode, blocks_for_truncate(inode)); - if (!IS_ERR(result)) - return result; -- -+ - ext3_std_error(inode->i_sb, PTR_ERR(result)); - return result; - } -@@ -135,14 +158,9 @@ static handle_t *start_transaction(struc - */ - static int try_to_extend_transaction(handle_t *handle, struct inode *inode) - { -- long needed; -- - if (handle->h_buffer_credits > EXT3_RESERVE_TRANS_BLOCKS) - return 0; -- needed = inode->i_blocks; -- if (needed > EXT3_MAX_TRANS_DATA) -- needed = EXT3_MAX_TRANS_DATA; -- if (!ext3_journal_extend(handle, EXT3_RESERVE_TRANS_BLOCKS + needed)) -+ if (!ext3_journal_extend(handle, blocks_for_truncate(inode))) - return 0; - return 1; - } -@@ -154,11 +172,8 @@ static int try_to_extend_transaction(han - */ - static int ext3_journal_test_restart(handle_t *handle, struct inode *inode) - { -- long needed = inode->i_blocks; -- if (needed > EXT3_MAX_TRANS_DATA) -- needed = EXT3_MAX_TRANS_DATA; - jbd_debug(2, "restarting handle %p\n", handle); -- return ext3_journal_restart(handle, EXT3_DATA_TRANS_BLOCKS + needed); -+ return ext3_journal_restart(handle, blocks_for_truncate(inode)); - } - - /* diff --git a/lustre/kernel_patches/patches/ext3-unmount_sync.patch b/lustre/kernel_patches/patches/ext3-unmount_sync.patch deleted file mode 100644 index c57903c..0000000 --- a/lustre/kernel_patches/patches/ext3-unmount_sync.patch +++ /dev/null @@ -1,21 +0,0 @@ - fs/ext3/super.c | 7 ++++++- - 1 files changed, 6 insertions(+), 1 deletion(-) - ---- linux-2.4.20/fs/ext3/super.c~ext3-unmount_sync 2003-04-08 23:35:44.000000000 -0600 -+++ linux-2.4.20-braam/fs/ext3/super.c 2003-04-08 23:35:44.000000000 -0600 -@@ -1612,7 +1612,12 @@ void ext3_write_super (struct super_bloc - sb->s_dirt = 0; - target = log_start_commit(EXT3_SB(sb)->s_journal, NULL); - -- if (do_sync_supers) { -+ /* -+ * Tricky --- if we are unmounting, the write really does need -+ * to be synchronous. We can detect that by looking for NULL in -+ * sb->s_root. -+ */ -+ if (do_sync_supers || !sb->s_root) { - unlock_super(sb); - log_wait_commit(EXT3_SB(sb)->s_journal, target); - lock_super(sb); - -_ diff --git a/lustre/kernel_patches/patches/ext3-use-after-free.patch b/lustre/kernel_patches/patches/ext3-use-after-free.patch deleted file mode 100644 index dd999bf..0000000 --- a/lustre/kernel_patches/patches/ext3-use-after-free.patch +++ /dev/null @@ -1,53 +0,0 @@ - ./fs/ext3/namei.c | 11 +++++------ - 1 files changed, 5 insertions(+), 6 deletions(-) - ---- linux-2.4.20/./fs/ext3/namei.c~ext3-use-after-free 2003-04-08 23:35:51.000000000 -0600 -+++ linux-2.4.20-braam/./fs/ext3/namei.c 2003-04-08 23:35:51.000000000 -0600 -@@ -1521,8 +1521,11 @@ static int ext3_add_nondir(handle_t *han - { - int err = ext3_add_entry(handle, dentry, inode); - if (!err) { -- d_instantiate(dentry, inode); -- return 0; -+ err = ext3_mark_inode_dirty(handle, inode); -+ if (err == 0) { -+ d_instantiate(dentry, inode); -+ return 0; -+ } - } - ext3_dec_count(handle, inode); - iput(inode); -@@ -1559,7 +1562,6 @@ static int ext3_create (struct inode * d - inode->i_fop = &ext3_file_operations; - inode->i_mapping->a_ops = &ext3_aops; - err = ext3_add_nondir(handle, dentry, inode); -- ext3_mark_inode_dirty(handle, inode); - } - ext3_journal_stop(handle, dir); - return err; -@@ -1586,7 +1588,6 @@ static int ext3_mknod (struct inode * di - if (!IS_ERR(inode)) { - init_special_inode(inode, mode, rdev); - err = ext3_add_nondir(handle, dentry, inode); -- ext3_mark_inode_dirty(handle, inode); - } - ext3_journal_stop(handle, dir); - return err; -@@ -2035,7 +2036,6 @@ static int ext3_symlink (struct inode * - } - EXT3_I(inode)->i_disksize = inode->i_size; - err = ext3_add_nondir(handle, dentry, inode); -- ext3_mark_inode_dirty(handle, inode); - out_stop: - ext3_journal_stop(handle, dir); - return err; -@@ -2069,7 +2069,6 @@ static int ext3_link (struct dentry * ol - atomic_inc(&inode->i_count); - - err = ext3_add_nondir(handle, dentry, inode); -- ext3_mark_inode_dirty(handle, inode); - ext3_journal_stop(handle, dir); - return err; - } - -_ diff --git a/lustre/kernel_patches/patches/ext3_orphan_lock-2.4.20-rh.patch b/lustre/kernel_patches/patches/ext3_orphan_lock-2.4.20-rh.patch deleted file mode 100644 index d029650..0000000 --- a/lustre/kernel_patches/patches/ext3_orphan_lock-2.4.20-rh.patch +++ /dev/null @@ -1,82 +0,0 @@ - fs/ext3/namei.c | 15 +++++++-------- - fs/ext3/super.c | 1 + - include/linux/ext3_fs_sb.h | 1 + - 3 files changed, 9 insertions(+), 8 deletions(-) - ---- linux-rh-2.4.20-8/fs/ext3/namei.c~ext3_orphan_lock-2.4.20-rh 2003-05-05 19:49:15.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext3/namei.c 2003-05-05 20:01:28.000000000 +0800 -@@ -1747,8 +1747,8 @@ int ext3_orphan_add(handle_t *handle, st - struct super_block *sb = inode->i_sb; - struct ext3_iloc iloc; - int err = 0, rc; -- -- lock_super(sb); -+ -+ down(&EXT3_SB(sb)->s_orphan_lock); - if (!list_empty(&EXT3_I(inode)->i_orphan)) - goto out_unlock; - -@@ -1796,7 +1796,7 @@ int ext3_orphan_add(handle_t *handle, st - jbd_debug(4, "orphan inode %ld will point to %d\n", - inode->i_ino, NEXT_ORPHAN(inode)); - out_unlock: -- unlock_super(sb); -+ up(&EXT3_SB(sb)->s_orphan_lock); - ext3_std_error(inode->i_sb, err); - return err; - } -@@ -1809,20 +1809,19 @@ int ext3_orphan_del(handle_t *handle, st - { - struct list_head *prev; - struct ext3_inode_info *ei = EXT3_I(inode); -- struct ext3_sb_info *sbi; -+ struct ext3_sb_info *sbi = EXT3_SB(inode->i_sb); - unsigned long ino_next; - struct ext3_iloc iloc; - int err = 0; - -- lock_super(inode->i_sb); -+ down(&sbi->s_orphan_lock); - if (list_empty(&ei->i_orphan)) { -- unlock_super(inode->i_sb); -+ up(&sbi->s_orphan_lock); - return 0; - } - - ino_next = NEXT_ORPHAN(inode); - prev = ei->i_orphan.prev; -- sbi = EXT3_SB(inode->i_sb); - - jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); - -@@ -1871,7 +1870,7 @@ int ext3_orphan_del(handle_t *handle, st - out_err: - ext3_std_error(inode->i_sb, err); - out: -- unlock_super(inode->i_sb); -+ up(&sbi->s_orphan_lock); - return err; - - out_brelse: ---- linux-rh-2.4.20-8/fs/ext3/super.c~ext3_orphan_lock-2.4.20-rh 2003-05-05 19:49:15.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext3/super.c 2003-05-05 19:54:09.000000000 +0800 -@@ -1151,6 +1151,7 @@ struct super_block * ext3_read_super (st - */ - sb->s_op = &ext3_sops; - INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ -+ sema_init(&sbi->s_orphan_lock, 1); - - sb->s_root = 0; - ---- linux-rh-2.4.20-8/include/linux/ext3_fs_sb.h~ext3_orphan_lock-2.4.20-rh 2003-05-05 19:49:07.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/linux/ext3_fs_sb.h 2003-05-05 19:54:09.000000000 +0800 -@@ -69,6 +69,7 @@ struct ext3_sb_info { - struct inode * s_journal_inode; - struct journal_s * s_journal; - struct list_head s_orphan; -+ struct semaphore s_orphan_lock; - unsigned long s_commit_interval; - struct block_device *journal_bdev; - #ifdef CONFIG_JBD_DEBUG - -_ diff --git a/lustre/kernel_patches/patches/extN-2.4.18-ino_sb_fixup.patch b/lustre/kernel_patches/patches/extN-2.4.18-ino_sb_fixup.patch deleted file mode 100644 index df46643..0000000 --- a/lustre/kernel_patches/patches/extN-2.4.18-ino_sb_fixup.patch +++ /dev/null @@ -1,33 +0,0 @@ ---- ./include/linux/ext3_fs.h.orig Tue May 7 17:06:03 2002 -+++ ./include/linux/ext3_fs.h Tue May 7 17:07:11 2002 -@@ -17,6 +17,8 @@ - #define _LINUX_EXT3_FS_H - - #include -+#include -+#include - - /* - * The second extended filesystem constants/structures -@@ -86,8 +88,8 @@ - #define EXT3_MIN_BLOCK_LOG_SIZE 10 - - #ifdef __KERNEL__ --#define EXT3_SB(sb) (&((sb)->u.ext3_sb)) --#define EXT3_I(inode) (&((inode)->u.ext3_i)) -+#define EXT3_SB(sb) ((struct ext3_sb_info *)&((sb)->u.generic_sbp)) -+#define EXT3_I(inode) ((struct ext3_inode_info *)&((inode)->u.generic_ip)) - - #define EXT3_BLOCK_SIZE(s) ((s)->s_blocksize) - #define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -@@ -447,7 +447,9 @@ - #define NEXT_ORPHAN(inode) EXT3_I(inode)->i_dtime - static inline struct inode *orphan_list_entry(struct list_head *l) - { -- return list_entry(l, struct inode, u.ext3_i.i_orphan); -+ return ((struct inode *)((char *)l - -+ (unsigned long)(offsetof(struct inode, u.generic_ip) + -+ offsetof(struct ext3_inode_info, i_orphan)))); - } - - /* diff --git a/lustre/kernel_patches/patches/extN-delete_thread.patch b/lustre/kernel_patches/patches/extN-delete_thread.patch deleted file mode 100644 index 4248b5c..0000000 --- a/lustre/kernel_patches/patches/extN-delete_thread.patch +++ /dev/null @@ -1,278 +0,0 @@ - 0 files changed - ---- linux-2.4.18-p4smp-61chaos/include/linux/ext3_fs.h~extN-delete_thread 2003-05-29 10:19:15.000000000 +0800 -+++ linux-2.4.18-p4smp-61chaos-root/include/linux/ext3_fs.h 2003-05-29 10:50:04.000000000 +0800 -@@ -190,6 +190,7 @@ struct ext3_group_desc - */ - #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ - #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */ -+#define EXT3_STATE_DELETE 0x00000010 /* deferred delete inode */ - - /* - * ioctl commands ---- linux-2.4.18-p4smp-61chaos/include/linux/ext3_fs_sb.h~extN-delete_thread 2003-05-29 10:19:15.000000000 +0800 -+++ linux-2.4.18-p4smp-61chaos-root/include/linux/ext3_fs_sb.h 2003-05-29 10:50:04.000000000 +0800 -@@ -29,6 +29,8 @@ - - #define EXT3_MAX_GROUP_LOADED 32 - -+#define EXT3_DELETE_THREAD -+ - /* - * third extended-fs super-block data in memory - */ -@@ -74,6 +76,14 @@ struct ext3_sb_info { - struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ - wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ - #endif -+#ifdef EXT3_DELETE_THREAD -+ spinlock_t s_delete_lock; -+ struct list_head s_delete_list; -+ unsigned long s_delete_blocks; -+ unsigned long s_delete_inodes; -+ wait_queue_head_t s_delete_thread_queue; -+ wait_queue_head_t s_delete_waiter_queue; -+#endif - }; - - #endif /* _LINUX_EXT3_FS_SB */ ---- linux-2.4.18-p4smp-61chaos/fs/ext3/super.c~extN-delete_thread 2003-05-29 10:19:15.000000000 +0800 -+++ linux-2.4.18-p4smp-61chaos-root/fs/ext3/super.c 2003-05-29 10:50:04.000000000 +0800 -@@ -398,6 +398,207 @@ static void dump_orphan_list(struct supe - } - } - -+#ifdef EXT3_DELETE_THREAD -+/* -+ * Delete inodes in a loop until there are no more to be deleted. -+ * Normally, we run in the background doing the deletes and sleeping again, -+ * and clients just add new inodes to be deleted onto the end of the list. -+ * If someone is concerned about free space (e.g. block allocation or similar) -+ * then they can sleep on s_delete_waiter_queue and be woken up when space -+ * has been freed. -+ */ -+int ext3_delete_thread(void *data) -+{ -+ struct super_block *sb = data; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct task_struct *tsk = current; -+ -+ /* Almost like daemonize, but not quite */ -+ exit_mm(current); -+ tsk->session = 1; -+ tsk->pgrp = 1; -+ tsk->tty = NULL; -+ exit_files(current); -+ reparent_to_init(); -+ -+ sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev)); -+ sigfillset(&tsk->blocked); -+ -+ tsk->flags |= PF_KERNTHREAD; -+ -+ INIT_LIST_HEAD(&sbi->s_delete_list); -+ wake_up(&sbi->s_delete_waiter_queue); -+ printk(KERN_INFO "EXT3-fs: delete thread on %s started\n", -+ kdevname(sb->s_dev)); -+ -+ /* main loop */ -+ for (;;) { -+ sleep_on(&sbi->s_delete_thread_queue); -+ printk(KERN_DEBUG "%s woken up: %lu inodes, %lu blocks\n", -+ tsk->comm, sbi->s_delete_inodes, sbi->s_delete_blocks); -+ -+ spin_lock(&sbi->s_delete_lock); -+ if (list_empty(&sbi->s_delete_list)) { -+ memset(&sbi->s_delete_list, 0, -+ sizeof(sbi->s_delete_list)); -+ spin_unlock(&sbi->s_delete_lock); -+ printk(KERN_DEBUG "ext3 delete thread on %s exiting\n", -+ kdevname(sb->s_dev)); -+ wake_up(&sbi->s_delete_waiter_queue); -+ break; -+ } -+ -+ while (!list_empty(&sbi->s_delete_list)) { -+ struct inode *inode=list_entry(sbi->s_delete_list.next, -+ struct inode, i_dentry); -+ unsigned long blocks = inode->i_blocks >> -+ (inode->i_blkbits - 9); -+ -+ list_del_init(&inode->i_dentry); -+ spin_unlock(&sbi->s_delete_lock); -+ printk(KERN_DEBUG "%s delete ino %lu blk %lu\n", -+ tsk->comm, inode->i_ino, blocks); -+ -+ iput(inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ sbi->s_delete_blocks -= blocks; -+ sbi->s_delete_inodes--; -+ } -+ if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) -+ printk(KERN_WARNING -+ "%lu blocks and %lu left on list?\n", -+ sbi->s_delete_blocks, sbi->s_delete_inodes); -+ sbi->s_delete_blocks = 0; -+ sbi->s_delete_inodes = 0; -+ spin_unlock(&sbi->s_delete_lock); -+ wake_up(&sbi->s_delete_waiter_queue); -+ } -+ -+ return 0; -+} -+ -+static void ext3_start_delete_thread(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int rc; -+ -+ spin_lock_init(&sbi->s_delete_lock); -+ memset(&sbi->s_delete_list, 0, sizeof(sbi->s_delete_list)); -+ init_waitqueue_head(&sbi->s_delete_thread_queue); -+ init_waitqueue_head(&sbi->s_delete_waiter_queue); -+ sbi->s_delete_blocks = 0; -+ sbi->s_delete_inodes = 0; -+ rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES); -+ if (rc < 0) -+ printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n", -+ rc); -+ else -+ wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next); -+} -+ -+static void ext3_stop_delete_thread(struct ext3_sb_info *sbi) -+{ -+ wake_up(&sbi->s_delete_thread_queue); -+ wait_event(sbi->s_delete_waiter_queue, list_empty(&sbi->s_delete_list)); -+} -+ -+/* Instead of playing games with the inode flags, destruction, etc we just -+ * duplicate the inode data locally and put it on a list for the truncate -+ * thread. We need large parts of the inode struct in order to complete -+ * the truncate and unlink, so we may as well just copy the whole thing. -+ * -+ * If we have any problem deferring the delete, just delete it right away. -+ * If we defer it, we also mark how many blocks it would free, so that we -+ * can keep the statfs data correct, and we know if we should sleep on the -+ * truncate thread when we run out of space. -+ * -+ * One shouldn't consider this duplicate an "inode", as it isn't really -+ * visible to the VFS, but rather a data struct that holds truncate data. -+ * -+ * In 2.5 this can be done much more cleanly by just registering a "drop" -+ * method in the super_operations struct. -+ */ -+static void ext3_delete_inode_thread(struct inode *old_inode) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb); -+ struct inode *new_inode; -+ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9); -+ -+ if (is_bad_inode(old_inode)) { -+ clear_inode(old_inode); -+ return; -+ } -+ -+ /* We may want to delete the inode immediately and not defer it */ -+ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS || -+ !sbi->s_delete_list.next) { -+ ext3_delete_inode(old_inode); -+ return; -+ } -+ -+ if (EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) { -+ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", -+ old_inode->i_ino, blocks); -+ ext3_delete_inode(old_inode); -+ return; -+ } -+ -+ /* We can iget this inode again here, because our caller has unhashed -+ * old_inode, so new_inode will be in a different inode struct. -+ * -+ * We need to ensure that the i_orphan pointers in the other inodes -+ * point at the new inode copy instead of the old one so the orphan -+ * list doesn't get corrupted when the old orphan inode is freed. -+ */ -+ down(&sbi->s_orphan_lock); -+ -+ EXT3_SB(old_inode->i_sb)->s_mount_state |= EXT3_ORPHAN_FS; -+ new_inode = iget(old_inode->i_sb, old_inode->i_ino); -+ EXT3_SB(old_inode->i_sb)->s_mount_state &= ~EXT3_ORPHAN_FS; -+ if (is_bad_inode(new_inode)) { -+ printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino); -+ iput(new_inode); -+ new_inode = NULL; -+ } -+ if (!new_inode) { -+ up(&sbi->s_orphan_lock); -+ ext3_debug(KERN_DEBUG "delete inode %lu directly (bad read)\n", -+ old_inode->i_ino); -+ ext3_delete_inode(old_inode); -+ return; -+ } -+ J_ASSERT(new_inode != old_inode); -+ -+ J_ASSERT(!list_empty(&EXT3_I(old_inode)->i_orphan)); -+ /* Ugh. We need to insert new_inode into the same spot on the list -+ * as old_inode was, to ensure the in-memory orphan list is still -+ * the same as the on-disk orphan list. -+ */ -+ EXT3_I(new_inode)->i_orphan = EXT3_I(old_inode)->i_orphan; -+ EXT3_I(new_inode)->i_orphan.next->prev = &EXT3_I(new_inode)->i_orphan; -+ EXT3_I(new_inode)->i_orphan.prev->next = &EXT3_I(new_inode)->i_orphan; -+ EXT3_I(new_inode)->i_state |= EXT3_STATE_DELETE; -+ up(&sbi->s_orphan_lock); -+ -+ clear_inode(old_inode); -+ -+ printk(KERN_DEBUG "delete inode %lu (%lu blocks) by thread\n", -+ new_inode->i_ino, blocks); -+ spin_lock(&sbi->s_delete_lock); -+ J_ASSERT(list_empty(&new_inode->i_dentry)); -+ list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list); -+ sbi->s_delete_blocks += blocks; -+ sbi->s_delete_inodes++; -+ spin_unlock(&sbi->s_delete_lock); -+ -+ wake_up(&sbi->s_delete_thread_queue); -+} -+#else -+#define ext3_start_delete_thread(sbi) do {} while(0) -+#define ext3_stop_delete_thread(sbi) do {} while(0) -+#endif /* EXT3_DELETE_THREAD */ -+ - void ext3_put_super (struct super_block * sb) - { - struct ext3_sb_info *sbi = EXT3_SB(sb); -@@ -405,6 +606,7 @@ void ext3_put_super (struct super_block - kdev_t j_dev = sbi->s_journal->j_dev; - int i; - -+ ext3_stop_delete_thread(sbi); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -453,7 +655,11 @@ static struct super_operations ext3_sops - write_inode: ext3_write_inode, /* BKL not held. Don't need */ - dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */ - put_inode: ext3_put_inode, /* BKL not held. Don't need */ -+#ifdef EXT3_DELETE_THREAD -+ delete_inode: ext3_delete_inode_thread,/* BKL not held. We take it */ -+#else - delete_inode: ext3_delete_inode, /* BKL not held. We take it */ -+#endif - put_super: ext3_put_super, /* BKL held */ - write_super: ext3_write_super, /* BKL held */ - sync_fs: ext3_sync_fs, -@@ -1209,6 +1415,7 @@ struct super_block * ext3_read_super (st - } - - ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); -+ ext3_start_delete_thread(sb); - /* - * akpm: core read_super() calls in here with the superblock locked. - * That deadlocks, because orphan cleanup needs to lock the superblock - -_ diff --git a/lustre/kernel_patches/patches/extN-iget-debug.patch b/lustre/kernel_patches/patches/extN-iget-debug.patch deleted file mode 100644 index dbe90c8..0000000 --- a/lustre/kernel_patches/patches/extN-iget-debug.patch +++ /dev/null @@ -1,48 +0,0 @@ ---- linux/fs/ext3/namei.c.orig Thu Jan 30 01:15:13 2003 -+++ linux/fs/ext3/namei.c Sat Feb 1 00:33:46 2003 -@@ -710,6 +710,24 @@ - return ret; - } - -+static int ext3_find_inode(struct inode *inode, unsigned long ino, -+ void *opaque) -+{ -+ const char *name = NULL; -+ int len = 0; -+ -+ if (opaque) { -+ struct dentry *dentry = opaque; -+ name = dentry->d_name.name; -+ len = dentry->d_name.len; -+ } -+ printk(KERN_INFO "finding inode %s:%lu (%p) count %d (%p = %*s)\n", -+ kdevname(inode->i_dev), ino, inode, atomic_read(&inode->i_count), -+ opaque, len, name ? name : ""); -+ -+ return 1; -+} -+ - static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry) - { - struct inode * inode; -@@ -724,8 +742,8 @@ - if (bh) { - unsigned long ino = le32_to_cpu(de->inode); - brelse (bh); -- inode = iget(dir->i_sb, ino); -+ inode = iget4(dir->i_sb, ino, ext3_find_inode, dentry); - - if (!inode) - return ERR_PTR(-EACCES); ---- linux/fs/ext3/inode.c.orig Thu Jan 30 01:15:13 2003 -+++ linux/fs/ext3/inode.c Sat Feb 1 00:34:45 2003 -@@ -166,6 +166,9 @@ - */ - void ext3_put_inode (struct inode * inode) - { -+ printk(KERN_INFO "putting inode %s:%lu (%p) count %d\n", -+ kdevname(inode->i_dev), inode->i_ino, inode, -+ atomic_read(&inode->i_count)); - ext3_discard_prealloc (inode); - } - diff --git a/lustre/kernel_patches/patches/extN-misc-fixup.patch b/lustre/kernel_patches/patches/extN-misc-fixup.patch deleted file mode 100644 index 06ea72a..0000000 --- a/lustre/kernel_patches/patches/extN-misc-fixup.patch +++ /dev/null @@ -1,23 +0,0 @@ ---- linux-2.4.17/fs/ext3/super.c.orig Fri Dec 21 10:41:55 2001 -+++ linux-2.4.17/fs/ext3/super.c Fri Mar 22 11:00:41 2002 -@@ -1344,10 +1342,10 @@ - printk(KERN_ERR "EXT3-fs: I/O error on journal device\n"); - goto out_journal; - } -- if (ntohl(journal->j_superblock->s_nr_users) != 1) { -+ if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { - printk(KERN_ERR "EXT3-fs: External journal has more than one " - "user (unsupported) - %d\n", -- ntohl(journal->j_superblock->s_nr_users)); -+ be32_to_cpu(journal->j_superblock->s_nr_users)); - goto out_journal; - } - EXT3_SB(sb)->journal_bdev = bdev; -@@ -1560,6 +1560,7 @@ - unlock_kernel(); - return ret; - } -+EXPORT_SYMBOL(ext3_force_commit); /* here to avoid potential patch collisions */ - - /* - * Ext3 always journals updates to the superblock itself, so we don't diff --git a/lustre/kernel_patches/patches/extN-noread.patch b/lustre/kernel_patches/patches/extN-noread.patch deleted file mode 100644 index 63f4463..0000000 --- a/lustre/kernel_patches/patches/extN-noread.patch +++ /dev/null @@ -1,237 +0,0 @@ - fs/ext3/ialloc.c | 47 +++++++++++++++++++++- - fs/ext3/inode.c | 99 ++++++++++++++++++++++++++++++++++++------------ - include/linux/ext3_fs.h | 2 - 3 files changed, 122 insertions(+), 26 deletions(-) - ---- linux-2.4.18-chaos52/fs/ext3/ialloc.c~extN-noread 2003-05-16 12:26:29.000000000 +0800 -+++ linux-2.4.18-chaos52-root/fs/ext3/ialloc.c 2003-05-16 12:26:31.000000000 +0800 -@@ -289,6 +289,37 @@ error_return: - } - - /* -+ * @block_group: block group of inode -+ * @offset: relative offset of inode within @block_group -+ * -+ * Check whether any of the inodes in this disk block are in use. -+ * -+ * Caller must be holding superblock lock (group/bitmap read lock in future). -+ */ -+int ext3_itable_block_used(struct super_block *sb, unsigned int block_group, -+ int offset) -+{ -+ int bitmap_nr = load_inode_bitmap(sb, block_group); -+ int inodes_per_block; -+ unsigned long inum, iend; -+ struct buffer_head *ibitmap; -+ -+ if (bitmap_nr < 0) -+ return 1; -+ -+ inodes_per_block = sb->s_blocksize / EXT3_SB(sb)->s_inode_size; -+ inum = offset & ~(inodes_per_block - 1); -+ iend = inum + inodes_per_block; -+ ibitmap = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr]; -+ for (; inum < iend; inum++) { -+ if (inum != offset && ext3_test_bit(inum, ibitmap->b_data)) -+ return 1; -+ } -+ -+ return 0; -+} -+ -+/* - * There are two policies for allocating an inode. If the new inode is - * a directory, then a forward search is made for a block group with both - * free space and a low directory-to-inode ratio; if that fails, then of -@@ -312,6 +343,7 @@ struct inode * ext3_new_inode (handle_t - struct ext3_group_desc * gdp; - struct ext3_group_desc * tmp; - struct ext3_super_block * es; -+ struct ext3_iloc iloc; - int err = 0; - - /* Cannot create files in a deleted directory */ -@@ -505,7 +537,7 @@ repeat: - ei->i_prealloc_count = 0; - #endif - ei->i_block_group = i; -- -+ - if (ei->i_flags & EXT3_SYNC_FL) - inode->i_flags |= S_SYNC; - if (IS_SYNC(inode)) -@@ -514,9 +546,18 @@ repeat: - inode->i_generation = sbi->s_next_generation++; - - ei->i_state = EXT3_STATE_NEW; -- err = ext3_mark_inode_dirty(handle, inode); -+ err = ext3_get_inode_loc_new(inode, &iloc, 1); - if (err) goto fail; -- -+ BUFFER_TRACE(iloc->bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, iloc.bh); -+ if (err) { -+ brelse(iloc.bh); -+ iloc.bh = NULL; -+ goto fail; -+ } -+ err = ext3_mark_iloc_dirty(handle, inode, &iloc); -+ if (err) goto fail; -+ - unlock_super (sb); - if(DQUOT_ALLOC_INODE(inode)) { - DQUOT_DROP(inode); ---- linux-2.4.18-chaos52/fs/ext3/inode.c~extN-noread 2003-05-16 12:26:29.000000000 +0800 -+++ linux-2.4.18-chaos52-root/fs/ext3/inode.c 2003-05-16 12:27:06.000000000 +0800 -@@ -2011,23 +2011,28 @@ out_stop: - ext3_journal_stop(handle, inode); - } - --/* -- * ext3_get_inode_loc returns with an extra refcount against the -- * inode's underlying buffer_head on success. -- */ -+#define NUM_INODE_PREREAD 16 - --int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc) -+/* -+ * ext3_get_inode_loc returns with an extra refcount against the inode's -+ * underlying buffer_head on success. If this is for a new inode allocation -+ * (new is non-zero) then we may be able to optimize away the read if there -+ * are no other in-use inodes in this inode table block. If we need to do -+ * a read, then read in a whole chunk of blocks to avoid blocking again soon -+ * if we are doing lots of creates/updates. -+ */ -+int ext3_get_inode_loc_new(struct inode *inode, struct ext3_iloc *iloc, int new) - { - struct super_block *sb = inode->i_sb; - struct ext3_sb_info *sbi = EXT3_SB(sb); -- struct buffer_head *bh = 0; -+ struct buffer_head *bh[NUM_INODE_PREREAD]; - unsigned long block; - unsigned long block_group; - unsigned long group_desc; - unsigned long desc; - unsigned long offset; - struct ext3_group_desc * gdp; -- -+ - if ((inode->i_ino != EXT3_ROOT_INO && - inode->i_ino != EXT3_JOURNAL_INO && - inode->i_ino < EXT3_FIRST_INO(sb)) || -@@ -2042,38 +2047,86 @@ int ext3_get_inode_loc (struct inode *in - } - group_desc = block_group >> sbi->s_desc_per_block_bits; - desc = block_group & (sbi->s_desc_per_block - 1); -- bh = sbi->s_group_desc[group_desc]; -- if (!bh) { -+ if (!sbi->s_group_desc[group_desc]) { - ext3_error(sb, __FUNCTION__, "Descriptor not loaded"); - goto bad_inode; - } - -- gdp = (struct ext3_group_desc *) bh->b_data; -+ gdp = (struct ext3_group_desc *)(sbi->s_group_desc[group_desc]->b_data); -+ - /* - * Figure out the offset within the block group inode table - */ -- offset = ((inode->i_ino - 1) % sbi->s_inodes_per_group) * -- sbi->s_inode_size; -+ offset = ((inode->i_ino - 1) % sbi->s_inodes_per_group); -+ - block = le32_to_cpu(gdp[desc].bg_inode_table) + -- (offset >> EXT3_BLOCK_SIZE_BITS(sb)); -- if (!(bh = sb_bread(sb, block))) { -- ext3_error (sb, __FUNCTION__, -- "unable to read inode block - " -- "inode=%lu, block=%lu", inode->i_ino, block); -- goto bad_inode; -+ (offset * sbi->s_inode_size >> EXT3_BLOCK_SIZE_BITS(sb)); -+ -+ bh[0] = sb_getblk(sb, block); -+ if (buffer_uptodate(bh[0])) -+ goto done; -+ -+ /* If we don't really need to read this block, and it isn't already -+ * in memory, then we just zero it out. Otherwise, we keep the -+ * current block contents (deleted inode data) for posterity. -+ */ -+ if (new && !ext3_itable_block_used(sb, block_group, offset)) { -+ lock_buffer(bh[0]); -+ memset(bh[0]->b_data, 0, bh[0]->b_size); -+ mark_buffer_uptodate(bh[0], 1); -+ unlock_buffer(bh[0]); -+ } else { -+ unsigned long block_end, itable_end; -+ int count = 1; -+ -+ itable_end = le32_to_cpu(gdp[desc].bg_inode_table) + -+ sbi->s_itb_per_group; -+ block_end = block + NUM_INODE_PREREAD; -+ if (block_end > itable_end) -+ block_end = itable_end; -+ -+ for (++block; block < block_end; block++) { -+ bh[count] = sb_getblk(sb, block); -+ if (count && (buffer_uptodate(bh[count]) || -+ buffer_locked(bh[count]))) { -+ __brelse(bh[count]); -+ } else -+ count++; -+ } -+ -+ ll_rw_block(READ, count, bh); -+ -+ /* Release all but the block we actually need (bh[0]) */ -+ while (--count > 0) -+ __brelse(bh[count]); -+ -+ wait_on_buffer(bh[0]); -+ if (!buffer_uptodate(bh[0])) { -+ ext3_error(sb, __FUNCTION__, -+ "unable to read inode block - " -+ "inode=%lu, block=%lu", inode->i_ino, -+ bh[0]->b_blocknr); -+ goto bad_inode; -+ } - } -- offset &= (EXT3_BLOCK_SIZE(sb) - 1); -+ done: -+ offset = (offset * sbi->s_inode_size) & (EXT3_BLOCK_SIZE(sb) - 1); - -- iloc->bh = bh; -- iloc->raw_inode = (struct ext3_inode *) (bh->b_data + offset); -+ iloc->bh = bh[0]; -+ iloc->raw_inode = (struct ext3_inode *)(bh[0]->b_data + offset); - iloc->block_group = block_group; -- -+ - return 0; -- -+ - bad_inode: - return -EIO; - } - -+int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc) -+{ -+ return ext3_get_inode_loc_new(inode, iloc, 0); -+} -+ - void ext3_read_inode(struct inode * inode) - { - struct ext3_iloc iloc; ---- linux-2.4.18-chaos52/include/linux/ext3_fs.h~extN-noread 2003-05-16 12:26:29.000000000 +0800 -+++ linux-2.4.18-chaos52-root/include/linux/ext3_fs.h 2003-05-16 12:26:31.000000000 +0800 -@@ -640,6 +640,8 @@ extern int ext3_forget(handle_t *, int, - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); - -+extern int ext3_itable_block_used(struct super_block *sb, unsigned int, int); -+extern int ext3_get_inode_loc_new(struct inode *, struct ext3_iloc *, int); - extern int ext3_get_inode_loc (struct inode *, struct ext3_iloc *); - extern void ext3_read_inode (struct inode *); - extern void ext3_write_inode (struct inode *, int); - -_ diff --git a/lustre/kernel_patches/patches/extN-san.patch b/lustre/kernel_patches/patches/extN-san.patch deleted file mode 100644 index d58fe8c..0000000 --- a/lustre/kernel_patches/patches/extN-san.patch +++ /dev/null @@ -1,106 +0,0 @@ - fs/ext3/inode.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - fs/ext3/super.c | 4 ++ - 2 files changed, 85 insertions(+) - ---- linux-2.4.18-18.8.0-l18/fs/ext3/inode.c~extN-san Sun May 18 12:58:13 2003 -+++ linux-2.4.18-18.8.0-l18-phil/fs/ext3/inode.c Sun May 18 13:24:49 2003 -@@ -2781,3 +2781,84 @@ int ext3_change_inode_journal_flag(struc - * here, in ext3_aops_journal_start() to ensure that the forthcoming "see if we - * need to extend" test in ext3_prepare_write() succeeds. - */ -+ -+/* for each block: 1 ind + 1 dind + 1 tind -+ * for each block: 3 bitmap blocks -+ * for each block: 3 group descriptor blocks -+ * i inode block -+ * 1 superblock -+ * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quote files -+ * ((1+1+1) * 3 * nblocks) + 1 + 1 + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS -+ * -+ * XXX assuming: -+ * (1) fs logic block size == page size -+ * (2) ext3 in writeback mode -+ */ -+static inline int ext3_san_write_trans_blocks(int nblocks) -+{ -+ int ret; -+ -+ ret = (1 + 1 + 1) * 3 * nblocks + 1 + 1; -+ -+#ifdef CONFIG_QUOTA -+ ret += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif -+ -+ return ret; -+} -+ -+/* Alloc blocks for an inode, while don't create any buffer/page -+ * for data I/O; set the inode size if file is extended. -+ * -+ * @inode: target inode -+ * @blocks: array of logic block number -+ * @nblocks: how many blocks need be alloced -+ * @newsize: new filesize we should set -+ * -+ * return: 0 success, otherwise failed -+ * (*blocks) contains physical block number alloced -+ * -+ * XXX this assume the fs block size == page size -+ */ -+int ext3_prep_san_write(struct inode *inode, long *blocks, -+ int nblocks, loff_t newsize) -+{ -+ handle_t *handle; -+ struct buffer_head bh_tmp; -+ int needed_blocks; -+ int i, ret = 0, ret2; -+ -+ needed_blocks = ext3_san_write_trans_blocks(nblocks); -+ -+ lock_kernel(); -+ handle = ext3_journal_start(inode, needed_blocks); -+ if (IS_ERR(handle)) { -+ unlock_kernel(); -+ return PTR_ERR(handle); -+ } -+ unlock_kernel(); -+ -+ /* alloc blocks one by one */ -+ for (i = 0; i < nblocks; i++) { -+ ret = ext3_get_block_handle(handle, inode, blocks[i], -+ &bh_tmp, 1); -+ if (ret) -+ break; -+ -+ blocks[i] = bh_tmp.b_blocknr; -+ } -+ -+ /* set inode size if needed */ -+ if (!ret && (newsize > inode->i_size)) { -+ inode->i_size = newsize; -+ ext3_mark_inode_dirty(handle, inode); -+ } -+ -+ lock_kernel(); -+ ret2 = ext3_journal_stop(handle, inode); -+ unlock_kernel(); -+ -+ if (!ret) -+ ret = ret2; -+ return ret; -+} ---- linux-2.4.18-18.8.0-l18/fs/ext3/super.c~extN-san Sun May 18 13:24:35 2003 -+++ linux-2.4.18-18.8.0-l18-phil/fs/ext3/super.c Sun May 18 13:24:55 2003 -@@ -1774,6 +1774,10 @@ static int __init init_ext3_fs(void) - - EXPORT_SYMBOL(ext3_bread); - -+int ext3_prep_san_write(struct inode *inode, long *blocks, -+ int nblocks, loff_t newsize); -+EXPORT_SYMBOL(ext3_prep_san_write); -+ - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); - MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); - MODULE_LICENSE("GPL"); - -_ diff --git a/lustre/kernel_patches/patches/extN-wantedi.patch b/lustre/kernel_patches/patches/extN-wantedi.patch deleted file mode 100644 index fc74c6b..0000000 --- a/lustre/kernel_patches/patches/extN-wantedi.patch +++ /dev/null @@ -1,171 +0,0 @@ - fs/ext3/ialloc.c | 38 ++++++++++++++++++++++++++++++++++++-- - fs/ext3/ioctl.c | 25 +++++++++++++++++++++++++ - fs/ext3/namei.c | 12 ++++++++---- - include/linux/ext3_fs.h | 5 ++++- - 4 files changed, 73 insertions(+), 7 deletions(-) - ---- linux-2.4.20/fs/ext3/namei.c~extN-wantedi 2003-04-08 23:35:55.000000000 -0600 -+++ linux-2.4.20-braam/fs/ext3/namei.c 2003-04-08 23:35:55.000000000 -0600 -@@ -1555,7 +1555,8 @@ static int ext3_create (struct inode * d - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, mode); -+ inode = ext3_new_inode (handle, dir, mode, -+ (unsigned long)dentry->d_fsdata); - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - inode->i_op = &ext3_file_inode_operations; -@@ -1583,7 +1584,8 @@ static int ext3_mknod (struct inode * di - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, mode); -+ inode = ext3_new_inode (handle, dir, mode, -+ (unsigned long)dentry->d_fsdata); - err = PTR_ERR(inode); - if (!IS_ERR(inode)) { - init_special_inode(inode, mode, rdev); -@@ -1613,7 +1615,8 @@ static int ext3_mkdir(struct inode * dir - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFDIR | mode); -+ inode = ext3_new_inode (handle, dir, S_IFDIR | mode, -+ (unsigned long)dentry->d_fsdata); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -@@ -2009,7 +2012,8 @@ static int ext3_symlink (struct inode * - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); -+ inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO, -+ (unsigned long)dentry->d_fsdata); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; ---- linux-2.4.20/fs/ext3/ialloc.c~extN-wantedi 2003-04-08 23:35:55.000000000 -0600 -+++ linux-2.4.20-braam/fs/ext3/ialloc.c 2003-04-08 23:35:55.000000000 -0600 -@@ -299,7 +299,8 @@ error_return: - * group to find a free inode. - */ - struct inode * ext3_new_inode (handle_t *handle, -- const struct inode * dir, int mode) -+ const struct inode * dir, int mode, -+ unsigned long goal) - { - struct super_block * sb; - struct buffer_head * bh; -@@ -323,7 +324,39 @@ struct inode * ext3_new_inode (handle_t - init_rwsem(&inode->u.ext3_i.truncate_sem); - - lock_super (sb); -- es = sb->u.ext3_sb.s_es; -+ es = EXT3_SB(sb)->s_es; -+ -+ if (goal) { -+ i = (goal - 1) / EXT3_INODES_PER_GROUP(sb); -+ j = (goal - 1) % EXT3_INODES_PER_GROUP(sb); -+ gdp = ext3_get_group_desc(sb, i, &bh2); -+ -+ bitmap_nr = load_inode_bitmap (sb, i); -+ if (bitmap_nr < 0) -+ goto fail; -+ -+ bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr]; -+ -+ BUFFER_TRACE(bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, bh); -+ if (err) goto fail; -+ -+ if (ext3_set_bit(j, bh->b_data)) { -+ printk(KERN_ERR "goal inode %lu unavailable\n", goal); -+ /* Oh well, we tried. */ -+ goto repeat; -+ } -+ -+ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, bh); -+ if (err) goto fail; -+ -+ /* We've shortcircuited the allocation system successfully, -+ * now finish filling in the inode. -+ */ -+ goto have_bit_and_group; -+ } -+ - repeat: - gdp = NULL; - i = 0; -@@ -438,6 +471,7 @@ repeat: - } - goto repeat; - } -+ have_bit_and_group: - j += i * EXT3_INODES_PER_GROUP(sb) + 1; - if (j < EXT3_FIRST_INO(sb) || j > le32_to_cpu(es->s_inodes_count)) { - ext3_error (sb, "ext3_new_inode", ---- linux-2.4.20/fs/ext3/ioctl.c~extN-wantedi 2003-04-08 23:35:55.000000000 -0600 -+++ linux-2.4.20-braam/fs/ext3/ioctl.c 2003-04-08 23:35:55.000000000 -0600 -@@ -23,6 +23,31 @@ int ext3_ioctl (struct inode * inode, st - ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg); - - switch (cmd) { -+ case EXT3_IOC_CREATE_INUM: { -+ char name[32]; -+ struct dentry *dchild, *dparent; -+ int rc = 0; -+ -+ dparent = list_entry(inode->i_dentry.next, struct dentry, -+ d_alias); -+ snprintf(name, sizeof name, "%lu", arg); -+ dchild = lookup_one_len(name, dparent, strlen(name)); -+ if (dchild->d_inode) { -+ printk(KERN_ERR "%*s/%lu already exists (ino %lu)\n", -+ dparent->d_name.len, dparent->d_name.name, arg, -+ dchild->d_inode->i_ino); -+ rc = -EEXIST; -+ } else { -+ dchild->d_fsdata = (void *)arg; -+ rc = vfs_create(inode, dchild, 0644); -+ if (rc) -+ printk(KERN_ERR "vfs_create: %d\n", rc); -+ else if (dchild->d_inode->i_ino != arg) -+ rc = -EEXIST; -+ } -+ dput(dchild); -+ return rc; -+ } - case EXT3_IOC_GETFLAGS: - flags = inode->u.ext3_i.i_flags & EXT3_FL_USER_VISIBLE; - return put_user(flags, (int *) arg); ---- linux-2.4.20/include/linux/ext3_fs.h~extN-wantedi 2003-04-08 23:35:55.000000000 -0600 -+++ linux-2.4.20-braam/include/linux/ext3_fs.h 2003-04-08 23:35:55.000000000 -0600 -@@ -201,6 +201,7 @@ struct ext3_group_desc - #define EXT3_IOC_SETFLAGS _IOW('f', 2, long) - #define EXT3_IOC_GETVERSION _IOR('f', 3, long) - #define EXT3_IOC_SETVERSION _IOW('f', 4, long) -+/* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */ - #define EXT3_IOC_GETVERSION_OLD _IOR('v', 1, long) - #define EXT3_IOC_SETVERSION_OLD _IOW('v', 2, long) - #ifdef CONFIG_JBD_DEBUG -@@ -671,7 +672,8 @@ extern int ext3fs_dirhash(const char *na - dx_hash_info *hinfo); - - /* ialloc.c */ --extern struct inode * ext3_new_inode (handle_t *, const struct inode *, int); -+extern struct inode * ext3_new_inode (handle_t *, const struct inode *, int, -+ unsigned long); - extern void ext3_free_inode (handle_t *, struct inode *); - extern struct inode * ext3_orphan_get (struct super_block *, unsigned long); - extern unsigned long ext3_count_free_inodes (struct super_block *); -@@ -757,4 +759,5 @@ extern struct inode_operations ext3_fast - - #endif /* __KERNEL__ */ - -+#define EXT3_IOC_CREATE_INUM _IOW('f', 5, long) - #endif /* _LINUX_EXT3_FS_H */ - -_ diff --git a/lustre/kernel_patches/patches/htree-ext3-2.4.18.patch b/lustre/kernel_patches/patches/htree-ext3-2.4.18.patch deleted file mode 100644 index a54e9ca..0000000 --- a/lustre/kernel_patches/patches/htree-ext3-2.4.18.patch +++ /dev/null @@ -1,1201 +0,0 @@ ---- ./fs/ext3/super.c 2002/03/05 06:18:59 2.1 -+++ ./fs/ext3/super.c 2002/03/05 06:26:56 -@@ -529,6 +529,12 @@ - "EXT3 Check option not supported\n"); - #endif - } -+ else if (!strcmp (this_char, "index")) -+#ifdef CONFIG_EXT3_INDEX -+ set_opt (*mount_options, INDEX); -+#else -+ printk("EXT3 index option not supported\n"); -+#endif - else if (!strcmp (this_char, "debug")) - set_opt (*mount_options, DEBUG); - else if (!strcmp (this_char, "errors")) { -@@ -702,6 +708,12 @@ - es->s_mtime = cpu_to_le32(CURRENT_TIME); - ext3_update_dynamic_rev(sb); - EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); -+ -+ if (test_opt(sb, INDEX)) -+ EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX); -+ else if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX)) -+ set_opt (EXT3_SB(sb)->s_mount_opt, INDEX); -+ - ext3_commit_super (sb, es, 1); - if (test_opt (sb, DEBUG)) - printk (KERN_INFO ---- ./fs/ext3/namei.c 2002/03/05 06:18:59 2.1 -+++ ./fs/ext3/namei.c 2002/03/06 00:13:18 -@@ -16,6 +16,10 @@ - * David S. Miller (davem@caip.rutgers.edu), 1995 - * Directory entry file type support and forward compatibility hooks - * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998 -+ * Hash Tree Directory indexing (c) -+ * Daniel Phillips, 2001 -+ * Hash Tree Directory indexing porting -+ * Christopher Li, 2002 - */ - - #include -@@ -33,7 +33,7 @@ - #include - #include - #include -- -+#include - - /* - * define how far ahead to read directories while searching them. -@@ -38,6 +42,437 @@ - #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) - #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) - -+static struct buffer_head *ext3_append(handle_t *handle, -+ struct inode *inode, -+ u32 *block, int *err) -+{ -+ struct buffer_head *bh; -+ -+ *block = inode->i_size >> inode->i_sb->s_blocksize_bits; -+ -+ if ((bh = ext3_bread(handle, inode, *block, 1, err))) { -+ inode->i_size += inode->i_sb->s_blocksize; -+ EXT3_I(inode)->i_disksize = inode->i_size; -+ ext3_journal_get_write_access(handle,bh); -+ } -+ return bh; -+} -+ -+#ifndef assert -+#define assert(test) J_ASSERT(test) -+#endif -+ -+#ifndef swap -+#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0) -+#endif -+ -+typedef struct { u32 v; } le_u32; -+typedef struct { u16 v; } le_u16; -+ -+#define dxtrace_on(command) command -+#define dxtrace_off(command) -+ -+struct fake_dirent -+{ -+ /*le*/u32 inode; -+ /*le*/u16 rec_len; -+ u8 name_len; -+ u8 file_type; -+}; -+ -+struct dx_countlimit -+{ -+ le_u16 limit; -+ le_u16 count; -+}; -+ -+struct dx_entry -+{ -+ le_u32 hash; -+ le_u32 block; -+}; -+ -+/* -+ * dx_root_info is laid out so that if it should somehow get overlaid by a -+ * dirent the two low bits of the hash version will be zero. Therefore, the -+ * hash version mod 4 should never be 0. Sincerely, the paranoia department. -+ */ -+ -+struct dx_root -+{ -+ struct fake_dirent dot; -+ char dot_name[4]; -+ struct fake_dirent dotdot; -+ char dotdot_name[4]; -+ struct dx_root_info -+ { -+ le_u32 reserved_zero; -+ u8 hash_version; /* 0 now, 1 at release */ -+ u8 info_length; /* 8 */ -+ u8 indirect_levels; -+ u8 unused_flags; -+ } -+ info; -+ struct dx_entry entries[0]; -+}; -+ -+struct dx_node -+{ -+ struct fake_dirent fake; -+ struct dx_entry entries[0]; -+}; -+ -+ -+struct dx_frame -+{ -+ struct buffer_head *bh; -+ struct dx_entry *entries; -+ struct dx_entry *at; -+}; -+ -+struct dx_map_entry -+{ -+ u32 hash; -+ u32 offs; -+}; -+ -+typedef struct ext3_dir_entry_2 ext3_dirent; -+static inline unsigned dx_get_block (struct dx_entry *entry); -+static void dx_set_block (struct dx_entry *entry, unsigned value); -+static inline unsigned dx_get_hash (struct dx_entry *entry); -+static void dx_set_hash (struct dx_entry *entry, unsigned value); -+static unsigned dx_get_count (struct dx_entry *entries); -+static unsigned dx_get_limit (struct dx_entry *entries); -+static void dx_set_count (struct dx_entry *entries, unsigned value); -+static void dx_set_limit (struct dx_entry *entries, unsigned value); -+static unsigned dx_root_limit (struct inode *dir, unsigned infosize); -+static unsigned dx_node_limit (struct inode *dir); -+static unsigned dx_hack_hash (const u8 *name, int len); -+static struct dx_frame *dx_probe (struct inode *dir, u32 hash, struct dx_frame *frame); -+static void dx_release (struct dx_frame *frames); -+static int dx_make_map (ext3_dirent *de, int size, struct dx_map_entry map[]); -+static void dx_sort_map(struct dx_map_entry *map, unsigned count); -+static ext3_dirent *dx_copy_dirents (char *from, char *to, -+ struct dx_map_entry *map, int count); -+static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block); -+ -+ -+#ifdef CONFIG_EXT3_INDEX -+/* -+ * Future: use high four bits of block for coalesce-on-delete flags -+ * Mask them off for now. -+ */ -+ -+static inline unsigned dx_get_block (struct dx_entry *entry) -+{ -+ return le32_to_cpu(entry->block.v) & 0x00ffffff; -+} -+ -+static inline void dx_set_block (struct dx_entry *entry, unsigned value) -+{ -+ entry->block.v = cpu_to_le32(value); -+} -+ -+static inline unsigned dx_get_hash (struct dx_entry *entry) -+{ -+ return le32_to_cpu(entry->hash.v); -+} -+ -+static inline void dx_set_hash (struct dx_entry *entry, unsigned value) -+{ -+ entry->hash.v = cpu_to_le32(value); -+} -+ -+static inline unsigned dx_get_count (struct dx_entry *entries) -+{ -+ return le16_to_cpu(((struct dx_countlimit *) entries)->count.v); -+} -+ -+static inline unsigned dx_get_limit (struct dx_entry *entries) -+{ -+ return le16_to_cpu(((struct dx_countlimit *) entries)->limit.v); -+} -+ -+static inline void dx_set_count (struct dx_entry *entries, unsigned value) -+{ -+ ((struct dx_countlimit *) entries)->count.v = cpu_to_le16(value); -+} -+ -+static inline void dx_set_limit (struct dx_entry *entries, unsigned value) -+{ -+ ((struct dx_countlimit *) entries)->limit.v = cpu_to_le16(value); -+} -+ -+static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) -+{ -+ unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) - -+ EXT3_DIR_REC_LEN(2) - infosize; -+ return 0? 20: entry_space / sizeof(struct dx_entry); -+} -+ -+static inline unsigned dx_node_limit (struct inode *dir) -+{ -+ unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0); -+ return 0? 22: entry_space / sizeof(struct dx_entry); -+} -+ -+/* Hash function - not bad, but still looking for an ideal default */ -+ -+static unsigned dx_hack_hash (const u8 *name, int len) -+{ -+ u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; -+ while (len--) -+ { -+ u32 hash = hash1 + (hash0 ^ (*name++ * 7152373)); -+ if (hash & 0x80000000) hash -= 0x7fffffff; -+ hash1 = hash0; -+ hash0 = hash; -+ } -+ return hash0; -+} -+ -+#define dx_hash(s,n) (dx_hack_hash(s,n) << 1) -+ -+/* -+ * Debug -+ */ -+#ifdef DX_DEBUG -+#define dxtrace dxtrace_on -+static void dx_show_index (char * label, struct dx_entry *entries) -+{ -+ int i, n = dx_get_count (entries); -+ printk("%s index ", label); -+ for (i = 0; i < n; i++) -+ { -+ printk("%x->%u ", i? dx_get_hash(entries + i): 0, dx_get_block(entries + i)); -+ } -+ printk("\n"); -+} -+ -+struct stats -+{ -+ unsigned names; -+ unsigned space; -+ unsigned bcount; -+}; -+ -+static struct stats dx_show_leaf (ext3_dirent *de, int size, int show_names) -+{ -+ unsigned names = 0, space = 0; -+ char *base = (char *) de; -+ printk("names: "); -+ while ((char *) de < base + size) -+ { -+ if (de->inode) -+ { -+ if (show_names) -+ { -+ int len = de->name_len; -+ char *name = de->name; -+ while (len--) printk("%c", *name++); -+ printk(":%x.%u ", dx_hash (de->name, de->name_len), ((char *) de - base)); -+ } -+ space += EXT3_DIR_REC_LEN(de->name_len); -+ names++; -+ } -+ de = (ext3_dirent *) ((char *) de + le16_to_cpu(de->rec_len)); -+ } -+ printk("(%i)\n", names); -+ return (struct stats) { names, space, 1 }; -+} -+ -+struct stats dx_show_entries (struct inode *dir, struct dx_entry *entries, int levels) -+{ -+ unsigned blocksize = dir->i_sb->s_blocksize; -+ unsigned count = dx_get_count (entries), names = 0, space = 0, i; -+ unsigned bcount = 0; -+ struct buffer_head *bh; -+ int err; -+ printk("%i indexed blocks...\n", count); -+ for (i = 0; i < count; i++, entries++) -+ { -+ u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0; -+ u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; -+ struct stats stats; -+ printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); -+ if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue; -+ stats = levels? -+ dx_show_entries (dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): -+ dx_show_leaf ((ext3_dirent *) bh->b_data, blocksize, 0); -+ names += stats.names; -+ space += stats.space; -+ bcount += stats.bcount; -+ brelse (bh); -+ } -+ if (bcount) -+ printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", -+ names, space/bcount,(space/bcount)*100/blocksize); -+ return (struct stats) { names, space, bcount}; -+} -+#else -+#define dxtrace dxtrace_off -+#endif -+ -+/* -+ * Probe for a directory leaf block to search -+ */ -+ -+static struct dx_frame * -+dx_probe(struct inode *dir, u32 hash, struct dx_frame *frame_in) -+{ -+ unsigned count, indirect; -+ struct dx_entry *at, *entries, *p, *q, *m; -+ struct dx_root *root; -+ struct buffer_head *bh; -+ struct dx_frame *frame = frame_in; -+ int err; -+ -+ frame->bh = NULL; -+ if (!(bh = ext3_bread(NULL, dir, 0, 0, &err))) -+ goto fail; -+ root = (struct dx_root *) bh->b_data; -+ if (root->info.hash_version > 0 || root->info.unused_flags & 1) { -+ brelse(bh); -+ goto fail; -+ } -+ if ((indirect = root->info.indirect_levels) > 1) { -+ brelse(bh); -+ goto fail; -+ } -+ entries = (struct dx_entry *) (((char *) &root->info) + root->info.info_length); -+ assert (dx_get_limit(entries) == dx_root_limit(dir, root->info.info_length)); -+ dxtrace (printk("Look up %x", hash)); -+ while (1) -+ { -+ count = dx_get_count(entries); -+ assert (count && count <= dx_get_limit(entries)); -+ p = entries + 1; -+ q = entries + count - 1; -+ while (p <= q) -+ { -+ m = p + (q - p)/2; -+ dxtrace(printk(".")); -+ if (dx_get_hash(m) > hash) -+ q = m - 1; -+ else -+ p = m + 1; -+ } -+ -+ if (0) // linear search cross check -+ { -+ unsigned n = count - 1; -+ at = entries; -+ while (n--) -+ { -+ dxtrace(printk(",")); -+ if (dx_get_hash(++at) > hash) -+ { -+ at--; -+ break; -+ } -+ } -+ assert (at == p - 1); -+ } -+ -+ at = p - 1; -+ dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); -+ frame->bh = bh; -+ frame->entries = entries; -+ frame->at = at; -+ if (!indirect--) return frame; -+ if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0,&err))) -+ goto fail2; -+ at = entries = ((struct dx_node *) bh->b_data)->entries; -+ assert (dx_get_limit(entries) == dx_node_limit (dir)); -+ frame++; -+ } -+fail2: -+ while (frame >= frame_in) { -+ brelse(frame->bh); -+ frame--; -+ } -+fail: -+ return NULL; -+} -+ -+static void dx_release (struct dx_frame *frames) -+{ -+ if (frames[0].bh == NULL) -+ return; -+ -+ if (((struct dx_root *)frames[0].bh->b_data)->info.indirect_levels) -+ brelse (frames[1].bh); -+ brelse (frames[0].bh); -+} -+ -+/* -+ * Directory block splitting, compacting -+ */ -+ -+static int dx_make_map (ext3_dirent *de, int size, struct dx_map_entry map[]) -+{ -+ int count = 0; -+ char *base = (char *) de; -+ while ((char *) de < base + size) { -+ if (de->name_len && de->inode) { -+ map[count].hash = dx_hash (de->name, de->name_len); -+ map[count].offs = (u32) ((char *) de - base); -+ count++; -+ } -+ de = (ext3_dirent *) ((char *) de + le16_to_cpu(de->rec_len)); -+ } -+ return count; -+} -+ -+static void dx_sort_map (struct dx_map_entry *map, unsigned count) -+{ -+ struct dx_map_entry *p, *q, *top = map + count - 1; -+ int more; -+ /* Combsort until bubble sort doesn't suck */ -+ while (count > 2) -+ { -+ count = count*10/13; -+ if (count - 9 < 2) /* 9, 10 -> 11 */ -+ count = 11; -+ for (p = top, q = p - count; q >= map; p--, q--) -+ if (p->hash < q->hash) -+ swap(*p, *q); -+ } -+ /* Garden variety bubble sort */ -+ do { -+ more = 0; -+ q = top; -+ while (q-- > map) -+ { -+ if (q[1].hash >= q[0].hash) -+ continue; -+ swap(*(q+1), *q); -+ more = 1; -+ } -+ } while(more); -+} -+ -+static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block) -+{ -+ struct dx_entry *entries = frame->entries; -+ struct dx_entry *old = frame->at, *new = old + 1; -+ int count = dx_get_count(entries); -+ -+ assert(count < dx_get_limit(entries)); -+ assert(old < entries + count); -+ memmove(new + 1, new, (char *)(entries + count) - (char *)(new)); -+ dx_set_hash(new, hash); -+ dx_set_block(new, block); -+ dx_set_count(entries, count + 1); -+} -+#endif -+ -+static void ext3_update_dx_flag(struct inode *inode) -+{ -+ if (!test_opt(inode->i_sb, INDEX)) -+ EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL; -+} -+ - /* - * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure. - * -@@ -95,6 +529,15 @@ - } - - /* -+ * p is at least 6 bytes before the end of page -+ */ -+static inline ext3_dirent *ext3_next_entry(ext3_dirent *p) -+{ -+ return (ext3_dirent *)((char*)p + le16_to_cpu(p->rec_len)); -+} -+ -+ -+/* - * ext3_find_entry() - * - * finds an entry in the specified directory with the wanted name. It -@@ -105,6 +548,8 @@ - * The returned buffer_head has ->b_count elevated. The caller is expected - * to brelse() it when appropriate. - */ -+ -+ - static struct buffer_head * ext3_find_entry (struct dentry *dentry, - struct ext3_dir_entry_2 ** res_dir) - { -@@ -119,10 +564,70 @@ - int num = 0; - int nblocks, i, err; - struct inode *dir = dentry->d_parent->d_inode; -+ ext3_dirent *de, *top; - - *res_dir = NULL; - sb = dir->i_sb; -+ if (dentry->d_name.len > EXT3_NAME_LEN) -+ return NULL; -+ if (ext3_dx && is_dx(dir)) { -+ u32 hash = dx_hash(dentry->d_name.name, dentry->d_name.len); -+ struct dx_frame frames[2], *frame; -+ if (!(frame = dx_probe (dir, hash, frames))) -+ return NULL; -+dxnext: -+ block = dx_get_block(frame->at); -+ if (!(bh = ext3_bread (NULL,dir, block, 0, &err))) -+ goto dxfail; -+ de = (ext3_dirent *) bh->b_data; -+ top = (ext3_dirent *) ((char *) de + sb->s_blocksize - -+ EXT3_DIR_REC_LEN(0)); -+ for (; de < top; de = ext3_next_entry(de)) -+ if (ext3_match(dentry->d_name.len, dentry->d_name.name, de)) { -+ if (!ext3_check_dir_entry("ext3_find_entry", -+ dir, de, bh, -+ (block<b_data))) { -+ brelse (bh); -+ goto dxfail; -+ } -+ *res_dir = de; -+ goto dxfound; -+ } -+ brelse (bh); -+ /* Same hash continues in next block? Search on. */ -+ if (++(frame->at) == frame->entries + dx_get_count(frame->entries)) -+ { -+ struct buffer_head *bh2; -+ if (frame == frames) -+ goto dxfail; -+ if (++(frames->at) == frames->entries + dx_get_count(frames->entries)) -+ goto dxfail; -+ /* should omit read if not continued */ -+ if (!(bh2 = ext3_bread (NULL, dir, -+ dx_get_block(frames->at), -+ 0, &err))) -+ goto dxfail; -+ brelse (frame->bh); -+ frame->bh = bh2; -+ frame->at = frame->entries = ((struct dx_node *) bh2->b_data)->entries; -+ /* Subtle: the 0th entry has the count, find the hash in frame above */ -+ if ((dx_get_hash(frames->at) & -2) == hash) -+ goto dxnext; -+ goto dxfail; -+ } -+ if ((dx_get_hash(frame->at) & -2) == hash) -+ goto dxnext; -+dxfail: -+ dxtrace(printk("%s not found\n", name)); -+ dx_release (frames); -+ return NULL; -+dxfound: -+ dx_release (frames); -+ return bh; - -+ } -+ - nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb); - start = dir->u.ext3_i.i_dir_start_lookup; - if (start >= nblocks) -@@ -237,6 +748,90 @@ - de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; - } - -+static ext3_dirent * -+dx_copy_dirents (char *from, char *to, struct dx_map_entry *map, int count) -+{ -+ unsigned rec_len = 0; -+ -+ while (count--) { -+ ext3_dirent *de = (ext3_dirent *) (from + map->offs); -+ rec_len = EXT3_DIR_REC_LEN(de->name_len); -+ memcpy (to, de, rec_len); -+ ((ext3_dirent *) to)->rec_len = rec_len; -+ to += rec_len; -+ map++; -+ } -+ return (ext3_dirent *) (to - rec_len); -+} -+ -+#ifdef CONFIG_EXT3_INDEX -+static ext3_dirent *do_split(handle_t *handle, struct inode *dir, -+ struct buffer_head **bh,struct dx_frame *frame, -+ u32 hash, int *error) -+{ -+ unsigned count; -+ struct buffer_head *bh2; -+ u32 newblock; -+ u32 hash2; -+ struct dx_map_entry *map; -+ char *data1 = (*bh)->b_data, *data2, *data3; -+ unsigned split; -+ ext3_dirent *de, *de2; -+ -+ bh2 = ext3_append (handle, dir, &newblock, error); -+ if (!(bh2)) -+ { -+ brelse(*bh); -+ *bh = NULL; -+ return (ext3_dirent *)bh2; -+ } -+ -+ BUFFER_TRACE(*bh, "get_write_access"); -+ ext3_journal_get_write_access(handle, *bh); -+ BUFFER_TRACE(frame->bh, "get_write_access"); -+ ext3_journal_get_write_access(handle, frame->bh); -+ -+ data2 = bh2->b_data; -+ -+ map = kmalloc(sizeof(*map) * PAGE_CACHE_SIZE/EXT3_DIR_REC_LEN(1) + 1, -+ GFP_KERNEL); -+ if (!map) -+ panic("no memory for do_split\n"); -+ count = dx_make_map((ext3_dirent *)data1, dir->i_sb->s_blocksize, map); -+ split = count/2; // need to adjust to actual middle -+ dx_sort_map (map, count); -+ hash2 = map[split].hash; -+ dxtrace(printk("Split block %i at %x, %i/%i\n", -+ dx_get_block(frame->at), hash2, split, count-split)); -+ -+ /* Fancy dance to stay within two buffers */ -+ de2 = dx_copy_dirents (data1, data2, map + split, count - split); -+ data3 = (char *) de2 + de2->rec_len; -+ de = dx_copy_dirents (data1, data3, map, split); -+ memcpy(data1, data3, (char *) de + de->rec_len - data3); -+ de = (ext3_dirent *) ((char *) de - data3 + data1); // relocate de -+ de->rec_len = cpu_to_le16(data1 + dir->i_sb->s_blocksize - (char *)de); -+ de2->rec_len = cpu_to_le16(data2 + dir->i_sb->s_blocksize-(char *)de2); -+ dxtrace(dx_show_leaf((ext3_dirent *)data1, dir->i_sb->s_blocksize, 1)); -+ dxtrace(dx_show_leaf((ext3_dirent *)data2, dir->i_sb->s_blocksize, 1)); -+ -+ /* Which block gets the new entry? */ -+ if (hash >= hash2) -+ { -+ swap(*bh, bh2); -+ de = de2; -+ } -+ dx_insert_block(frame, hash2 + (hash2 == map[split-1].hash), newblock); -+ ext3_journal_dirty_metadata (handle, bh2); -+ brelse (bh2); -+ ext3_journal_dirty_metadata (handle, frame->bh); -+ dxtrace(dx_show_index ("frame", frame->entries)); -+ kfree(map); -+ return de; -+} -+#endif -+ -+ - /* - * ext3_add_entry() - * -@@ -255,118 +849,278 @@ - struct inode *inode) - { - struct inode *dir = dentry->d_parent->d_inode; -- const char *name = dentry->d_name.name; -- int namelen = dentry->d_name.len; - unsigned long offset; -- unsigned short rec_len; - struct buffer_head * bh; -- struct ext3_dir_entry_2 * de, * de1; -- struct super_block * sb; -+ ext3_dirent *de; -+ struct super_block * sb = dir->i_sb; - int retval; -+ unsigned short reclen = EXT3_DIR_REC_LEN(dentry->d_name.len); - -- sb = dir->i_sb; -+ unsigned nlen, rlen; -+ u32 block, blocks; -+ char *top; - -- if (!namelen) -+ if (!dentry->d_name.len) - return -EINVAL; -- bh = ext3_bread (handle, dir, 0, 0, &retval); -- if (!bh) -- return retval; -- rec_len = EXT3_DIR_REC_LEN(namelen); -- offset = 0; -- de = (struct ext3_dir_entry_2 *) bh->b_data; -- while (1) { -- if ((char *)de >= sb->s_blocksize + bh->b_data) { -- brelse (bh); -- bh = NULL; -- bh = ext3_bread (handle, dir, -- offset >> EXT3_BLOCK_SIZE_BITS(sb), 1, &retval); -- if (!bh) -- return retval; -- if (dir->i_size <= offset) { -- if (dir->i_size == 0) { -- brelse(bh); -- return -ENOENT; -+ if (ext3_dx && is_dx(dir)) { -+ struct dx_frame frames[2], *frame; -+ struct dx_entry *entries, *at; -+ u32 hash; -+ char *data1; -+ -+ hash = dx_hash(dentry->d_name.name, dentry->d_name.len); -+ /* FIXME: do something if dx_probe() fails here */ -+ frame = dx_probe(dir, hash, frames); -+ entries = frame->entries; -+ at = frame->at; -+ -+ if (!(bh = ext3_bread(handle,dir, dx_get_block(at), 0,&retval))) -+ goto dxfail1; -+ -+ BUFFER_TRACE(bh, "get_write_access"); -+ ext3_journal_get_write_access(handle, bh); -+ -+ data1 = bh->b_data; -+ de = (ext3_dirent *) data1; -+ top = data1 + (0? 200: sb->s_blocksize); -+ while ((char *) de < top) -+ { -+ /* FIXME: check EEXIST and dir */ -+ nlen = EXT3_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ if ((de->inode? rlen - nlen: rlen) >= reclen) -+ goto dx_add; -+ de = (ext3_dirent *) ((char *) de + rlen); -+ } -+ /* Block full, should compress but for now just split */ -+ dxtrace(printk("using %u of %u node entries\n", -+ dx_get_count(entries), dx_get_limit(entries))); -+ /* Need to split index? */ -+ if (dx_get_count(entries) == dx_get_limit(entries)) -+ { -+ u32 newblock; -+ unsigned icount = dx_get_count(entries); -+ int levels = frame - frames; -+ struct dx_entry *entries2; -+ struct dx_node *node2; -+ struct buffer_head *bh2; -+ if (levels && dx_get_count(frames->entries) == dx_get_limit(frames->entries)) -+ goto dxfull; -+ bh2 = ext3_append (handle, dir, &newblock, &retval); -+ if (!(bh2)) -+ goto dxfail2; -+ node2 = (struct dx_node *)(bh2->b_data); -+ entries2 = node2->entries; -+ node2->fake.rec_len = cpu_to_le16(sb->s_blocksize); -+ node2->fake.inode = 0; -+ BUFFER_TRACE(frame->bh, "get_write_access"); -+ ext3_journal_get_write_access(handle, frame->bh); -+ if (levels) -+ { -+ unsigned icount1 = icount/2, icount2 = icount - icount1; -+ unsigned hash2 = dx_get_hash(entries + icount1); -+ dxtrace(printk("Split index %i/%i\n", icount1, icount2)); -+ -+ BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ -+ ext3_journal_get_write_access(handle, frames[0].bh); -+ -+ memcpy ((char *) entries2, (char *) (entries + icount1), -+ icount2 * sizeof(struct dx_entry)); -+ dx_set_count (entries, icount1); -+ dx_set_count (entries2, icount2); -+ dx_set_limit (entries2, dx_node_limit(dir)); -+ -+ /* Which index block gets the new entry? */ -+ if (at - entries >= icount1) { -+ frame->at = at = at - entries - icount1 + entries2; -+ frame->entries = entries = entries2; -+ swap(frame->bh, bh2); - } -- -- ext3_debug ("creating next block\n"); -- -- BUFFER_TRACE(bh, "get_write_access"); -- ext3_journal_get_write_access(handle, bh); -- de = (struct ext3_dir_entry_2 *) bh->b_data; -- de->inode = 0; -- de->rec_len = le16_to_cpu(sb->s_blocksize); -- dir->u.ext3_i.i_disksize = -- dir->i_size = offset + sb->s_blocksize; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -- ext3_mark_inode_dirty(handle, dir); -+ dx_insert_block (frames + 0, hash2, newblock); -+ dxtrace(dx_show_index ("node", frames[1].entries)); -+ dxtrace(dx_show_index ("node", -+ ((struct dx_node *) bh2->b_data)->entries)); -+ ext3_journal_dirty_metadata(handle, bh2); -+ brelse (bh2); - } else { -- -- ext3_debug ("skipping to next block\n"); -- -- de = (struct ext3_dir_entry_2 *) bh->b_data; -+ dxtrace(printk("Creating second level index...\n")); -+ memcpy((char *) entries2, (char *) entries, -+ icount * sizeof(struct dx_entry)); -+ dx_set_limit(entries2, dx_node_limit(dir)); -+ -+ /* Set up root */ -+ dx_set_count(entries, 1); -+ dx_set_block(entries + 0, newblock); -+ ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1; -+ -+ /* Add new access path frame */ -+ frame = frames + 1; -+ frame->at = at = at - entries + entries2; -+ frame->entries = entries = entries2; -+ frame->bh = bh2; -+ ext3_journal_get_write_access(handle, frame->bh); - } -+ ext3_journal_dirty_metadata(handle, frames[0].bh); - } -- if (!ext3_check_dir_entry ("ext3_add_entry", dir, de, bh, -- offset)) { -- brelse (bh); -- return -ENOENT; -- } -- if (ext3_match (namelen, name, de)) { -+ de = do_split(handle, dir, &bh, frame, hash, &retval); -+ dx_release (frames); -+ if (!(de)) -+ goto fail; -+ nlen = EXT3_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ goto add; -+ -+dx_add: -+ dx_release (frames); -+ goto add; -+ -+dxfull: -+ ext3_warning(sb, __FUNCTION__, "Directory index full!\n"); -+ retval = -ENOSPC; -+dxfail2: -+ brelse(bh); -+dxfail1: -+ dx_release (frames); -+ goto fail1; -+ } -+ -+ blocks = dir->i_size >> sb->s_blocksize_bits; -+ for (block = 0, offset = 0; block < blocks; block++) { -+ bh = ext3_bread(handle, dir, block, 0, &retval); -+ if(!bh) -+ return retval; -+ de = (ext3_dirent *)bh->b_data; -+ top = bh->b_data + sb->s_blocksize - reclen; -+ while ((char *) de <= top) { -+ if (!ext3_check_dir_entry("ext3_add_entry", dir, de, -+ bh, offset)) { -+ brelse (bh); -+ return -EIO; -+ } -+ if (ext3_match(dentry->d_name.len,dentry->d_name.name,de)) { - brelse (bh); - return -EEXIST; -- } -- if ((le32_to_cpu(de->inode) == 0 && -- le16_to_cpu(de->rec_len) >= rec_len) || -- (le16_to_cpu(de->rec_len) >= -- EXT3_DIR_REC_LEN(de->name_len) + rec_len)) { -- BUFFER_TRACE(bh, "get_write_access"); -- ext3_journal_get_write_access(handle, bh); -- /* By now the buffer is marked for journaling */ -- offset += le16_to_cpu(de->rec_len); -- if (le32_to_cpu(de->inode)) { -- de1 = (struct ext3_dir_entry_2 *) ((char *) de + -- EXT3_DIR_REC_LEN(de->name_len)); -- de1->rec_len = -- cpu_to_le16(le16_to_cpu(de->rec_len) - -- EXT3_DIR_REC_LEN(de->name_len)); -- de->rec_len = cpu_to_le16( -- EXT3_DIR_REC_LEN(de->name_len)); -- de = de1; - } -- de->file_type = EXT3_FT_UNKNOWN; -- if (inode) { -- de->inode = cpu_to_le32(inode->i_ino); -- ext3_set_de_type(dir->i_sb, de, inode->i_mode); -- } else -- de->inode = 0; -- de->name_len = namelen; -- memcpy (de->name, name, namelen); -- /* -- * XXX shouldn't update any times until successful -- * completion of syscall, but too many callers depend -- * on this. -- * -- * XXX similarly, too many callers depend on -- * ext3_new_inode() setting the times, but error -- * recovery deletes the inode, so the worst that can -- * happen is that the times are slightly out of date -- * and/or different from the directory change time. -- */ -- dir->i_mtime = dir->i_ctime = CURRENT_TIME; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -- dir->i_version = ++event; -- ext3_mark_inode_dirty(handle, dir); -- BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -- ext3_journal_dirty_metadata(handle, bh); -+ nlen = EXT3_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ if ((de->inode ? rlen - nlen: rlen) >= reclen) -+ goto add; -+ de = (ext3_dirent *)((char *)de + rlen); -+ offset += rlen; -+ } -+ if (ext3_dx && blocks == 1 && test_opt(sb, INDEX)) -+ goto dx_make_index; -+ brelse(bh); -+ } -+ bh = ext3_append(handle, dir, &block, &retval); -+ if (!bh) -+ return retval; -+ de = (ext3_dirent *) bh->b_data; -+ de->inode = 0; -+ de->rec_len = cpu_to_le16(rlen = sb->s_blocksize); -+ nlen = 0; -+ goto add; -+ -+add: -+ BUFFER_TRACE(bh, "get_write_access"); -+ ext3_journal_get_write_access(handle, bh); -+ /* By now the buffer is marked for journaling */ -+ if (de->inode) { -+ ext3_dirent *de1 = (ext3_dirent *)((char *)de + nlen); -+ de1->rec_len = cpu_to_le16(rlen - nlen); -+ de->rec_len = cpu_to_le16(nlen); -+ de = de1; -+ } -+ de->file_type = EXT3_FT_UNKNOWN; -+ if (inode) { -+ de->inode = cpu_to_le32(inode->i_ino); -+ ext3_set_de_type(dir->i_sb, de, inode->i_mode); -+ } else -+ de->inode = 0; -+ de->name_len = dentry->d_name.len; -+ memcpy (de->name, dentry->d_name.name, dentry->d_name.len); -+ /* -+ * XXX shouldn't update any times until successful -+ * completion of syscall, but too many callers depend -+ * on this. -+ * -+ * XXX similarly, too many callers depend on -+ * ext3_new_inode() setting the times, but error -+ * recovery deletes the inode, so the worst that can -+ * happen is that the times are slightly out of date -+ * and/or different from the directory change time. -+ */ -+ dir->i_mtime = dir->i_ctime = CURRENT_TIME; -+ ext3_update_dx_flag(dir); -+ dir->i_version = ++event; -+ ext3_mark_inode_dirty(handle, dir); -+ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata"); -+ ext3_journal_dirty_metadata(handle, bh); -+ brelse(bh); -+ return 0; -+ -+dx_make_index: -+ { -+ struct buffer_head *bh2; -+ struct dx_root *root; -+ struct dx_frame frames[2], *frame; -+ struct dx_entry *entries; -+ ext3_dirent *de2; -+ char *data1; -+ unsigned len; -+ u32 hash; -+ -+ dxtrace(printk("Creating index\n")); -+ ext3_journal_get_write_access(handle, bh); -+ root = (struct dx_root *) bh->b_data; -+ -+ EXT3_I(dir)->i_flags |= EXT3_INDEX_FL; -+ bh2 = ext3_append (handle, dir, &block, &retval); -+ if (!(bh2)) -+ { - brelse(bh); -- return 0; -+ return retval; - } -- offset += le16_to_cpu(de->rec_len); -- de = (struct ext3_dir_entry_2 *) -- ((char *) de + le16_to_cpu(de->rec_len)); -+ data1 = bh2->b_data; -+ -+ /* The 0th block becomes the root, move the dirents out */ -+ de = (ext3_dirent *) &root->info; -+ len = ((char *) root) + sb->s_blocksize - (char *) de; -+ memcpy (data1, de, len); -+ de = (ext3_dirent *) data1; -+ top = data1 + len; -+ while (((char *) de2=(char*)de+le16_to_cpu(de->rec_len)) < top) -+ de = de2; -+ de->rec_len = cpu_to_le16(data1 + sb->s_blocksize - (char *)de); -+ /* Initialize the root; the dot dirents already exist */ -+ de = (ext3_dirent *) (&root->dotdot); -+ de->rec_len = cpu_to_le16(sb->s_blocksize-EXT3_DIR_REC_LEN(2)); -+ memset (&root->info, 0, sizeof(root->info)); -+ root->info.info_length = sizeof(root->info); -+ entries = root->entries; -+ dx_set_block (entries, 1); -+ dx_set_count (entries, 1); -+ dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); -+ -+ /* Initialize as for dx_probe */ -+ hash = dx_hash (dentry->d_name.name, dentry->d_name.len); -+ frame = frames; -+ frame->entries = entries; -+ frame->at = entries; -+ frame->bh = bh; -+ bh = bh2; -+ de = do_split(handle,dir, &bh, frame, hash, &retval); -+ dx_release (frames); -+ if (!(de)) -+ return retval; -+ nlen = EXT3_DIR_REC_LEN(de->name_len); -+ rlen = le16_to_cpu(de->rec_len); -+ goto add; - } -- brelse (bh); -- return -ENOSPC; -+fail1: -+ return retval; -+fail: -+ return -ENOENT; - } - - /* -@@ -451,7 +1212,8 @@ - struct inode * inode; - int err; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); - if (IS_ERR(handle)) - return PTR_ERR(handle); - -@@ -478,7 +1240,8 @@ - struct inode *inode; - int err; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); - if (IS_ERR(handle)) - return PTR_ERR(handle); - -@@ -507,7 +1270,8 @@ - if (dir->i_nlink >= EXT3_LINK_MAX) - return -EMLINK; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 3); -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3); - if (IS_ERR(handle)) - return PTR_ERR(handle); - -@@ -550,7 +1320,7 @@ - if (err) - goto out_no_entry; - dir->i_nlink++; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - d_instantiate(dentry, inode); - out_stop: -@@ -832,7 +1596,7 @@ - ext3_mark_inode_dirty(handle, inode); - dir->i_nlink--; - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - - end_rmdir: -@@ -878,7 +1642,7 @@ - if (retval) - goto end_unlink; - dir->i_ctime = dir->i_mtime = CURRENT_TIME; -- dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(dir); - ext3_mark_inode_dirty(handle, dir); - inode->i_nlink--; - if (!inode->i_nlink) -@@ -904,7 +1668,8 @@ - if (l > dir->i_sb->s_blocksize) - return -ENAMETOOLONG; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 5); -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5); - if (IS_ERR(handle)) - return PTR_ERR(handle); - -@@ -959,7 +1724,8 @@ - if (inode->i_nlink >= EXT3_LINK_MAX) - return -EMLINK; - -- handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS); -+ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS); - if (IS_ERR(handle)) - return PTR_ERR(handle); - -@@ -995,7 +1761,8 @@ - - old_bh = new_bh = dir_bh = NULL; - -- handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2); -+ handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + -+ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); - if (IS_ERR(handle)) - return PTR_ERR(handle); - -@@ -1077,7 +1844,7 @@ - new_inode->i_ctime = CURRENT_TIME; - } - old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; -- old_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(old_dir); - if (dir_bh) { - BUFFER_TRACE(dir_bh, "get_write_access"); - ext3_journal_get_write_access(handle, dir_bh); -@@ -1089,7 +1856,7 @@ - new_inode->i_nlink--; - } else { - new_dir->i_nlink++; -- new_dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL; -+ ext3_update_dx_flag(new_dir); - ext3_mark_inode_dirty(handle, new_dir); - } - } ---- ./include/linux/ext3_fs.h 2002/03/05 06:18:59 2.1 -+++ ./include/linux/ext3_fs.h 2002/03/05 06:26:56 -@@ -339,6 +339,7 @@ - #define EXT3_MOUNT_WRITEBACK_DATA 0x0C00 /* No data ordering */ - #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ -+#define EXT3_MOUNT_INDEX 0x4000 /* Enable directory index */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -575,6 +576,24 @@ - #define EXT3_DIR_ROUND (EXT3_DIR_PAD - 1) - #define EXT3_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT3_DIR_ROUND) & \ - ~EXT3_DIR_ROUND) -+/* -+ * Hash Tree Directory indexing -+ * (c) Daniel Phillips, 2001 -+ */ -+ -+#define CONFIG_EXT3_INDEX -+ -+#ifdef CONFIG_EXT3_INDEX -+ enum {ext3_dx = 1}; -+ #define is_dx(dir) (EXT3_I(dir)->i_flags & EXT3_INDEX_FL) -+#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX) -+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1) -+#else -+ enum {ext3_dx = 0}; -+ #define is_dx(dir) 0 -+#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX) -+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2) -+#endif - - #ifdef __KERNEL__ - /* ---- ./include/linux/ext3_jbd.h 2002/03/05 06:18:59 2.1 -+++ ./include/linux/ext3_jbd.h 2002/03/05 06:33:54 -@@ -63,6 +63,8 @@ - - #define EXT3_RESERVE_TRANS_BLOCKS 12 - -+#define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8 -+ - int - ext3_mark_iloc_dirty(handle_t *handle, - struct inode *inode, diff --git a/lustre/kernel_patches/patches/invalidate_show-2.4.20-rh.patch b/lustre/kernel_patches/patches/invalidate_show-2.4.20-rh.patch deleted file mode 100644 index 6e7d920..0000000 --- a/lustre/kernel_patches/patches/invalidate_show-2.4.20-rh.patch +++ /dev/null @@ -1,114 +0,0 @@ - fs/inode.c | 23 +++++++++++++++-------- - fs/smbfs/inode.c | 2 +- - fs/super.c | 4 ++-- - include/linux/fs.h | 2 +- - 4 files changed, 19 insertions(+), 12 deletions(-) - ---- kernel-2.4.20/fs/inode.c~invalidate_show-2.4.20-rh 2003-05-24 01:56:40.000000000 -0400 -+++ kernel-2.4.20-root/fs/inode.c 2003-06-02 00:35:37.000000000 -0400 -@@ -628,7 +628,8 @@ static void dispose_list(struct list_hea - /* - * Invalidate all inodes for a device. - */ --static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose) -+static int invalidate_list(struct list_head *head, struct super_block * sb, -+ struct list_head * dispose, int show) - { - struct list_head *next; - int busy = 0, count = 0; -@@ -653,6 +654,11 @@ static int invalidate_list(struct list_h - count++; - continue; - } -+ if (show) -+ printk(KERN_ERR -+ "inode busy: dev %s:%lu (%p) mode %o count %u\n", -+ kdevname(sb->s_dev), inode->i_ino, inode, -+ inode->i_mode, atomic_read(&inode->i_count)); - busy = 1; - } - /* only unused inodes may be cached with i_count zero */ -@@ -671,23 +677,24 @@ static int invalidate_list(struct list_h - /** - * invalidate_inodes - discard the inodes on a device - * @sb: superblock -+ * @show: whether we should display any busy inodes found - * - * Discard all of the inodes for a given superblock. If the discard - * fails because there are busy inodes then a non zero value is returned. - * If the discard is successful all the inodes have been discarded. - */ - --int invalidate_inodes(struct super_block * sb) -+int invalidate_inodes(struct super_block * sb, int show) - { - int busy; - LIST_HEAD(throw_away); - - spin_lock(&inode_lock); -- busy = invalidate_list(&inode_in_use, sb, &throw_away); -- busy |= invalidate_list(&inode_unused, sb, &throw_away); -- busy |= invalidate_list(&inode_unused_pagecache, sb, &throw_away); -- busy |= invalidate_list(&sb->s_dirty, sb, &throw_away); -- busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away); -+ busy = invalidate_list(&inode_in_use, sb, &throw_away, show); -+ busy |= invalidate_list(&inode_unused, sb, &throw_away, show); -+ busy |= invalidate_list(&inode_unused_pagecache, sb, &throw_away, show); -+ busy |= invalidate_list(&sb->s_dirty, sb, &throw_away, show); -+ busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away, show); - spin_unlock(&inode_lock); - - dispose_list(&throw_away); -@@ -713,7 +720,7 @@ int invalidate_device(kdev_t dev, int do - * hold). - */ - shrink_dcache_sb(sb); -- res = invalidate_inodes(sb); -+ res = invalidate_inodes(sb, 0); - drop_super(sb); - } - invalidate_buffers(dev); ---- kernel-2.4.20/fs/super.c~invalidate_show-2.4.20-rh 2003-05-24 01:56:24.000000000 -0400 -+++ kernel-2.4.20-root/fs/super.c 2003-06-02 00:35:00.000000000 -0400 -@@ -943,7 +943,7 @@ void kill_super(struct super_block *sb) - lock_super(sb); - lock_kernel(); - sb->s_flags &= ~MS_ACTIVE; -- invalidate_inodes(sb); /* bad name - it should be evict_inodes() */ -+ invalidate_inodes(sb, 0); /* bad name - it should be evict_inodes() */ - if (sop) { - if (sop->write_super && sb->s_dirt) - sop->write_super(sb); -@@ -952,7 +952,7 @@ void kill_super(struct super_block *sb) - } - - /* Forget any remaining inodes */ -- if (invalidate_inodes(sb)) { -+ if (invalidate_inodes(sb, 1)) { - printk(KERN_ERR "VFS: Busy inodes after unmount. " - "Self-destruct in 5 seconds. Have a nice day...\n"); - } ---- kernel-2.4.20/include/linux/fs.h~invalidate_show-2.4.20-rh 2003-06-02 00:31:47.000000000 -0400 -+++ kernel-2.4.20-root/include/linux/fs.h 2003-06-02 00:35:00.000000000 -0400 -@@ -1284,7 +1284,7 @@ static inline void mark_buffer_dirty_ino - extern void set_buffer_flushtime(struct buffer_head *); - extern void balance_dirty(void); - extern int check_disk_change(kdev_t); --extern int invalidate_inodes(struct super_block *); -+extern int invalidate_inodes(struct super_block *, int); - extern int invalidate_device(kdev_t, int); - extern void invalidate_inode_pages(struct inode *); - extern void invalidate_inode_pages2(struct address_space *); ---- kernel-2.4.20/fs/smbfs/inode.c~invalidate_show-2.4.20-rh 2002-11-28 18:53:15.000000000 -0500 -+++ kernel-2.4.20-root/fs/smbfs/inode.c 2003-06-02 00:35:00.000000000 -0400 -@@ -167,7 +167,7 @@ smb_invalidate_inodes(struct smb_sb_info - { - VERBOSE("\n"); - shrink_dcache_sb(SB_of(server)); -- invalidate_inodes(SB_of(server)); -+ invalidate_inodes(SB_of(server), 0); - } - - /* - -_ diff --git a/lustre/kernel_patches/patches/invalidate_show.patch b/lustre/kernel_patches/patches/invalidate_show.patch deleted file mode 100644 index 9273c5c..0000000 --- a/lustre/kernel_patches/patches/invalidate_show.patch +++ /dev/null @@ -1,115 +0,0 @@ - - - - fs/inode.c | 21 ++++++++++++++------- - fs/smbfs/inode.c | 2 +- - fs/super.c | 4 ++-- - include/linux/fs.h | 2 +- - 4 files changed, 18 insertions(+), 11 deletions(-) - ---- linux-rh-2.4.20-8/fs/inode.c~invalidate_show 2003-04-11 14:04:56.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/inode.c 2003-04-16 20:59:35.000000000 +0800 -@@ -604,7 +604,8 @@ static void dispose_list(struct list_hea - /* - * Invalidate all inodes for a device. - */ --static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose) -+static int invalidate_list(struct list_head *head, struct super_block * sb, -+ struct list_head * dispose, int show) - { - struct list_head *next; - int busy = 0, count = 0; -@@ -629,6 +630,11 @@ static int invalidate_list(struct list_h - count++; - continue; - } -+ if (show) -+ printk(KERN_ERR -+ "inode busy: dev %s:%lu (%p) mode %o count %u\n", -+ kdevname(sb->s_dev), inode->i_ino, inode, -+ inode->i_mode, atomic_read(&inode->i_count)); - busy = 1; - } - /* only unused inodes may be cached with i_count zero */ -@@ -647,22 +653,23 @@ static int invalidate_list(struct list_h - /** - * invalidate_inodes - discard the inodes on a device - * @sb: superblock -+ * @show: whether we should display any busy inodes found - * - * Discard all of the inodes for a given superblock. If the discard - * fails because there are busy inodes then a non zero value is returned. - * If the discard is successful all the inodes have been discarded. - */ - --int invalidate_inodes(struct super_block * sb) -+int invalidate_inodes(struct super_block * sb, int show) - { - int busy; - LIST_HEAD(throw_away); - - spin_lock(&inode_lock); -- busy = invalidate_list(&inode_in_use, sb, &throw_away); -- busy |= invalidate_list(&inode_unused, sb, &throw_away); -- busy |= invalidate_list(&sb->s_dirty, sb, &throw_away); -- busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away); -+ busy = invalidate_list(&inode_in_use, sb, &throw_away, show); -+ busy |= invalidate_list(&inode_unused, sb, &throw_away, show); -+ busy |= invalidate_list(&sb->s_dirty, sb, &throw_away, show); -+ busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away, show); - spin_unlock(&inode_lock); - - dispose_list(&throw_away); -@@ -688,7 +695,7 @@ int invalidate_device(kdev_t dev, int do - * hold). - */ - shrink_dcache_sb(sb); -- res = invalidate_inodes(sb); -+ res = invalidate_inodes(sb, 0); - drop_super(sb); - } - invalidate_buffers(dev); ---- linux-rh-2.4.20-8/fs/super.c~invalidate_show 2003-04-11 14:04:57.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/super.c 2003-04-16 20:59:35.000000000 +0800 -@@ -943,7 +943,7 @@ void kill_super(struct super_block *sb) - lock_super(sb); - lock_kernel(); - sb->s_flags &= ~MS_ACTIVE; -- invalidate_inodes(sb); /* bad name - it should be evict_inodes() */ -+ invalidate_inodes(sb, 0); /* bad name - it should be evict_inodes() */ - if (sop) { - if (sop->write_super && sb->s_dirt) - sop->write_super(sb); -@@ -952,7 +952,7 @@ void kill_super(struct super_block *sb) - } - - /* Forget any remaining inodes */ -- if (invalidate_inodes(sb)) { -+ if (invalidate_inodes(sb, 1)) { - printk(KERN_ERR "VFS: Busy inodes after unmount. " - "Self-destruct in 5 seconds. Have a nice day...\n"); - } ---- linux-rh-2.4.20-8/include/linux/fs.h~invalidate_show 2003-04-16 20:55:35.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/linux/fs.h 2003-04-16 20:59:35.000000000 +0800 -@@ -1283,7 +1283,7 @@ static inline void mark_buffer_dirty_ino - extern void set_buffer_flushtime(struct buffer_head *); - extern void balance_dirty(void); - extern int check_disk_change(kdev_t); --extern int invalidate_inodes(struct super_block *); -+extern int invalidate_inodes(struct super_block *, int); - extern int invalidate_device(kdev_t, int); - extern void invalidate_inode_pages(struct inode *); - extern void invalidate_inode_pages2(struct address_space *); ---- linux-rh-2.4.20-8/fs/smbfs/inode.c~invalidate_show 2003-04-16 20:59:48.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/smbfs/inode.c 2003-04-16 21:00:43.000000000 +0800 -@@ -167,7 +167,7 @@ smb_invalidate_inodes(struct smb_sb_info - { - VERBOSE("\n"); - shrink_dcache_sb(SB_of(server)); -- invalidate_inodes(SB_of(server)); -+ invalidate_inodes(SB_of(server), 0); - } - - /* - -_ diff --git a/lustre/kernel_patches/patches/iod-rmap-exports-2.4.20.patch b/lustre/kernel_patches/patches/iod-rmap-exports-2.4.20.patch deleted file mode 100644 index 3fdf3fd..0000000 --- a/lustre/kernel_patches/patches/iod-rmap-exports-2.4.20.patch +++ /dev/null @@ -1,86 +0,0 @@ - fs/Makefile | 4 +++- - fs/inode.c | 4 +++- - mm/Makefile | 2 +- - mm/page_alloc.c | 1 + - mm/vmscan.c | 3 +++ - 5 files changed, 11 insertions(+), 3 deletions(-) - ---- linux-rh-2.4.20-6/fs/inode.c~iod-rmap-exports Tue Apr 1 01:01:56 2003 -+++ linux-rh-2.4.20-6-braam/fs/inode.c Tue Apr 1 01:01:56 2003 -@@ -5,6 +5,7 @@ - */ - - #include -+#include - #include - #include - #include -@@ -66,7 +67,8 @@ static LIST_HEAD(anon_hash_chain); /* fo - * NOTE! You also have to own the lock if you change - * the i_state of an inode while it is in use.. - */ --static spinlock_t inode_lock = SPIN_LOCK_UNLOCKED; -+spinlock_t inode_lock = SPIN_LOCK_UNLOCKED; -+EXPORT_SYMBOL(inode_lock); - - /* - * Statistics gathering.. ---- linux-rh-2.4.20-6/fs/Makefile~iod-rmap-exports Tue Apr 1 01:01:56 2003 -+++ linux-rh-2.4.20-6-braam/fs/Makefile Tue Apr 1 01:02:34 2003 -@@ -1,3 +1,5 @@ -+ -+ - # - # Makefile for the Linux filesystems. - # -@@ -7,7 +9,7 @@ - - O_TARGET := fs.o - --export-objs := filesystems.o open.o dcache.o buffer.o dquot.o dcookies.o -+export-objs := filesystems.o open.o dcache.o buffer.o dquot.o dcookies.o inode.o - mod-subdirs := nls - - obj-y := open.o read_write.o devices.o file_table.o buffer.o \ ---- linux-rh-2.4.20-6/mm/vmscan.c~iod-rmap-exports Tue Apr 1 01:01:56 2003 -+++ linux-rh-2.4.20-6-braam/mm/vmscan.c Tue Apr 1 01:01:56 2003 -@@ -15,6 +15,8 @@ - * O(1) rmap vm, Arjan van de ven - */ - -+#include -+#include - #include - #include - #include -@@ -1061,6 +1063,7 @@ void wakeup_kswapd(unsigned int gfp_mask - set_current_state(TASK_RUNNING); - remove_wait_queue(&kswapd_done, &wait); - } -+EXPORT_SYMBOL(wakeup_kswapd); - - static void wakeup_memwaiters(void) - { ---- linux-rh-2.4.20-6/mm/Makefile~iod-rmap-exports Tue Apr 1 01:01:56 2003 -+++ linux-rh-2.4.20-6-braam/mm/Makefile Tue Apr 1 01:01:56 2003 -@@ -9,7 +9,7 @@ - - O_TARGET := mm.o - --export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o -+export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o vmscan.o - - obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \ - vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \ ---- linux-rh-2.4.20-6/mm/page_alloc.c~iod-rmap-exports Tue Apr 1 01:01:56 2003 -+++ linux-rh-2.4.20-6-braam/mm/page_alloc.c Tue Apr 1 01:01:56 2003 -@@ -27,6 +27,7 @@ - - int nr_swap_pages; - pg_data_t *pgdat_list; -+EXPORT_SYMBOL(pgdat_list); - - /* - * - -_ diff --git a/lustre/kernel_patches/patches/iod-rmap-exports.patch b/lustre/kernel_patches/patches/iod-rmap-exports.patch deleted file mode 100644 index 5ba68dd..0000000 --- a/lustre/kernel_patches/patches/iod-rmap-exports.patch +++ /dev/null @@ -1,86 +0,0 @@ - fs/Makefile | 4 +++- - fs/inode.c | 4 +++- - mm/Makefile | 2 +- - mm/page_alloc.c | 1 + - mm/vmscan.c | 3 +++ - 5 files changed, 11 insertions(+), 3 deletions(-) - ---- linux-2.4.18-18/fs/inode.c~iod-rmap-exports Thu Apr 3 00:40:01 2003 -+++ linux-2.4.18-18-braam/fs/inode.c Thu Apr 3 00:40:01 2003 -@@ -5,6 +5,7 @@ - */ - - #include -+#include - #include - #include - #include -@@ -66,7 +67,8 @@ static LIST_HEAD(anon_hash_chain); /* fo - * NOTE! You also have to own the lock if you change - * the i_state of an inode while it is in use.. - */ --static spinlock_t inode_lock = SPIN_LOCK_UNLOCKED; -+spinlock_t inode_lock = SPIN_LOCK_UNLOCKED; -+EXPORT_SYMBOL(inode_lock); - - /* - * Statistics gathering.. ---- linux-2.4.18-18/fs/Makefile~iod-rmap-exports Thu Apr 3 00:40:01 2003 -+++ linux-2.4.18-18-braam/fs/Makefile Thu Apr 3 00:40:29 2003 -@@ -1,3 +1,5 @@ -+ -+ - # - # Makefile for the Linux filesystems. - # -@@ -7,7 +9,7 @@ - - O_TARGET := fs.o - --export-objs := filesystems.o open.o dcache.o buffer.o -+export-objs := filesystems.o open.o dcache.o buffer.o inode.o - mod-subdirs := nls - - obj-y := open.o read_write.o devices.o file_table.o buffer.o \ ---- linux-2.4.18-18/mm/vmscan.c~iod-rmap-exports Thu Apr 3 00:40:01 2003 -+++ linux-2.4.18-18-braam/mm/vmscan.c Thu Apr 3 00:40:01 2003 -@@ -14,6 +14,8 @@ - * Multiqueue VM started 5.8.00, Rik van Riel. - */ - -+#include -+#include - #include - #include - #include -@@ -837,6 +839,7 @@ void wakeup_kswapd(unsigned int gfp_mask - set_current_state(TASK_RUNNING); - remove_wait_queue(&kswapd_done, &wait); - } -+EXPORT_SYMBOL(wakeup_kswapd); - - static void wakeup_memwaiters(void) - { ---- linux-2.4.18-18/mm/Makefile~iod-rmap-exports Thu Apr 3 00:40:01 2003 -+++ linux-2.4.18-18-braam/mm/Makefile Thu Apr 3 00:40:01 2003 -@@ -9,7 +9,7 @@ - - O_TARGET := mm.o - --export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o -+export-objs := shmem.o filemap.o memory.o page_alloc.o mempool.o vmscan.o - - obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \ - vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \ ---- linux-2.4.18-18/mm/page_alloc.c~iod-rmap-exports Thu Apr 3 00:40:01 2003 -+++ linux-2.4.18-18-braam/mm/page_alloc.c Thu Apr 3 00:40:01 2003 -@@ -31,6 +31,7 @@ int nr_active_pages; - int nr_inactive_dirty_pages; - int nr_inactive_clean_pages; - pg_data_t *pgdat_list; -+EXPORT_SYMBOL(pgdat_list); - - /* - * The zone_table array is used to look up the address of the - -_ diff --git a/lustre/kernel_patches/patches/iod-stock-24-exports.patch b/lustre/kernel_patches/patches/iod-stock-24-exports.patch deleted file mode 100644 index 2070377..0000000 --- a/lustre/kernel_patches/patches/iod-stock-24-exports.patch +++ /dev/null @@ -1,48 +0,0 @@ - fs/Makefile | 2 +- - fs/inode.c | 4 +++- - mm/page_alloc.c | 1 + - 3 files changed, 5 insertions(+), 2 deletions(-) - ---- linux-2.4.20/fs/inode.c~iod-stock-24-exports Wed Apr 2 23:21:20 2003 -+++ linux-2.4.20-braam/fs/inode.c Wed Apr 2 23:21:20 2003 -@@ -5,6 +5,7 @@ - */ - - #include -+#include - #include - #include - #include -@@ -66,7 +67,8 @@ static LIST_HEAD(anon_hash_chain); /* fo - * NOTE! You also have to own the lock if you change - * the i_state of an inode while it is in use.. - */ --static spinlock_t inode_lock = SPIN_LOCK_UNLOCKED; -+spinlock_t inode_lock = SPIN_LOCK_UNLOCKED; -+EXPORT_SYMBOL(inode_lock); - - /* - * Statistics gathering.. ---- linux-2.4.20/fs/Makefile~iod-stock-24-exports Wed Apr 2 23:21:20 2003 -+++ linux-2.4.20-braam/fs/Makefile Wed Apr 2 23:21:53 2003 -@@ -7,7 +7,7 @@ - - O_TARGET := fs.o - --export-objs := filesystems.o open.o dcache.o buffer.o -+export-objs := filesystems.o open.o dcache.o buffer.o inode.o - mod-subdirs := nls - - obj-y := open.o read_write.o devices.o file_table.o buffer.o \ ---- linux-2.4.20/mm/page_alloc.c~iod-stock-24-exports Wed Apr 2 23:21:20 2003 -+++ linux-2.4.20-braam/mm/page_alloc.c Wed Apr 2 23:21:20 2003 -@@ -28,6 +28,7 @@ int nr_inactive_pages; - LIST_HEAD(inactive_list); - LIST_HEAD(active_list); - pg_data_t *pgdat_list; -+EXPORT_SYMBOL(pgdat_list); - - /* - * - -_ diff --git a/lustre/kernel_patches/patches/iod-stock-24-exports_hp.patch b/lustre/kernel_patches/patches/iod-stock-24-exports_hp.patch deleted file mode 100644 index 3035f55..0000000 --- a/lustre/kernel_patches/patches/iod-stock-24-exports_hp.patch +++ /dev/null @@ -1,48 +0,0 @@ - fs/Makefile | 2 +- - fs/inode.c | 4 +++- - mm/page_alloc.c | 1 + - 3 files changed, 5 insertions(+), 2 deletions(-) - ---- linux/fs/inode.c~iod-stock-24-exports_hp Wed Apr 9 10:44:54 2003 -+++ linux-mmonroe/fs/inode.c Wed Apr 9 10:49:50 2003 -@@ -5,6 +5,7 @@ - */ - - #include -+#include - #include - #include - #include -@@ -66,7 +67,8 @@ static LIST_HEAD(anon_hash_chain); /* fo - * NOTE! You also have to own the lock if you change - * the i_state of an inode while it is in use.. - */ --static spinlock_t inode_lock = SPIN_LOCK_UNLOCKED; -+spinlock_t inode_lock = SPIN_LOCK_UNLOCKED; -+EXPORT_SYMBOL(inode_lock); - - /* - * Statistics gathering.. ---- linux/fs/Makefile~iod-stock-24-exports_hp Wed Apr 9 10:26:08 2003 -+++ linux-mmonroe/fs/Makefile Wed Apr 9 10:49:50 2003 -@@ -7,7 +7,7 @@ - - O_TARGET := fs.o - --export-objs := filesystems.o open.o dcache.o buffer.o dquot.o -+export-objs := filesystems.o open.o dcache.o buffer.o dquot.o inode.o - mod-subdirs := nls xfs - - obj-y := open.o read_write.o devices.o file_table.o buffer.o \ ---- linux/mm/page_alloc.c~iod-stock-24-exports_hp Wed Apr 9 10:26:14 2003 -+++ linux-mmonroe/mm/page_alloc.c Wed Apr 9 10:49:50 2003 -@@ -28,6 +28,7 @@ int nr_inactive_pages; - LIST_HEAD(inactive_list); - LIST_HEAD(active_list); - pg_data_t *pgdat_list; -+EXPORT_SYMBOL(pgdat_list); - - /* - * - -_ diff --git a/lustre/kernel_patches/patches/iopen-2.4.18.patch b/lustre/kernel_patches/patches/iopen-2.4.18.patch deleted file mode 100644 index 6eabe85..0000000 --- a/lustre/kernel_patches/patches/iopen-2.4.18.patch +++ /dev/null @@ -1,414 +0,0 @@ - 0 files changed - ---- linux-2.4.18-chaos52/Documentation/filesystems/ext2.txt~iopen-2.4.18 2003-04-13 15:21:33.000000000 +0800 -+++ linux-2.4.18-chaos52-root/Documentation/filesystems/ext2.txt 2003-06-03 17:10:55.000000000 +0800 -@@ -35,6 +35,22 @@ resgid=n The group ID which may use th - - sb=n Use alternate superblock at this location. - -+iopen Makes an invisible pseudo-directory called -+ __iopen__ available in the root directory -+ of the filesystem. Allows open-by-inode- -+ number. i.e., inode 3145 can be accessed -+ via /mntpt/__iopen__/3145 -+ -+iopen_nopriv This option makes the iopen directory be -+ world-readable. This may be safer since it -+ allows daemons to run as an unprivileged user, -+ however it significantly changes the security -+ model of a Unix filesystem, since previously -+ all files under a mode 700 directory were not -+ generally avilable even if the -+ permissions on the file itself is -+ world-readable. -+ - grpquota,noquota,quota,usrquota Quota options are silently ignored by ext2. - - ---- linux-2.4.18-chaos52/fs/ext3/Makefile~iopen-2.4.18 2003-06-01 03:24:07.000000000 +0800 -+++ linux-2.4.18-chaos52-root/fs/ext3/Makefile 2003-06-03 17:10:55.000000000 +0800 -@@ -11,7 +11,7 @@ O_TARGET := ext3.o - - export-objs := super.o inode.o xattr.o - --obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -+obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o xattr.o - obj-m := $(O_TARGET) - ---- linux-2.4.18-chaos52/fs/ext3/inode.c~iopen-2.4.18 2003-06-03 17:10:21.000000000 +0800 -+++ linux-2.4.18-chaos52-root/fs/ext3/inode.c 2003-06-03 17:10:55.000000000 +0800 -@@ -31,6 +31,7 @@ - #include - #include - #include -+#include "iopen.h" - - /* - * SEARCH_FROM_ZERO forces each block allocation to search from the start -@@ -2135,6 +2136,9 @@ void ext3_read_inode(struct inode * inod - struct buffer_head *bh; - int block; - -+ if (ext3_iopen_get_inode(inode)) -+ return; -+ - if(ext3_get_inode_loc(inode, &iloc)) - goto bad_inode; - bh = iloc.bh; ---- /dev/null 2002-08-31 07:31:37.000000000 +0800 -+++ linux-2.4.18-chaos52-root/fs/ext3/iopen.c 2003-06-03 17:10:55.000000000 +0800 -@@ -0,0 +1,259 @@ -+/* -+ * linux/fs/ext3/iopen.c -+ * -+ * Special support for open by inode number -+ * -+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). -+ * -+ * This file may be redistributed under the terms of the GNU General -+ * Public License. -+ * -+ * -+ * Invariants: -+ * - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias -+ * for an inode at one time. -+ * - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry -+ * aliases on an inode at the same time. -+ * -+ * If we have any connected dentry aliases for an inode, use one of those -+ * in iopen_lookup(). Otherwise, we instantiate a single NFSD_DISCONNECTED -+ * dentry for this inode, which thereafter will be found by the dcache -+ * when looking up this inode number in __iopen__, so we don't return here -+ * until it is gone. -+ * -+ * If we get an inode via a regular name lookup, then we "rename" the -+ * NFSD_DISCONNECTED dentry to the proper name and parent. This ensures -+ * existing users of the disconnected dentry will continue to use the same -+ * dentry as the connected users, and there will never be both kinds of -+ * dentry aliases at one time. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "iopen.h" -+ -+#ifndef assert -+#define assert(test) J_ASSERT(test) -+#endif -+ -+#define IOPEN_NAME_LEN 32 -+ -+/* -+ * This implements looking up an inode by number. -+ */ -+static struct dentry *iopen_lookup(struct inode *dir, struct dentry *dentry) -+{ -+ struct inode *inode; -+ unsigned long ino; -+ struct list_head *lp; -+ struct dentry *alternate; -+ char buf[IOPEN_NAME_LEN]; -+ -+ if (dentry->d_name.len >= IOPEN_NAME_LEN) -+ return ERR_PTR(-ENAMETOOLONG); -+ -+ memcpy(buf, dentry->d_name.name, dentry->d_name.len); -+ buf[dentry->d_name.len] = 0; -+ -+ if (strcmp(buf, ".") == 0) -+ ino = dir->i_ino; -+ else if (strcmp(buf, "..") == 0) -+ ino = EXT3_ROOT_INO; -+ else -+ ino = simple_strtoul(buf, 0, 0); -+ -+ if ((ino != EXT3_ROOT_INO && -+ //ino != EXT3_ACL_IDX_INO && -+ //ino != EXT3_ACL_DATA_INO && -+ ino < EXT3_FIRST_INO(dir->i_sb)) || -+ ino > le32_to_cpu(dir->i_sb->u.ext3_sb.s_es->s_inodes_count)) -+ return ERR_PTR(-ENOENT); -+ -+ inode = iget(dir->i_sb, ino); -+ if (!inode) -+ return ERR_PTR(-EACCES); -+ if (is_bad_inode(inode)) { -+ iput(inode); -+ return ERR_PTR(-ENOENT); -+ } -+ -+ /* preferrably return a connected dentry */ -+ spin_lock(&dcache_lock); -+ list_for_each(lp, &inode->i_dentry) { -+ alternate = list_entry(lp, struct dentry, d_alias); -+ assert(!(alternate->d_flags & DCACHE_NFSD_DISCONNECTED)); -+ } -+ -+ if (!list_empty(&inode->i_dentry)) { -+ alternate = list_entry(inode->i_dentry.next, -+ struct dentry, d_alias); -+ dget_locked(alternate); -+ alternate->d_vfs_flags |= DCACHE_REFERENCED; -+ iput(inode); -+ spin_unlock(&dcache_lock); -+ return alternate; -+ } -+ dentry->d_flags |= DCACHE_NFSD_DISCONNECTED; -+ spin_unlock(&dcache_lock); -+ -+ d_add(dentry, inode); -+ return NULL; -+} -+ -+#define do_switch(x,y) do { \ -+ __typeof__ (x) __tmp = x; \ -+ x = y; y = __tmp; } while (0) -+ -+static inline void switch_names(struct dentry *dentry, struct dentry *target) -+{ -+ const unsigned char *old_name, *new_name; -+ -+ memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN); -+ old_name = target->d_name.name; -+ new_name = dentry->d_name.name; -+ if (old_name == target->d_iname) -+ old_name = dentry->d_iname; -+ if (new_name == dentry->d_iname) -+ new_name = target->d_iname; -+ target->d_name.name = new_name; -+ dentry->d_name.name = old_name; -+} -+ -+/* This function is spliced into ext3_lookup and does the move of a -+ * disconnected dentry (if it exists) to a connected dentry. -+ */ -+struct dentry *iopen_connect_dentry(struct dentry *de, struct inode *inode) -+{ -+ struct dentry *tmp, *goal = NULL; -+ struct list_head *lp; -+ -+ /* preferrably return a connected dentry */ -+ spin_lock(&dcache_lock); -+ /* verify this dentry is really new */ -+ assert(!de->d_inode); -+ assert(list_empty(&de->d_subdirs)); -+ assert(list_empty(&de->d_alias)); -+ -+ -+ list_for_each(lp, &inode->i_dentry) { -+ tmp = list_entry(lp, struct dentry, d_alias); -+ if (tmp->d_flags & DCACHE_NFSD_DISCONNECTED) { -+ assert(tmp->d_alias.next == &inode->i_dentry); -+ assert(tmp->d_alias.prev == &inode->i_dentry); -+ goal = tmp; -+ dget_locked(goal); -+ break; -+ } -+ } -+ -+ if (!goal) { -+ spin_unlock(&dcache_lock); -+ return NULL; -+ } -+ -+ /* Move the goal to the de hash queue - like d_move() */ -+ goal->d_flags &= ~DCACHE_NFSD_DISCONNECTED; -+ list_del(&goal->d_hash); -+ list_add(&goal->d_hash, &de->d_hash); -+ -+ list_del(&goal->d_child); -+ list_del(&de->d_child); -+ -+ /* Switch the parents and the names.. */ -+ switch_names(goal, de); -+ do_switch(goal->d_parent, de->d_parent); -+ do_switch(goal->d_name.len, de->d_name.len); -+ do_switch(goal->d_name.hash, de->d_name.hash); -+ -+ /* And add them back to the (new) parent lists */ -+ list_add(&goal->d_child, &goal->d_parent->d_subdirs); -+ list_add(&de->d_child, &de->d_parent->d_subdirs); -+ spin_unlock(&dcache_lock); -+ -+ return goal; -+} -+ -+/* -+ * These are the special structures for the iopen pseudo directory. -+ */ -+ -+static struct inode_operations iopen_inode_operations = { -+ lookup: iopen_lookup, /* BKL held */ -+}; -+ -+static struct file_operations iopen_file_operations = { -+ read: generic_read_dir, -+}; -+ -+static int match_dentry(struct dentry *dentry, const char *name) -+{ -+ int len; -+ -+ len = strlen(name); -+ if (dentry->d_name.len != len) -+ return 0; -+ if (strncmp(dentry->d_name.name, name, len)) -+ return 0; -+ return 1; -+} -+ -+/* -+ * This function is spliced into ext3_lookup and returns 1 the file -+ * name is __iopen__ and dentry has been filled in appropriately. -+ */ -+int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry) -+{ -+ struct inode *inode; -+ -+ if (dir->i_ino != EXT3_ROOT_INO || -+ !test_opt(dir->i_sb, IOPEN) || -+ !match_dentry(dentry, "__iopen__")) -+ return 0; -+ -+ inode = iget(dir->i_sb, EXT3_BAD_INO); -+ -+ if (!inode) -+ return 0; -+ d_add(dentry, inode); -+ return 1; -+} -+ -+/* -+ * This function is spliced into read_inode; it returns 1 if inode -+ * number is the one for /__iopen__, in which case the inode is filled -+ * in appropriately. Otherwise, this fuction returns 0. -+ */ -+int ext3_iopen_get_inode(struct inode *inode) -+{ -+ if (inode->i_ino != EXT3_BAD_INO) -+ return 0; -+ -+ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR; -+ if (test_opt(inode->i_sb, IOPEN_NOPRIV)) -+ inode->i_mode |= 0777; -+ inode->i_uid = 0; -+ inode->i_gid = 0; -+ inode->i_nlink = 1; -+ inode->i_size = 4096; -+ inode->i_atime = CURRENT_TIME; -+ inode->i_ctime = CURRENT_TIME; -+ inode->i_mtime = CURRENT_TIME; -+ inode->u.ext3_i.i_dtime = 0; -+ inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size -+ * (for stat), not the fs block -+ * size */ -+ inode->i_blocks = 0; -+ inode->i_version = 1; -+ inode->i_generation = 0; -+ -+ inode->i_op = &iopen_inode_operations; -+ inode->i_fop = &iopen_file_operations; -+ inode->i_mapping->a_ops = 0; -+ -+ return 1; -+} ---- /dev/null 2002-08-31 07:31:37.000000000 +0800 -+++ linux-2.4.18-chaos52-root/fs/ext3/iopen.h 2003-06-03 17:10:55.000000000 +0800 -@@ -0,0 +1,13 @@ -+/* -+ * iopen.h -+ * -+ * Special support for opening files by inode number. -+ * -+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). -+ * -+ * This file may be redistributed under the terms of the GNU General -+ * Public License. -+ */ -+ -+extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry); -+extern int ext3_iopen_get_inode(struct inode *inode); ---- linux-2.4.18-chaos52/fs/ext3/namei.c~iopen-2.4.18 2003-06-03 17:10:20.000000000 +0800 -+++ linux-2.4.18-chaos52-root/fs/ext3/namei.c 2003-06-03 17:10:55.000000000 +0800 -@@ -34,6 +34,7 @@ - #include - #include - #include -+#include "iopen.h" - - /* - * define how far ahead to read directories while searching them. -@@ -703,16 +704,21 @@ cleanup_and_exit: - brelse (bh_use[ra_ptr]); - return ret; - } -+struct dentry *iopen_connect_dentry(struct dentry *de, struct inode *inode); - - static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry) - { - struct inode * inode; - struct ext3_dir_entry_2 * de; - struct buffer_head * bh; -+ struct dentry *alternate = NULL; - - if (dentry->d_name.len > EXT3_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); - -+ if (ext3_check_for_iopen(dir, dentry)) -+ return NULL; -+ - bh = ext3_find_entry(dentry, &de); - inode = NULL; - if (bh) { -@@ -723,6 +729,12 @@ static struct dentry *ext3_lookup(struct - if (!inode) - return ERR_PTR(-EACCES); - } -+ -+ if (inode && (alternate = iopen_connect_dentry(dentry, inode))) { -+ iput(inode); -+ return alternate; -+ } -+ - d_add(dentry, inode); - return NULL; - } ---- linux-2.4.18-chaos52/fs/ext3/super.c~iopen-2.4.18 2003-06-03 17:10:21.000000000 +0800 -+++ linux-2.4.18-chaos52-root/fs/ext3/super.c 2003-06-03 17:10:55.000000000 +0800 -@@ -820,6 +820,17 @@ static int parse_options (char * options - || !strcmp (this_char, "quota") - || !strcmp (this_char, "usrquota")) - /* Don't do anything ;-) */ ; -+ else if (!strcmp (this_char, "iopen")) { -+ set_opt (sbi->s_mount_opt, IOPEN); -+ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ } else if (!strcmp (this_char, "noiopen")) { -+ clear_opt (sbi->s_mount_opt, IOPEN); -+ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ } -+ else if (!strcmp (this_char, "iopen_nopriv")) { -+ set_opt (sbi->s_mount_opt, IOPEN); -+ set_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ } - else if (!strcmp (this_char, "journal")) { - /* @@@ FIXME */ - /* Eventually we will want to be able to create ---- linux-2.4.18-chaos52/include/linux/ext3_fs.h~iopen-2.4.18 2003-06-03 17:10:22.000000000 +0800 -+++ linux-2.4.18-chaos52-root/include/linux/ext3_fs.h 2003-06-03 17:12:08.000000000 +0800 -@@ -321,6 +321,8 @@ struct ext3_inode { - #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ - #define EXT3_MOUNT_INDEX 0x4000 /* Enable directory index */ -+#define EXT3_MOUNT_IOPEN 0x8000 /* Allow access via iopen */ -+#define EXT3_MOUNT_IOPEN_NOPRIV 0x10000 /* Make iopen world-readable */ - #define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - -_ diff --git a/lustre/kernel_patches/patches/iopen-2.4.20.patch b/lustre/kernel_patches/patches/iopen-2.4.20.patch deleted file mode 100644 index 3038cc87..0000000 --- a/lustre/kernel_patches/patches/iopen-2.4.20.patch +++ /dev/null @@ -1,423 +0,0 @@ - Documentation/filesystems/ext2.txt | 16 ++ - fs/ext3/Makefile | 2 - fs/ext3/inode.c | 4 - fs/ext3/iopen.c | 240 +++++++++++++++++++++++++++++++++++++ - fs/ext3/iopen.h | 15 ++ - fs/ext3/namei.c | 13 +- - fs/ext3/super.c | 11 + - include/linux/ext3_fs.h | 2 - 8 files changed, 301 insertions(+), 2 deletions(-) - ---- linux-2.4.20/Documentation/filesystems/ext2.txt~iopen 2001-07-11 16:44:45.000000000 -0600 -+++ linux-2.4.20-braam/Documentation/filesystems/ext2.txt 2003-05-17 14:06:00.000000000 -0600 -@@ -35,6 +35,22 @@ resgid=n The group ID which may use th - - sb=n Use alternate superblock at this location. - -+iopen Makes an invisible pseudo-directory called -+ __iopen__ available in the root directory -+ of the filesystem. Allows open-by-inode- -+ number. i.e., inode 3145 can be accessed -+ via /mntpt/__iopen__/3145 -+ -+iopen_nopriv This option makes the iopen directory be -+ world-readable. This may be safer since it -+ allows daemons to run as an unprivileged user, -+ however it significantly changes the security -+ model of a Unix filesystem, since previously -+ all files under a mode 700 directory were not -+ generally avilable even if the -+ permissions on the file itself is -+ world-readable. -+ - grpquota,noquota,quota,usrquota Quota options are silently ignored by ext2. - - ---- linux-2.4.20/fs/ext3/Makefile~iopen 2003-05-17 14:05:57.000000000 -0600 -+++ linux-2.4.20-braam/fs/ext3/Makefile 2003-05-17 14:06:00.000000000 -0600 -@@ -11,7 +11,7 @@ O_TARGET := ext3.o - - export-objs := ext3-exports.o - --obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -+obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o - obj-m := $(O_TARGET) - ---- linux-2.4.20/fs/ext3/inode.c~iopen 2003-05-17 14:06:00.000000000 -0600 -+++ linux-2.4.20-braam/fs/ext3/inode.c 2003-05-17 14:06:00.000000000 -0600 -@@ -31,6 +31,7 @@ - #include - #include - #include -+#include "iopen.h" - - /* - * SEARCH_FROM_ZERO forces each block allocation to search from the start -@@ -2137,6 +2138,9 @@ void ext3_read_inode(struct inode * inod - struct buffer_head *bh; - int block; - -+ if (ext3_iopen_get_inode(inode)) -+ return; -+ - if(ext3_get_inode_loc(inode, &iloc)) - goto bad_inode; - bh = iloc.bh; ---- /dev/null 2003-01-30 03:24:37.000000000 -0700 -+++ linux-2.4.20-braam/fs/ext3/iopen.c 2003-05-17 22:18:55.000000000 -0600 -@@ -0,0 +1,259 @@ -+/* -+ * linux/fs/ext3/iopen.c -+ * -+ * Special support for open by inode number -+ * -+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). -+ * -+ * This file may be redistributed under the terms of the GNU General -+ * Public License. -+ * -+ * -+ * Invariants: -+ * - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias -+ * for an inode at one time. -+ * - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry -+ * aliases on an inode at the same time. -+ * -+ * If we have any connected dentry aliases for an inode, use one of those -+ * in iopen_lookup(). Otherwise, we instantiate a single NFSD_DISCONNECTED -+ * dentry for this inode, which thereafter will be found by the dcache -+ * when looking up this inode number in __iopen__, so we don't return here -+ * until it is gone. -+ * -+ * If we get an inode via a regular name lookup, then we "rename" the -+ * NFSD_DISCONNECTED dentry to the proper name and parent. This ensures -+ * existing users of the disconnected dentry will continue to use the same -+ * dentry as the connected users, and there will never be both kinds of -+ * dentry aliases at one time. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "iopen.h" -+ -+#ifndef assert -+#define assert(test) J_ASSERT(test) -+#endif -+ -+#define IOPEN_NAME_LEN 32 -+ -+/* -+ * This implements looking up an inode by number. -+ */ -+static struct dentry *iopen_lookup(struct inode *dir, struct dentry *dentry) -+{ -+ struct inode *inode; -+ unsigned long ino; -+ struct list_head *lp; -+ struct dentry *alternate; -+ char buf[IOPEN_NAME_LEN]; -+ -+ if (dentry->d_name.len >= IOPEN_NAME_LEN) -+ return ERR_PTR(-ENAMETOOLONG); -+ -+ memcpy(buf, dentry->d_name.name, dentry->d_name.len); -+ buf[dentry->d_name.len] = 0; -+ -+ if (strcmp(buf, ".") == 0) -+ ino = dir->i_ino; -+ else if (strcmp(buf, "..") == 0) -+ ino = EXT3_ROOT_INO; -+ else -+ ino = simple_strtoul(buf, 0, 0); -+ -+ if ((ino != EXT3_ROOT_INO && -+ //ino != EXT3_ACL_IDX_INO && -+ //ino != EXT3_ACL_DATA_INO && -+ ino < EXT3_FIRST_INO(dir->i_sb)) || -+ ino > le32_to_cpu(dir->i_sb->u.ext3_sb.s_es->s_inodes_count)) -+ return ERR_PTR(-ENOENT); -+ -+ inode = iget(dir->i_sb, ino); -+ if (!inode) -+ return ERR_PTR(-EACCES); -+ if (is_bad_inode(inode)) { -+ iput(inode); -+ return ERR_PTR(-ENOENT); -+ } -+ -+ /* preferrably return a connected dentry */ -+ spin_lock(&dcache_lock); -+ list_for_each(lp, &inode->i_dentry) { -+ alternate = list_entry(lp, struct dentry, d_alias); -+ assert(!(alternate->d_flags & DCACHE_NFSD_DISCONNECTED)); -+ } -+ -+ if (!list_empty(&inode->i_dentry)) { -+ alternate = list_entry(inode->i_dentry.next, -+ struct dentry, d_alias); -+ dget_locked(alternate); -+ alternate->d_vfs_flags |= DCACHE_REFERENCED; -+ iput(inode); -+ spin_unlock(&dcache_lock); -+ return alternate; -+ } -+ dentry->d_flags |= DCACHE_NFSD_DISCONNECTED; -+ spin_unlock(&dcache_lock); -+ -+ d_add(dentry, inode); -+ return NULL; -+} -+ -+#define do_switch(x,y) do { \ -+ __typeof__ (x) __tmp = x; \ -+ x = y; y = __tmp; } while (0) -+ -+static inline void switch_names(struct dentry *dentry, struct dentry *target) -+{ -+ const unsigned char *old_name, *new_name; -+ -+ memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN); -+ old_name = target->d_name.name; -+ new_name = dentry->d_name.name; -+ if (old_name == target->d_iname) -+ old_name = dentry->d_iname; -+ if (new_name == dentry->d_iname) -+ new_name = target->d_iname; -+ target->d_name.name = new_name; -+ dentry->d_name.name = old_name; -+} -+ -+/* This function is spliced into ext3_lookup and does the move of a -+ * disconnected dentry (if it exists) to a connected dentry. -+ */ -+struct dentry *iopen_connect_dentry(struct dentry *de, struct inode *inode) -+{ -+ struct dentry *tmp, *goal = NULL; -+ struct list_head *lp; -+ -+ /* preferrably return a connected dentry */ -+ spin_lock(&dcache_lock); -+ /* verify this dentry is really new */ -+ assert(!de->d_inode); -+ assert(list_empty(&de->d_subdirs)); -+ assert(list_empty(&de->d_alias)); -+ -+ -+ list_for_each(lp, &inode->i_dentry) { -+ tmp = list_entry(lp, struct dentry, d_alias); -+ if (tmp->d_flags & DCACHE_NFSD_DISCONNECTED) { -+ assert(tmp->d_alias.next == &inode->i_dentry); -+ assert(tmp->d_alias.prev == &inode->i_dentry); -+ goal = tmp; -+ dget_locked(goal); -+ break; -+ } -+ } -+ -+ if (!goal) { -+ spin_unlock(&dcache_lock); -+ return NULL; -+ } -+ -+ /* Move the goal to the de hash queue - like d_move() */ -+ goal->d_flags &= ~DCACHE_NFSD_DISCONNECTED; -+ list_del(&goal->d_hash); -+ list_add(&goal->d_hash, &de->d_hash); -+ -+ list_del(&goal->d_child); -+ list_del(&de->d_child); -+ -+ /* Switch the parents and the names.. */ -+ switch_names(goal, de); -+ do_switch(goal->d_parent, de->d_parent); -+ do_switch(goal->d_name.len, de->d_name.len); -+ do_switch(goal->d_name.hash, de->d_name.hash); -+ -+ /* And add them back to the (new) parent lists */ -+ list_add(&goal->d_child, &goal->d_parent->d_subdirs); -+ list_add(&de->d_child, &de->d_parent->d_subdirs); -+ spin_unlock(&dcache_lock); -+ -+ return goal; -+} -+ -+/* -+ * These are the special structures for the iopen pseudo directory. -+ */ -+ -+static struct inode_operations iopen_inode_operations = { -+ lookup: iopen_lookup, /* BKL held */ -+}; -+ -+static struct file_operations iopen_file_operations = { -+ read: generic_read_dir, -+}; -+ -+static int match_dentry(struct dentry *dentry, const char *name) -+{ -+ int len; -+ -+ len = strlen(name); -+ if (dentry->d_name.len != len) -+ return 0; -+ if (strncmp(dentry->d_name.name, name, len)) -+ return 0; -+ return 1; -+} -+ -+/* -+ * This function is spliced into ext3_lookup and returns 1 the file -+ * name is __iopen__ and dentry has been filled in appropriately. -+ */ -+int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry) -+{ -+ struct inode *inode; -+ -+ if (dir->i_ino != EXT3_ROOT_INO || -+ !test_opt(dir->i_sb, IOPEN) || -+ !match_dentry(dentry, "__iopen__")) -+ return 0; -+ -+ inode = iget(dir->i_sb, EXT3_BAD_INO); -+ -+ if (!inode) -+ return 0; -+ d_add(dentry, inode); -+ return 1; -+} -+ -+/* -+ * This function is spliced into read_inode; it returns 1 if inode -+ * number is the one for /__iopen__, in which case the inode is filled -+ * in appropriately. Otherwise, this fuction returns 0. -+ */ -+int ext3_iopen_get_inode(struct inode *inode) -+{ -+ if (inode->i_ino != EXT3_BAD_INO) -+ return 0; -+ -+ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR; -+ if (test_opt(inode->i_sb, IOPEN_NOPRIV)) -+ inode->i_mode |= 0777; -+ inode->i_uid = 0; -+ inode->i_gid = 0; -+ inode->i_nlink = 1; -+ inode->i_size = 4096; -+ inode->i_atime = CURRENT_TIME; -+ inode->i_ctime = CURRENT_TIME; -+ inode->i_mtime = CURRENT_TIME; -+ inode->u.ext3_i.i_dtime = 0; -+ inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size -+ * (for stat), not the fs block -+ * size */ -+ inode->i_blocks = 0; -+ inode->i_version = 1; -+ inode->i_generation = 0; -+ -+ inode->i_op = &iopen_inode_operations; -+ inode->i_fop = &iopen_file_operations; -+ inode->i_mapping->a_ops = 0; -+ -+ return 1; -+} ---- /dev/null 2003-01-30 03:24:37.000000000 -0700 -+++ linux-2.4.20-braam/fs/ext3/iopen.h 2003-05-17 14:06:00.000000000 -0600 -@@ -0,0 +1,13 @@ -+/* -+ * iopen.h -+ * -+ * Special support for opening files by inode number. -+ * -+ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). -+ * -+ * This file may be redistributed under the terms of the GNU General -+ * Public License. -+ */ -+ -+extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry); -+extern int ext3_iopen_get_inode(struct inode *inode); ---- linux-2.4.20/fs/ext3/namei.c~iopen 2003-05-17 14:05:59.000000000 -0600 -+++ linux-2.4.20-braam/fs/ext3/namei.c 2003-05-17 22:23:08.000000000 -0600 -@@ -35,7 +35,7 @@ - #include - #include - #include -- -+#include "iopen.h" - - /* - * define how far ahead to read directories while searching them. -@@ -921,16 +921,21 @@ errout: - return NULL; - } - #endif -+struct dentry *iopen_connect_dentry(struct dentry *de, struct inode *inode); - - static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry) - { - struct inode * inode; - struct ext3_dir_entry_2 * de; - struct buffer_head * bh; -+ struct dentry *alternate = NULL; - - if (dentry->d_name.len > EXT3_NAME_LEN) - return ERR_PTR(-ENAMETOOLONG); - -+ if (ext3_check_for_iopen(dir, dentry)) -+ return NULL; -+ - bh = ext3_find_entry(dentry, &de); - inode = NULL; - if (bh) { -@@ -942,6 +947,12 @@ static struct dentry *ext3_lookup(struct - return ERR_PTR(-EACCES); - } - } -+ -+ if (inode && (alternate = iopen_connect_dentry(dentry, inode))) { -+ iput(inode); -+ return alternate; -+ } -+ - d_add(dentry, inode); - return NULL; - } ---- linux-2.4.20/fs/ext3/super.c~iopen 2003-05-17 14:05:59.000000000 -0600 -+++ linux-2.4.20-braam/fs/ext3/super.c 2003-05-17 14:06:00.000000000 -0600 -@@ -820,6 +820,17 @@ static int parse_options (char * options - || !strcmp (this_char, "quota") - || !strcmp (this_char, "usrquota")) - /* Don't do anything ;-) */ ; -+ else if (!strcmp (this_char, "iopen")) { -+ set_opt (sbi->s_mount_opt, IOPEN); -+ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ } else if (!strcmp (this_char, "noiopen")) { -+ clear_opt (sbi->s_mount_opt, IOPEN); -+ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ } -+ else if (!strcmp (this_char, "iopen_nopriv")) { -+ set_opt (sbi->s_mount_opt, IOPEN); -+ set_opt (sbi->s_mount_opt, IOPEN_NOPRIV); -+ } - else if (!strcmp (this_char, "journal")) { - /* @@@ FIXME */ - /* Eventually we will want to be able to create ---- linux-2.4.20/include/linux/ext3_fs.h~iopen 2003-05-17 14:05:59.000000000 -0600 -+++ linux-2.4.20-braam/include/linux/ext3_fs.h 2003-05-17 14:06:29.000000000 -0600 -@@ -322,6 +322,8 @@ struct ext3_inode { - #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ - #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ -+#define EXT3_MOUNT_IOPEN 0x8000 /* Allow access via iopen */ -+#define EXT3_MOUNT_IOPEN_NOPRIV 0x10000 /* Make iopen world-readable */ - #define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - -_ diff --git a/lustre/kernel_patches/patches/jbd-transno-cb.patch b/lustre/kernel_patches/patches/jbd-transno-cb.patch deleted file mode 100644 index ceb086d..0000000 --- a/lustre/kernel_patches/patches/jbd-transno-cb.patch +++ /dev/null @@ -1,240 +0,0 @@ - - - - fs/jbd/commit.c | 27 +++++++++++++++++++++--- - fs/jbd/journal.c | 1 - fs/jbd/transaction.c | 56 ++++++++++++++++++++++++++++++++++++++++----------- - include/linux/jbd.h | 20 ++++++++++++++++++ - 4 files changed, 90 insertions(+), 14 deletions(-) - ---- linux-2.4.19/fs/jbd/commit.c~vanilla-2.4.19 Sun Jan 19 19:46:42 2003 -+++ linux-2.4.19-root/fs/jbd/commit.c Sun Jan 19 19:46:42 2003 -@@ -475,7 +475,7 @@ start_journal_io: - transaction's t_log_list queue, and metadata buffers are on - the t_iobuf_list queue. - -- Wait for the transactions in reverse order. That way we are -+ Wait for the buffers in reverse order. That way we are - less likely to be woken up until all IOs have completed, and - so we incur less scheduling load. - */ -@@ -566,8 +566,10 @@ start_journal_io: - - jbd_debug(3, "JBD: commit phase 6\n"); - -- if (is_journal_aborted(journal)) -+ if (is_journal_aborted(journal)) { -+ unlock_journal(journal); - goto skip_commit; -+ } - - /* Done it all: now write the commit record. We should have - * cleaned up our previous buffers by now, so if we are in abort -@@ -577,6 +579,7 @@ start_journal_io: - descriptor = journal_get_descriptor_buffer(journal); - if (!descriptor) { - __journal_abort_hard(journal); -+ unlock_journal(journal); - goto skip_commit; - } - -@@ -600,7 +603,6 @@ start_journal_io: - put_bh(bh); /* One for getblk() */ - journal_unlock_journal_head(descriptor); - } -- lock_journal(journal); - - /* End of a transaction! Finally, we can do checkpoint - processing: any buffers committed as a result of this -@@ -609,6 +611,25 @@ start_journal_io: - - skip_commit: - -+ /* Call any callbacks that had been registered for handles in this -+ * transaction. It is up to the callback to free any allocated -+ * memory. -+ */ -+ if (!list_empty(&commit_transaction->t_jcb)) { -+ struct list_head *p, *n; -+ int error = is_journal_aborted(journal); -+ -+ list_for_each_safe(p, n, &commit_transaction->t_jcb) { -+ struct journal_callback *jcb; -+ -+ jcb = list_entry(p, struct journal_callback, jcb_list); -+ list_del(p); -+ jcb->jcb_func(jcb, error); -+ } -+ } -+ -+ lock_journal(journal); -+ - jbd_debug(3, "JBD: commit phase 7\n"); - - J_ASSERT(commit_transaction->t_sync_datalist == NULL); ---- linux-2.4.19/fs/jbd/journal.c~vanilla-2.4.19 Sun Jan 19 19:46:42 2003 -+++ linux-2.4.19-root/fs/jbd/journal.c Sun Jan 19 19:46:42 2003 -@@ -58,6 +58,7 @@ EXPORT_SYMBOL(journal_sync_buffer); - #endif - EXPORT_SYMBOL(journal_flush); - EXPORT_SYMBOL(journal_revoke); -+EXPORT_SYMBOL(journal_callback_set); - - EXPORT_SYMBOL(journal_init_dev); - EXPORT_SYMBOL(journal_init_inode); ---- linux-2.4.19/fs/jbd/transaction.c~vanilla-2.4.19 Sun Jan 19 19:46:42 2003 -+++ linux-2.4.19-root/fs/jbd/transaction.c Sun Jan 19 19:46:42 2003 -@@ -57,6 +57,7 @@ static transaction_t * get_transaction ( - transaction->t_state = T_RUNNING; - transaction->t_tid = journal->j_transaction_sequence++; - transaction->t_expires = jiffies + journal->j_commit_interval; -+ INIT_LIST_HEAD(&transaction->t_jcb); - - /* Set up the commit timer for the new transaction. */ - J_ASSERT (!journal->j_commit_timer_active); -@@ -201,6 +202,20 @@ repeat_locked: - return 0; - } - -+/* Allocate a new handle. This should probably be in a slab... */ -+static handle_t *new_handle(int nblocks) -+{ -+ handle_t *handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS); -+ if (!handle) -+ return NULL; -+ memset(handle, 0, sizeof (handle_t)); -+ handle->h_buffer_credits = nblocks; -+ handle->h_ref = 1; -+ INIT_LIST_HEAD(&handle->h_jcb); -+ -+ return handle; -+} -+ - /* - * Obtain a new handle. - * -@@ -227,14 +242,11 @@ handle_t *journal_start(journal_t *journ - handle->h_ref++; - return handle; - } -- -- handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS); -+ -+ handle = new_handle(nblocks); - if (!handle) - return ERR_PTR(-ENOMEM); -- memset (handle, 0, sizeof (handle_t)); - -- handle->h_buffer_credits = nblocks; -- handle->h_ref = 1; - current->journal_info = handle; - - err = start_this_handle(journal, handle); -@@ -333,14 +345,11 @@ handle_t *journal_try_start(journal_t *j - - if (is_journal_aborted(journal)) - return ERR_PTR(-EIO); -- -- handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS); -+ -+ handle = new_handle(nblocks); - if (!handle) - return ERR_PTR(-ENOMEM); -- memset (handle, 0, sizeof (handle_t)); - -- handle->h_buffer_credits = nblocks; -- handle->h_ref = 1; - current->journal_info = handle; - - err = try_start_this_handle(journal, handle); -@@ -1328,6 +1337,28 @@ out: - #endif - - /* -+ * Register a callback function for this handle. The function will be -+ * called when the transaction that this handle is part of has been -+ * committed to disk with the original callback data struct and the -+ * error status of the journal as parameters. There is no guarantee of -+ * ordering between handles within a single transaction, nor between -+ * callbacks registered on the same handle. -+ * -+ * The caller is responsible for allocating the journal_callback struct. -+ * This is to allow the caller to add as much extra data to the callback -+ * as needed, but reduce the overhead of multiple allocations. The caller -+ * allocated struct must start with a struct journal_callback at offset 0, -+ * and has the caller-specific data afterwards. -+ */ -+void journal_callback_set(handle_t *handle, -+ void (*func)(struct journal_callback *jcb, int error), -+ struct journal_callback *jcb) -+{ -+ list_add(&jcb->jcb_list, &handle->h_jcb); -+ jcb->jcb_func = func; -+} -+ -+/* - * All done for a particular handle. - * - * There is not much action needed here. We just return any remaining -@@ -1393,7 +1424,10 @@ int journal_stop(handle_t *handle) - wake_up(&journal->j_wait_transaction_locked); - } - -- /* -+ /* Move callbacks from the handle to the transaction. */ -+ list_splice(&handle->h_jcb, &transaction->t_jcb); -+ -+ /* - * If the handle is marked SYNC, we need to set another commit - * going! We also want to force a commit if the current - * transaction is occupying too much of the log, or if the ---- linux-2.4.19/include/linux/jbd.h~vanilla-2.4.19 Sun Jan 19 19:46:42 2003 -+++ linux-2.4.19-root/include/linux/jbd.h Sun Jan 19 19:46:42 2003 -@@ -249,6 +249,13 @@ static inline struct journal_head *bh2jh - return bh->b_private; - } - -+#define HAVE_JOURNAL_CALLBACK_STATUS -+struct journal_callback { -+ struct list_head jcb_list; -+ void (*jcb_func)(struct journal_callback *jcb, int error); -+ /* user data goes here */ -+}; -+ - struct jbd_revoke_table_s; - - /* The handle_t type represents a single atomic update being performed -@@ -279,6 +286,12 @@ struct handle_s - operations */ - int h_err; - -+ /* List of application registered callbacks for this handle. -+ * The function(s) will be called after the transaction that -+ * this handle is part of has been committed to disk. -+ */ -+ struct list_head h_jcb; -+ - /* Flags */ - unsigned int h_sync: 1; /* sync-on-close */ - unsigned int h_jdata: 1; /* force data journaling */ -@@ -398,6 +411,10 @@ struct transaction_s - - /* How many handles used this transaction? */ - int t_handle_count; -+ -+ /* List of registered callback functions for this transaction. -+ * Called when the transaction is committed. */ -+ struct list_head t_jcb; - }; - - -@@ -646,6 +663,9 @@ extern int journal_flushpage(journal_t - extern int journal_try_to_free_buffers(journal_t *, struct page *, int); - extern int journal_stop(handle_t *); - extern int journal_flush (journal_t *); -+extern void journal_callback_set(handle_t *handle, -+ void (*fn)(struct journal_callback *,int), -+ struct journal_callback *jcb); - - extern void journal_lock_updates (journal_t *); - extern void journal_unlock_updates (journal_t *); diff --git a/lustre/kernel_patches/patches/kmem_cache_validate.patch b/lustre/kernel_patches/patches/kmem_cache_validate.patch deleted file mode 100644 index 52880d8..0000000 --- a/lustre/kernel_patches/patches/kmem_cache_validate.patch +++ /dev/null @@ -1,119 +0,0 @@ - - - - 0 files changed - ---- linux-2.4.18-17.8.0/arch/i386/mm/init.c~kmem_cache_validate 2002-12-06 14:52:30.000000000 -0800 -+++ linux-2.4.18-17.8.0-zab/arch/i386/mm/init.c 2002-12-06 14:52:30.000000000 -0800 -@@ -43,6 +43,12 @@ unsigned long highstart_pfn, highend_pfn - static unsigned long totalram_pages; - static unsigned long totalhigh_pages; - -+struct page *check_get_page(unsigned long kaddr) -+{ -+#warning FIXME: Lustre team, is this solid? -+ return virt_to_page(kaddr); -+} -+ - int do_check_pgt_cache(int low, int high) - { - int freed = 0; ---- linux-2.4.18-17.8.0/arch/ia64/mm/init.c~kmem_cache_validate 2002-12-06 14:52:30.000000000 -0800 -+++ linux-2.4.18-17.8.0-zab/arch/ia64/mm/init.c 2002-12-06 14:52:30.000000000 -0800 -@@ -37,6 +37,12 @@ unsigned long MAX_DMA_ADDRESS = PAGE_OFF - - static unsigned long totalram_pages; - -+struct page *check_get_page(unsigned long kaddr) -+{ -+#warning FIXME: Lustre team, is this solid? -+ return virt_to_page(kaddr); -+} -+ - int - do_check_pgt_cache (int low, int high) - { ---- linux-2.4.18-17.8.0/include/linux/slab.h~kmem_cache_validate 2002-12-06 14:52:30.000000000 -0800 -+++ linux-2.4.18-17.8.0-zab/include/linux/slab.h 2002-12-06 14:52:30.000000000 -0800 -@@ -57,6 +57,7 @@ extern int kmem_cache_destroy(kmem_cache - extern int kmem_cache_shrink(kmem_cache_t *); - extern void *kmem_cache_alloc(kmem_cache_t *, int); - extern void kmem_cache_free(kmem_cache_t *, void *); -+extern int kmem_cache_validate(kmem_cache_t *cachep, void *objp); - - extern void *kmalloc(size_t, int); - extern void kfree(const void *); ---- linux-2.4.18-17.8.0/kernel/ksyms.c~kmem_cache_validate 2002-12-06 14:52:30.000000000 -0800 -+++ linux-2.4.18-17.8.0-zab/kernel/ksyms.c 2002-12-06 14:52:30.000000000 -0800 -@@ -119,6 +119,7 @@ EXPORT_SYMBOL(kmem_cache_destroy); - EXPORT_SYMBOL(kmem_cache_shrink); - EXPORT_SYMBOL(kmem_cache_alloc); - EXPORT_SYMBOL(kmem_cache_free); -+EXPORT_SYMBOL(kmem_cache_validate); - EXPORT_SYMBOL(kmalloc); - EXPORT_SYMBOL(kfree); - EXPORT_SYMBOL(vfree); ---- linux-2.4.18-17.8.0/mm/slab.c~kmem_cache_validate 2002-12-06 14:52:30.000000000 -0800 -+++ linux-2.4.18-17.8.0-zab/mm/slab.c 2002-12-06 14:52:30.000000000 -0800 -@@ -1208,6 +1208,59 @@ failed: - * Called with the cache-lock held. - */ - -+extern struct page *check_get_page(unsigned long kaddr); -+struct page *page_mem_map(struct page *page); -+static int kmem_check_cache_obj (kmem_cache_t * cachep, -+ slab_t *slabp, void * objp) -+{ -+ int i; -+ unsigned int objnr; -+ -+#if DEBUG -+ if (cachep->flags & SLAB_RED_ZONE) { -+ objp -= BYTES_PER_WORD; -+ if ( *(unsigned long *)objp != RED_MAGIC2) -+ /* Either write before start, or a double free. */ -+ return 0; -+ if (*(unsigned long *)(objp+cachep->objsize - -+ BYTES_PER_WORD) != RED_MAGIC2) -+ /* Either write past end, or a double free. */ -+ return 0; -+ } -+#endif -+ -+ objnr = (objp-slabp->s_mem)/cachep->objsize; -+ if (objnr >= cachep->num) -+ return 0; -+ if (objp != slabp->s_mem + objnr*cachep->objsize) -+ return 0; -+ -+ /* Check slab's freelist to see if this obj is there. */ -+ for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { -+ if (i == objnr) -+ return 0; -+ } -+ return 1; -+} -+ -+ -+int kmem_cache_validate(kmem_cache_t *cachep, void *objp) -+{ -+ struct page *page = check_get_page((unsigned long)objp); -+ -+ if (!VALID_PAGE(page)) -+ return 0; -+ -+ if (!PageSlab(page)) -+ return 0; -+ -+ /* XXX check for freed slab objects ? */ -+ if (!kmem_check_cache_obj(cachep, GET_PAGE_SLAB(page), objp)) -+ return 0; -+ -+ return (cachep == GET_PAGE_CACHE(page)); -+} -+ - #if DEBUG - static int kmem_extra_free_checks (kmem_cache_t * cachep, - slab_t *slabp, void * objp) - -_ diff --git a/lustre/kernel_patches/patches/kmem_cache_validate_2.4.20-rh.patch b/lustre/kernel_patches/patches/kmem_cache_validate_2.4.20-rh.patch deleted file mode 100644 index 8113828..0000000 --- a/lustre/kernel_patches/patches/kmem_cache_validate_2.4.20-rh.patch +++ /dev/null @@ -1,124 +0,0 @@ - - - - arch/i386/mm/init.c | 6 +++++ - arch/ia64/mm/init.c | 6 +++++ - include/linux/slab.h | 1 - kernel/ksyms.c | 1 - mm/slab.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++ - 5 files changed, 67 insertions(+) - ---- rh-2.4.20/arch/i386/mm/init.c~kmem_cache_validate_2.4.20-rh 2003-04-11 14:05:09.000000000 +0800 -+++ rh-2.4.20-root/arch/i386/mm/init.c 2003-04-13 10:51:58.000000000 +0800 -@@ -43,6 +43,12 @@ unsigned long highstart_pfn, highend_pfn - static unsigned long totalram_pages; - static unsigned long totalhigh_pages; - -+struct page *check_get_page(unsigned long kaddr) -+{ -+#warning FIXME: Lustre team, is this solid? -+ return virt_to_page(kaddr); -+} -+ - int do_check_pgt_cache(int low, int high) - { - return 0; /* FIXME! */ ---- rh-2.4.20/arch/ia64/mm/init.c~kmem_cache_validate_2.4.20-rh 2003-04-11 14:04:43.000000000 +0800 -+++ rh-2.4.20-root/arch/ia64/mm/init.c 2003-04-13 10:51:58.000000000 +0800 -@@ -45,6 +45,12 @@ unsigned long vmalloc_end = VMALLOC_END_ - static struct page *vmem_map; - static unsigned long num_dma_physpages; - -+struct page *check_get_page(unsigned long kaddr) -+{ -+#warning FIXME: Lustre team, is this solid? -+ return virt_to_page(kaddr); -+} -+ - int - do_check_pgt_cache (int low, int high) - { ---- rh-2.4.20/include/linux/slab.h~kmem_cache_validate_2.4.20-rh 2003-04-12 15:46:39.000000000 +0800 -+++ rh-2.4.20-root/include/linux/slab.h 2003-04-13 10:53:00.000000000 +0800 -@@ -57,6 +57,7 @@ extern int kmem_cache_destroy(kmem_cache - extern int kmem_cache_shrink(kmem_cache_t *); - extern void *kmem_cache_alloc(kmem_cache_t *, int); - extern void kmem_cache_free(kmem_cache_t *, void *); -+extern int kmem_cache_validate(kmem_cache_t *cachep, void *objp); - extern unsigned int kmem_cache_size(kmem_cache_t *); - - extern void *kmalloc(size_t, int); ---- rh-2.4.20/kernel/ksyms.c~kmem_cache_validate_2.4.20-rh 2003-04-12 16:15:26.000000000 +0800 -+++ rh-2.4.20-root/kernel/ksyms.c 2003-04-13 10:54:10.000000000 +0800 -@@ -123,6 +123,7 @@ EXPORT_SYMBOL(kmem_cache_destroy); - EXPORT_SYMBOL(kmem_cache_shrink); - EXPORT_SYMBOL(kmem_cache_alloc); - EXPORT_SYMBOL(kmem_cache_free); -+EXPORT_SYMBOL(kmem_cache_validate); - EXPORT_SYMBOL(kmem_cache_size); - EXPORT_SYMBOL(kmalloc); - EXPORT_SYMBOL(kfree); ---- rh-2.4.20/mm/slab.c~kmem_cache_validate_2.4.20-rh 2003-04-11 14:04:56.000000000 +0800 -+++ rh-2.4.20-root/mm/slab.c 2003-04-13 10:51:58.000000000 +0800 -@@ -1208,6 +1208,59 @@ failed: - * Called with the cache-lock held. - */ - -+extern struct page *check_get_page(unsigned long kaddr); -+struct page *page_mem_map(struct page *page); -+static int kmem_check_cache_obj (kmem_cache_t * cachep, -+ slab_t *slabp, void * objp) -+{ -+ int i; -+ unsigned int objnr; -+ -+#if DEBUG -+ if (cachep->flags & SLAB_RED_ZONE) { -+ objp -= BYTES_PER_WORD; -+ if ( *(unsigned long *)objp != RED_MAGIC2) -+ /* Either write before start, or a double free. */ -+ return 0; -+ if (*(unsigned long *)(objp+cachep->objsize - -+ BYTES_PER_WORD) != RED_MAGIC2) -+ /* Either write past end, or a double free. */ -+ return 0; -+ } -+#endif -+ -+ objnr = (objp-slabp->s_mem)/cachep->objsize; -+ if (objnr >= cachep->num) -+ return 0; -+ if (objp != slabp->s_mem + objnr*cachep->objsize) -+ return 0; -+ -+ /* Check slab's freelist to see if this obj is there. */ -+ for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { -+ if (i == objnr) -+ return 0; -+ } -+ return 1; -+} -+ -+ -+int kmem_cache_validate(kmem_cache_t *cachep, void *objp) -+{ -+ struct page *page = check_get_page((unsigned long)objp); -+ -+ if (!VALID_PAGE(page)) -+ return 0; -+ -+ if (!PageSlab(page)) -+ return 0; -+ -+ /* XXX check for freed slab objects ? */ -+ if (!kmem_check_cache_obj(cachep, GET_PAGE_SLAB(page), objp)) -+ return 0; -+ -+ return (cachep == GET_PAGE_CACHE(page)); -+} -+ - #if DEBUG - static int kmem_extra_free_checks (kmem_cache_t * cachep, - slab_t *slabp, void * objp) - -_ diff --git a/lustre/kernel_patches/patches/kmem_cache_validate_2.4.20.patch b/lustre/kernel_patches/patches/kmem_cache_validate_2.4.20.patch deleted file mode 100644 index e802312..0000000 --- a/lustre/kernel_patches/patches/kmem_cache_validate_2.4.20.patch +++ /dev/null @@ -1,116 +0,0 @@ - 0 files changed - ---- linux-2.4.20-8/arch/ia64/mm/init.c~kmem_cache_validate_2.4.20 2002-11-29 07:53:09.000000000 +0800 -+++ linux-2.4.20-8-root/arch/ia64/mm/init.c 2003-06-01 01:44:13.000000000 +0800 -@@ -45,6 +45,12 @@ static struct page *vmem_map; - static unsigned long num_dma_physpages; - #endif - -+struct page *check_get_page(unsigned long kaddr) -+{ -+#warning FIXME: Lustre team, is this solid? -+ return virt_to_page(kaddr); -+} -+ - int - do_check_pgt_cache (int low, int high) - { ---- linux-2.4.20-8/include/linux/slab.h~kmem_cache_validate_2.4.20 2002-11-29 07:53:15.000000000 +0800 -+++ linux-2.4.20-8-root/include/linux/slab.h 2003-06-01 01:44:13.000000000 +0800 -@@ -56,6 +56,7 @@ extern kmem_cache_t *kmem_cache_create(c - extern int kmem_cache_destroy(kmem_cache_t *); - extern int kmem_cache_shrink(kmem_cache_t *); - extern void *kmem_cache_alloc(kmem_cache_t *, int); -+extern int kmem_cache_validate(kmem_cache_t *cachep, void *objp); - extern void kmem_cache_free(kmem_cache_t *, void *); - extern unsigned int kmem_cache_size(kmem_cache_t *); - ---- linux-2.4.20-8/kernel/ksyms.c~kmem_cache_validate_2.4.20 2003-06-01 01:44:11.000000000 +0800 -+++ linux-2.4.20-8-root/kernel/ksyms.c 2003-06-01 01:44:13.000000000 +0800 -@@ -103,6 +103,7 @@ EXPORT_SYMBOL(kmem_find_general_cachep); - EXPORT_SYMBOL(kmem_cache_create); - EXPORT_SYMBOL(kmem_cache_destroy); - EXPORT_SYMBOL(kmem_cache_shrink); -+EXPORT_SYMBOL(kmem_cache_validate); - EXPORT_SYMBOL(kmem_cache_alloc); - EXPORT_SYMBOL(kmem_cache_free); - EXPORT_SYMBOL(kmem_cache_size); ---- linux-2.4.20-8/mm/slab.c~kmem_cache_validate_2.4.20 2003-06-01 01:44:08.000000000 +0800 -+++ linux-2.4.20-8-root/mm/slab.c 2003-06-01 01:44:13.000000000 +0800 -@@ -1205,6 +1205,59 @@ failed: - * Called with the cache-lock held. - */ - -+extern struct page *check_get_page(unsigned long kaddr); -+struct page *page_mem_map(struct page *page); -+static int kmem_check_cache_obj (kmem_cache_t * cachep, -+ slab_t *slabp, void * objp) -+{ -+ int i; -+ unsigned int objnr; -+ -+#if DEBUG -+ if (cachep->flags & SLAB_RED_ZONE) { -+ objp -= BYTES_PER_WORD; -+ if ( *(unsigned long *)objp != RED_MAGIC2) -+ /* Either write before start, or a double free. */ -+ return 0; -+ if (*(unsigned long *)(objp+cachep->objsize - -+ BYTES_PER_WORD) != RED_MAGIC2) -+ /* Either write past end, or a double free. */ -+ return 0; -+ } -+#endif -+ -+ objnr = (objp-slabp->s_mem)/cachep->objsize; -+ if (objnr >= cachep->num) -+ return 0; -+ if (objp != slabp->s_mem + objnr*cachep->objsize) -+ return 0; -+ -+ /* Check slab's freelist to see if this obj is there. */ -+ for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { -+ if (i == objnr) -+ return 0; -+ } -+ return 1; -+} -+ -+ -+int kmem_cache_validate(kmem_cache_t *cachep, void *objp) -+{ -+ struct page *page = check_get_page((unsigned long)objp); -+ -+ if (!VALID_PAGE(page)) -+ return 0; -+ -+ if (!PageSlab(page)) -+ return 0; -+ -+ /* XXX check for freed slab objects ? */ -+ if (!kmem_check_cache_obj(cachep, GET_PAGE_SLAB(page), objp)) -+ return 0; -+ -+ return (cachep == GET_PAGE_CACHE(page)); -+} -+ - #if DEBUG - static int kmem_extra_free_checks (kmem_cache_t * cachep, - slab_t *slabp, void * objp) ---- linux-2.4.20-8/arch/i386/mm/init.c~kmem_cache_validate_2.4.20 2002-11-29 07:53:09.000000000 +0800 -+++ linux-2.4.20-8-root/arch/i386/mm/init.c 2003-06-01 01:46:43.000000000 +0800 -@@ -43,6 +43,12 @@ unsigned long highstart_pfn, highend_pfn - static unsigned long totalram_pages; - static unsigned long totalhigh_pages; - -+struct page *check_get_page(unsigned long kaddr) -+{ -+#warning FIXME: Lustre team, is this solid? -+ return virt_to_page(kaddr); -+} -+ - int do_check_pgt_cache(int low, int high) - { - int freed = 0; - -_ diff --git a/lustre/kernel_patches/patches/kmem_cache_validate_hp.patch b/lustre/kernel_patches/patches/kmem_cache_validate_hp.patch deleted file mode 100644 index 04b49ea..0000000 --- a/lustre/kernel_patches/patches/kmem_cache_validate_hp.patch +++ /dev/null @@ -1,121 +0,0 @@ - arch/i386/mm/init.c | 6 +++++ - arch/ia64/mm/init.c | 6 +++++ - include/linux/slab.h | 1 - kernel/ksyms.c | 1 - mm/slab.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++ - 5 files changed, 67 insertions(+) - ---- linux/arch/ia64/mm/init.c~kmem_cache_validate_hp 2003-04-11 14:24:25.000000000 +0800 -+++ linux-root/arch/ia64/mm/init.c 2003-05-16 20:03:56.000000000 +0800 -@@ -45,6 +45,12 @@ unsigned long vmalloc_end = VMALLOC_END_ - static struct page *vmem_map; - static unsigned long num_dma_physpages; - -+struct page *check_get_page(unsigned long kaddr) -+{ -+#warning FIXME: Lustre team, is this solid? -+ return virt_to_page(kaddr); -+} -+ - int - do_check_pgt_cache (int low, int high) - { ---- linux/include/linux/slab.h~kmem_cache_validate_hp 2002-11-29 07:53:15.000000000 +0800 -+++ linux-root/include/linux/slab.h 2003-05-16 20:03:56.000000000 +0800 -@@ -56,6 +56,7 @@ extern kmem_cache_t *kmem_cache_create(c - extern int kmem_cache_destroy(kmem_cache_t *); - extern int kmem_cache_shrink(kmem_cache_t *); - extern void *kmem_cache_alloc(kmem_cache_t *, int); -+extern int kmem_cache_validate(kmem_cache_t *cachep, void *objp); - extern void kmem_cache_free(kmem_cache_t *, void *); - extern unsigned int kmem_cache_size(kmem_cache_t *); - ---- linux/kernel/ksyms.c~kmem_cache_validate_hp 2003-05-16 20:03:55.000000000 +0800 -+++ linux-root/kernel/ksyms.c 2003-05-16 20:03:56.000000000 +0800 -@@ -119,6 +119,7 @@ EXPORT_SYMBOL(kmem_find_general_cachep); - EXPORT_SYMBOL(kmem_cache_create); - EXPORT_SYMBOL(kmem_cache_destroy); - EXPORT_SYMBOL(kmem_cache_shrink); -+EXPORT_SYMBOL(kmem_cache_validate); - EXPORT_SYMBOL(kmem_cache_alloc); - EXPORT_SYMBOL(kmem_cache_free); - EXPORT_SYMBOL(kmem_cache_size); ---- linux/mm/slab.c~kmem_cache_validate_hp 2002-11-29 07:53:15.000000000 +0800 -+++ linux-root/mm/slab.c 2003-05-16 20:03:56.000000000 +0800 -@@ -1205,6 +1205,59 @@ failed: - * Called with the cache-lock held. - */ - -+extern struct page *check_get_page(unsigned long kaddr); -+struct page *page_mem_map(struct page *page); -+static int kmem_check_cache_obj (kmem_cache_t * cachep, -+ slab_t *slabp, void * objp) -+{ -+ int i; -+ unsigned int objnr; -+ -+#if DEBUG -+ if (cachep->flags & SLAB_RED_ZONE) { -+ objp -= BYTES_PER_WORD; -+ if ( *(unsigned long *)objp != RED_MAGIC2) -+ /* Either write before start, or a double free. */ -+ return 0; -+ if (*(unsigned long *)(objp+cachep->objsize - -+ BYTES_PER_WORD) != RED_MAGIC2) -+ /* Either write past end, or a double free. */ -+ return 0; -+ } -+#endif -+ -+ objnr = (objp-slabp->s_mem)/cachep->objsize; -+ if (objnr >= cachep->num) -+ return 0; -+ if (objp != slabp->s_mem + objnr*cachep->objsize) -+ return 0; -+ -+ /* Check slab's freelist to see if this obj is there. */ -+ for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) { -+ if (i == objnr) -+ return 0; -+ } -+ return 1; -+} -+ -+ -+int kmem_cache_validate(kmem_cache_t *cachep, void *objp) -+{ -+ struct page *page = check_get_page((unsigned long)objp); -+ -+ if (!VALID_PAGE(page)) -+ return 0; -+ -+ if (!PageSlab(page)) -+ return 0; -+ -+ /* XXX check for freed slab objects ? */ -+ if (!kmem_check_cache_obj(cachep, GET_PAGE_SLAB(page), objp)) -+ return 0; -+ -+ return (cachep == GET_PAGE_CACHE(page)); -+} -+ - #if DEBUG - static int kmem_extra_free_checks (kmem_cache_t * cachep, - slab_t *slabp, void * objp) ---- linux/arch/i386/mm/init.c~kmem_cache_validate_hp 2003-05-16 20:03:22.000000000 +0800 -+++ linux-root/arch/i386/mm/init.c 2003-05-16 20:06:16.000000000 +0800 -@@ -42,6 +42,12 @@ mmu_gather_t mmu_gathers[NR_CPUS]; - unsigned long highstart_pfn, highend_pfn; - static unsigned long totalram_pages; - static unsigned long totalhigh_pages; -+ -+struct page *check_get_page(unsigned long kaddr) -+{ -+#warning FIXME: Lustre team, is this solid? -+ return virt_to_page(kaddr); -+} - - int do_check_pgt_cache(int low, int high) - { - -_ diff --git a/lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26.patch b/lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26.patch deleted file mode 100644 index 75ebcd0..0000000 --- a/lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26.patch +++ /dev/null @@ -1,1759 +0,0 @@ - 0 files changed - ---- linux-2.4.18-18/fs/ext3/ialloc.c~linux-2.4.18ea-0.8.26 2003-04-20 16:14:31.000000000 +0800 -+++ linux-2.4.18-18-root/fs/ext3/ialloc.c 2003-04-20 16:14:31.000000000 +0800 -@@ -17,6 +17,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -216,6 +217,7 @@ void ext3_free_inode (handle_t *handle, - * as writing the quota to disk may need the lock as well. - */ - DQUOT_INIT(inode); -+ ext3_xattr_drop_inode(handle, inode); - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); - ---- linux-2.4.18-18/fs/ext3/inode.c~linux-2.4.18ea-0.8.26 2003-04-20 16:14:31.000000000 +0800 -+++ linux-2.4.18-18-root/fs/ext3/inode.c 2003-04-20 16:14:31.000000000 +0800 -@@ -39,6 +39,18 @@ - */ - #undef SEARCH_FROM_ZERO - -+/* -+ * Test whether an inode is a fast symlink. -+ */ -+static inline int ext3_inode_is_fast_symlink(struct inode *inode) -+{ -+ int ea_blocks = EXT3_I(inode)->i_file_acl ? -+ (inode->i_sb->s_blocksize >> 9) : 0; -+ -+ return (S_ISLNK(inode->i_mode) && -+ inode->i_blocks - ea_blocks == 0); -+} -+ - /* The ext3 forget function must perform a revoke if we are freeing data - * which has been journaled. Metadata (eg. indirect blocks) must be - * revoked in all cases. -@@ -48,7 +60,7 @@ - * still needs to be revoked. - */ - --static int ext3_forget(handle_t *handle, int is_metadata, -+int ext3_forget(handle_t *handle, int is_metadata, - struct inode *inode, struct buffer_head *bh, - int blocknr) - { -@@ -164,9 +176,7 @@ void ext3_delete_inode (struct inode * i - { - handle_t *handle; - -- if (is_bad_inode(inode) || -- inode->i_ino == EXT3_ACL_IDX_INO || -- inode->i_ino == EXT3_ACL_DATA_INO) -+ if (is_bad_inode(inode)) - goto no_delete; - - lock_kernel(); -@@ -1861,6 +1871,8 @@ void ext3_truncate(struct inode * inode) - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; -+ if (ext3_inode_is_fast_symlink(inode)) -+ return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - -@@ -2008,8 +2020,6 @@ int ext3_get_inode_loc (struct inode *in - struct ext3_group_desc * gdp; - - if ((inode->i_ino != EXT3_ROOT_INO && -- inode->i_ino != EXT3_ACL_IDX_INO && -- inode->i_ino != EXT3_ACL_DATA_INO && - inode->i_ino != EXT3_JOURNAL_INO && - inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || - inode->i_ino > le32_to_cpu( -@@ -2136,10 +2146,7 @@ void ext3_read_inode(struct inode * inod - - brelse (iloc.bh); - -- if (inode->i_ino == EXT3_ACL_IDX_INO || -- inode->i_ino == EXT3_ACL_DATA_INO) -- /* Nothing to do */ ; -- else if (S_ISREG(inode->i_mode)) { -+ if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; - inode->i_mapping->a_ops = &ext3_aops; -@@ -2147,7 +2154,7 @@ void ext3_read_inode(struct inode * inod - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; - } else if (S_ISLNK(inode->i_mode)) { -- if (!inode->i_blocks) -+ if (ext3_inode_is_fast_symlink(inode)) - inode->i_op = &ext3_fast_symlink_inode_operations; - else { - inode->i_op = &page_symlink_inode_operations; ---- linux-2.4.18-18/fs/ext3/namei.c~linux-2.4.18ea-0.8.26 2003-04-20 16:14:31.000000000 +0800 -+++ linux-2.4.18-18-root/fs/ext3/namei.c 2003-04-20 16:14:31.000000000 +0800 -@@ -27,6 +27,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -1183,6 +1184,7 @@ static int ext3_add_nondir(handle_t *han - d_instantiate(dentry, inode); - return 0; - } -+ ext3_xattr_drop_inode(handle, inode); - ext3_dec_count(handle, inode); - iput(inode); - return err; -@@ -1268,15 +1270,14 @@ static int ext3_mkdir(struct inode * dir - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFDIR); -+ inode = ext3_new_inode (handle, dir, S_IFDIR | mode); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; - - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; -- inode->i_size = inode->u.ext3_i.i_disksize = inode->i_sb->s_blocksize; -- inode->i_blocks = 0; -+ inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; - dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) { - inode->i_nlink--; /* is this nlink == 0? */ -@@ -1303,9 +1304,6 @@ static int ext3_mkdir(struct inode * dir - BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, dir_block); - brelse (dir_block); -- inode->i_mode = S_IFDIR | mode; -- if (dir->i_mode & S_ISGID) -- inode->i_mode |= S_ISGID; - ext3_mark_inode_dirty(handle, inode); - err = ext3_add_entry (handle, dentry, inode); - if (err) -@@ -1671,7 +1669,7 @@ static int ext3_symlink (struct inode * - if (IS_ERR(inode)) - goto out_stop; - -- if (l > sizeof (inode->u.ext3_i.i_data)) { -+ if (l > sizeof(EXT3_I(inode)->i_data)) { - inode->i_op = &page_symlink_inode_operations; - inode->i_mapping->a_ops = &ext3_aops; - /* ---- linux-2.4.18-18/fs/ext3/super.c~linux-2.4.18ea-0.8.26 2003-04-20 16:14:31.000000000 +0800 -+++ linux-2.4.18-18-root/fs/ext3/super.c 2003-04-20 16:14:31.000000000 +0800 -@@ -24,6 +24,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -404,6 +405,7 @@ void ext3_put_super (struct super_block - kdev_t j_dev = sbi->s_journal->j_dev; - int i; - -+ ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { - EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); -@@ -1748,14 +1750,25 @@ int ext3_statfs (struct super_block * sb - - static DECLARE_FSTYPE_DEV(ext3_fs_type, "ext3", ext3_read_super); - --static int __init init_ext3_fs(void) -+static void exit_ext3_fs(void) - { -- return register_filesystem(&ext3_fs_type); -+ unregister_filesystem(&ext3_fs_type); -+ exit_ext3_xattr_user(); -+ exit_ext3_xattr(); - } - --static void __exit exit_ext3_fs(void) -+static int __init init_ext3_fs(void) - { -- unregister_filesystem(&ext3_fs_type); -+ int error = init_ext3_xattr(); -+ if (!error) -+ error = init_ext3_xattr_user(); -+ if (!error) -+ error = register_filesystem(&ext3_fs_type); -+ if (!error) -+ return 0; -+ -+ exit_ext3_fs(); -+ return error; - } - - EXPORT_SYMBOL(ext3_bread); ---- /dev/null 2002-08-31 07:31:37.000000000 +0800 -+++ linux-2.4.18-18-root/fs/ext3/xattr.c 2003-04-20 16:14:31.000000000 +0800 -@@ -0,0 +1,1247 @@ -+/* -+ * linux/fs/ext3/xattr.c -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ * -+ * Fix by Harrison Xing . -+ * Ext3 code with a lot of help from Eric Jarman . -+ * Extended attributes for symlinks and special files added per -+ * suggestion of Luka Renko . -+ */ -+ -+/* -+ * Extended attributes are stored on disk blocks allocated outside of -+ * any inode. The i_file_acl field is then made to point to this allocated -+ * block. If all extended attributes of an inode are identical, these -+ * inodes may share the same extended attribute block. Such situations -+ * are automatically detected by keeping a cache of recent attribute block -+ * numbers and hashes over the block's contents in memory. -+ * -+ * -+ * Extended attribute block layout: -+ * -+ * +------------------+ -+ * | header | -+ * ¦ entry 1 | | -+ * | entry 2 | | growing downwards -+ * | entry 3 | v -+ * | four null bytes | -+ * | . . . | -+ * | value 1 | ^ -+ * | value 3 | | growing upwards -+ * | value 2 | | -+ * +------------------+ -+ * -+ * The block header is followed by multiple entry descriptors. These entry -+ * descriptors are variable in size, and alligned to EXT3_XATTR_PAD -+ * byte boundaries. The entry descriptors are sorted by attribute name, -+ * so that two extended attribute blocks can be compared efficiently. -+ * -+ * Attribute values are aligned to the end of the block, stored in -+ * no specific order. They are also padded to EXT3_XATTR_PAD byte -+ * boundaries. No additional gaps are left between them. -+ * -+ * Locking strategy -+ * ---------------- -+ * The VFS already holds the BKL and the inode->i_sem semaphore when any of -+ * the xattr inode operations are called, so we are guaranteed that only one -+ * processes accesses extended attributes of an inode at any time. -+ * -+ * For writing we also grab the ext3_xattr_sem semaphore. This ensures that -+ * only a single process is modifying an extended attribute block, even -+ * if the block is shared among inodes. -+ * -+ * Note for porting to 2.5 -+ * ----------------------- -+ * The BKL will no longer be held in the xattr inode operations. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+#include -+#endif -+#include -+#include -+#include -+#include -+ -+/* These symbols may be needed by a module. */ -+EXPORT_SYMBOL(ext3_xattr_register); -+EXPORT_SYMBOL(ext3_xattr_unregister); -+EXPORT_SYMBOL(ext3_xattr_get); -+EXPORT_SYMBOL(ext3_xattr_list); -+EXPORT_SYMBOL(ext3_xattr_set); -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1) -+#endif -+ -+#define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data)) -+#define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr)) -+#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1) -+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) -+ -+#ifdef EXT3_XATTR_DEBUG -+# define ea_idebug(inode, f...) do { \ -+ printk(KERN_DEBUG "inode %s:%ld: ", \ -+ kdevname(inode->i_dev), inode->i_ino); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+# define ea_bdebug(bh, f...) do { \ -+ printk(KERN_DEBUG "block %s:%ld: ", \ -+ kdevname(bh->b_dev), bh->b_blocknr); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+#else -+# define ea_idebug(f...) -+# define ea_bdebug(f...) -+#endif -+ -+static int ext3_xattr_set2(handle_t *, struct inode *, struct buffer_head *, -+ struct ext3_xattr_header *); -+ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ -+static int ext3_xattr_cache_insert(struct buffer_head *); -+static struct buffer_head *ext3_xattr_cache_find(struct inode *, -+ struct ext3_xattr_header *); -+static void ext3_xattr_cache_remove(struct buffer_head *); -+static void ext3_xattr_rehash(struct ext3_xattr_header *, -+ struct ext3_xattr_entry *); -+ -+static struct mb_cache *ext3_xattr_cache; -+ -+#else -+# define ext3_xattr_cache_insert(bh) 0 -+# define ext3_xattr_cache_find(inode, header) NULL -+# define ext3_xattr_cache_remove(bh) do {} while(0) -+# define ext3_xattr_rehash(header, entry) do {} while(0) -+#endif -+ -+/* -+ * If a file system does not share extended attributes among inodes, -+ * we should not need the ext3_xattr_sem semaphore. However, the -+ * filesystem may still contain shared blocks, so we always take -+ * the lock. -+ */ -+ -+DECLARE_MUTEX(ext3_xattr_sem); -+ -+static inline void -+ext3_xattr_lock(void) -+{ -+ down(&ext3_xattr_sem); -+} -+ -+static inline void -+ext3_xattr_unlock(void) -+{ -+ up(&ext3_xattr_sem); -+} -+ -+static inline int -+ext3_xattr_new_block(handle_t *handle, struct inode *inode, -+ int * errp, int force) -+{ -+ struct super_block *sb = inode->i_sb; -+ int goal = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + -+ EXT3_I(inode)->i_block_group * EXT3_BLOCKS_PER_GROUP(sb); -+ -+ /* How can we enforce the allocation? */ -+ int block = ext3_new_block(handle, inode, goal, 0, 0, errp); -+#ifdef OLD_QUOTAS -+ if (!*errp) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#endif -+ return block; -+} -+ -+static inline int -+ext3_xattr_quota_alloc(struct inode *inode, int force) -+{ -+ /* How can we enforce the allocation? */ -+#ifdef OLD_QUOTAS -+ int error = DQUOT_ALLOC_BLOCK(inode->i_sb, inode, 1); -+ if (!error) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#else -+ int error = DQUOT_ALLOC_BLOCK(inode, 1); -+#endif -+ return error; -+} -+ -+#ifdef OLD_QUOTAS -+ -+static inline void -+ext3_xattr_quota_free(struct inode *inode) -+{ -+ DQUOT_FREE_BLOCK(inode->i_sb, inode, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+static inline void -+ext3_xattr_free_block(handle_t *handle, struct inode * inode, -+ unsigned long block) -+{ -+ ext3_free_blocks(handle, inode, block, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+#else -+# define ext3_xattr_quota_free(inode) \ -+ DQUOT_FREE_BLOCK(inode, 1) -+# define ext3_xattr_free_block(handle, inode, block) \ -+ ext3_free_blocks(handle, inode, block, 1) -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) -+ -+static inline struct buffer_head * -+sb_bread(struct super_block *sb, int block) -+{ -+ return bread(sb->s_dev, block, sb->s_blocksize); -+} -+ -+static inline struct buffer_head * -+sb_getblk(struct super_block *sb, int block) -+{ -+ return getblk(sb->s_dev, block, sb->s_blocksize); -+} -+ -+#endif -+ -+struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX]; -+rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED; -+ -+int -+ext3_xattr_register(int name_index, struct ext3_xattr_handler *handler) -+{ -+ int error = -EINVAL; -+ -+ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { -+ write_lock(&ext3_handler_lock); -+ if (!ext3_xattr_handlers[name_index-1]) { -+ ext3_xattr_handlers[name_index-1] = handler; -+ error = 0; -+ } -+ write_unlock(&ext3_handler_lock); -+ } -+ return error; -+} -+ -+void -+ext3_xattr_unregister(int name_index, struct ext3_xattr_handler *handler) -+{ -+ if (name_index > 0 || name_index <= EXT3_XATTR_INDEX_MAX) { -+ write_lock(&ext3_handler_lock); -+ ext3_xattr_handlers[name_index-1] = NULL; -+ write_unlock(&ext3_handler_lock); -+ } -+} -+ -+static inline const char * -+strcmp_prefix(const char *a, const char *a_prefix) -+{ -+ while (*a_prefix && *a == *a_prefix) { -+ a++; -+ a_prefix++; -+ } -+ return *a_prefix ? NULL : a; -+} -+ -+/* -+ * Decode the extended attribute name, and translate it into -+ * the name_index and name suffix. -+ */ -+static inline struct ext3_xattr_handler * -+ext3_xattr_resolve_name(const char **name) -+{ -+ struct ext3_xattr_handler *handler = NULL; -+ int i; -+ -+ if (!*name) -+ return NULL; -+ read_lock(&ext3_handler_lock); -+ for (i=0; iprefix); -+ if (n) { -+ handler = ext3_xattr_handlers[i]; -+ *name = n; -+ break; -+ } -+ } -+ } -+ read_unlock(&ext3_handler_lock); -+ return handler; -+} -+ -+static inline struct ext3_xattr_handler * -+ext3_xattr_handler(int name_index) -+{ -+ struct ext3_xattr_handler *handler = NULL; -+ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { -+ read_lock(&ext3_handler_lock); -+ handler = ext3_xattr_handlers[name_index-1]; -+ read_unlock(&ext3_handler_lock); -+ } -+ return handler; -+} -+ -+/* -+ * Inode operation getxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext3_getxattr(struct dentry *dentry, const char *name, -+ void *buffer, size_t size) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->get(inode, name, buffer, size); -+} -+ -+/* -+ * Inode operation listxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) -+{ -+ return ext3_xattr_list(dentry->d_inode, buffer, size); -+} -+ -+/* -+ * Inode operation setxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext3_setxattr(struct dentry *dentry, const char *name, -+ void *value, size_t size, int flags) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ if (size == 0) -+ value = ""; /* empty EA, do not remove */ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, value, size, flags); -+} -+ -+/* -+ * Inode operation removexattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext3_removexattr(struct dentry *dentry, const char *name) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, NULL, 0, XATTR_REPLACE); -+} -+ -+/* -+ * ext3_xattr_get() -+ * -+ * Copy an extended attribute into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ unsigned int block, size; -+ char *end; -+ int name_len, error; -+ -+ ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", -+ name_index, name, buffer, (long)buffer_size); -+ -+ if (name == NULL) -+ return -EINVAL; -+ if (!EXT3_I(inode)->i_file_acl) -+ return -ENOATTR; -+ block = EXT3_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ -+ error = -ERANGE; -+ if (name_len > 255) -+ goto cleanup; -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) -+ goto found; -+ entry = next; -+ } -+ /* Check the remaining name entries */ -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ entry = next; -+ } -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ error = -ENOATTR; -+ goto cleanup; -+found: -+ /* check the buffer size */ -+ if (entry->e_value_block != 0) -+ goto bad_block; -+ size = le32_to_cpu(entry->e_value_size); -+ if (size > inode->i_sb->s_blocksize || -+ le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) -+ goto bad_block; -+ -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (buffer) { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ /* return value of attribute */ -+ memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), -+ size); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ unsigned int block, size = 0; -+ char *buf, *end; -+ int error; -+ -+ ea_idebug(inode, "buffer=%p, buffer_size=%ld", -+ buffer, (long)buffer_size); -+ -+ if (!EXT3_I(inode)->i_file_acl) -+ return 0; -+ block = EXT3_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_list", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* compute the size required for the list of attribute names */ -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT3_XATTR_NEXT(entry)) { -+ struct ext3_xattr_handler *handler; -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ -+ handler = ext3_xattr_handler(entry->e_name_index); -+ if (handler) { -+ size += handler->list(NULL, inode, entry->e_name, -+ entry->e_name_len) + 1; -+ } -+ } -+ -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (!buffer) { -+ error = size; -+ goto cleanup; -+ } else { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ /* list the attribute names */ -+ buf = buffer; -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT3_XATTR_NEXT(entry)) { -+ struct ext3_xattr_handler *handler; -+ -+ handler = ext3_xattr_handler(entry->e_name_index); -+ if (handler) { -+ buf += handler->list(buf, inode, entry->e_name, -+ entry->e_name_len); -+ *buf++ = '\0'; -+ } -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is -+ * not set, set it. -+ */ -+static void ext3_xattr_update_super_block(handle_t *handle, -+ struct super_block *sb) -+{ -+ if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR)) -+ return; -+ -+ lock_super(sb); -+ ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ EXT3_SB(sb)->s_feature_compat |= EXT3_FEATURE_COMPAT_EXT_ATTR; -+#endif -+ EXT3_SB(sb)->s_es->s_feature_compat |= -+ cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR); -+ sb->s_dirt = 1; -+ ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); -+ unlock_super(sb); -+} -+ -+/* -+ * ext3_xattr_set() -+ * -+ * Create, replace or remove an extended attribute for this inode. Buffer -+ * is NULL to remove an existing extended attribute, and non-NULL to -+ * either replace an existing extended attribute, or create a new extended -+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE -+ * specify that an extended attribute must exist and must not exist -+ * previous to the call, respectively. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+int -+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, void *value, size_t value_len, int flags) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_header *header = NULL; -+ struct ext3_xattr_entry *here, *last; -+ unsigned int name_len; -+ int min_offs = sb->s_blocksize, not_found = 1, free, error; -+ char *end; -+ -+ /* -+ * header -- Points either into bh, or to a temporarily -+ * allocated buffer. -+ * here -- The named entry found, or the place for inserting, within -+ * the block pointed to by header. -+ * last -- Points right after the last named entry within the block -+ * pointed to by header. -+ * min_offs -- The offset of the first value (values are aligned -+ * towards the end of the block). -+ * end -- Points right after the block pointed to by header. -+ */ -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > sb->s_blocksize) -+ return -ERANGE; -+ ext3_xattr_lock(); -+ -+ if (EXT3_I(inode)->i_file_acl) { -+ /* The inode already has an extended attribute block. */ -+ int block = EXT3_I(inode)->i_file_acl; -+ -+ bh = sb_bread(sb, block); -+ error = -EIO; -+ if (!bh) -+ goto cleanup; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), -+ le32_to_cpu(HDR(bh)->h_refcount)); -+ header = HDR(bh); -+ end = bh->b_data + bh->b_size; -+ if (header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ header->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(sb, "ext3_xattr_set", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* Find the named attribute. */ -+ here = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(here)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(here); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!here->e_value_block && here->e_value_size) { -+ int offs = le16_to_cpu(here->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ not_found = name_index - here->e_name_index; -+ if (!not_found) -+ not_found = name_len - here->e_name_len; -+ if (!not_found) -+ not_found = memcmp(name, here->e_name,name_len); -+ if (not_found <= 0) -+ break; -+ here = next; -+ } -+ last = here; -+ /* We still need to compute min_offs and last. */ -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!last->e_value_block && last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ last = next; -+ } -+ -+ /* Check whether we have enough space left. */ -+ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); -+ } else { -+ /* We will use a new extended attribute block. */ -+ free = sb->s_blocksize - -+ sizeof(struct ext3_xattr_header) - sizeof(__u32); -+ here = last = NULL; /* avoid gcc uninitialized warning. */ -+ } -+ -+ if (not_found) { -+ /* Request to remove a nonexistent attribute? */ -+ error = -ENOATTR; -+ if (flags & XATTR_REPLACE) -+ goto cleanup; -+ error = 0; -+ if (value == NULL) -+ goto cleanup; -+ else -+ free -= EXT3_XATTR_LEN(name_len); -+ } else { -+ /* Request to create an existing attribute? */ -+ error = -EEXIST; -+ if (flags & XATTR_CREATE) -+ goto cleanup; -+ if (!here->e_value_block && here->e_value_size) { -+ unsigned int size = le32_to_cpu(here->e_value_size); -+ -+ if (le16_to_cpu(here->e_value_offs) + size > -+ sb->s_blocksize || size > sb->s_blocksize) -+ goto bad_block; -+ free += EXT3_XATTR_SIZE(size); -+ } -+ } -+ free -= EXT3_XATTR_SIZE(value_len); -+ error = -ENOSPC; -+ if (free < 0) -+ goto cleanup; -+ -+ /* Here we know that we can set the new attribute. */ -+ -+ if (header) { -+ if (header->h_refcount == cpu_to_le32(1)) { -+ ea_bdebug(bh, "modifying in-place"); -+ ext3_xattr_cache_remove(bh); -+ error = ext3_journal_get_write_access(handle, bh); -+ if (error) -+ goto cleanup; -+ } else { -+ int offset; -+ -+ ea_bdebug(bh, "cloning"); -+ header = kmalloc(bh->b_size, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memcpy(header, HDR(bh), bh->b_size); -+ header->h_refcount = cpu_to_le32(1); -+ offset = (char *)header - bh->b_data; -+ here = ENTRY((char *)here + offset); -+ last = ENTRY((char *)last + offset); -+ } -+ } else { -+ /* Allocate a buffer where we construct the new block. */ -+ header = kmalloc(sb->s_blocksize, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memset(header, 0, sb->s_blocksize); -+ end = (char *)header + sb->s_blocksize; -+ header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC); -+ header->h_blocks = header->h_refcount = cpu_to_le32(1); -+ last = here = ENTRY(header+1); -+ } -+ -+ if (not_found) { -+ /* Insert the new name. */ -+ int size = EXT3_XATTR_LEN(name_len); -+ int rest = (char *)last - (char *)here; -+ memmove((char *)here + size, here, rest); -+ memset(here, 0, size); -+ here->e_name_index = name_index; -+ here->e_name_len = name_len; -+ memcpy(here->e_name, name, name_len); -+ } else { -+ /* Remove the old value. */ -+ if (!here->e_value_block && here->e_value_size) { -+ char *first_val = (char *)header + min_offs; -+ int offs = le16_to_cpu(here->e_value_offs); -+ char *val = (char *)header + offs; -+ size_t size = EXT3_XATTR_SIZE( -+ le32_to_cpu(here->e_value_size)); -+ memmove(first_val + size, first_val, val - first_val); -+ memset(first_val, 0, size); -+ here->e_value_offs = 0; -+ min_offs += size; -+ -+ /* Adjust all value offsets. */ -+ last = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(last)) { -+ int o = le16_to_cpu(last->e_value_offs); -+ if (!last->e_value_block && o < offs) -+ last->e_value_offs = -+ cpu_to_le16(o + size); -+ last = EXT3_XATTR_NEXT(last); -+ } -+ } -+ if (value == NULL) { -+ /* Remove this attribute. */ -+ if (EXT3_XATTR_NEXT(ENTRY(header+1)) == last) { -+ /* This block is now empty. */ -+ error = ext3_xattr_set2(handle, inode, bh,NULL); -+ goto cleanup; -+ } else { -+ /* Remove the old name. */ -+ int size = EXT3_XATTR_LEN(name_len); -+ last = ENTRY((char *)last - size); -+ memmove(here, (char*)here + size, -+ (char*)last - (char*)here); -+ memset(last, 0, size); -+ } -+ } -+ } -+ -+ if (value != NULL) { -+ /* Insert the new value. */ -+ here->e_value_size = cpu_to_le32(value_len); -+ if (value_len) { -+ size_t size = EXT3_XATTR_SIZE(value_len); -+ char *val = (char *)header + min_offs - size; -+ here->e_value_offs = -+ cpu_to_le16((char *)val - (char *)header); -+ memset(val + size - EXT3_XATTR_PAD, 0, -+ EXT3_XATTR_PAD); /* Clear the pad bytes. */ -+ memcpy(val, value, value_len); -+ } -+ } -+ ext3_xattr_rehash(header, here); -+ -+ error = ext3_xattr_set2(handle, inode, bh, header); -+ -+cleanup: -+ brelse(bh); -+ if (!(bh && header == HDR(bh))) -+ kfree(header); -+ ext3_xattr_unlock(); -+ -+ return error; -+} -+ -+/* -+ * Second half of ext3_xattr_set(): Update the file system. -+ */ -+static int -+ext3_xattr_set2(handle_t *handle, struct inode *inode, -+ struct buffer_head *old_bh, struct ext3_xattr_header *header) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *new_bh = NULL; -+ int error; -+ -+ if (header) { -+ new_bh = ext3_xattr_cache_find(inode, header); -+ if (new_bh) { -+ /* -+ * We found an identical block in the cache. -+ * The old block will be released after updating -+ * the inode. -+ */ -+ ea_bdebug(old_bh, "reusing block %ld", -+ new_bh->b_blocknr); -+ -+ error = -EDQUOT; -+ if (ext3_xattr_quota_alloc(inode, 1)) -+ goto cleanup; -+ -+ error = ext3_journal_get_write_access(handle, new_bh); -+ if (error) -+ goto cleanup; -+ HDR(new_bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(new_bh)->h_refcount) + 1); -+ ea_bdebug(new_bh, "refcount now=%d", -+ le32_to_cpu(HDR(new_bh)->h_refcount)); -+ } else if (old_bh && header == HDR(old_bh)) { -+ /* Keep this block. */ -+ new_bh = old_bh; -+ (void)ext3_xattr_cache_insert(new_bh); -+ } else { -+ /* We need to allocate a new block */ -+ int force = EXT3_I(inode)->i_file_acl != 0; -+ int block = ext3_xattr_new_block(handle, inode, -+ &error, force); -+ if (error) -+ goto cleanup; -+ ea_idebug(inode, "creating block %d", block); -+ -+ new_bh = sb_getblk(sb, block); -+ if (!new_bh) { -+getblk_failed: ext3_xattr_free_block(handle, inode, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(new_bh); -+ error = ext3_journal_get_create_access(handle, new_bh); -+ if (error) { -+ unlock_buffer(new_bh); -+ goto getblk_failed; -+ } -+ memcpy(new_bh->b_data, header, new_bh->b_size); -+ mark_buffer_uptodate(new_bh, 1); -+ unlock_buffer(new_bh); -+ (void)ext3_xattr_cache_insert(new_bh); -+ ext3_xattr_update_super_block(handle, sb); -+ } -+ error = ext3_journal_dirty_metadata(handle, new_bh); -+ if (error) -+ goto cleanup; -+ } -+ -+ /* Update the inode. */ -+ EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; -+ inode->i_ctime = CURRENT_TIME; -+ ext3_mark_inode_dirty(handle, inode); -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ -+ error = 0; -+ if (old_bh && old_bh != new_bh) { -+ /* -+ * If there was an old block, and we are not still using it, -+ * we now release the old block. -+ */ -+ unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); -+ -+ error = ext3_journal_get_write_access(handle, old_bh); -+ if (error) -+ goto cleanup; -+ if (refcount == 1) { -+ /* Free the old block. */ -+ ea_bdebug(old_bh, "freeing"); -+ ext3_xattr_free_block(handle, inode, old_bh->b_blocknr); -+ -+ /* ext3_forget() calls bforget() for us, but we -+ let our caller release old_bh, so we need to -+ duplicate the handle before. */ -+ get_bh(old_bh); -+ ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr); -+ } else { -+ /* Decrement the refcount only. */ -+ refcount--; -+ HDR(old_bh)->h_refcount = cpu_to_le32(refcount); -+ ext3_xattr_quota_free(inode); -+ ext3_journal_dirty_metadata(handle, old_bh); -+ ea_bdebug(old_bh, "refcount now=%d", refcount); -+ } -+ } -+ -+cleanup: -+ if (old_bh != new_bh) -+ brelse(new_bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_drop_inode() -+ * -+ * Free extended attribute resources associated with this inode. This -+ * is called immediately before an inode is freed. -+ */ -+void -+ext3_xattr_drop_inode(handle_t *handle, struct inode *inode) -+{ -+ struct buffer_head *bh; -+ unsigned int block = EXT3_I(inode)->i_file_acl; -+ -+ if (!block) -+ return; -+ ext3_xattr_lock(); -+ -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) { -+ ext3_error(inode->i_sb, "ext3_xattr_drop_inode", -+ "inode %ld: block %d read error", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+ ext3_error(inode->i_sb, "ext3_xattr_drop_inode", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ goto cleanup; -+ } -+ ext3_journal_get_write_access(handle, bh); -+ ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) { -+ ext3_xattr_cache_remove(bh); -+ ext3_xattr_free_block(handle, inode, block); -+ ext3_forget(handle, 1, inode, bh, block); -+ bh = NULL; -+ } else { -+ HDR(bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ ext3_journal_dirty_metadata(handle, bh); -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ ext3_xattr_quota_free(inode); -+ } -+ EXT3_I(inode)->i_file_acl = 0; -+ -+cleanup: -+ brelse(bh); -+ ext3_xattr_unlock(); -+} -+ -+/* -+ * ext3_xattr_put_super() -+ * -+ * This is called when a file system is unmounted. -+ */ -+void -+ext3_xattr_put_super(struct super_block *sb) -+{ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ mb_cache_shrink(ext3_xattr_cache, sb->s_dev); -+#endif -+} -+ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ -+/* -+ * ext3_xattr_cache_insert() -+ * -+ * Create a new entry in the extended attribute cache, and insert -+ * it unless such an entry is already in the cache. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+static int -+ext3_xattr_cache_insert(struct buffer_head *bh) -+{ -+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash); -+ struct mb_cache_entry *ce; -+ int error; -+ -+ ce = mb_cache_entry_alloc(ext3_xattr_cache); -+ if (!ce) -+ return -ENOMEM; -+ error = mb_cache_entry_insert(ce, bh->b_dev, bh->b_blocknr, &hash); -+ if (error) { -+ mb_cache_entry_free(ce); -+ if (error == -EBUSY) { -+ ea_bdebug(bh, "already in cache (%d cache entries)", -+ atomic_read(&ext3_xattr_cache->c_entry_count)); -+ error = 0; -+ } -+ } else { -+ ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, -+ atomic_read(&ext3_xattr_cache->c_entry_count)); -+ mb_cache_entry_release(ce); -+ } -+ return error; -+} -+ -+/* -+ * ext3_xattr_cmp() -+ * -+ * Compare two extended attribute blocks for equality. -+ * -+ * Returns 0 if the blocks are equal, 1 if they differ, and -+ * a negative error number on errors. -+ */ -+static int -+ext3_xattr_cmp(struct ext3_xattr_header *header1, -+ struct ext3_xattr_header *header2) -+{ -+ struct ext3_xattr_entry *entry1, *entry2; -+ -+ entry1 = ENTRY(header1+1); -+ entry2 = ENTRY(header2+1); -+ while (!IS_LAST_ENTRY(entry1)) { -+ if (IS_LAST_ENTRY(entry2)) -+ return 1; -+ if (entry1->e_hash != entry2->e_hash || -+ entry1->e_name_len != entry2->e_name_len || -+ entry1->e_value_size != entry2->e_value_size || -+ memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) -+ return 1; -+ if (entry1->e_value_block != 0 || entry2->e_value_block != 0) -+ return -EIO; -+ if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), -+ (char *)header2 + le16_to_cpu(entry2->e_value_offs), -+ le32_to_cpu(entry1->e_value_size))) -+ return 1; -+ -+ entry1 = EXT3_XATTR_NEXT(entry1); -+ entry2 = EXT3_XATTR_NEXT(entry2); -+ } -+ if (!IS_LAST_ENTRY(entry2)) -+ return 1; -+ return 0; -+} -+ -+/* -+ * ext3_xattr_cache_find() -+ * -+ * Find an identical extended attribute block. -+ * -+ * Returns a pointer to the block found, or NULL if such a block was -+ * not found or an error occurred. -+ */ -+static struct buffer_head * -+ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header) -+{ -+ __u32 hash = le32_to_cpu(header->h_hash); -+ struct mb_cache_entry *ce; -+ -+ if (!header->h_hash) -+ return NULL; /* never share */ -+ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); -+ ce = mb_cache_entry_find_first(ext3_xattr_cache, 0, inode->i_dev, hash); -+ while (ce) { -+ struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block); -+ -+ if (!bh) { -+ ext3_error(inode->i_sb, "ext3_xattr_cache_find", -+ "inode %ld: block %ld read error", -+ inode->i_ino, ce->e_block); -+ } else if (le32_to_cpu(HDR(bh)->h_refcount) > -+ EXT3_XATTR_REFCOUNT_MAX) { -+ ea_idebug(inode, "block %ld refcount %d>%d",ce->e_block, -+ le32_to_cpu(HDR(bh)->h_refcount), -+ EXT3_XATTR_REFCOUNT_MAX); -+ } else if (!ext3_xattr_cmp(header, HDR(bh))) { -+ ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); -+ mb_cache_entry_release(ce); -+ return bh; -+ } -+ brelse(bh); -+ ce = mb_cache_entry_find_next(ce, 0, inode->i_dev, hash); -+ } -+ return NULL; -+} -+ -+/* -+ * ext3_xattr_cache_remove() -+ * -+ * Remove the cache entry of a block from the cache. Called when a -+ * block becomes invalid. -+ */ -+static void -+ext3_xattr_cache_remove(struct buffer_head *bh) -+{ -+ struct mb_cache_entry *ce; -+ -+ ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_dev, bh->b_blocknr); -+ if (ce) { -+ ea_bdebug(bh, "removing (%d cache entries remaining)", -+ atomic_read(&ext3_xattr_cache->c_entry_count)-1); -+ mb_cache_entry_free(ce); -+ } else -+ ea_bdebug(bh, "no cache entry"); -+} -+ -+#define NAME_HASH_SHIFT 5 -+#define VALUE_HASH_SHIFT 16 -+ -+/* -+ * ext3_xattr_hash_entry() -+ * -+ * Compute the hash of an extended attribute. -+ */ -+static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header, -+ struct ext3_xattr_entry *entry) -+{ -+ __u32 hash = 0; -+ char *name = entry->e_name; -+ int n; -+ -+ for (n=0; n < entry->e_name_len; n++) { -+ hash = (hash << NAME_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ -+ *name++; -+ } -+ -+ if (entry->e_value_block == 0 && entry->e_value_size != 0) { -+ __u32 *value = (__u32 *)((char *)header + -+ le16_to_cpu(entry->e_value_offs)); -+ for (n = (le32_to_cpu(entry->e_value_size) + -+ EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) { -+ hash = (hash << VALUE_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ -+ le32_to_cpu(*value++); -+ } -+ } -+ entry->e_hash = cpu_to_le32(hash); -+} -+ -+#undef NAME_HASH_SHIFT -+#undef VALUE_HASH_SHIFT -+ -+#define BLOCK_HASH_SHIFT 16 -+ -+/* -+ * ext3_xattr_rehash() -+ * -+ * Re-compute the extended attribute hash value after an entry has changed. -+ */ -+static void ext3_xattr_rehash(struct ext3_xattr_header *header, -+ struct ext3_xattr_entry *entry) -+{ -+ struct ext3_xattr_entry *here; -+ __u32 hash = 0; -+ -+ ext3_xattr_hash_entry(header, entry); -+ here = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(here)) { -+ if (!here->e_hash) { -+ /* Block is not shared if an entry's hash value == 0 */ -+ hash = 0; -+ break; -+ } -+ hash = (hash << BLOCK_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ -+ le32_to_cpu(here->e_hash); -+ here = EXT3_XATTR_NEXT(here); -+ } -+ header->h_hash = cpu_to_le32(hash); -+} -+ -+#undef BLOCK_HASH_SHIFT -+ -+int __init -+init_ext3_xattr(void) -+{ -+ ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL, -+ sizeof(struct mb_cache_entry) + -+ sizeof(struct mb_cache_entry_index), 1, 61); -+ if (!ext3_xattr_cache) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+void -+exit_ext3_xattr(void) -+{ -+ if (ext3_xattr_cache) -+ mb_cache_destroy(ext3_xattr_cache); -+ ext3_xattr_cache = NULL; -+} -+ -+#else /* CONFIG_EXT3_FS_XATTR_SHARING */ -+ -+int __init -+init_ext3_xattr(void) -+{ -+ return 0; -+} -+ -+void -+exit_ext3_xattr(void) -+{ -+} -+ -+#endif /* CONFIG_EXT3_FS_XATTR_SHARING */ ---- linux-2.4.18-18/include/linux/ext3_fs.h~linux-2.4.18ea-0.8.26 2003-04-20 16:14:31.000000000 +0800 -+++ linux-2.4.18-18-root/include/linux/ext3_fs.h 2003-04-20 16:14:31.000000000 +0800 -@@ -58,8 +58,6 @@ - */ - #define EXT3_BAD_INO 1 /* Bad blocks inode */ - #define EXT3_ROOT_INO 2 /* Root inode */ --#define EXT3_ACL_IDX_INO 3 /* ACL inode */ --#define EXT3_ACL_DATA_INO 4 /* ACL inode */ - #define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */ - #define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */ - #define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */ -@@ -89,7 +87,6 @@ - #else - # define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size) - #endif --#define EXT3_ACLE_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_acl_entry)) - #define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32)) - #ifdef __KERNEL__ - # define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -@@ -124,28 +121,6 @@ - #endif - - /* -- * ACL structures -- */ --struct ext3_acl_header /* Header of Access Control Lists */ --{ -- __u32 aclh_size; -- __u32 aclh_file_count; -- __u32 aclh_acle_count; -- __u32 aclh_first_acle; --}; -- --struct ext3_acl_entry /* Access Control List Entry */ --{ -- __u32 acle_size; -- __u16 acle_perms; /* Access permissions */ -- __u16 acle_type; /* Type of entry */ -- __u16 acle_tag; /* User or group identity */ -- __u16 acle_pad1; -- __u32 acle_next; /* Pointer on next entry for the */ -- /* same inode or on next free entry */ --}; -- --/* - * Structure of a blocks group descriptor - */ - struct ext3_group_desc -@@ -513,7 +488,7 @@ struct ext3_super_block { - #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ - --#define EXT3_FEATURE_COMPAT_SUPP 0 -+#define EXT3_FEATURE_COMPAT_SUPP EXT3_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ -@@ -606,6 +581,24 @@ struct ext3_iloc - unsigned long block_group; - }; - -+/* Defined for extended attributes */ -+#define CONFIG_EXT3_FS_XATTR y -+#ifndef ENOATTR -+#define ENOATTR ENODATA /* No such attribute */ -+#endif -+#ifndef ENOTSUP -+#define ENOTSUP EOPNOTSUPP /* Operation not supported */ -+#endif -+#ifndef XATTR_NAME_MAX -+#define XATTR_NAME_MAX 255 /* # chars in an extended attribute name */ -+#define XATTR_SIZE_MAX 65536 /* size of an extended attribute value (64k) */ -+#define XATTR_LIST_MAX 65536 /* size of extended attribute namelist (64k) */ -+#endif -+#ifndef XATTR_CREATE -+#define XATTR_CREATE 1 /* set value, fail if attr already exists */ -+#define XATTR_REPLACE 2 /* set value, fail if attr does not exist */ -+#endif -+ - /* - * Function prototypes - */ -@@ -647,6 +640,7 @@ extern void ext3_check_inodes_bitmap (st - extern unsigned long ext3_count_free (struct buffer_head *, unsigned); - - /* inode.c */ -+extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); - ---- linux-2.4.18-18/include/linux/ext3_jbd.h~linux-2.4.18ea-0.8.26 2003-04-20 16:14:31.000000000 +0800 -+++ linux-2.4.18-18-root/include/linux/ext3_jbd.h 2003-04-20 16:14:31.000000000 +0800 -@@ -30,13 +30,19 @@ - - #define EXT3_SINGLEDATA_TRANS_BLOCKS 8 - -+/* Extended attributes may touch two data buffers, two bitmap buffers, -+ * and two group and summaries. */ -+ -+#define EXT3_XATTR_TRANS_BLOCKS 8 -+ - /* Define the minimum size for a transaction which modifies data. This - * needs to take into account the fact that we may end up modifying two - * quota files too (one for the group, one for the user quota). The - * superblock only gets updated once, of course, so don't bother - * counting that again for the quota updates. */ - --#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS - 2) -+#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS + \ -+ EXT3_XATTR_TRANS_BLOCKS - 2) - - extern int ext3_writepage_trans_blocks(struct inode *inode); - ---- /dev/null 2002-08-31 07:31:37.000000000 +0800 -+++ linux-2.4.18-18-root/include/linux/ext3_xattr.h 2003-04-20 16:14:31.000000000 +0800 -@@ -0,0 +1,155 @@ -+/* -+ File: linux/ext3_xattr.h -+ -+ On-disk format of extended attributes for the ext3 filesystem. -+ -+ (C) 2001 Andreas Gruenbacher, -+*/ -+ -+#include -+#include -+#include -+ -+/* Magic value in attribute blocks */ -+#define EXT3_XATTR_MAGIC 0xEA020000 -+ -+/* Maximum number of references to one attribute block */ -+#define EXT3_XATTR_REFCOUNT_MAX 1024 -+ -+/* Name indexes */ -+#define EXT3_XATTR_INDEX_MAX 10 -+#define EXT3_XATTR_INDEX_USER 1 -+ -+struct ext3_xattr_header { -+ __u32 h_magic; /* magic number for identification */ -+ __u32 h_refcount; /* reference count */ -+ __u32 h_blocks; /* number of disk blocks used */ -+ __u32 h_hash; /* hash value of all attributes */ -+ __u32 h_reserved[4]; /* zero right now */ -+}; -+ -+struct ext3_xattr_entry { -+ __u8 e_name_len; /* length of name */ -+ __u8 e_name_index; /* attribute name index */ -+ __u16 e_value_offs; /* offset in disk block of value */ -+ __u32 e_value_block; /* disk block attribute is stored on (n/i) */ -+ __u32 e_value_size; /* size of attribute value */ -+ __u32 e_hash; /* hash value of name and value */ -+ char e_name[0]; /* attribute name */ -+}; -+ -+#define EXT3_XATTR_PAD_BITS 2 -+#define EXT3_XATTR_PAD (1<e_name_len)) ) -+#define EXT3_XATTR_SIZE(size) \ -+ (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND) -+ -+#ifdef __KERNEL__ -+ -+# ifdef CONFIG_EXT3_FS_XATTR -+ -+struct ext3_xattr_handler { -+ char *prefix; -+ size_t (*list)(char *list, struct inode *inode, const char *name, -+ int name_len); -+ int (*get)(struct inode *inode, const char *name, void *buffer, -+ size_t size); -+ int (*set)(struct inode *inode, const char *name, void *buffer, -+ size_t size, int flags); -+}; -+ -+extern int ext3_xattr_register(int, struct ext3_xattr_handler *); -+extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *); -+ -+extern int ext3_setxattr(struct dentry *, const char *, void *, size_t, int); -+extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t); -+extern ssize_t ext3_listxattr(struct dentry *, char *, size_t); -+extern int ext3_removexattr(struct dentry *, const char *); -+ -+extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t); -+extern int ext3_xattr_list(struct inode *, char *, size_t); -+extern int ext3_xattr_set(handle_t *handle, struct inode *, int, const char *, void *, size_t, int); -+ -+extern void ext3_xattr_drop_inode(handle_t *, struct inode *); -+extern void ext3_xattr_put_super(struct super_block *); -+ -+extern int init_ext3_xattr(void) __init; -+extern void exit_ext3_xattr(void); -+ -+# else /* CONFIG_EXT3_FS_XATTR */ -+# define ext3_setxattr NULL -+# define ext3_getxattr NULL -+# define ext3_listxattr NULL -+# define ext3_removexattr NULL -+ -+static inline int -+ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t size, int flags) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext3_xattr_list(struct inode *inode, void *buffer, size_t size, int flags) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, void *value, size_t size, int flags) -+{ -+ return -ENOTSUP; -+} -+ -+static inline void -+ext3_xattr_drop_inode(handle_t *handle, struct inode *inode) -+{ -+} -+ -+static inline void -+ext3_xattr_put_super(struct super_block *sb) -+{ -+} -+ -+static inline int -+init_ext3_xattr(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext3_xattr(void) -+{ -+} -+ -+# endif /* CONFIG_EXT3_FS_XATTR */ -+ -+# ifdef CONFIG_EXT3_FS_XATTR_USER -+ -+extern int init_ext3_xattr_user(void) __init; -+extern void exit_ext3_xattr_user(void); -+ -+# else /* CONFIG_EXT3_FS_XATTR_USER */ -+ -+static inline int -+init_ext3_xattr_user(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext3_xattr_user(void) -+{ -+} -+ -+#endif /* CONFIG_EXT3_FS_XATTR_USER */ -+ -+#endif /* __KERNEL__ */ -+ ---- /dev/null 2002-08-31 07:31:37.000000000 +0800 -+++ linux-2.4.18-18-root/include/linux/xattr.h 2003-04-20 16:14:31.000000000 +0800 -@@ -0,0 +1,15 @@ -+/* -+ File: linux/xattr.h -+ -+ Extended attributes handling. -+ -+ Copyright (C) 2001 by Andreas Gruenbacher -+ Copyright (C) 2001 SGI - Silicon Graphics, Inc -+*/ -+#ifndef _LINUX_XATTR_H -+#define _LINUX_XATTR_H -+ -+#define XATTR_CREATE 1 /* set value, fail if attr already exists */ -+#define XATTR_REPLACE 2 /* set value, fail if attr does not exist */ -+ -+#endif /* _LINUX_XATTR_H */ ---- linux-2.4.18-18/fs/ext3/Makefile~linux-2.4.18ea-0.8.26 2003-04-20 16:14:54.000000000 +0800 -+++ linux-2.4.18-18-root/fs/ext3/Makefile 2003-04-20 16:15:15.000000000 +0800 -@@ -9,10 +9,10 @@ - - O_TARGET := ext3.o - --export-objs := super.o inode.o -+export-objs := super.o inode.o xattr.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -- ioctl.o namei.o super.o symlink.o -+ ioctl.o namei.o super.o symlink.o xattr.o - obj-m := $(O_TARGET) - - include $(TOPDIR)/Rules.make - -_ diff --git a/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-chaos.patch b/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-chaos.patch deleted file mode 100644 index 5c6c6a9..0000000 --- a/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-chaos.patch +++ /dev/null @@ -1,5538 +0,0 @@ - Documentation/Configure.help | 66 ++ - arch/alpha/defconfig | 7 - arch/alpha/kernel/entry.S | 12 - arch/arm/defconfig | 7 - arch/arm/kernel/calls.S | 24 - arch/i386/defconfig | 7 - arch/ia64/defconfig | 7 - arch/m68k/defconfig | 7 - arch/mips/defconfig | 7 - arch/mips64/defconfig | 7 - arch/ppc/defconfig | 14 - arch/ppc64/kernel/misc.S | 2 - arch/s390/defconfig | 7 - arch/s390/kernel/entry.S | 24 - arch/s390x/defconfig | 7 - arch/s390x/kernel/entry.S | 24 - arch/s390x/kernel/wrapper32.S | 92 +++ - arch/sparc/defconfig | 7 - arch/sparc/kernel/systbls.S | 10 - arch/sparc64/defconfig | 7 - arch/sparc64/kernel/systbls.S | 20 - fs/Config.in | 14 - fs/Makefile | 3 - fs/ext2/Makefile | 4 - fs/ext2/file.c | 5 - fs/ext2/ialloc.c | 2 - fs/ext2/inode.c | 34 - - fs/ext2/namei.c | 14 - fs/ext2/super.c | 29 - fs/ext2/symlink.c | 14 - fs/ext2/xattr.c | 1212 +++++++++++++++++++++++++++++++++++++++++ - fs/ext2/xattr_user.c | 103 +++ - fs/ext3/Makefile | 10 - fs/ext3/file.c | 5 - fs/ext3/ialloc.c | 2 - fs/ext3/inode.c | 35 - - fs/ext3/namei.c | 21 - fs/ext3/super.c | 36 + - fs/ext3/symlink.c | 14 - fs/ext3/xattr.c | 1225 ++++++++++++++++++++++++++++++++++++++++++ - fs/ext3/xattr_user.c | 111 +++ - fs/jfs/jfs_xattr.h | 6 - fs/jfs/xattr.c | 6 - fs/mbcache.c | 648 ++++++++++++++++++++++ - include/asm-arm/unistd.h | 2 - include/asm-ppc64/unistd.h | 2 - include/asm-s390/unistd.h | 15 - include/asm-s390x/unistd.h | 15 - include/asm-sparc/unistd.h | 24 - include/asm-sparc64/unistd.h | 24 - include/linux/cache_def.h | 15 - include/linux/errno.h | 4 - include/linux/ext2_fs.h | 31 - - include/linux/ext2_xattr.h | 157 +++++ - include/linux/ext3_fs.h | 31 - - include/linux/ext3_jbd.h | 8 - include/linux/ext3_xattr.h | 157 +++++ - include/linux/fs.h | 2 - include/linux/mbcache.h | 69 ++ - kernel/ksyms.c | 4 - mm/vmscan.c | 36 + - fs/ext3/ext3-exports.c | 14 + - 62 files changed, 4331 insertions(+), 197 deletions(-) - ---- linux-rh-2.4.20-8/Documentation/Configure.help~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:50.000000000 +0800 -+++ linux-rh-2.4.20-8-root/Documentation/Configure.help 2003-05-07 17:34:25.000000000 +0800 -@@ -15226,6 +15226,39 @@ CONFIG_EXT2_FS - be compiled as a module, and so this could be dangerous. Most - everyone wants to say Y here. - -+Ext2 extended attributes -+CONFIG_EXT2_FS_XATTR -+ Extended attributes are name:value pairs associated with inodes by -+ the kernel or by users (see the attr(5) manual page, or visit -+ for details). -+ -+ If unsure, say N. -+ -+Ext2 extended attribute block sharing -+CONFIG_EXT2_FS_XATTR_SHARING -+ This options enables code for sharing identical extended attribute -+ blocks among multiple inodes. -+ -+ Usually, say Y. -+ -+Ext2 extended user attributes -+CONFIG_EXT2_FS_XATTR_USER -+ This option enables extended user attributes on ext2. Processes can -+ associate extended user attributes with inodes to store additional -+ information such as the character encoding of files, etc. (see the -+ attr(5) manual page, or visit for details). -+ -+ If unsure, say N. -+ -+Ext2 trusted extended attributes -+CONFIG_EXT2_FS_XATTR_TRUSTED -+ This option enables extended attributes on ext2 that are accessible -+ (and visible) only to users capable of CAP_SYS_ADMIN. Usually this -+ is only the super user. Trusted extended attributes are meant for -+ implementing system/security services. -+ -+ If unsure, say N. -+ - Ext3 journalling file system support (EXPERIMENTAL) - CONFIG_EXT3_FS - This is the journalling version of the Second extended file system -@@ -15258,6 +15291,39 @@ CONFIG_EXT3_FS - of your root partition (the one containing the directory /) cannot - be compiled as a module, and so this may be dangerous. - -+Ext3 extended attributes -+CONFIG_EXT3_FS_XATTR -+ Extended attributes are name:value pairs associated with inodes by -+ the kernel or by users (see the attr(5) manual page, or visit -+ for details). -+ -+ If unsure, say N. -+ -+Ext3 extended attribute block sharing -+CONFIG_EXT3_FS_XATTR_SHARING -+ This options enables code for sharing identical extended attribute -+ blocks among multiple inodes. -+ -+ Usually, say Y. -+ -+Ext3 extended user attributes -+CONFIG_EXT3_FS_XATTR_USER -+ This option enables extended user attributes on ext3. Processes can -+ associate extended user attributes with inodes to store additional -+ information such as the character encoding of files, etc. (see the -+ attr(5) manual page, or visit for details). -+ -+ If unsure, say N. -+ -+Ext3 trusted extended attributes -+CONFIG_EXT3_FS_XATTR_TRUSTED -+ This option enables extended attributes on ext3 that are accessible -+ (and visible) only to users capable of CAP_SYS_ADMIN. Usually this -+ is only the super user. Trusted extended attributes are meant for -+ implementing system/security services. -+ -+ If unsure, say N. -+ - Journal Block Device support (JBD for ext3) (EXPERIMENTAL) - CONFIG_JBD - This is a generic journalling layer for block devices. It is ---- linux-rh-2.4.20-8/arch/alpha/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2001-11-20 07:19:42.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/alpha/defconfig 2003-05-07 17:34:25.000000000 +0800 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_ALPHA=y - # CONFIG_UID16 is not set - # CONFIG_RWSEM_GENERIC_SPINLOCK is not set ---- linux-rh-2.4.20-8/arch/alpha/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:53.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/alpha/kernel/entry.S 2003-05-07 17:34:25.000000000 +0800 -@@ -1162,6 +1162,18 @@ sys_call_table: - .quad sys_readahead - .quad sys_ni_syscall /* 380, sys_security */ - .quad sys_tkill -+ .quad sys_setxattr -+ .quad sys_lsetxattr -+ .quad sys_fsetxattr -+ .quad sys_getxattr /* 385 */ -+ .quad sys_lgetxattr -+ .quad sys_fgetxattr -+ .quad sys_listxattr -+ .quad sys_llistxattr -+ .quad sys_flistxattr /* 390 */ -+ .quad sys_removexattr -+ .quad sys_lremovexattr -+ .quad sys_fremovexattr - - /* Remember to update everything, kids. */ - .ifne (. - sys_call_table) - (NR_SYSCALLS * 8) ---- linux-rh-2.4.20-8/arch/arm/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2001-05-20 08:43:05.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/arm/defconfig 2003-05-07 17:34:25.000000000 +0800 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_ARM=y - # CONFIG_EISA is not set - # CONFIG_SBUS is not set ---- linux-rh-2.4.20-8/arch/arm/kernel/calls.S~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:42.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/arm/kernel/calls.S 2003-05-07 17:34:25.000000000 +0800 -@@ -240,18 +240,18 @@ __syscall_start: - .long SYMBOL_NAME(sys_ni_syscall) /* Security */ - .long SYMBOL_NAME(sys_gettid) - /* 225 */ .long SYMBOL_NAME(sys_readahead) -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_setxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_lsetxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_fsetxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_getxattr */ --/* 230 */ .long SYMBOL_NAME(sys_ni_syscall) /* sys_lgetxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_fgetxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_listxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_llistxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_flistxattr */ --/* 235 */ .long SYMBOL_NAME(sys_ni_syscall) /* sys_removexattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_lremovexattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_fremovexattr */ -+ .long SYMBOL_NAME(sys_setxattr) -+ .long SYMBOL_NAME(sys_lsetxattr) -+ .long SYMBOL_NAME(sys_fsetxattr) -+ .long SYMBOL_NAME(sys_getxattr) -+/* 230 */ .long SYMBOL_NAME(sys_lgetxattr) -+ .long SYMBOL_NAME(sys_fgetxattr) -+ .long SYMBOL_NAME(sys_listxattr) -+ .long SYMBOL_NAME(sys_llistxattr) -+ .long SYMBOL_NAME(sys_flistxattr) -+/* 235 */ .long SYMBOL_NAME(sys_removexattr) -+ .long SYMBOL_NAME(sys_lremovexattr) -+ .long SYMBOL_NAME(sys_fremovexattr) - .long SYMBOL_NAME(sys_tkill) - /* - * Please check 2.5 _before_ adding calls here, ---- linux-rh-2.4.20-8/arch/i386/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:53.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/i386/defconfig 2003-05-07 17:34:25.000000000 +0800 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_X86=y - CONFIG_ISA=y - # CONFIG_SBUS is not set ---- linux-rh-2.4.20-8/arch/ia64/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:43.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/ia64/defconfig 2003-05-07 17:34:25.000000000 +0800 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - - # - # Code maturity level options ---- linux-rh-2.4.20-8/arch/m68k/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2000-06-20 03:56:08.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/m68k/defconfig 2003-05-07 17:34:25.000000000 +0800 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_UID16=y - - # ---- linux-rh-2.4.20-8/arch/mips/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:10.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/mips/defconfig 2003-05-07 17:34:25.000000000 +0800 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_MIPS=y - CONFIG_MIPS32=y - # CONFIG_MIPS64 is not set ---- linux-rh-2.4.20-8/arch/mips64/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:10.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/mips64/defconfig 2003-05-07 17:34:25.000000000 +0800 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_MIPS=y - # CONFIG_MIPS32 is not set - CONFIG_MIPS64=y ---- linux-rh-2.4.20-8/arch/ppc/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:43.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/ppc/defconfig 2003-05-07 17:34:25.000000000 +0800 -@@ -1,6 +1,20 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - # CONFIG_UID16 is not set - # CONFIG_RWSEM_GENERIC_SPINLOCK is not set - CONFIG_RWSEM_XCHGADD_ALGORITHM=y ---- linux-rh-2.4.20-8/arch/ppc64/kernel/misc.S~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:11.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/ppc64/kernel/misc.S 2003-05-07 17:34:25.000000000 +0800 -@@ -731,6 +731,7 @@ _GLOBAL(sys_call_table32) - .llong .sys_gettid /* 207 */ - #if 0 /* Reserved syscalls */ - .llong .sys_tkill /* 208 */ -+#endif - .llong .sys_setxattr - .llong .sys_lsetxattr /* 210 */ - .llong .sys_fsetxattr -@@ -743,6 +744,7 @@ _GLOBAL(sys_call_table32) - .llong .sys_removexattr - .llong .sys_lremovexattr - .llong .sys_fremovexattr /* 220 */ -+#if 0 /* Reserved syscalls */ - .llong .sys_futex - #endif - .llong .sys_perfmonctl /* Put this here for now ... */ ---- linux-rh-2.4.20-8/arch/s390/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:11.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/s390/defconfig 2003-05-07 17:34:25.000000000 +0800 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - # CONFIG_ISA is not set - # CONFIG_EISA is not set - # CONFIG_MCA is not set ---- linux-rh-2.4.20-8/arch/s390/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:11.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/s390/kernel/entry.S 2003-05-07 17:34:25.000000000 +0800 -@@ -558,18 +558,18 @@ sys_call_table: - .long sys_fcntl64 - .long sys_ni_syscall - .long sys_ni_syscall -- .long sys_ni_syscall /* 224 - reserved for setxattr */ -- .long sys_ni_syscall /* 225 - reserved for lsetxattr */ -- .long sys_ni_syscall /* 226 - reserved for fsetxattr */ -- .long sys_ni_syscall /* 227 - reserved for getxattr */ -- .long sys_ni_syscall /* 228 - reserved for lgetxattr */ -- .long sys_ni_syscall /* 229 - reserved for fgetxattr */ -- .long sys_ni_syscall /* 230 - reserved for listxattr */ -- .long sys_ni_syscall /* 231 - reserved for llistxattr */ -- .long sys_ni_syscall /* 232 - reserved for flistxattr */ -- .long sys_ni_syscall /* 233 - reserved for removexattr */ -- .long sys_ni_syscall /* 234 - reserved for lremovexattr */ -- .long sys_ni_syscall /* 235 - reserved for fremovexattr */ -+ .long sys_setxattr -+ .long sys_lsetxattr /* 225 */ -+ .long sys_fsetxattr -+ .long sys_getxattr -+ .long sys_lgetxattr -+ .long sys_fgetxattr -+ .long sys_listxattr /* 230 */ -+ .long sys_llistxattr -+ .long sys_flistxattr -+ .long sys_removexattr -+ .long sys_lremovexattr -+ .long sys_fremovexattr /* 235 */ - .long sys_gettid - .long sys_tkill - .rept 255-237 ---- linux-rh-2.4.20-8/arch/s390x/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:11.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/s390x/defconfig 2003-05-07 17:34:25.000000000 +0800 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - # CONFIG_ISA is not set - # CONFIG_EISA is not set - # CONFIG_MCA is not set ---- linux-rh-2.4.20-8/arch/s390x/kernel/entry.S~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:11.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/s390x/kernel/entry.S 2003-05-07 17:34:25.000000000 +0800 -@@ -591,18 +591,18 @@ sys_call_table: - .long SYSCALL(sys_ni_syscall,sys32_fcntl64_wrapper) - .long SYSCALL(sys_ni_syscall,sys_ni_syscall) - .long SYSCALL(sys_ni_syscall,sys_ni_syscall) -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 224 - reserved for setxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 225 - reserved for lsetxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 226 - reserved for fsetxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 227 - reserved for getxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 228 - reserved for lgetxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 229 - reserved for fgetxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 230 - reserved for listxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 231 - reserved for llistxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 232 - reserved for flistxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 233 - reserved for removexattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 234 - reserved for lremovexattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 235 - reserved for fremovexattr */ -+ .long SYSCALL(sys_setxattr,sys32_setxattr_wrapper) -+ .long SYSCALL(sys_lsetxattr,sys32_lsetxattr_wrapper) /* 225 */ -+ .long SYSCALL(sys_fsetxattr,sys32_fsetxattr_wrapper) -+ .long SYSCALL(sys_getxattr,sys32_getxattr_wrapper) -+ .long SYSCALL(sys_lgetxattr,sys32_lgetxattr_wrapper) -+ .long SYSCALL(sys_fgetxattr,sys32_fgetxattr_wrapper) -+ .long SYSCALL(sys_listxattr,sys32_listxattr_wrapper) /* 230 */ -+ .long SYSCALL(sys_llistxattr,sys32_llistxattr_wrapper) -+ .long SYSCALL(sys_flistxattr,sys32_flistxattr_wrapper) -+ .long SYSCALL(sys_removexattr,sys32_removexattr_wrapper) -+ .long SYSCALL(sys_lremovexattr,sys32_lremovexattr_wrapper) -+ .long SYSCALL(sys_fremovexattr,sys32_fremovexattr_wrapper)/* 235 */ - .long SYSCALL(sys_gettid,sys_gettid) - .long SYSCALL(sys_tkill,sys_tkill) - .rept 255-237 ---- linux-rh-2.4.20-8/arch/s390x/kernel/wrapper32.S~linux-2.4.20-xattr-0.8.54-chaos 2002-02-26 03:37:56.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/s390x/kernel/wrapper32.S 2003-05-07 17:34:25.000000000 +0800 -@@ -1091,3 +1091,95 @@ sys32_fstat64_wrapper: - llgtr %r3,%r3 # struct stat64 * - llgfr %r4,%r4 # long - jg sys32_fstat64 # branch to system call -+ -+ .globl sys32_setxattr_wrapper -+sys32_setxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ lgfr %r6,%r6 # int -+ jg sys_setxattr -+ -+ .globl sys32_lsetxattr_wrapper -+sys32_lsetxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ lgfr %r6,%r6 # int -+ jg sys_lsetxattr -+ -+ .globl sys32_fsetxattr_wrapper -+sys32_fsetxattr_wrapper: -+ lgfr %r2,%r2 # int -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ lgfr %r6,%r6 # int -+ jg sys_fsetxattr -+ -+ .globl sys32_getxattr_wrapper -+sys32_getxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ jg sys_getxattr -+ -+ .globl sys32_lgetxattr_wrapper -+sys32_lgetxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ jg sys_lgetxattr -+ -+ .globl sys32_fgetxattr_wrapper -+sys32_fgetxattr_wrapper: -+ lgfr %r2,%r2 # int -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ jg sys_fgetxattr -+ -+ .globl sys32_listxattr_wrapper -+sys32_listxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgfr %r4,%r4 # size_t -+ jg sys_listxattr -+ -+ .globl sys32_llistxattr_wrapper -+sys32_llistxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgfr %r4,%r4 # size_t -+ jg sys_llistxattr -+ -+ .globl sys32_flistxattr_wrapper -+sys32_flistxattr_wrapper: -+ lgfr %r2,%r2 # int -+ llgtr %r3,%r3 # char * -+ llgfr %r4,%r4 # size_t -+ jg sys_flistxattr -+ -+ .globl sys32_removexattr_wrapper -+sys32_removexattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ jg sys_removexattr -+ -+ .globl sys32_lremovexattr_wrapper -+sys32_lremovexattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ jg sys_lremovexattr -+ -+ .globl sys32_fremovexattr_wrapper -+sys32_fremovexattr_wrapper: -+ lgfr %r2,%r2 # int -+ llgtr %r3,%r3 # char * -+ jg sys_fremovexattr -+ -+ ---- linux-rh-2.4.20-8/arch/sparc/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:43.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/sparc/defconfig 2003-05-07 17:34:25.000000000 +0800 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_UID16=y - CONFIG_HIGHMEM=y - ---- linux-rh-2.4.20-8/arch/sparc/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:43.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/sparc/kernel/systbls.S 2003-05-07 17:34:25.000000000 +0800 -@@ -51,11 +51,11 @@ sys_call_table: - /*150*/ .long sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_getdents64 - /*155*/ .long sys_fcntl64, sys_nis_syscall, sys_statfs, sys_fstatfs, sys_oldumount - /*160*/ .long sys_nis_syscall, sys_nis_syscall, sys_getdomainname, sys_setdomainname, sys_nis_syscall --/*165*/ .long sys_quotactl, sys_nis_syscall, sys_mount, sys_ustat, sys_nis_syscall --/*170*/ .long sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_getdents --/*175*/ .long sys_setsid, sys_fchdir, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall --/*180*/ .long sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_sigpending, sys_query_module --/*185*/ .long sys_setpgid, sys_nis_syscall, sys_tkill, sys_nis_syscall, sys_newuname -+/*165*/ .long sys_quotactl, sys_nis_syscall, sys_mount, sys_ustat, sys_setxattr -+/*170*/ .long sys_lsetxattr, sys_fsetxattr, sys_getxattr, sys_lgetxattr, sys_getdents -+/*175*/ .long sys_setsid, sys_fchdir, sys_fgetxattr, sys_listxattr, sys_llistxattr -+/*180*/ .long sys_flistxattr, sys_removexattr, sys_lremovexattr, sys_sigpending, sys_query_module -+/*185*/ .long sys_setpgid, sys_fremovexattr, sys_tkill, sys_nis_syscall, sys_newuname - /*190*/ .long sys_init_module, sys_personality, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall - /*195*/ .long sys_nis_syscall, sys_nis_syscall, sys_getppid, sparc_sigaction, sys_sgetmask - /*200*/ .long sys_ssetmask, sys_sigsuspend, sys_newlstat, sys_uselib, old_readdir ---- linux-rh-2.4.20-8/arch/sparc64/defconfig~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:43.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/sparc64/defconfig 2003-05-07 17:34:25.000000000 +0800 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - - # - # Code maturity level options ---- linux-rh-2.4.20-8/arch/sparc64/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:43.000000000 +0800 -+++ linux-rh-2.4.20-8-root/arch/sparc64/kernel/systbls.S 2003-05-07 17:34:25.000000000 +0800 -@@ -52,11 +52,11 @@ sys_call_table32: - /*150*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_getdents64 - .word sys32_fcntl64, sys_nis_syscall, sys32_statfs, sys32_fstatfs, sys_oldumount - /*160*/ .word sys_nis_syscall, sys_nis_syscall, sys_getdomainname, sys_setdomainname, sys_nis_syscall -- .word sys32_quotactl, sys_nis_syscall, sys32_mount, sys_ustat, sys_nis_syscall --/*170*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys32_getdents -- .word sys_setsid, sys_fchdir, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall --/*180*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys32_sigpending, sys32_query_module -- .word sys_setpgid, sys_nis_syscall, sys_tkill, sys_nis_syscall, sparc64_newuname -+ .word sys32_quotactl, sys_nis_syscall, sys32_mount, sys_ustat, sys_setxattr -+/*170*/ .word sys_lsetxattr, sys_fsetxattr, sys_getxattr, sys_lgetxattr, sys32_getdents -+ .word sys_setsid, sys_fchdir, sys_fgetxattr, sys_listxattr, sys_llistxattr -+/*180*/ .word sys_flistxattr, sys_removexattr, sys_lremovexattr, sys32_sigpending, sys32_query_module -+ .word sys_setpgid, sys_fremovexattr, sys_tkill, sys_nis_syscall, sparc64_newuname - /*190*/ .word sys32_init_module, sparc64_personality, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall - .word sys_nis_syscall, sys_nis_syscall, sys_getppid, sys32_sigaction, sys_sgetmask - /*200*/ .word sys_ssetmask, sys_sigsuspend, sys32_newlstat, sys_uselib, old32_readdir -@@ -111,11 +111,11 @@ sys_call_table: - /*150*/ .word sys_getsockname, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_getdents64 - .word sys_nis_syscall, sys_nis_syscall, sys_statfs, sys_fstatfs, sys_oldumount - /*160*/ .word sys_nis_syscall, sys_nis_syscall, sys_getdomainname, sys_setdomainname, sys_utrap_install -- .word sys_quotactl, sys_nis_syscall, sys_mount, sys_ustat, sys_nis_syscall --/*170*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_getdents -- .word sys_setsid, sys_fchdir, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall --/*180*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_query_module -- .word sys_setpgid, sys_nis_syscall, sys_tkill, sys_nis_syscall, sparc64_newuname -+ .word sys_quotactl, sys_nis_syscall, sys_mount, sys_ustat, sys_setxattr -+/*170*/ .word sys_lsetxattr, sys_fsetxattr, sys_getxattr, sys_lgetxattr, sys_getdents -+ .word sys_setsid, sys_fchdir, sys_fgetxattr, sys_listxattr, sys_llistxattr -+/*180*/ .word sys_flistxattr, sys_removexattr, sys_lremovexattr, sys_nis_syscall, sys_query_module -+ .word sys_setpgid, sys_fremovexattr, sys_tkill, sys_nis_syscall, sparc64_newuname - /*190*/ .word sys_init_module, sparc64_personality, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall - .word sys_nis_syscall, sys_nis_syscall, sys_getppid, sys_nis_syscall, sys_sgetmask - /*200*/ .word sys_ssetmask, sys_nis_syscall, sys_newlstat, sys_uselib, sys_nis_syscall ---- linux-rh-2.4.20-8/fs/Config.in~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:05:03.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/Config.in 2003-05-07 17:34:25.000000000 +0800 -@@ -34,6 +34,11 @@ dep_mbool ' Debug Befs' CONFIG_BEFS_DEB - dep_tristate 'BFS file system support (EXPERIMENTAL)' CONFIG_BFS_FS $CONFIG_EXPERIMENTAL - - tristate 'Ext3 journalling file system support' CONFIG_EXT3_FS -+dep_mbool ' Ext3 extended attributes' CONFIG_EXT3_FS_XATTR $CONFIG_EXT3_FS -+dep_bool ' Ext3 extended attribute block sharing' \ -+ CONFIG_EXT3_FS_XATTR_SHARING $CONFIG_EXT3_FS_XATTR -+dep_bool ' Ext3 extended user attributes' \ -+ CONFIG_EXT3_FS_XATTR_USER $CONFIG_EXT3_FS_XATTR - # CONFIG_JBD could be its own option (even modular), but until there are - # other users than ext3, we will simply make it be the same as CONFIG_EXT3_FS - # dep_tristate ' Journal Block Device support (JBD for ext3)' CONFIG_JBD $CONFIG_EXT3_FS -@@ -93,6 +98,11 @@ dep_mbool ' QNX4FS write support (DANGE - tristate 'ROM file system support' CONFIG_ROMFS_FS - - tristate 'Second extended fs support' CONFIG_EXT2_FS -+dep_mbool ' Ext2 extended attributes' CONFIG_EXT2_FS_XATTR $CONFIG_EXT2_FS -+dep_bool ' Ext2 extended attribute block sharing' \ -+ CONFIG_EXT2_FS_XATTR_SHARING $CONFIG_EXT2_FS_XATTR -+dep_bool ' Ext2 extended user attributes' \ -+ CONFIG_EXT2_FS_XATTR_USER $CONFIG_EXT2_FS_XATTR - - tristate 'System V/Xenix/V7/Coherent file system support' CONFIG_SYSV_FS - -@@ -164,6 +174,10 @@ else - define_tristate CONFIG_ZISOFS_FS n - fi - -+# Meta block cache for Extended Attributes (ext2/ext3) -+#tristate 'Meta block cache' CONFIG_FS_MBCACHE -+define_tristate CONFIG_FS_MBCACHE y -+ - mainmenu_option next_comment - comment 'Partition Types' - source fs/partitions/Config.in ---- linux-rh-2.4.20-8/fs/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:58.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/Makefile 2003-05-07 17:34:25.000000000 +0800 -@@ -84,6 +84,9 @@ obj-y += binfmt_script.o - - obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o - -+export-objs += mbcache.o -+obj-$(CONFIG_FS_MBCACHE) += mbcache.o -+ - # persistent filesystems - obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) - ---- linux-rh-2.4.20-8/fs/ext2/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2001-10-11 23:05:18.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext2/Makefile 2003-05-07 17:34:25.000000000 +0800 -@@ -13,4 +13,8 @@ obj-y := balloc.o bitmap.o dir.o file - ioctl.o namei.o super.o symlink.o - obj-m := $(O_TARGET) - -+export-objs += xattr.o -+obj-$(CONFIG_EXT2_FS_XATTR) += xattr.o -+obj-$(CONFIG_EXT2_FS_XATTR_USER) += xattr_user.o -+ - include $(TOPDIR)/Rules.make ---- linux-rh-2.4.20-8/fs/ext2/file.c~linux-2.4.20-xattr-0.8.54-chaos 2001-10-11 23:05:18.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext2/file.c 2003-05-07 17:34:25.000000000 +0800 -@@ -20,6 +20,7 @@ - - #include - #include -+#include - #include - - /* -@@ -51,4 +52,8 @@ struct file_operations ext2_file_operati - - struct inode_operations ext2_file_inode_operations = { - truncate: ext2_truncate, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, - }; ---- linux-rh-2.4.20-8/fs/ext2/ialloc.c~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:15.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext2/ialloc.c 2003-05-07 17:34:25.000000000 +0800 -@@ -15,6 +15,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -167,6 +168,7 @@ void ext2_free_inode (struct inode * ino - */ - if (!is_bad_inode(inode)) { - /* Quota is already initialized in iput() */ -+ ext2_xattr_delete_inode(inode); - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); - } ---- linux-rh-2.4.20-8/fs/ext2/inode.c~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:15.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext2/inode.c 2003-05-07 17:34:25.000000000 +0800 -@@ -39,6 +39,18 @@ MODULE_LICENSE("GPL"); - static int ext2_update_inode(struct inode * inode, int do_sync); - - /* -+ * Test whether an inode is a fast symlink. -+ */ -+static inline int ext2_inode_is_fast_symlink(struct inode *inode) -+{ -+ int ea_blocks = inode->u.ext2_i.i_file_acl ? -+ (inode->i_sb->s_blocksize >> 9) : 0; -+ -+ return (S_ISLNK(inode->i_mode) && -+ inode->i_blocks - ea_blocks == 0); -+} -+ -+/* - * Called at each iput() - */ - void ext2_put_inode (struct inode * inode) -@@ -53,9 +65,7 @@ void ext2_delete_inode (struct inode * i - { - lock_kernel(); - -- if (is_bad_inode(inode) || -- inode->i_ino == EXT2_ACL_IDX_INO || -- inode->i_ino == EXT2_ACL_DATA_INO) -+ if (is_bad_inode(inode)) - goto no_delete; - inode->u.ext2_i.i_dtime = CURRENT_TIME; - mark_inode_dirty(inode); -@@ -801,6 +811,8 @@ void ext2_truncate (struct inode * inode - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; -+ if (ext2_inode_is_fast_symlink(inode)) -+ return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - -@@ -888,8 +900,7 @@ void ext2_read_inode (struct inode * ino - unsigned long offset; - struct ext2_group_desc * gdp; - -- if ((inode->i_ino != EXT2_ROOT_INO && inode->i_ino != EXT2_ACL_IDX_INO && -- inode->i_ino != EXT2_ACL_DATA_INO && -+ if ((inode->i_ino != EXT2_ROOT_INO && - inode->i_ino < EXT2_FIRST_INO(inode->i_sb)) || - inode->i_ino > le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_inodes_count)) { - ext2_error (inode->i_sb, "ext2_read_inode", -@@ -974,10 +985,7 @@ void ext2_read_inode (struct inode * ino - for (block = 0; block < EXT2_N_BLOCKS; block++) - inode->u.ext2_i.i_data[block] = raw_inode->i_block[block]; - -- if (inode->i_ino == EXT2_ACL_IDX_INO || -- inode->i_ino == EXT2_ACL_DATA_INO) -- /* Nothing to do */ ; -- else if (S_ISREG(inode->i_mode)) { -+ if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext2_file_inode_operations; - inode->i_fop = &ext2_file_operations; - inode->i_mapping->a_ops = &ext2_aops; -@@ -986,15 +994,17 @@ void ext2_read_inode (struct inode * ino - inode->i_fop = &ext2_dir_operations; - inode->i_mapping->a_ops = &ext2_aops; - } else if (S_ISLNK(inode->i_mode)) { -- if (!inode->i_blocks) -+ if (ext2_inode_is_fast_symlink(inode)) - inode->i_op = &ext2_fast_symlink_inode_operations; - else { -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext2_symlink_inode_operations; - inode->i_mapping->a_ops = &ext2_aops; - } -- } else -+ } else { -+ inode->i_op = &ext2_special_inode_operations; - init_special_inode(inode, inode->i_mode, - le32_to_cpu(raw_inode->i_block[0])); -+ } - brelse (bh); - inode->i_attr_flags = 0; - if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL) { ---- linux-rh-2.4.20-8/fs/ext2/namei.c~linux-2.4.20-xattr-0.8.54-chaos 2001-10-04 13:57:36.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext2/namei.c 2003-05-07 17:34:25.000000000 +0800 -@@ -31,6 +31,7 @@ - - #include - #include -+#include - #include - - /* -@@ -136,7 +137,7 @@ static int ext2_symlink (struct inode * - - if (l > sizeof (inode->u.ext2_i.i_data)) { - /* slow symlink */ -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext2_symlink_inode_operations; - inode->i_mapping->a_ops = &ext2_aops; - err = block_symlink(inode, symname, l); - if (err) -@@ -345,4 +346,15 @@ struct inode_operations ext2_dir_inode_o - rmdir: ext2_rmdir, - mknod: ext2_mknod, - rename: ext2_rename, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, -+}; -+ -+struct inode_operations ext2_special_inode_operations = { -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, - }; ---- linux-rh-2.4.20-8/fs/ext2/super.c~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:15.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext2/super.c 2003-05-07 17:34:25.000000000 +0800 -@@ -21,6 +21,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -125,6 +126,7 @@ void ext2_put_super (struct super_block - int db_count; - int i; - -+ ext2_xattr_put_super(sb); - if (!(sb->s_flags & MS_RDONLY)) { - struct ext2_super_block *es = EXT2_SB(sb)->s_es; - -@@ -175,6 +177,13 @@ static int parse_options (char * options - this_char = strtok (NULL, ",")) { - if ((value = strchr (this_char, '=')) != NULL) - *value++ = 0; -+#ifdef CONFIG_EXT2_FS_XATTR_USER -+ if (!strcmp (this_char, "user_xattr")) -+ set_opt (*mount_options, XATTR_USER); -+ else if (!strcmp (this_char, "nouser_xattr")) -+ clear_opt (*mount_options, XATTR_USER); -+ else -+#endif - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -424,6 +433,9 @@ struct super_block * ext2_read_super (st - blocksize = BLOCK_SIZE; - - sb->u.ext2_sb.s_mount_opt = 0; -+#ifdef CONFIG_EXT2_FS_XATTR_USER -+ /* set_opt (sb->u.ext2_sb.s_mount_opt, XATTR_USER); */ -+#endif - if (!parse_options ((char *) data, &sb_block, &resuid, &resgid, - &sb->u.ext2_sb.s_mount_opt)) { - return NULL; -@@ -813,12 +825,27 @@ static DECLARE_FSTYPE_DEV(ext2_fs_type, - - static int __init init_ext2_fs(void) - { -- return register_filesystem(&ext2_fs_type); -+ int error = init_ext2_xattr(); -+ if (error) -+ return error; -+ error = init_ext2_xattr_user(); -+ if (error) -+ goto fail; -+ error = register_filesystem(&ext2_fs_type); -+ if (!error) -+ return 0; -+ -+ exit_ext2_xattr_user(); -+fail: -+ exit_ext2_xattr(); -+ return error; - } - - static void __exit exit_ext2_fs(void) - { - unregister_filesystem(&ext2_fs_type); -+ exit_ext2_xattr_user(); -+ exit_ext2_xattr(); - } - - EXPORT_NO_SYMBOLS; ---- linux-rh-2.4.20-8/fs/ext2/symlink.c~linux-2.4.20-xattr-0.8.54-chaos 2000-09-28 04:41:33.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext2/symlink.c 2003-05-07 17:34:25.000000000 +0800 -@@ -19,6 +19,7 @@ - - #include - #include -+#include - - static int ext2_readlink(struct dentry *dentry, char *buffer, int buflen) - { -@@ -32,7 +33,20 @@ static int ext2_follow_link(struct dentr - return vfs_follow_link(nd, s); - } - -+struct inode_operations ext2_symlink_inode_operations = { -+ readlink: page_readlink, -+ follow_link: page_follow_link, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, -+}; -+ - struct inode_operations ext2_fast_symlink_inode_operations = { - readlink: ext2_readlink, - follow_link: ext2_follow_link, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, - }; ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext2/xattr.c 2003-05-07 17:34:25.000000000 +0800 -@@ -0,0 +1,1212 @@ -+/* -+ * linux/fs/ext2/xattr.c -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ * -+ * Fix by Harrison Xing . -+ * Extended attributes for symlinks and special files added per -+ * suggestion of Luka Renko . -+ */ -+ -+/* -+ * Extended attributes are stored on disk blocks allocated outside of -+ * any inode. The i_file_acl field is then made to point to this allocated -+ * block. If all extended attributes of an inode are identical, these -+ * inodes may share the same extended attribute block. Such situations -+ * are automatically detected by keeping a cache of recent attribute block -+ * numbers and hashes over the block's contents in memory. -+ * -+ * -+ * Extended attribute block layout: -+ * -+ * +------------------+ -+ * | header | -+ * | entry 1 | | -+ * | entry 2 | | growing downwards -+ * | entry 3 | v -+ * | four null bytes | -+ * | . . . | -+ * | value 1 | ^ -+ * | value 3 | | growing upwards -+ * | value 2 | | -+ * +------------------+ -+ * -+ * The block header is followed by multiple entry descriptors. These entry -+ * descriptors are variable in size, and alligned to EXT2_XATTR_PAD -+ * byte boundaries. The entry descriptors are sorted by attribute name, -+ * so that two extended attribute blocks can be compared efficiently. -+ * -+ * Attribute values are aligned to the end of the block, stored in -+ * no specific order. They are also padded to EXT2_XATTR_PAD byte -+ * boundaries. No additional gaps are left between them. -+ * -+ * Locking strategy -+ * ---------------- -+ * The VFS already holds the BKL and the inode->i_sem semaphore when any of -+ * the xattr inode operations are called, so we are guaranteed that only one -+ * processes accesses extended attributes of an inode at any time. -+ * -+ * For writing we also grab the ext2_xattr_sem semaphore. This ensures that -+ * only a single process is modifying an extended attribute block, even -+ * if the block is shared among inodes. -+ * -+ * Note for porting to 2.5 -+ * ----------------------- -+ * The BKL will no longer be held in the xattr inode operations. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* These symbols may be needed by a module. */ -+EXPORT_SYMBOL(ext2_xattr_register); -+EXPORT_SYMBOL(ext2_xattr_unregister); -+EXPORT_SYMBOL(ext2_xattr_get); -+EXPORT_SYMBOL(ext2_xattr_list); -+EXPORT_SYMBOL(ext2_xattr_set); -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1) -+#endif -+ -+#define HDR(bh) ((struct ext2_xattr_header *)((bh)->b_data)) -+#define ENTRY(ptr) ((struct ext2_xattr_entry *)(ptr)) -+#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1) -+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) -+ -+#ifdef EXT2_XATTR_DEBUG -+# define ea_idebug(inode, f...) do { \ -+ printk(KERN_DEBUG "inode %s:%ld: ", \ -+ kdevname(inode->i_dev), inode->i_ino); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+# define ea_bdebug(bh, f...) do { \ -+ printk(KERN_DEBUG "block %s:%ld: ", \ -+ kdevname(bh->b_dev), bh->b_blocknr); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+#else -+# define ea_idebug(f...) -+# define ea_bdebug(f...) -+#endif -+ -+static int ext2_xattr_set2(struct inode *, struct buffer_head *, -+ struct ext2_xattr_header *); -+ -+#ifdef CONFIG_EXT2_FS_XATTR_SHARING -+ -+static int ext2_xattr_cache_insert(struct buffer_head *); -+static struct buffer_head *ext2_xattr_cache_find(struct inode *, -+ struct ext2_xattr_header *); -+static void ext2_xattr_cache_remove(struct buffer_head *); -+static void ext2_xattr_rehash(struct ext2_xattr_header *, -+ struct ext2_xattr_entry *); -+ -+static struct mb_cache *ext2_xattr_cache; -+ -+#else -+# define ext2_xattr_cache_insert(bh) 0 -+# define ext2_xattr_cache_find(inode, header) NULL -+# define ext2_xattr_cache_remove(bh) while(0) {} -+# define ext2_xattr_rehash(header, entry) while(0) {} -+#endif -+ -+/* -+ * If a file system does not share extended attributes among inodes, -+ * we should not need the ext2_xattr_sem semaphore. However, the -+ * filesystem may still contain shared blocks, so we always take -+ * the lock. -+ */ -+ -+DECLARE_MUTEX(ext2_xattr_sem); -+ -+static inline int -+ext2_xattr_new_block(struct inode *inode, int * errp, int force) -+{ -+ struct super_block *sb = inode->i_sb; -+ int goal = le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block) + -+ EXT2_I(inode)->i_block_group * EXT2_BLOCKS_PER_GROUP(sb); -+ -+ /* How can we enforce the allocation? */ -+ int block = ext2_new_block(inode, goal, 0, 0, errp); -+#ifdef OLD_QUOTAS -+ if (!*errp) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#endif -+ return block; -+} -+ -+static inline int -+ext2_xattr_quota_alloc(struct inode *inode, int force) -+{ -+ /* How can we enforce the allocation? */ -+#ifdef OLD_QUOTAS -+ int error = DQUOT_ALLOC_BLOCK(inode->i_sb, inode, 1); -+ if (!error) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#else -+ int error = DQUOT_ALLOC_BLOCK(inode, 1); -+#endif -+ return error; -+} -+ -+#ifdef OLD_QUOTAS -+ -+static inline void -+ext2_xattr_quota_free(struct inode *inode) -+{ -+ DQUOT_FREE_BLOCK(inode->i_sb, inode, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+static inline void -+ext2_xattr_free_block(struct inode * inode, unsigned long block) -+{ -+ ext2_free_blocks(inode, block, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+#else -+# define ext2_xattr_quota_free(inode) \ -+ DQUOT_FREE_BLOCK(inode, 1) -+# define ext2_xattr_free_block(inode, block) \ -+ ext2_free_blocks(inode, block, 1) -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) -+ -+static inline struct buffer_head * -+sb_bread(struct super_block *sb, int block) -+{ -+ return bread(sb->s_dev, block, sb->s_blocksize); -+} -+ -+static inline struct buffer_head * -+sb_getblk(struct super_block *sb, int block) -+{ -+ return getblk(sb->s_dev, block, sb->s_blocksize); -+} -+ -+#endif -+ -+struct ext2_xattr_handler *ext2_xattr_handlers[EXT2_XATTR_INDEX_MAX]; -+rwlock_t ext2_handler_lock = RW_LOCK_UNLOCKED; -+ -+int -+ext2_xattr_register(int name_index, struct ext2_xattr_handler *handler) -+{ -+ int error = -EINVAL; -+ -+ if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) { -+ write_lock(&ext2_handler_lock); -+ if (!ext2_xattr_handlers[name_index-1]) { -+ ext2_xattr_handlers[name_index-1] = handler; -+ error = 0; -+ } -+ write_unlock(&ext2_handler_lock); -+ } -+ return error; -+} -+ -+void -+ext2_xattr_unregister(int name_index, struct ext2_xattr_handler *handler) -+{ -+ if (name_index > 0 || name_index <= EXT2_XATTR_INDEX_MAX) { -+ write_lock(&ext2_handler_lock); -+ ext2_xattr_handlers[name_index-1] = NULL; -+ write_unlock(&ext2_handler_lock); -+ } -+} -+ -+static inline const char * -+strcmp_prefix(const char *a, const char *a_prefix) -+{ -+ while (*a_prefix && *a == *a_prefix) { -+ a++; -+ a_prefix++; -+ } -+ return *a_prefix ? NULL : a; -+} -+ -+/* -+ * Decode the extended attribute name, and translate it into -+ * the name_index and name suffix. -+ */ -+static struct ext2_xattr_handler * -+ext2_xattr_resolve_name(const char **name) -+{ -+ struct ext2_xattr_handler *handler = NULL; -+ int i; -+ -+ if (!*name) -+ return NULL; -+ read_lock(&ext2_handler_lock); -+ for (i=0; iprefix); -+ if (n) { -+ handler = ext2_xattr_handlers[i]; -+ *name = n; -+ break; -+ } -+ } -+ } -+ read_unlock(&ext2_handler_lock); -+ return handler; -+} -+ -+static inline struct ext2_xattr_handler * -+ext2_xattr_handler(int name_index) -+{ -+ struct ext2_xattr_handler *handler = NULL; -+ if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) { -+ read_lock(&ext2_handler_lock); -+ handler = ext2_xattr_handlers[name_index-1]; -+ read_unlock(&ext2_handler_lock); -+ } -+ return handler; -+} -+ -+/* -+ * Inode operation getxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext2_getxattr(struct dentry *dentry, const char *name, -+ void *buffer, size_t size) -+{ -+ struct ext2_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext2_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->get(inode, name, buffer, size); -+} -+ -+/* -+ * Inode operation listxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) -+{ -+ return ext2_xattr_list(dentry->d_inode, buffer, size); -+} -+ -+/* -+ * Inode operation setxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext2_setxattr(struct dentry *dentry, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ struct ext2_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ if (size == 0) -+ value = ""; /* empty EA, do not remove */ -+ handler = ext2_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, value, size, flags); -+} -+ -+/* -+ * Inode operation removexattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext2_removexattr(struct dentry *dentry, const char *name) -+{ -+ struct ext2_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext2_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, NULL, 0, XATTR_REPLACE); -+} -+ -+/* -+ * ext2_xattr_get() -+ * -+ * Copy an extended attribute into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext2_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext2_xattr_entry *entry; -+ unsigned int block, size; -+ char *end; -+ int name_len, error; -+ -+ ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", -+ name_index, name, buffer, (long)buffer_size); -+ -+ if (name == NULL) -+ return -EINVAL; -+ if (!EXT2_I(inode)->i_file_acl) -+ return -ENOATTR; -+ block = EXT2_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ -+ error = -ERANGE; -+ if (name_len > 255) -+ goto cleanup; -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext2_xattr_entry *next = -+ EXT2_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) -+ goto found; -+ entry = next; -+ } -+ /* Check the remaining name entries */ -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext2_xattr_entry *next = -+ EXT2_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ entry = next; -+ } -+ if (ext2_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ error = -ENOATTR; -+ goto cleanup; -+found: -+ /* check the buffer size */ -+ if (entry->e_value_block != 0) -+ goto bad_block; -+ size = le32_to_cpu(entry->e_value_size); -+ if (size > inode->i_sb->s_blocksize || -+ le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) -+ goto bad_block; -+ -+ if (ext2_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (buffer) { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ /* return value of attribute */ -+ memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), -+ size); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext2_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext2_xattr_entry *entry; -+ unsigned int block, size = 0; -+ char *buf, *end; -+ int error; -+ -+ ea_idebug(inode, "buffer=%p, buffer_size=%ld", -+ buffer, (long)buffer_size); -+ -+ if (!EXT2_I(inode)->i_file_acl) -+ return 0; -+ block = EXT2_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* compute the size required for the list of attribute names */ -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT2_XATTR_NEXT(entry)) { -+ struct ext2_xattr_handler *handler; -+ struct ext2_xattr_entry *next = -+ EXT2_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ -+ handler = ext2_xattr_handler(entry->e_name_index); -+ if (handler) -+ size += handler->list(NULL, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ -+ if (ext2_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (!buffer) { -+ error = size; -+ goto cleanup; -+ } else { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ /* list the attribute names */ -+ buf = buffer; -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT2_XATTR_NEXT(entry)) { -+ struct ext2_xattr_handler *handler; -+ -+ handler = ext2_xattr_handler(entry->e_name_index); -+ if (handler) -+ buf += handler->list(buf, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * If the EXT2_FEATURE_COMPAT_EXT_ATTR feature of this file system is -+ * not set, set it. -+ */ -+static void ext2_xattr_update_super_block(struct super_block *sb) -+{ -+ if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR)) -+ return; -+ -+ lock_super(sb); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ EXT2_SB(sb)->s_feature_compat |= EXT2_FEATURE_COMPAT_EXT_ATTR; -+#endif -+ EXT2_SB(sb)->s_es->s_feature_compat |= -+ cpu_to_le32(EXT2_FEATURE_COMPAT_EXT_ATTR); -+ sb->s_dirt = 1; -+ mark_buffer_dirty(EXT2_SB(sb)->s_sbh); -+ unlock_super(sb); -+} -+ -+/* -+ * ext2_xattr_set() -+ * -+ * Create, replace or remove an extended attribute for this inode. Buffer -+ * is NULL to remove an existing extended attribute, and non-NULL to -+ * either replace an existing extended attribute, or create a new extended -+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE -+ * specify that an extended attribute must exist and must not exist -+ * previous to the call, respectively. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+int -+ext2_xattr_set(struct inode *inode, int name_index, const char *name, -+ const void *value, size_t value_len, int flags) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *bh = NULL; -+ struct ext2_xattr_header *header = NULL; -+ struct ext2_xattr_entry *here, *last; -+ unsigned int name_len; -+ int block = EXT2_I(inode)->i_file_acl; -+ int min_offs = sb->s_blocksize, not_found = 1, free, error; -+ char *end; -+ -+ /* -+ * header -- Points either into bh, or to a temporarily -+ * allocated buffer. -+ * here -- The named entry found, or the place for inserting, within -+ * the block pointed to by header. -+ * last -- Points right after the last named entry within the block -+ * pointed to by header. -+ * min_offs -- The offset of the first value (values are aligned -+ * towards the end of the block). -+ * end -- Points right after the block pointed to by header. -+ */ -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > sb->s_blocksize) -+ return -ERANGE; -+ down(&ext2_xattr_sem); -+ -+ if (block) { -+ /* The inode already has an extended attribute block. */ -+ -+ bh = sb_bread(sb, block); -+ error = -EIO; -+ if (!bh) -+ goto cleanup; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), -+ le32_to_cpu(HDR(bh)->h_refcount)); -+ header = HDR(bh); -+ end = bh->b_data + bh->b_size; -+ if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ header->h_blocks != cpu_to_le32(1)) { -+bad_block: ext2_error(sb, "ext2_xattr_set", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* Find the named attribute. */ -+ here = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(here)) { -+ struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(here); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!here->e_value_block && here->e_value_size) { -+ int offs = le16_to_cpu(here->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ not_found = name_index - here->e_name_index; -+ if (!not_found) -+ not_found = name_len - here->e_name_len; -+ if (!not_found) -+ not_found = memcmp(name, here->e_name,name_len); -+ if (not_found <= 0) -+ break; -+ here = next; -+ } -+ last = here; -+ /* We still need to compute min_offs and last. */ -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(last); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!last->e_value_block && last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ last = next; -+ } -+ -+ /* Check whether we have enough space left. */ -+ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); -+ } else { -+ /* We will use a new extended attribute block. */ -+ free = sb->s_blocksize - -+ sizeof(struct ext2_xattr_header) - sizeof(__u32); -+ here = last = NULL; /* avoid gcc uninitialized warning. */ -+ } -+ -+ if (not_found) { -+ /* Request to remove a nonexistent attribute? */ -+ error = -ENOATTR; -+ if (flags & XATTR_REPLACE) -+ goto cleanup; -+ error = 0; -+ if (value == NULL) -+ goto cleanup; -+ else -+ free -= EXT2_XATTR_LEN(name_len); -+ } else { -+ /* Request to create an existing attribute? */ -+ error = -EEXIST; -+ if (flags & XATTR_CREATE) -+ goto cleanup; -+ if (!here->e_value_block && here->e_value_size) { -+ unsigned int size = le32_to_cpu(here->e_value_size); -+ -+ if (le16_to_cpu(here->e_value_offs) + size > -+ sb->s_blocksize || size > sb->s_blocksize) -+ goto bad_block; -+ free += EXT2_XATTR_SIZE(size); -+ } -+ } -+ free -= EXT2_XATTR_SIZE(value_len); -+ error = -ENOSPC; -+ if (free < 0) -+ goto cleanup; -+ -+ /* Here we know that we can set the new attribute. */ -+ -+ if (header) { -+ if (header->h_refcount == cpu_to_le32(1)) { -+ ea_bdebug(bh, "modifying in-place"); -+ ext2_xattr_cache_remove(bh); -+ } else { -+ int offset; -+ -+ ea_bdebug(bh, "cloning"); -+ header = kmalloc(bh->b_size, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memcpy(header, HDR(bh), bh->b_size); -+ header->h_refcount = cpu_to_le32(1); -+ offset = (char *)header - bh->b_data; -+ here = ENTRY((char *)here + offset); -+ last = ENTRY((char *)last + offset); -+ } -+ } else { -+ /* Allocate a buffer where we construct the new block. */ -+ header = kmalloc(sb->s_blocksize, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memset(header, 0, sb->s_blocksize); -+ end = (char *)header + sb->s_blocksize; -+ header->h_magic = cpu_to_le32(EXT2_XATTR_MAGIC); -+ header->h_blocks = header->h_refcount = cpu_to_le32(1); -+ last = here = ENTRY(header+1); -+ } -+ -+ if (not_found) { -+ /* Insert the new name. */ -+ int size = EXT2_XATTR_LEN(name_len); -+ int rest = (char *)last - (char *)here; -+ memmove((char *)here + size, here, rest); -+ memset(here, 0, size); -+ here->e_name_index = name_index; -+ here->e_name_len = name_len; -+ memcpy(here->e_name, name, name_len); -+ } else { -+ /* Remove the old value. */ -+ if (!here->e_value_block && here->e_value_size) { -+ char *first_val = (char *)header + min_offs; -+ int offs = le16_to_cpu(here->e_value_offs); -+ char *val = (char *)header + offs; -+ size_t size = EXT2_XATTR_SIZE( -+ le32_to_cpu(here->e_value_size)); -+ memmove(first_val + size, first_val, val - first_val); -+ memset(first_val, 0, size); -+ here->e_value_offs = 0; -+ min_offs += size; -+ -+ /* Adjust all value offsets. */ -+ last = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(last)) { -+ int o = le16_to_cpu(last->e_value_offs); -+ if (!last->e_value_block && o < offs) -+ last->e_value_offs = -+ cpu_to_le16(o + size); -+ last = EXT2_XATTR_NEXT(last); -+ } -+ } -+ if (value == NULL) { -+ /* Remove this attribute. */ -+ if (EXT2_XATTR_NEXT(ENTRY(header+1)) == last) { -+ /* This block is now empty. */ -+ error = ext2_xattr_set2(inode, bh, NULL); -+ goto cleanup; -+ } else { -+ /* Remove the old name. */ -+ int size = EXT2_XATTR_LEN(name_len); -+ last = ENTRY((char *)last - size); -+ memmove(here, (char*)here + size, -+ (char*)last - (char*)here); -+ memset(last, 0, size); -+ } -+ } -+ } -+ -+ if (value != NULL) { -+ /* Insert the new value. */ -+ here->e_value_size = cpu_to_le32(value_len); -+ if (value_len) { -+ size_t size = EXT2_XATTR_SIZE(value_len); -+ char *val = (char *)header + min_offs - size; -+ here->e_value_offs = -+ cpu_to_le16((char *)val - (char *)header); -+ memset(val + size - EXT2_XATTR_PAD, 0, -+ EXT2_XATTR_PAD); /* Clear the pad bytes. */ -+ memcpy(val, value, value_len); -+ } -+ } -+ ext2_xattr_rehash(header, here); -+ -+ error = ext2_xattr_set2(inode, bh, header); -+ -+cleanup: -+ brelse(bh); -+ if (!(bh && header == HDR(bh))) -+ kfree(header); -+ up(&ext2_xattr_sem); -+ -+ return error; -+} -+ -+/* -+ * Second half of ext2_xattr_set(): Update the file system. -+ */ -+static int -+ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, -+ struct ext2_xattr_header *header) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *new_bh = NULL; -+ int error; -+ -+ if (header) { -+ new_bh = ext2_xattr_cache_find(inode, header); -+ if (new_bh) { -+ /* -+ * We found an identical block in the cache. -+ * The old block will be released after updating -+ * the inode. -+ */ -+ ea_bdebug(old_bh, "reusing block %ld", -+ new_bh->b_blocknr); -+ -+ error = -EDQUOT; -+ if (ext2_xattr_quota_alloc(inode, 1)) -+ goto cleanup; -+ -+ HDR(new_bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(new_bh)->h_refcount) + 1); -+ ea_bdebug(new_bh, "refcount now=%d", -+ le32_to_cpu(HDR(new_bh)->h_refcount)); -+ } else if (old_bh && header == HDR(old_bh)) { -+ /* Keep this block. */ -+ new_bh = old_bh; -+ ext2_xattr_cache_insert(new_bh); -+ } else { -+ /* We need to allocate a new block */ -+ int force = EXT2_I(inode)->i_file_acl != 0; -+ int block = ext2_xattr_new_block(inode, &error, force); -+ if (error) -+ goto cleanup; -+ ea_idebug(inode, "creating block %d", block); -+ -+ new_bh = sb_getblk(sb, block); -+ if (!new_bh) { -+ ext2_xattr_free_block(inode, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(new_bh); -+ memcpy(new_bh->b_data, header, new_bh->b_size); -+ mark_buffer_uptodate(new_bh, 1); -+ unlock_buffer(new_bh); -+ ext2_xattr_cache_insert(new_bh); -+ -+ ext2_xattr_update_super_block(sb); -+ } -+ mark_buffer_dirty(new_bh); -+ if (IS_SYNC(inode)) { -+ ll_rw_block(WRITE, 1, &new_bh); -+ wait_on_buffer(new_bh); -+ error = -EIO; -+ if (buffer_req(new_bh) && !buffer_uptodate(new_bh)) -+ goto cleanup; -+ } -+ } -+ -+ /* Update the inode. */ -+ EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; -+ inode->i_ctime = CURRENT_TIME; -+ if (IS_SYNC(inode)) { -+ error = ext2_sync_inode (inode); -+ if (error) -+ goto cleanup; -+ } else -+ mark_inode_dirty(inode); -+ -+ error = 0; -+ if (old_bh && old_bh != new_bh) { -+ /* -+ * If there was an old block, and we are not still using it, -+ * we now release the old block. -+ */ -+ unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); -+ -+ if (refcount == 1) { -+ /* Free the old block. */ -+ ea_bdebug(old_bh, "freeing"); -+ ext2_xattr_free_block(inode, old_bh->b_blocknr); -+ mark_buffer_clean(old_bh); -+ } else { -+ /* Decrement the refcount only. */ -+ refcount--; -+ HDR(old_bh)->h_refcount = cpu_to_le32(refcount); -+ ext2_xattr_quota_free(inode); -+ mark_buffer_dirty(old_bh); -+ ea_bdebug(old_bh, "refcount now=%d", refcount); -+ } -+ } -+ -+cleanup: -+ if (old_bh != new_bh) -+ brelse(new_bh); -+ -+ return error; -+} -+ -+/* -+ * ext2_xattr_delete_inode() -+ * -+ * Free extended attribute resources associated with this inode. This -+ * is called immediately before an inode is freed. -+ */ -+void -+ext2_xattr_delete_inode(struct inode *inode) -+{ -+ struct buffer_head *bh; -+ unsigned int block = EXT2_I(inode)->i_file_acl; -+ -+ if (!block) -+ return; -+ down(&ext2_xattr_sem); -+ -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) { -+ ext2_error(inode->i_sb, "ext2_xattr_delete_inode", -+ "inode %ld: block %d read error", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+ ext2_error(inode->i_sb, "ext2_xattr_delete_inode", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) { -+ ext2_xattr_cache_remove(bh); -+ ext2_xattr_free_block(inode, block); -+ bforget(bh); -+ bh = NULL; -+ } else { -+ HDR(bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ mark_buffer_dirty(bh); -+ if (IS_SYNC(inode)) { -+ ll_rw_block(WRITE, 1, &bh); -+ wait_on_buffer(bh); -+ } -+ ext2_xattr_quota_free(inode); -+ } -+ EXT2_I(inode)->i_file_acl = 0; -+ -+cleanup: -+ brelse(bh); -+ up(&ext2_xattr_sem); -+} -+ -+/* -+ * ext2_xattr_put_super() -+ * -+ * This is called when a file system is unmounted. -+ */ -+void -+ext2_xattr_put_super(struct super_block *sb) -+{ -+#ifdef CONFIG_EXT2_FS_XATTR_SHARING -+ mb_cache_shrink(ext2_xattr_cache, sb->s_dev); -+#endif -+} -+ -+#ifdef CONFIG_EXT2_FS_XATTR_SHARING -+ -+/* -+ * ext2_xattr_cache_insert() -+ * -+ * Create a new entry in the extended attribute cache, and insert -+ * it unless such an entry is already in the cache. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+static int -+ext2_xattr_cache_insert(struct buffer_head *bh) -+{ -+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash); -+ struct mb_cache_entry *ce; -+ int error; -+ -+ ce = mb_cache_entry_alloc(ext2_xattr_cache); -+ if (!ce) -+ return -ENOMEM; -+ error = mb_cache_entry_insert(ce, bh->b_dev, bh->b_blocknr, &hash); -+ if (error) { -+ mb_cache_entry_free(ce); -+ if (error == -EBUSY) { -+ ea_bdebug(bh, "already in cache (%d cache entries)", -+ atomic_read(&ext2_xattr_cache->c_entry_count)); -+ error = 0; -+ } -+ } else { -+ ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, -+ atomic_read(&ext2_xattr_cache->c_entry_count)); -+ mb_cache_entry_release(ce); -+ } -+ return error; -+} -+ -+/* -+ * ext2_xattr_cmp() -+ * -+ * Compare two extended attribute blocks for equality. -+ * -+ * Returns 0 if the blocks are equal, 1 if they differ, and -+ * a negative error number on errors. -+ */ -+static int -+ext2_xattr_cmp(struct ext2_xattr_header *header1, -+ struct ext2_xattr_header *header2) -+{ -+ struct ext2_xattr_entry *entry1, *entry2; -+ -+ entry1 = ENTRY(header1+1); -+ entry2 = ENTRY(header2+1); -+ while (!IS_LAST_ENTRY(entry1)) { -+ if (IS_LAST_ENTRY(entry2)) -+ return 1; -+ if (entry1->e_hash != entry2->e_hash || -+ entry1->e_name_len != entry2->e_name_len || -+ entry1->e_value_size != entry2->e_value_size || -+ memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) -+ return 1; -+ if (entry1->e_value_block != 0 || entry2->e_value_block != 0) -+ return -EIO; -+ if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), -+ (char *)header2 + le16_to_cpu(entry2->e_value_offs), -+ le32_to_cpu(entry1->e_value_size))) -+ return 1; -+ -+ entry1 = EXT2_XATTR_NEXT(entry1); -+ entry2 = EXT2_XATTR_NEXT(entry2); -+ } -+ if (!IS_LAST_ENTRY(entry2)) -+ return 1; -+ return 0; -+} -+ -+/* -+ * ext2_xattr_cache_find() -+ * -+ * Find an identical extended attribute block. -+ * -+ * Returns a pointer to the block found, or NULL if such a block was -+ * not found or an error occurred. -+ */ -+static struct buffer_head * -+ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) -+{ -+ __u32 hash = le32_to_cpu(header->h_hash); -+ struct mb_cache_entry *ce; -+ -+ if (!header->h_hash) -+ return NULL; /* never share */ -+ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); -+ ce = mb_cache_entry_find_first(ext2_xattr_cache, 0, inode->i_dev, hash); -+ while (ce) { -+ struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block); -+ -+ if (!bh) { -+ ext2_error(inode->i_sb, "ext2_xattr_cache_find", -+ "inode %ld: block %ld read error", -+ inode->i_ino, ce->e_block); -+ } else if (le32_to_cpu(HDR(bh)->h_refcount) > -+ EXT2_XATTR_REFCOUNT_MAX) { -+ ea_idebug(inode, "block %ld refcount %d>%d",ce->e_block, -+ le32_to_cpu(HDR(bh)->h_refcount), -+ EXT2_XATTR_REFCOUNT_MAX); -+ } else if (!ext2_xattr_cmp(header, HDR(bh))) { -+ ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); -+ mb_cache_entry_release(ce); -+ return bh; -+ } -+ brelse(bh); -+ ce = mb_cache_entry_find_next(ce, 0, inode->i_dev, hash); -+ } -+ return NULL; -+} -+ -+/* -+ * ext2_xattr_cache_remove() -+ * -+ * Remove the cache entry of a block from the cache. Called when a -+ * block becomes invalid. -+ */ -+static void -+ext2_xattr_cache_remove(struct buffer_head *bh) -+{ -+ struct mb_cache_entry *ce; -+ -+ ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_dev, bh->b_blocknr); -+ if (ce) { -+ ea_bdebug(bh, "removing (%d cache entries remaining)", -+ atomic_read(&ext2_xattr_cache->c_entry_count)-1); -+ mb_cache_entry_free(ce); -+ } else -+ ea_bdebug(bh, "no cache entry"); -+} -+ -+#define NAME_HASH_SHIFT 5 -+#define VALUE_HASH_SHIFT 16 -+ -+/* -+ * ext2_xattr_hash_entry() -+ * -+ * Compute the hash of an extended attribute. -+ */ -+static inline void ext2_xattr_hash_entry(struct ext2_xattr_header *header, -+ struct ext2_xattr_entry *entry) -+{ -+ __u32 hash = 0; -+ char *name = entry->e_name; -+ int n; -+ -+ for (n=0; n < entry->e_name_len; n++) { -+ hash = (hash << NAME_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ -+ *name++; -+ } -+ -+ if (entry->e_value_block == 0 && entry->e_value_size != 0) { -+ __u32 *value = (__u32 *)((char *)header + -+ le16_to_cpu(entry->e_value_offs)); -+ for (n = (le32_to_cpu(entry->e_value_size) + -+ EXT2_XATTR_ROUND) >> EXT2_XATTR_PAD_BITS; n; n--) { -+ hash = (hash << VALUE_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ -+ le32_to_cpu(*value++); -+ } -+ } -+ entry->e_hash = cpu_to_le32(hash); -+} -+ -+#undef NAME_HASH_SHIFT -+#undef VALUE_HASH_SHIFT -+ -+#define BLOCK_HASH_SHIFT 16 -+ -+/* -+ * ext2_xattr_rehash() -+ * -+ * Re-compute the extended attribute hash value after an entry has changed. -+ */ -+static void ext2_xattr_rehash(struct ext2_xattr_header *header, -+ struct ext2_xattr_entry *entry) -+{ -+ struct ext2_xattr_entry *here; -+ __u32 hash = 0; -+ -+ ext2_xattr_hash_entry(header, entry); -+ here = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(here)) { -+ if (!here->e_hash) { -+ /* Block is not shared if an entry's hash value == 0 */ -+ hash = 0; -+ break; -+ } -+ hash = (hash << BLOCK_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ -+ le32_to_cpu(here->e_hash); -+ here = EXT2_XATTR_NEXT(here); -+ } -+ header->h_hash = cpu_to_le32(hash); -+} -+ -+#undef BLOCK_HASH_SHIFT -+ -+int __init -+init_ext2_xattr(void) -+{ -+ ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL, -+ sizeof(struct mb_cache_entry) + -+ sizeof(struct mb_cache_entry_index), 1, 61); -+ if (!ext2_xattr_cache) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+void -+exit_ext2_xattr(void) -+{ -+ mb_cache_destroy(ext2_xattr_cache); -+} -+ -+#else /* CONFIG_EXT2_FS_XATTR_SHARING */ -+ -+int __init -+init_ext2_xattr(void) -+{ -+ return 0; -+} -+ -+void -+exit_ext2_xattr(void) -+{ -+} -+ -+#endif /* CONFIG_EXT2_FS_XATTR_SHARING */ ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext2/xattr_user.c 2003-05-07 17:34:25.000000000 +0800 -@@ -0,0 +1,103 @@ -+/* -+ * linux/fs/ext2/xattr_user.c -+ * Handler for extended user attributes. -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef CONFIG_EXT2_FS_POSIX_ACL -+# include -+#endif -+ -+#define XATTR_USER_PREFIX "user." -+ -+static size_t -+ext2_xattr_user_list(char *list, struct inode *inode, -+ const char *name, int name_len) -+{ -+ const int prefix_len = sizeof(XATTR_USER_PREFIX)-1; -+ -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return 0; -+ -+ if (list) { -+ memcpy(list, XATTR_USER_PREFIX, prefix_len); -+ memcpy(list+prefix_len, name, name_len); -+ list[prefix_len + name_len] = '\0'; -+ } -+ return prefix_len + name_len + 1; -+} -+ -+static int -+ext2_xattr_user_get(struct inode *inode, const char *name, -+ void *buffer, size_t size) -+{ -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+#ifdef CONFIG_EXT2_FS_POSIX_ACL -+ error = ext2_permission_locked(inode, MAY_READ); -+#else -+ error = permission(inode, MAY_READ); -+#endif -+ if (error) -+ return error; -+ -+ return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, -+ buffer, size); -+} -+ -+static int -+ext2_xattr_user_set(struct inode *inode, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+ if ( !S_ISREG(inode->i_mode) && -+ (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) -+ return -EPERM; -+#ifdef CONFIG_EXT2_FS_POSIX_ACL -+ error = ext2_permission_locked(inode, MAY_WRITE); -+#else -+ error = permission(inode, MAY_WRITE); -+#endif -+ if (error) -+ return error; -+ -+ return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name, -+ value, size, flags); -+} -+ -+struct ext2_xattr_handler ext2_xattr_user_handler = { -+ prefix: XATTR_USER_PREFIX, -+ list: ext2_xattr_user_list, -+ get: ext2_xattr_user_get, -+ set: ext2_xattr_user_set, -+}; -+ -+int __init -+init_ext2_xattr_user(void) -+{ -+ return ext2_xattr_register(EXT2_XATTR_INDEX_USER, -+ &ext2_xattr_user_handler); -+} -+ -+void -+exit_ext2_xattr_user(void) -+{ -+ ext2_xattr_unregister(EXT2_XATTR_INDEX_USER, -+ &ext2_xattr_user_handler); -+} ---- linux-rh-2.4.20-8/fs/ext3/Makefile~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext3/Makefile 2003-05-07 17:45:13.000000000 +0800 -@@ -1,5 +1,5 @@ - # --# Makefile for the linux ext2-filesystem routines. -+# Makefile for the linux ext3-filesystem routines. - # - # Note! Dependencies are done automagically by 'make dep', which also - # removes any old dependencies. DON'T put your own dependencies here -@@ -9,10 +9,14 @@ - - O_TARGET := ext3.o - --export-objs := super.o inode.o -+export-objs := ext3-exports.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -- ioctl.o namei.o super.o symlink.o hash.o -+ ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o - obj-m := $(O_TARGET) - -+export-objs += xattr.o -+obj-$(CONFIG_EXT3_FS_XATTR) += xattr.o -+obj-$(CONFIG_EXT3_FS_XATTR_USER) += xattr_user.o -+ - include $(TOPDIR)/Rules.make ---- linux-rh-2.4.20-8/fs/ext3/file.c~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext3/file.c 2003-05-07 17:34:25.000000000 +0800 -@@ -23,6 +23,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -126,5 +127,9 @@ struct file_operations ext3_file_operati - struct inode_operations ext3_file_inode_operations = { - truncate: ext3_truncate, /* BKL held */ - setattr: ext3_setattr, /* BKL held */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ - }; - ---- linux-rh-2.4.20-8/fs/ext3/ialloc.c~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:48.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext3/ialloc.c 2003-05-07 17:34:25.000000000 +0800 -@@ -17,6 +17,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -216,6 +217,7 @@ void ext3_free_inode (handle_t *handle, - * as writing the quota to disk may need the lock as well. - */ - DQUOT_INIT(inode); -+ ext3_xattr_delete_inode(handle, inode); - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); - ---- linux-rh-2.4.20-8/fs/ext3/inode.c~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:58.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext3/inode.c 2003-05-07 17:34:25.000000000 +0800 -@@ -39,6 +39,18 @@ - */ - #undef SEARCH_FROM_ZERO - -+/* -+ * Test whether an inode is a fast symlink. -+ */ -+static inline int ext3_inode_is_fast_symlink(struct inode *inode) -+{ -+ int ea_blocks = inode->u.ext3_i.i_file_acl ? -+ (inode->i_sb->s_blocksize >> 9) : 0; -+ -+ return (S_ISLNK(inode->i_mode) && -+ inode->i_blocks - ea_blocks == 0); -+} -+ - /* The ext3 forget function must perform a revoke if we are freeing data - * which has been journaled. Metadata (eg. indirect blocks) must be - * revoked in all cases. -@@ -48,7 +60,7 @@ - * still needs to be revoked. - */ - --static int ext3_forget(handle_t *handle, int is_metadata, -+int ext3_forget(handle_t *handle, int is_metadata, - struct inode *inode, struct buffer_head *bh, - int blocknr) - { -@@ -179,9 +191,7 @@ void ext3_delete_inode (struct inode * i - { - handle_t *handle; - -- if (is_bad_inode(inode) || -- inode->i_ino == EXT3_ACL_IDX_INO || -- inode->i_ino == EXT3_ACL_DATA_INO) -+ if (is_bad_inode(inode)) - goto no_delete; - - lock_kernel(); -@@ -1874,6 +1884,8 @@ void ext3_truncate(struct inode * inode) - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; -+ if (ext3_inode_is_fast_symlink(inode)) -+ return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - -@@ -2021,8 +2033,6 @@ int ext3_get_inode_loc (struct inode *in - struct ext3_group_desc * gdp; - - if ((inode->i_ino != EXT3_ROOT_INO && -- inode->i_ino != EXT3_ACL_IDX_INO && -- inode->i_ino != EXT3_ACL_DATA_INO && - inode->i_ino != EXT3_JOURNAL_INO && - inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || - inode->i_ino > le32_to_cpu( -@@ -2149,10 +2159,7 @@ void ext3_read_inode(struct inode * inod - - brelse (iloc.bh); - -- if (inode->i_ino == EXT3_ACL_IDX_INO || -- inode->i_ino == EXT3_ACL_DATA_INO) -- /* Nothing to do */ ; -- else if (S_ISREG(inode->i_mode)) { -+ if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; - inode->i_mapping->a_ops = &ext3_aops; -@@ -2160,15 +2167,17 @@ void ext3_read_inode(struct inode * inod - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; - } else if (S_ISLNK(inode->i_mode)) { -- if (!inode->i_blocks) -+ if (ext3_inode_is_fast_symlink(inode)) - inode->i_op = &ext3_fast_symlink_inode_operations; - else { -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext3_symlink_inode_operations; - inode->i_mapping->a_ops = &ext3_aops; - } -- } else -+ } else { -+ inode->i_op = &ext3_special_inode_operations; - init_special_inode(inode, inode->i_mode, - le32_to_cpu(iloc.raw_inode->i_block[0])); -+ } - /* inode->i_attr_flags = 0; unused */ - if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL) { - /* inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS; unused */ ---- linux-rh-2.4.20-8/fs/ext3/namei.c~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext3/namei.c 2003-05-07 17:34:25.000000000 +0800 -@@ -29,6 +29,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -1613,7 +1614,7 @@ static int ext3_mkdir(struct inode * dir - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFDIR); -+ inode = ext3_new_inode (handle, dir, S_IFDIR | mode); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -@@ -1621,7 +1622,6 @@ static int ext3_mkdir(struct inode * dir - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; - inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; -- inode->i_blocks = 0; - dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) { - inode->i_nlink--; /* is this nlink == 0? */ -@@ -1648,9 +1648,6 @@ static int ext3_mkdir(struct inode * dir - BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, dir_block); - brelse (dir_block); -- inode->i_mode = S_IFDIR | mode; -- if (dir->i_mode & S_ISGID) -- inode->i_mode |= S_ISGID; - ext3_mark_inode_dirty(handle, inode); - err = ext3_add_entry (handle, dentry, inode); - if (err) { -@@ -2019,7 +2016,7 @@ static int ext3_symlink (struct inode * - goto out_stop; - - if (l > sizeof (EXT3_I(inode)->i_data)) { -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext3_symlink_inode_operations; - inode->i_mapping->a_ops = &ext3_aops; - /* - * block_symlink() calls back into ext3_prepare/commit_write. -@@ -2245,4 +2242,16 @@ struct inode_operations ext3_dir_inode_o - rmdir: ext3_rmdir, /* BKL held */ - mknod: ext3_mknod, /* BKL held */ - rename: ext3_rename, /* BKL held */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ - }; -+ -+struct inode_operations ext3_special_inode_operations = { -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ -+}; -+ ---- linux-rh-2.4.20-8/fs/ext3/super.c~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext3/super.c 2003-05-07 17:40:45.000000000 +0800 -@@ -24,6 +24,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -406,6 +407,7 @@ void ext3_put_super (struct super_block - kdev_t j_dev = sbi->s_journal->j_dev; - int i; - -+ ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { - EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); -@@ -502,6 +504,7 @@ static int parse_options (char * options - int is_remount) - { - unsigned long *mount_options = &sbi->s_mount_opt; -+ - uid_t *resuid = &sbi->s_resuid; - gid_t *resgid = &sbi->s_resgid; - char * this_char; -@@ -514,6 +517,13 @@ static int parse_options (char * options - this_char = strtok (NULL, ",")) { - if ((value = strchr (this_char, '=')) != NULL) - *value++ = 0; -+#ifdef CONFIG_EXT3_FS_XATTR_USER -+ if (!strcmp (this_char, "user_xattr")) -+ set_opt (*mount_options, XATTR_USER); -+ else if (!strcmp (this_char, "nouser_xattr")) -+ clear_opt (*mount_options, XATTR_USER); -+ else -+#endif - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -931,6 +941,12 @@ struct super_block * ext3_read_super (st - sbi->s_mount_opt = 0; - sbi->s_resuid = EXT3_DEF_RESUID; - sbi->s_resgid = EXT3_DEF_RESGID; -+ -+ /* Default extended attribute flags */ -+#ifdef CONFIG_EXT3_FS_XATTR_USER -+ /* set_opt(sbi->s_mount_opt, XATTR_USER); */ -+#endif -+ - if (!parse_options ((char *) data, &sb_block, sbi, &journal_inum, 0)) { - sb->s_dev = 0; - goto out_fail; -@@ -1768,17 +1784,29 @@ static DECLARE_FSTYPE_DEV(ext3_fs_type, - - static int __init init_ext3_fs(void) - { -- return register_filesystem(&ext3_fs_type); -+ int error = init_ext3_xattr(); -+ if (error) -+ return error; -+ error = init_ext3_xattr_user(); -+ if (error) -+ goto fail; -+ error = register_filesystem(&ext3_fs_type); -+ if (!error) -+ return 0; -+ -+ exit_ext3_xattr_user(); -+fail: -+ exit_ext3_xattr(); -+ return error; - } - - static void __exit exit_ext3_fs(void) - { - unregister_filesystem(&ext3_fs_type); -+ exit_ext3_xattr_user(); -+ exit_ext3_xattr(); - } - --EXPORT_SYMBOL(ext3_force_commit); --EXPORT_SYMBOL(ext3_bread); -- - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); - MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); - MODULE_LICENSE("GPL"); ---- linux-rh-2.4.20-8/fs/ext3/symlink.c~linux-2.4.20-xattr-0.8.54-chaos 2001-11-10 06:25:04.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext3/symlink.c 2003-05-07 17:34:25.000000000 +0800 -@@ -20,6 +20,7 @@ - #include - #include - #include -+#include - - static int ext3_readlink(struct dentry *dentry, char *buffer, int buflen) - { -@@ -33,7 +34,20 @@ static int ext3_follow_link(struct dentr - return vfs_follow_link(nd, s); - } - -+struct inode_operations ext3_symlink_inode_operations = { -+ readlink: page_readlink, /* BKL not held. Don't need */ -+ follow_link: page_follow_link, /* BKL not held. Don't need */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ -+}; -+ - struct inode_operations ext3_fast_symlink_inode_operations = { - readlink: ext3_readlink, /* BKL not held. Don't need */ - follow_link: ext3_follow_link, /* BKL not held. Don't need */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ - }; ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext3/xattr.c 2003-05-07 17:42:06.000000000 +0800 -@@ -0,0 +1,1225 @@ -+/* -+ * linux/fs/ext3/xattr.c -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ * -+ * Fix by Harrison Xing . -+ * Ext3 code with a lot of help from Eric Jarman . -+ * Extended attributes for symlinks and special files added per -+ * suggestion of Luka Renko . -+ */ -+ -+/* -+ * Extended attributes are stored on disk blocks allocated outside of -+ * any inode. The i_file_acl field is then made to point to this allocated -+ * block. If all extended attributes of an inode are identical, these -+ * inodes may share the same extended attribute block. Such situations -+ * are automatically detected by keeping a cache of recent attribute block -+ * numbers and hashes over the block's contents in memory. -+ * -+ * -+ * Extended attribute block layout: -+ * -+ * +------------------+ -+ * | header | -+ * | entry 1 | | -+ * | entry 2 | | growing downwards -+ * | entry 3 | v -+ * | four null bytes | -+ * | . . . | -+ * | value 1 | ^ -+ * | value 3 | | growing upwards -+ * | value 2 | | -+ * +------------------+ -+ * -+ * The block header is followed by multiple entry descriptors. These entry -+ * descriptors are variable in size, and alligned to EXT3_XATTR_PAD -+ * byte boundaries. The entry descriptors are sorted by attribute name, -+ * so that two extended attribute blocks can be compared efficiently. -+ * -+ * Attribute values are aligned to the end of the block, stored in -+ * no specific order. They are also padded to EXT3_XATTR_PAD byte -+ * boundaries. No additional gaps are left between them. -+ * -+ * Locking strategy -+ * ---------------- -+ * The VFS already holds the BKL and the inode->i_sem semaphore when any of -+ * the xattr inode operations are called, so we are guaranteed that only one -+ * processes accesses extended attributes of an inode at any time. -+ * -+ * For writing we also grab the ext3_xattr_sem semaphore. This ensures that -+ * only a single process is modifying an extended attribute block, even -+ * if the block is shared among inodes. -+ * -+ * Note for porting to 2.5 -+ * ----------------------- -+ * The BKL will no longer be held in the xattr inode operations. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define EXT3_EA_USER "user." -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1) -+#endif -+ -+#define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data)) -+#define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr)) -+#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1) -+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) -+ -+#ifdef EXT3_XATTR_DEBUG -+# define ea_idebug(inode, f...) do { \ -+ printk(KERN_DEBUG "inode %s:%ld: ", \ -+ kdevname(inode->i_dev), inode->i_ino); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+# define ea_bdebug(bh, f...) do { \ -+ printk(KERN_DEBUG "block %s:%ld: ", \ -+ kdevname(bh->b_dev), bh->b_blocknr); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+#else -+# define ea_idebug(f...) -+# define ea_bdebug(f...) -+#endif -+ -+static int ext3_xattr_set2(handle_t *, struct inode *, struct buffer_head *, -+ struct ext3_xattr_header *); -+ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ -+static int ext3_xattr_cache_insert(struct buffer_head *); -+static struct buffer_head *ext3_xattr_cache_find(struct inode *, -+ struct ext3_xattr_header *); -+static void ext3_xattr_cache_remove(struct buffer_head *); -+static void ext3_xattr_rehash(struct ext3_xattr_header *, -+ struct ext3_xattr_entry *); -+ -+static struct mb_cache *ext3_xattr_cache; -+ -+#else -+# define ext3_xattr_cache_insert(bh) 0 -+# define ext3_xattr_cache_find(inode, header) NULL -+# define ext3_xattr_cache_remove(bh) while(0) {} -+# define ext3_xattr_rehash(header, entry) while(0) {} -+#endif -+ -+/* -+ * If a file system does not share extended attributes among inodes, -+ * we should not need the ext3_xattr_sem semaphore. However, the -+ * filesystem may still contain shared blocks, so we always take -+ * the lock. -+ */ -+ -+DECLARE_MUTEX(ext3_xattr_sem); -+ -+static inline int -+ext3_xattr_new_block(handle_t *handle, struct inode *inode, -+ int * errp, int force) -+{ -+ struct super_block *sb = inode->i_sb; -+ int goal = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + -+ EXT3_I(inode)->i_block_group * EXT3_BLOCKS_PER_GROUP(sb); -+ -+ /* How can we enforce the allocation? */ -+ int block = ext3_new_block(handle, inode, goal, 0, 0, errp); -+#ifdef OLD_QUOTAS -+ if (!*errp) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#endif -+ return block; -+} -+ -+static inline int -+ext3_xattr_quota_alloc(struct inode *inode, int force) -+{ -+ /* How can we enforce the allocation? */ -+#ifdef OLD_QUOTAS -+ int error = DQUOT_ALLOC_BLOCK(inode->i_sb, inode, 1); -+ if (!error) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#else -+ int error = DQUOT_ALLOC_BLOCK(inode, 1); -+#endif -+ return error; -+} -+ -+#ifdef OLD_QUOTAS -+ -+static inline void -+ext3_xattr_quota_free(struct inode *inode) -+{ -+ DQUOT_FREE_BLOCK(inode->i_sb, inode, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+static inline void -+ext3_xattr_free_block(handle_t *handle, struct inode * inode, -+ unsigned long block) -+{ -+ ext3_free_blocks(handle, inode, block, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+#else -+# define ext3_xattr_quota_free(inode) \ -+ DQUOT_FREE_BLOCK(inode, 1) -+# define ext3_xattr_free_block(handle, inode, block) \ -+ ext3_free_blocks(handle, inode, block, 1) -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) -+ -+static inline struct buffer_head * -+sb_bread(struct super_block *sb, int block) -+{ -+ return bread(sb->s_dev, block, sb->s_blocksize); -+} -+ -+static inline struct buffer_head * -+sb_getblk(struct super_block *sb, int block) -+{ -+ return getblk(sb->s_dev, block, sb->s_blocksize); -+} -+ -+#endif -+ -+struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX]; -+rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED; -+ -+int -+ext3_xattr_register(int name_index, struct ext3_xattr_handler *handler) -+{ -+ int error = -EINVAL; -+ -+ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { -+ write_lock(&ext3_handler_lock); -+ if (!ext3_xattr_handlers[name_index-1]) { -+ ext3_xattr_handlers[name_index-1] = handler; -+ error = 0; -+ } -+ write_unlock(&ext3_handler_lock); -+ } -+ return error; -+} -+ -+void -+ext3_xattr_unregister(int name_index, struct ext3_xattr_handler *handler) -+{ -+ if (name_index > 0 || name_index <= EXT3_XATTR_INDEX_MAX) { -+ write_lock(&ext3_handler_lock); -+ ext3_xattr_handlers[name_index-1] = NULL; -+ write_unlock(&ext3_handler_lock); -+ } -+} -+ -+static inline const char * -+strcmp_prefix(const char *a, const char *a_prefix) -+{ -+ while (*a_prefix && *a == *a_prefix) { -+ a++; -+ a_prefix++; -+ } -+ return *a_prefix ? NULL : a; -+} -+ -+/* -+ * Decode the extended attribute name, and translate it into -+ * the name_index and name suffix. -+ */ -+static inline struct ext3_xattr_handler * -+ext3_xattr_resolve_name(const char **name) -+{ -+ struct ext3_xattr_handler *handler = NULL; -+ int i; -+ -+ if (!*name) -+ return NULL; -+ read_lock(&ext3_handler_lock); -+ for (i=0; iprefix); -+ if (n) { -+ handler = ext3_xattr_handlers[i]; -+ *name = n; -+ break; -+ } -+ } -+ } -+ read_unlock(&ext3_handler_lock); -+ return handler; -+} -+ -+static inline struct ext3_xattr_handler * -+ext3_xattr_handler(int name_index) -+{ -+ struct ext3_xattr_handler *handler = NULL; -+ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { -+ read_lock(&ext3_handler_lock); -+ handler = ext3_xattr_handlers[name_index-1]; -+ read_unlock(&ext3_handler_lock); -+ } -+ return handler; -+} -+ -+/* -+ * Inode operation getxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext3_getxattr(struct dentry *dentry, const char *name, -+ void *buffer, size_t size) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->get(inode, name, buffer, size); -+} -+ -+/* -+ * Inode operation listxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) -+{ -+ return ext3_xattr_list(dentry->d_inode, buffer, size); -+} -+ -+/* -+ * Inode operation setxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext3_setxattr(struct dentry *dentry, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ if (size == 0) -+ value = ""; /* empty EA, do not remove */ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, value, size, flags); -+} -+ -+/* -+ * Inode operation removexattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext3_removexattr(struct dentry *dentry, const char *name) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, NULL, 0, XATTR_REPLACE); -+} -+ -+/* -+ * ext3_xattr_get() -+ * -+ * Copy an extended attribute into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ unsigned int block, size; -+ char *end; -+ int name_len, error; -+ -+ ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", -+ name_index, name, buffer, (long)buffer_size); -+ -+ if (name == NULL) -+ return -EINVAL; -+ if (!EXT3_I(inode)->i_file_acl) -+ return -ENOATTR; -+ block = EXT3_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ -+ error = -ERANGE; -+ if (name_len > 255) -+ goto cleanup; -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) -+ goto found; -+ entry = next; -+ } -+ /* Check the remaining name entries */ -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ entry = next; -+ } -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ error = -ENOATTR; -+ goto cleanup; -+found: -+ /* check the buffer size */ -+ if (entry->e_value_block != 0) -+ goto bad_block; -+ size = le32_to_cpu(entry->e_value_size); -+ if (size > inode->i_sb->s_blocksize || -+ le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) -+ goto bad_block; -+ -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (buffer) { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ /* return value of attribute */ -+ memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), -+ size); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ unsigned int block, size = 0; -+ char *buf, *end; -+ int error; -+ -+ ea_idebug(inode, "buffer=%p, buffer_size=%ld", -+ buffer, (long)buffer_size); -+ -+ if (!EXT3_I(inode)->i_file_acl) -+ return 0; -+ block = EXT3_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_list", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* compute the size required for the list of attribute names */ -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT3_XATTR_NEXT(entry)) { -+ struct ext3_xattr_handler *handler; -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ -+ handler = ext3_xattr_handler(entry->e_name_index); -+ if (handler) -+ size += handler->list(NULL, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (!buffer) { -+ error = size; -+ goto cleanup; -+ } else { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ /* list the attribute names */ -+ buf = buffer; -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT3_XATTR_NEXT(entry)) { -+ struct ext3_xattr_handler *handler; -+ -+ handler = ext3_xattr_handler(entry->e_name_index); -+ if (handler) -+ buf += handler->list(buf, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is -+ * not set, set it. -+ */ -+static void ext3_xattr_update_super_block(handle_t *handle, -+ struct super_block *sb) -+{ -+ if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR)) -+ return; -+ -+ lock_super(sb); -+ ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ EXT3_SB(sb)->s_feature_compat |= EXT3_FEATURE_COMPAT_EXT_ATTR; -+#endif -+ EXT3_SB(sb)->s_es->s_feature_compat |= -+ cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR); -+ sb->s_dirt = 1; -+ ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); -+ unlock_super(sb); -+} -+ -+/* -+ * ext3_xattr_set() -+ * -+ * Create, replace or remove an extended attribute for this inode. Buffer -+ * is NULL to remove an existing extended attribute, and non-NULL to -+ * either replace an existing extended attribute, or create a new extended -+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE -+ * specify that an extended attribute must exist and must not exist -+ * previous to the call, respectively. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+int -+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, int flags) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_header *header = NULL; -+ struct ext3_xattr_entry *here, *last; -+ unsigned int name_len; -+ int block = EXT3_I(inode)->i_file_acl; -+ int min_offs = sb->s_blocksize, not_found = 1, free, error; -+ char *end; -+ -+ /* -+ * header -- Points either into bh, or to a temporarily -+ * allocated buffer. -+ * here -- The named entry found, or the place for inserting, within -+ * the block pointed to by header. -+ * last -- Points right after the last named entry within the block -+ * pointed to by header. -+ * min_offs -- The offset of the first value (values are aligned -+ * towards the end of the block). -+ * end -- Points right after the block pointed to by header. -+ */ -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > sb->s_blocksize) -+ return -ERANGE; -+ down(&ext3_xattr_sem); -+ -+ if (block) { -+ /* The inode already has an extended attribute block. */ -+ bh = sb_bread(sb, block); -+ error = -EIO; -+ if (!bh) -+ goto cleanup; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), -+ le32_to_cpu(HDR(bh)->h_refcount)); -+ header = HDR(bh); -+ end = bh->b_data + bh->b_size; -+ if (header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ header->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(sb, "ext3_xattr_set", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* Find the named attribute. */ -+ here = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(here)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(here); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!here->e_value_block && here->e_value_size) { -+ int offs = le16_to_cpu(here->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ not_found = name_index - here->e_name_index; -+ if (!not_found) -+ not_found = name_len - here->e_name_len; -+ if (!not_found) -+ not_found = memcmp(name, here->e_name,name_len); -+ if (not_found <= 0) -+ break; -+ here = next; -+ } -+ last = here; -+ /* We still need to compute min_offs and last. */ -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!last->e_value_block && last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ last = next; -+ } -+ -+ /* Check whether we have enough space left. */ -+ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); -+ } else { -+ /* We will use a new extended attribute block. */ -+ free = sb->s_blocksize - -+ sizeof(struct ext3_xattr_header) - sizeof(__u32); -+ here = last = NULL; /* avoid gcc uninitialized warning. */ -+ } -+ -+ if (not_found) { -+ /* Request to remove a nonexistent attribute? */ -+ error = -ENOATTR; -+ if (flags & XATTR_REPLACE) -+ goto cleanup; -+ error = 0; -+ if (value == NULL) -+ goto cleanup; -+ else -+ free -= EXT3_XATTR_LEN(name_len); -+ } else { -+ /* Request to create an existing attribute? */ -+ error = -EEXIST; -+ if (flags & XATTR_CREATE) -+ goto cleanup; -+ if (!here->e_value_block && here->e_value_size) { -+ unsigned int size = le32_to_cpu(here->e_value_size); -+ -+ if (le16_to_cpu(here->e_value_offs) + size > -+ sb->s_blocksize || size > sb->s_blocksize) -+ goto bad_block; -+ free += EXT3_XATTR_SIZE(size); -+ } -+ } -+ free -= EXT3_XATTR_SIZE(value_len); -+ error = -ENOSPC; -+ if (free < 0) -+ goto cleanup; -+ -+ /* Here we know that we can set the new attribute. */ -+ -+ if (header) { -+ if (header->h_refcount == cpu_to_le32(1)) { -+ ea_bdebug(bh, "modifying in-place"); -+ ext3_xattr_cache_remove(bh); -+ error = ext3_journal_get_write_access(handle, bh); -+ if (error) -+ goto cleanup; -+ } else { -+ int offset; -+ -+ ea_bdebug(bh, "cloning"); -+ header = kmalloc(bh->b_size, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memcpy(header, HDR(bh), bh->b_size); -+ header->h_refcount = cpu_to_le32(1); -+ offset = (char *)header - bh->b_data; -+ here = ENTRY((char *)here + offset); -+ last = ENTRY((char *)last + offset); -+ } -+ } else { -+ /* Allocate a buffer where we construct the new block. */ -+ header = kmalloc(sb->s_blocksize, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memset(header, 0, sb->s_blocksize); -+ end = (char *)header + sb->s_blocksize; -+ header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC); -+ header->h_blocks = header->h_refcount = cpu_to_le32(1); -+ last = here = ENTRY(header+1); -+ } -+ -+ if (not_found) { -+ /* Insert the new name. */ -+ int size = EXT3_XATTR_LEN(name_len); -+ int rest = (char *)last - (char *)here; -+ memmove((char *)here + size, here, rest); -+ memset(here, 0, size); -+ here->e_name_index = name_index; -+ here->e_name_len = name_len; -+ memcpy(here->e_name, name, name_len); -+ } else { -+ /* Remove the old value. */ -+ if (!here->e_value_block && here->e_value_size) { -+ char *first_val = (char *)header + min_offs; -+ int offs = le16_to_cpu(here->e_value_offs); -+ char *val = (char *)header + offs; -+ size_t size = EXT3_XATTR_SIZE( -+ le32_to_cpu(here->e_value_size)); -+ memmove(first_val + size, first_val, val - first_val); -+ memset(first_val, 0, size); -+ here->e_value_offs = 0; -+ min_offs += size; -+ -+ /* Adjust all value offsets. */ -+ last = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(last)) { -+ int o = le16_to_cpu(last->e_value_offs); -+ if (!last->e_value_block && o < offs) -+ last->e_value_offs = -+ cpu_to_le16(o + size); -+ last = EXT3_XATTR_NEXT(last); -+ } -+ } -+ if (value == NULL) { -+ /* Remove this attribute. */ -+ if (EXT3_XATTR_NEXT(ENTRY(header+1)) == last) { -+ /* This block is now empty. */ -+ error = ext3_xattr_set2(handle, inode, bh,NULL); -+ goto cleanup; -+ } else { -+ /* Remove the old name. */ -+ int size = EXT3_XATTR_LEN(name_len); -+ last = ENTRY((char *)last - size); -+ memmove(here, (char*)here + size, -+ (char*)last - (char*)here); -+ memset(last, 0, size); -+ } -+ } -+ } -+ -+ if (value != NULL) { -+ /* Insert the new value. */ -+ here->e_value_size = cpu_to_le32(value_len); -+ if (value_len) { -+ size_t size = EXT3_XATTR_SIZE(value_len); -+ char *val = (char *)header + min_offs - size; -+ here->e_value_offs = -+ cpu_to_le16((char *)val - (char *)header); -+ memset(val + size - EXT3_XATTR_PAD, 0, -+ EXT3_XATTR_PAD); /* Clear the pad bytes. */ -+ memcpy(val, value, value_len); -+ } -+ } -+ ext3_xattr_rehash(header, here); -+ -+ error = ext3_xattr_set2(handle, inode, bh, header); -+ -+cleanup: -+ brelse(bh); -+ if (!(bh && header == HDR(bh))) -+ kfree(header); -+ up(&ext3_xattr_sem); -+ -+ return error; -+} -+ -+/* -+ * Second half of ext3_xattr_set(): Update the file system. -+ */ -+static int -+ext3_xattr_set2(handle_t *handle, struct inode *inode, -+ struct buffer_head *old_bh, struct ext3_xattr_header *header) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *new_bh = NULL; -+ int error; -+ -+ if (header) { -+ new_bh = ext3_xattr_cache_find(inode, header); -+ if (new_bh) { -+ /* -+ * We found an identical block in the cache. -+ * The old block will be released after updating -+ * the inode. -+ */ -+ ea_bdebug(old_bh, "reusing block %ld", -+ new_bh->b_blocknr); -+ -+ error = -EDQUOT; -+ if (ext3_xattr_quota_alloc(inode, 1)) -+ goto cleanup; -+ -+ error = ext3_journal_get_write_access(handle, new_bh); -+ if (error) -+ goto cleanup; -+ HDR(new_bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(new_bh)->h_refcount) + 1); -+ ea_bdebug(new_bh, "refcount now=%d", -+ le32_to_cpu(HDR(new_bh)->h_refcount)); -+ } else if (old_bh && header == HDR(old_bh)) { -+ /* Keep this block. */ -+ new_bh = old_bh; -+ ext3_xattr_cache_insert(new_bh); -+ } else { -+ /* We need to allocate a new block */ -+ int force = EXT3_I(inode)->i_file_acl != 0; -+ int block = ext3_xattr_new_block(handle, inode, -+ &error, force); -+ if (error) -+ goto cleanup; -+ ea_idebug(inode, "creating block %d", block); -+ -+ new_bh = sb_getblk(sb, block); -+ if (!new_bh) { -+getblk_failed: ext3_xattr_free_block(handle, inode, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(new_bh); -+ error = ext3_journal_get_create_access(handle, new_bh); -+ if (error) { -+ unlock_buffer(new_bh); -+ goto getblk_failed; -+ } -+ memcpy(new_bh->b_data, header, new_bh->b_size); -+ mark_buffer_uptodate(new_bh, 1); -+ unlock_buffer(new_bh); -+ ext3_xattr_cache_insert(new_bh); -+ -+ ext3_xattr_update_super_block(handle, sb); -+ } -+ error = ext3_journal_dirty_metadata(handle, new_bh); -+ if (error) -+ goto cleanup; -+ } -+ -+ /* Update the inode. */ -+ EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; -+ inode->i_ctime = CURRENT_TIME; -+ ext3_mark_inode_dirty(handle, inode); -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ -+ error = 0; -+ if (old_bh && old_bh != new_bh) { -+ /* -+ * If there was an old block, and we are not still using it, -+ * we now release the old block. -+ */ -+ unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); -+ -+ error = ext3_journal_get_write_access(handle, old_bh); -+ if (error) -+ goto cleanup; -+ if (refcount == 1) { -+ /* Free the old block. */ -+ ea_bdebug(old_bh, "freeing"); -+ ext3_xattr_free_block(handle, inode, old_bh->b_blocknr); -+ -+ /* ext3_forget() calls bforget() for us, but we -+ let our caller release old_bh, so we need to -+ duplicate the handle before. */ -+ get_bh(old_bh); -+ ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr); -+ } else { -+ /* Decrement the refcount only. */ -+ refcount--; -+ HDR(old_bh)->h_refcount = cpu_to_le32(refcount); -+ ext3_xattr_quota_free(inode); -+ ext3_journal_dirty_metadata(handle, old_bh); -+ ea_bdebug(old_bh, "refcount now=%d", refcount); -+ } -+ } -+ -+cleanup: -+ if (old_bh != new_bh) -+ brelse(new_bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_delete_inode() -+ * -+ * Free extended attribute resources associated with this inode. This -+ * is called immediately before an inode is freed. -+ */ -+void -+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) -+{ -+ struct buffer_head *bh; -+ unsigned int block = EXT3_I(inode)->i_file_acl; -+ -+ if (!block) -+ return; -+ down(&ext3_xattr_sem); -+ -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) { -+ ext3_error(inode->i_sb, "ext3_xattr_delete_inode", -+ "inode %ld: block %d read error", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+ ext3_error(inode->i_sb, "ext3_xattr_delete_inode", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ goto cleanup; -+ } -+ ext3_journal_get_write_access(handle, bh); -+ ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) { -+ ext3_xattr_cache_remove(bh); -+ ext3_xattr_free_block(handle, inode, block); -+ ext3_forget(handle, 1, inode, bh, block); -+ bh = NULL; -+ } else { -+ HDR(bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ ext3_journal_dirty_metadata(handle, bh); -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ ext3_xattr_quota_free(inode); -+ } -+ EXT3_I(inode)->i_file_acl = 0; -+ -+cleanup: -+ brelse(bh); -+ up(&ext3_xattr_sem); -+} -+ -+/* -+ * ext3_xattr_put_super() -+ * -+ * This is called when a file system is unmounted. -+ */ -+void -+ext3_xattr_put_super(struct super_block *sb) -+{ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ mb_cache_shrink(ext3_xattr_cache, sb->s_dev); -+#endif -+} -+ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ -+/* -+ * ext3_xattr_cache_insert() -+ * -+ * Create a new entry in the extended attribute cache, and insert -+ * it unless such an entry is already in the cache. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+static int -+ext3_xattr_cache_insert(struct buffer_head *bh) -+{ -+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash); -+ struct mb_cache_entry *ce; -+ int error; -+ -+ ce = mb_cache_entry_alloc(ext3_xattr_cache); -+ if (!ce) -+ return -ENOMEM; -+ error = mb_cache_entry_insert(ce, bh->b_dev, bh->b_blocknr, &hash); -+ if (error) { -+ mb_cache_entry_free(ce); -+ if (error == -EBUSY) { -+ ea_bdebug(bh, "already in cache (%d cache entries)", -+ atomic_read(&ext3_xattr_cache->c_entry_count)); -+ error = 0; -+ } -+ } else { -+ ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, -+ atomic_read(&ext3_xattr_cache->c_entry_count)); -+ mb_cache_entry_release(ce); -+ } -+ return error; -+} -+ -+/* -+ * ext3_xattr_cmp() -+ * -+ * Compare two extended attribute blocks for equality. -+ * -+ * Returns 0 if the blocks are equal, 1 if they differ, and -+ * a negative error number on errors. -+ */ -+static int -+ext3_xattr_cmp(struct ext3_xattr_header *header1, -+ struct ext3_xattr_header *header2) -+{ -+ struct ext3_xattr_entry *entry1, *entry2; -+ -+ entry1 = ENTRY(header1+1); -+ entry2 = ENTRY(header2+1); -+ while (!IS_LAST_ENTRY(entry1)) { -+ if (IS_LAST_ENTRY(entry2)) -+ return 1; -+ if (entry1->e_hash != entry2->e_hash || -+ entry1->e_name_len != entry2->e_name_len || -+ entry1->e_value_size != entry2->e_value_size || -+ memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) -+ return 1; -+ if (entry1->e_value_block != 0 || entry2->e_value_block != 0) -+ return -EIO; -+ if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), -+ (char *)header2 + le16_to_cpu(entry2->e_value_offs), -+ le32_to_cpu(entry1->e_value_size))) -+ return 1; -+ -+ entry1 = EXT3_XATTR_NEXT(entry1); -+ entry2 = EXT3_XATTR_NEXT(entry2); -+ } -+ if (!IS_LAST_ENTRY(entry2)) -+ return 1; -+ return 0; -+} -+ -+/* -+ * ext3_xattr_cache_find() -+ * -+ * Find an identical extended attribute block. -+ * -+ * Returns a pointer to the block found, or NULL if such a block was -+ * not found or an error occurred. -+ */ -+static struct buffer_head * -+ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header) -+{ -+ __u32 hash = le32_to_cpu(header->h_hash); -+ struct mb_cache_entry *ce; -+ -+ if (!header->h_hash) -+ return NULL; /* never share */ -+ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); -+ ce = mb_cache_entry_find_first(ext3_xattr_cache, 0, inode->i_dev, hash); -+ while (ce) { -+ struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block); -+ -+ if (!bh) { -+ ext3_error(inode->i_sb, "ext3_xattr_cache_find", -+ "inode %ld: block %ld read error", -+ inode->i_ino, ce->e_block); -+ } else if (le32_to_cpu(HDR(bh)->h_refcount) > -+ EXT3_XATTR_REFCOUNT_MAX) { -+ ea_idebug(inode, "block %ld refcount %d>%d",ce->e_block, -+ le32_to_cpu(HDR(bh)->h_refcount), -+ EXT3_XATTR_REFCOUNT_MAX); -+ } else if (!ext3_xattr_cmp(header, HDR(bh))) { -+ ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); -+ mb_cache_entry_release(ce); -+ return bh; -+ } -+ brelse(bh); -+ ce = mb_cache_entry_find_next(ce, 0, inode->i_dev, hash); -+ } -+ return NULL; -+} -+ -+/* -+ * ext3_xattr_cache_remove() -+ * -+ * Remove the cache entry of a block from the cache. Called when a -+ * block becomes invalid. -+ */ -+static void -+ext3_xattr_cache_remove(struct buffer_head *bh) -+{ -+ struct mb_cache_entry *ce; -+ -+ ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_dev, bh->b_blocknr); -+ if (ce) { -+ ea_bdebug(bh, "removing (%d cache entries remaining)", -+ atomic_read(&ext3_xattr_cache->c_entry_count)-1); -+ mb_cache_entry_free(ce); -+ } else -+ ea_bdebug(bh, "no cache entry"); -+} -+ -+#define NAME_HASH_SHIFT 5 -+#define VALUE_HASH_SHIFT 16 -+ -+/* -+ * ext3_xattr_hash_entry() -+ * -+ * Compute the hash of an extended attribute. -+ */ -+static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header, -+ struct ext3_xattr_entry *entry) -+{ -+ __u32 hash = 0; -+ char *name = entry->e_name; -+ int n; -+ -+ for (n=0; n < entry->e_name_len; n++) { -+ hash = (hash << NAME_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ -+ *name++; -+ } -+ -+ if (entry->e_value_block == 0 && entry->e_value_size != 0) { -+ __u32 *value = (__u32 *)((char *)header + -+ le16_to_cpu(entry->e_value_offs)); -+ for (n = (le32_to_cpu(entry->e_value_size) + -+ EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) { -+ hash = (hash << VALUE_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ -+ le32_to_cpu(*value++); -+ } -+ } -+ entry->e_hash = cpu_to_le32(hash); -+} -+ -+#undef NAME_HASH_SHIFT -+#undef VALUE_HASH_SHIFT -+ -+#define BLOCK_HASH_SHIFT 16 -+ -+/* -+ * ext3_xattr_rehash() -+ * -+ * Re-compute the extended attribute hash value after an entry has changed. -+ */ -+static void ext3_xattr_rehash(struct ext3_xattr_header *header, -+ struct ext3_xattr_entry *entry) -+{ -+ struct ext3_xattr_entry *here; -+ __u32 hash = 0; -+ -+ ext3_xattr_hash_entry(header, entry); -+ here = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(here)) { -+ if (!here->e_hash) { -+ /* Block is not shared if an entry's hash value == 0 */ -+ hash = 0; -+ break; -+ } -+ hash = (hash << BLOCK_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ -+ le32_to_cpu(here->e_hash); -+ here = EXT3_XATTR_NEXT(here); -+ } -+ header->h_hash = cpu_to_le32(hash); -+} -+ -+#undef BLOCK_HASH_SHIFT -+ -+int __init -+init_ext3_xattr(void) -+{ -+ ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL, -+ sizeof(struct mb_cache_entry) + -+ sizeof(struct mb_cache_entry_index), 1, 61); -+ if (!ext3_xattr_cache) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+void -+exit_ext3_xattr(void) -+{ -+ if (ext3_xattr_cache) -+ mb_cache_destroy(ext3_xattr_cache); -+ ext3_xattr_cache = NULL; -+} -+ -+#else /* CONFIG_EXT3_FS_XATTR_SHARING */ -+ -+int __init -+init_ext3_xattr(void) -+{ -+ return 0; -+} -+ -+void -+exit_ext3_xattr(void) -+{ -+} -+ -+#endif /* CONFIG_EXT3_FS_XATTR_SHARING */ ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/ext3/xattr_user.c 2003-05-07 17:34:25.000000000 +0800 -@@ -0,0 +1,111 @@ -+/* -+ * linux/fs/ext3/xattr_user.c -+ * Handler for extended user attributes. -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef CONFIG_EXT3_FS_POSIX_ACL -+# include -+#endif -+ -+#define XATTR_USER_PREFIX "user." -+ -+static size_t -+ext3_xattr_user_list(char *list, struct inode *inode, -+ const char *name, int name_len) -+{ -+ const int prefix_len = sizeof(XATTR_USER_PREFIX)-1; -+ -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return 0; -+ -+ if (list) { -+ memcpy(list, XATTR_USER_PREFIX, prefix_len); -+ memcpy(list+prefix_len, name, name_len); -+ list[prefix_len + name_len] = '\0'; -+ } -+ return prefix_len + name_len + 1; -+} -+ -+static int -+ext3_xattr_user_get(struct inode *inode, const char *name, -+ void *buffer, size_t size) -+{ -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+#ifdef CONFIG_EXT3_FS_POSIX_ACL -+ error = ext3_permission_locked(inode, MAY_READ); -+#else -+ error = permission(inode, MAY_READ); -+#endif -+ if (error) -+ return error; -+ -+ return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, -+ buffer, size); -+} -+ -+static int -+ext3_xattr_user_set(struct inode *inode, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ handle_t *handle; -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+ if ( !S_ISREG(inode->i_mode) && -+ (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) -+ return -EPERM; -+#ifdef CONFIG_EXT3_FS_POSIX_ACL -+ error = ext3_permission_locked(inode, MAY_WRITE); -+#else -+ error = permission(inode, MAY_WRITE); -+#endif -+ if (error) -+ return error; -+ -+ handle = ext3_journal_start(inode, EXT3_XATTR_TRANS_BLOCKS); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ error = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_USER, name, -+ value, size, flags); -+ ext3_journal_stop(handle, inode); -+ -+ return error; -+} -+ -+struct ext3_xattr_handler ext3_xattr_user_handler = { -+ prefix: XATTR_USER_PREFIX, -+ list: ext3_xattr_user_list, -+ get: ext3_xattr_user_get, -+ set: ext3_xattr_user_set, -+}; -+ -+int __init -+init_ext3_xattr_user(void) -+{ -+ return ext3_xattr_register(EXT3_XATTR_INDEX_USER, -+ &ext3_xattr_user_handler); -+} -+ -+void -+exit_ext3_xattr_user(void) -+{ -+ ext3_xattr_unregister(EXT3_XATTR_INDEX_USER, -+ &ext3_xattr_user_handler); -+} ---- linux-rh-2.4.20-8/fs/jfs/jfs_xattr.h~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:15.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/jfs/jfs_xattr.h 2003-05-07 17:34:25.000000000 +0800 -@@ -52,8 +52,10 @@ struct jfs_ea_list { - #define END_EALIST(ealist) \ - ((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist))) - --extern int __jfs_setxattr(struct inode *, const char *, void *, size_t, int); --extern int jfs_setxattr(struct dentry *, const char *, void *, size_t, int); -+extern int __jfs_setxattr(struct inode *, const char *, const void *, size_t, -+ int); -+extern int jfs_setxattr(struct dentry *, const char *, const void *, size_t, -+ int); - extern ssize_t __jfs_getxattr(struct inode *, const char *, void *, size_t); - extern ssize_t jfs_getxattr(struct dentry *, const char *, void *, size_t); - extern ssize_t jfs_listxattr(struct dentry *, char *, size_t); ---- linux-rh-2.4.20-8/fs/jfs/xattr.c~linux-2.4.20-xattr-0.8.54-chaos 2002-11-29 07:53:15.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/jfs/xattr.c 2003-05-07 17:34:25.000000000 +0800 -@@ -641,7 +641,7 @@ static int ea_put(struct inode *inode, s - } - - static int can_set_xattr(struct inode *inode, const char *name, -- void *value, size_t value_len) -+ const void *value, size_t value_len) - { - if (IS_RDONLY(inode)) - return -EROFS; -@@ -660,7 +660,7 @@ static int can_set_xattr(struct inode *i - return permission(inode, MAY_WRITE); - } - --int __jfs_setxattr(struct inode *inode, const char *name, void *value, -+int __jfs_setxattr(struct inode *inode, const char *name, const void *value, - size_t value_len, int flags) - { - struct jfs_ea_list *ealist; -@@ -799,7 +799,7 @@ int __jfs_setxattr(struct inode *inode, - return rc; - } - --int jfs_setxattr(struct dentry *dentry, const char *name, void *value, -+int jfs_setxattr(struct dentry *dentry, const char *name, const void *value, - size_t value_len, int flags) - { - if (value == NULL) { /* empty EA, do not remove */ ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-rh-2.4.20-8-root/fs/mbcache.c 2003-05-07 17:34:25.000000000 +0800 -@@ -0,0 +1,648 @@ -+/* -+ * linux/fs/mbcache.c -+ * (C) 2001-2002 Andreas Gruenbacher, -+ */ -+ -+/* -+ * Filesystem Meta Information Block Cache (mbcache) -+ * -+ * The mbcache caches blocks of block devices that need to be located -+ * by their device/block number, as well as by other criteria (such -+ * as the block's contents). -+ * -+ * There can only be one cache entry in a cache per device and block number. -+ * Additional indexes need not be unique in this sense. The number of -+ * additional indexes (=other criteria) can be hardwired at compile time -+ * or specified at cache create time. -+ * -+ * Each cache entry is of fixed size. An entry may be `valid' or `invalid' -+ * in the cache. A valid entry is in the main hash tables of the cache, -+ * and may also be in the lru list. An invalid entry is not in any hashes -+ * or lists. -+ * -+ * A valid cache entry is only in the lru list if no handles refer to it. -+ * Invalid cache entries will be freed when the last handle to the cache -+ * entry is released. Entries that cannot be freed immediately are put -+ * back on the lru list. -+ */ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+#ifdef MB_CACHE_DEBUG -+# define mb_debug(f...) do { \ -+ printk(KERN_DEBUG f); \ -+ printk("\n"); \ -+ } while (0) -+#define mb_assert(c) do { if (!(c)) \ -+ printk(KERN_ERR "assertion " #c " failed\n"); \ -+ } while(0) -+#else -+# define mb_debug(f...) do { } while(0) -+# define mb_assert(c) do { } while(0) -+#endif -+#define mb_error(f...) do { \ -+ printk(KERN_ERR f); \ -+ printk("\n"); \ -+ } while(0) -+ -+MODULE_AUTHOR("Andreas Gruenbacher "); -+MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) -+MODULE_LICENSE("GPL"); -+#endif -+ -+EXPORT_SYMBOL(mb_cache_create); -+EXPORT_SYMBOL(mb_cache_shrink); -+EXPORT_SYMBOL(mb_cache_destroy); -+EXPORT_SYMBOL(mb_cache_entry_alloc); -+EXPORT_SYMBOL(mb_cache_entry_insert); -+EXPORT_SYMBOL(mb_cache_entry_release); -+EXPORT_SYMBOL(mb_cache_entry_takeout); -+EXPORT_SYMBOL(mb_cache_entry_free); -+EXPORT_SYMBOL(mb_cache_entry_dup); -+EXPORT_SYMBOL(mb_cache_entry_get); -+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) -+EXPORT_SYMBOL(mb_cache_entry_find_first); -+EXPORT_SYMBOL(mb_cache_entry_find_next); -+#endif -+ -+ -+/* -+ * Global data: list of all mbcache's, lru list, and a spinlock for -+ * accessing cache data structures on SMP machines. The lru list is -+ * global across all mbcaches. -+ */ -+ -+static LIST_HEAD(mb_cache_list); -+static LIST_HEAD(mb_cache_lru_list); -+static spinlock_t mb_cache_spinlock = SPIN_LOCK_UNLOCKED; -+ -+static inline int -+mb_cache_indexes(struct mb_cache *cache) -+{ -+#ifdef MB_CACHE_INDEXES_COUNT -+ return MB_CACHE_INDEXES_COUNT; -+#else -+ return cache->c_indexes_count; -+#endif -+} -+ -+/* -+ * What the mbcache registers as to get shrunk dynamically. -+ */ -+ -+static void -+mb_cache_memory_pressure(int priority, unsigned int gfp_mask); -+ -+static struct cache_definition mb_cache_definition = { -+ "mb_cache", -+ mb_cache_memory_pressure -+}; -+ -+ -+static inline int -+__mb_cache_entry_is_hashed(struct mb_cache_entry *ce) -+{ -+ return !list_empty(&ce->e_block_list); -+} -+ -+ -+static inline void -+__mb_cache_entry_unhash(struct mb_cache_entry *ce) -+{ -+ int n; -+ -+ if (__mb_cache_entry_is_hashed(ce)) { -+ list_del_init(&ce->e_block_list); -+ for (n=0; ne_cache); n++) -+ list_del(&ce->e_indexes[n].o_list); -+ } -+} -+ -+ -+static inline void -+__mb_cache_entry_forget(struct mb_cache_entry *ce, int gfp_mask) -+{ -+ struct mb_cache *cache = ce->e_cache; -+ -+ mb_assert(atomic_read(&ce->e_used) == 0); -+ if (cache->c_op.free && cache->c_op.free(ce, gfp_mask)) { -+ /* free failed -- put back on the lru list -+ for freeing later. */ -+ spin_lock(&mb_cache_spinlock); -+ list_add(&ce->e_lru_list, &mb_cache_lru_list); -+ spin_unlock(&mb_cache_spinlock); -+ } else { -+ kmem_cache_free(cache->c_entry_cache, ce); -+ atomic_dec(&cache->c_entry_count); -+ } -+} -+ -+ -+static inline void -+__mb_cache_entry_release_unlock(struct mb_cache_entry *ce) -+{ -+ if (atomic_dec_and_test(&ce->e_used)) { -+ if (__mb_cache_entry_is_hashed(ce)) -+ list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); -+ else { -+ spin_unlock(&mb_cache_spinlock); -+ __mb_cache_entry_forget(ce, GFP_KERNEL); -+ return; -+ } -+ } -+ spin_unlock(&mb_cache_spinlock); -+} -+ -+ -+/* -+ * mb_cache_memory_pressure() memory pressure callback -+ * -+ * This function is called by the kernel memory management when memory -+ * gets low. -+ * -+ * @priority: Amount by which to shrink the cache (0 = highes priority) -+ * @gfp_mask: (ignored) -+ */ -+static void -+mb_cache_memory_pressure(int priority, unsigned int gfp_mask) -+{ -+ LIST_HEAD(free_list); -+ struct list_head *l, *ltmp; -+ int count = 0; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each(l, &mb_cache_list) { -+ struct mb_cache *cache = -+ list_entry(l, struct mb_cache, c_cache_list); -+ mb_debug("cache %s (%d)", cache->c_name, -+ atomic_read(&cache->c_entry_count)); -+ count += atomic_read(&cache->c_entry_count); -+ } -+ mb_debug("trying to free %d of %d entries", -+ count / (priority ? priority : 1), count); -+ if (priority) -+ count /= priority; -+ while (count-- && !list_empty(&mb_cache_lru_list)) { -+ struct mb_cache_entry *ce = -+ list_entry(mb_cache_lru_list.next, -+ struct mb_cache_entry, e_lru_list); -+ list_del(&ce->e_lru_list); -+ __mb_cache_entry_unhash(ce); -+ list_add_tail(&ce->e_lru_list, &free_list); -+ } -+ spin_unlock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &free_list) { -+ __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, -+ e_lru_list), gfp_mask); -+ } -+} -+ -+ -+/* -+ * mb_cache_create() create a new cache -+ * -+ * All entries in one cache are equal size. Cache entries may be from -+ * multiple devices. If this is the first mbcache created, registers -+ * the cache with kernel memory management. Returns NULL if no more -+ * memory was available. -+ * -+ * @name: name of the cache (informal) -+ * @cache_op: contains the callback called when freeing a cache entry -+ * @entry_size: The size of a cache entry, including -+ * struct mb_cache_entry -+ * @indexes_count: number of additional indexes in the cache. Must equal -+ * MB_CACHE_INDEXES_COUNT if the number of indexes is -+ * hardwired. -+ * @bucket_count: number of hash buckets -+ */ -+struct mb_cache * -+mb_cache_create(const char *name, struct mb_cache_op *cache_op, -+ size_t entry_size, int indexes_count, int bucket_count) -+{ -+ int m=0, n; -+ struct mb_cache *cache = NULL; -+ -+ if(entry_size < sizeof(struct mb_cache_entry) + -+ indexes_count * sizeof(struct mb_cache_entry_index)) -+ return NULL; -+ -+ MOD_INC_USE_COUNT; -+ cache = kmalloc(sizeof(struct mb_cache) + -+ indexes_count * sizeof(struct list_head), GFP_KERNEL); -+ if (!cache) -+ goto fail; -+ cache->c_name = name; -+ cache->c_op.free = NULL; -+ if (cache_op) -+ cache->c_op.free = cache_op->free; -+ atomic_set(&cache->c_entry_count, 0); -+ cache->c_bucket_count = bucket_count; -+#ifdef MB_CACHE_INDEXES_COUNT -+ mb_assert(indexes_count == MB_CACHE_INDEXES_COUNT); -+#else -+ cache->c_indexes_count = indexes_count; -+#endif -+ cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head), -+ GFP_KERNEL); -+ if (!cache->c_block_hash) -+ goto fail; -+ for (n=0; nc_block_hash[n]); -+ for (m=0; mc_indexes_hash[m] = kmalloc(bucket_count * -+ sizeof(struct list_head), -+ GFP_KERNEL); -+ if (!cache->c_indexes_hash[m]) -+ goto fail; -+ for (n=0; nc_indexes_hash[m][n]); -+ } -+ cache->c_entry_cache = kmem_cache_create(name, entry_size, 0, -+ 0 /*SLAB_POISON | SLAB_RED_ZONE*/, NULL, NULL); -+ if (!cache->c_entry_cache) -+ goto fail; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_add(&cache->c_cache_list, &mb_cache_list); -+ spin_unlock(&mb_cache_spinlock); -+ return cache; -+ -+fail: -+ if (cache) { -+ while (--m >= 0) -+ kfree(cache->c_indexes_hash[m]); -+ if (cache->c_block_hash) -+ kfree(cache->c_block_hash); -+ kfree(cache); -+ } -+ MOD_DEC_USE_COUNT; -+ return NULL; -+} -+ -+ -+/* -+ * mb_cache_shrink() -+ * -+ * Removes all cache entires of a device from the cache. All cache entries -+ * currently in use cannot be freed, and thus remain in the cache. -+ * -+ * @cache: which cache to shrink -+ * @dev: which device's cache entries to shrink -+ */ -+void -+mb_cache_shrink(struct mb_cache *cache, kdev_t dev) -+{ -+ LIST_HEAD(free_list); -+ struct list_head *l, *ltmp; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &mb_cache_lru_list) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, e_lru_list); -+ if (ce->e_dev == dev) { -+ list_del(&ce->e_lru_list); -+ list_add_tail(&ce->e_lru_list, &free_list); -+ __mb_cache_entry_unhash(ce); -+ } -+ } -+ spin_unlock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &free_list) { -+ __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, -+ e_lru_list), GFP_KERNEL); -+ } -+} -+ -+ -+/* -+ * mb_cache_destroy() -+ * -+ * Shrinks the cache to its minimum possible size (hopefully 0 entries), -+ * and then destroys it. If this was the last mbcache, un-registers the -+ * mbcache from kernel memory management. -+ */ -+void -+mb_cache_destroy(struct mb_cache *cache) -+{ -+ LIST_HEAD(free_list); -+ struct list_head *l, *ltmp; -+ int n; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &mb_cache_lru_list) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, e_lru_list); -+ if (ce->e_cache == cache) { -+ list_del(&ce->e_lru_list); -+ list_add_tail(&ce->e_lru_list, &free_list); -+ __mb_cache_entry_unhash(ce); -+ } -+ } -+ list_del(&cache->c_cache_list); -+ spin_unlock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &free_list) { -+ __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, -+ e_lru_list), GFP_KERNEL); -+ } -+ -+ if (atomic_read(&cache->c_entry_count) > 0) { -+ mb_error("cache %s: %d orphaned entries", -+ cache->c_name, -+ atomic_read(&cache->c_entry_count)); -+ } -+ -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,3,0)) -+ /* We don't have kmem_cache_destroy() in 2.2.x */ -+ kmem_cache_shrink(cache->c_entry_cache); -+#else -+ kmem_cache_destroy(cache->c_entry_cache); -+#endif -+ for (n=0; n < mb_cache_indexes(cache); n++) -+ kfree(cache->c_indexes_hash[n]); -+ kfree(cache->c_block_hash); -+ kfree(cache); -+ -+ MOD_DEC_USE_COUNT; -+} -+ -+ -+/* -+ * mb_cache_entry_alloc() -+ * -+ * Allocates a new cache entry. The new entry will not be valid initially, -+ * and thus cannot be looked up yet. It should be filled with data, and -+ * then inserted into the cache using mb_cache_entry_insert(). Returns NULL -+ * if no more memory was available. -+ */ -+struct mb_cache_entry * -+mb_cache_entry_alloc(struct mb_cache *cache) -+{ -+ struct mb_cache_entry *ce; -+ -+ atomic_inc(&cache->c_entry_count); -+ ce = kmem_cache_alloc(cache->c_entry_cache, GFP_KERNEL); -+ if (ce) { -+ INIT_LIST_HEAD(&ce->e_lru_list); -+ INIT_LIST_HEAD(&ce->e_block_list); -+ ce->e_cache = cache; -+ atomic_set(&ce->e_used, 1); -+ } -+ return ce; -+} -+ -+ -+/* -+ * mb_cache_entry_insert() -+ * -+ * Inserts an entry that was allocated using mb_cache_entry_alloc() into -+ * the cache. After this, the cache entry can be looked up, but is not yet -+ * in the lru list as the caller still holds a handle to it. Returns 0 on -+ * success, or -EBUSY if a cache entry for that device + inode exists -+ * already (this may happen after a failed lookup, if another process has -+ * inserted the same cache entry in the meantime). -+ * -+ * @dev: device the cache entry belongs to -+ * @block: block number -+ * @keys: array of additional keys. There must be indexes_count entries -+ * in the array (as specified when creating the cache). -+ */ -+int -+mb_cache_entry_insert(struct mb_cache_entry *ce, kdev_t dev, -+ unsigned long block, unsigned int keys[]) -+{ -+ struct mb_cache *cache = ce->e_cache; -+ unsigned int bucket = (HASHDEV(dev) + block) % cache->c_bucket_count; -+ struct list_head *l; -+ int error = -EBUSY, n; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each(l, &cache->c_block_hash[bucket]) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, e_block_list); -+ if (ce->e_dev == dev && ce->e_block == block) -+ goto out; -+ } -+ __mb_cache_entry_unhash(ce); -+ ce->e_dev = dev; -+ ce->e_block = block; -+ list_add(&ce->e_block_list, &cache->c_block_hash[bucket]); -+ for (n=0; ne_indexes[n].o_key = keys[n]; -+ bucket = keys[n] % cache->c_bucket_count; -+ list_add(&ce->e_indexes[n].o_list, -+ &cache->c_indexes_hash[n][bucket]); -+ } -+out: -+ spin_unlock(&mb_cache_spinlock); -+ return error; -+} -+ -+ -+/* -+ * mb_cache_entry_release() -+ * -+ * Release a handle to a cache entry. When the last handle to a cache entry -+ * is released it is either freed (if it is invalid) or otherwise inserted -+ * in to the lru list. -+ */ -+void -+mb_cache_entry_release(struct mb_cache_entry *ce) -+{ -+ spin_lock(&mb_cache_spinlock); -+ __mb_cache_entry_release_unlock(ce); -+} -+ -+ -+/* -+ * mb_cache_entry_takeout() -+ * -+ * Take a cache entry out of the cache, making it invalid. The entry can later -+ * be re-inserted using mb_cache_entry_insert(), or released using -+ * mb_cache_entry_release(). -+ */ -+void -+mb_cache_entry_takeout(struct mb_cache_entry *ce) -+{ -+ spin_lock(&mb_cache_spinlock); -+ mb_assert(list_empty(&ce->e_lru_list)); -+ __mb_cache_entry_unhash(ce); -+ spin_unlock(&mb_cache_spinlock); -+} -+ -+ -+/* -+ * mb_cache_entry_free() -+ * -+ * This is equivalent to the sequence mb_cache_entry_takeout() -- -+ * mb_cache_entry_release(). -+ */ -+void -+mb_cache_entry_free(struct mb_cache_entry *ce) -+{ -+ spin_lock(&mb_cache_spinlock); -+ mb_assert(list_empty(&ce->e_lru_list)); -+ __mb_cache_entry_unhash(ce); -+ __mb_cache_entry_release_unlock(ce); -+} -+ -+ -+/* -+ * mb_cache_entry_dup() -+ * -+ * Duplicate a handle to a cache entry (does not duplicate the cache entry -+ * itself). After the call, both the old and the new handle must be released. -+ */ -+struct mb_cache_entry * -+mb_cache_entry_dup(struct mb_cache_entry *ce) -+{ -+ atomic_inc(&ce->e_used); -+ return ce; -+} -+ -+ -+/* -+ * mb_cache_entry_get() -+ * -+ * Get a cache entry by device / block number. (There can only be one entry -+ * in the cache per device and block.) Returns NULL if no such cache entry -+ * exists. -+ */ -+struct mb_cache_entry * -+mb_cache_entry_get(struct mb_cache *cache, kdev_t dev, unsigned long block) -+{ -+ unsigned int bucket = (HASHDEV(dev) + block) % cache->c_bucket_count; -+ struct list_head *l; -+ struct mb_cache_entry *ce; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each(l, &cache->c_block_hash[bucket]) { -+ ce = list_entry(l, struct mb_cache_entry, e_block_list); -+ if (ce->e_dev == dev && ce->e_block == block) { -+ if (!list_empty(&ce->e_lru_list)) -+ list_del_init(&ce->e_lru_list); -+ atomic_inc(&ce->e_used); -+ goto cleanup; -+ } -+ } -+ ce = NULL; -+ -+cleanup: -+ spin_unlock(&mb_cache_spinlock); -+ return ce; -+} -+ -+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) -+ -+static struct mb_cache_entry * -+__mb_cache_entry_find(struct list_head *l, struct list_head *head, -+ int index, kdev_t dev, unsigned int key) -+{ -+ while (l != head) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, -+ e_indexes[index].o_list); -+ if (ce->e_dev == dev && ce->e_indexes[index].o_key == key) { -+ if (!list_empty(&ce->e_lru_list)) -+ list_del_init(&ce->e_lru_list); -+ atomic_inc(&ce->e_used); -+ return ce; -+ } -+ l = l->next; -+ } -+ return NULL; -+} -+ -+ -+/* -+ * mb_cache_entry_find_first() -+ * -+ * Find the first cache entry on a given device with a certain key in -+ * an additional index. Additonal matches can be found with -+ * mb_cache_entry_find_next(). Returns NULL if no match was found. -+ * -+ * @cache: the cache to search -+ * @index: the number of the additonal index to search (0<=indexc_bucket_count; -+ struct list_head *l; -+ struct mb_cache_entry *ce; -+ -+ mb_assert(index < mb_cache_indexes(cache)); -+ spin_lock(&mb_cache_spinlock); -+ l = cache->c_indexes_hash[index][bucket].next; -+ ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], -+ index, dev, key); -+ spin_unlock(&mb_cache_spinlock); -+ return ce; -+} -+ -+ -+/* -+ * mb_cache_entry_find_next() -+ * -+ * Find the next cache entry on a given device with a certain key in an -+ * additional index. Returns NULL if no match could be found. The previous -+ * entry is atomatically released, so that mb_cache_entry_find_next() can -+ * be called like this: -+ * -+ * entry = mb_cache_entry_find_first(); -+ * while (entry) { -+ * ... -+ * entry = mb_cache_entry_find_next(entry, ...); -+ * } -+ * -+ * @prev: The previous match -+ * @index: the number of the additonal index to search (0<=indexe_cache; -+ unsigned int bucket = key % cache->c_bucket_count; -+ struct list_head *l; -+ struct mb_cache_entry *ce; -+ -+ mb_assert(index < mb_cache_indexes(cache)); -+ spin_lock(&mb_cache_spinlock); -+ l = prev->e_indexes[index].o_list.next; -+ ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], -+ index, dev, key); -+ __mb_cache_entry_release_unlock(prev); -+ return ce; -+} -+ -+#endif /* !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) */ -+ -+static int __init init_mbcache(void) -+{ -+ register_cache(&mb_cache_definition); -+ return 0; -+} -+ -+static void __exit exit_mbcache(void) -+{ -+ unregister_cache(&mb_cache_definition); -+} -+ -+module_init(init_mbcache) -+module_exit(exit_mbcache) -+ ---- linux-rh-2.4.20-8/include/asm-arm/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:53.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/asm-arm/unistd.h 2003-05-07 17:34:25.000000000 +0800 -@@ -244,7 +244,6 @@ - #define __NR_security (__NR_SYSCALL_BASE+223) - #define __NR_gettid (__NR_SYSCALL_BASE+224) - #define __NR_readahead (__NR_SYSCALL_BASE+225) --#if 0 /* allocated in 2.5 */ - #define __NR_setxattr (__NR_SYSCALL_BASE+226) - #define __NR_lsetxattr (__NR_SYSCALL_BASE+227) - #define __NR_fsetxattr (__NR_SYSCALL_BASE+228) -@@ -257,7 +256,6 @@ - #define __NR_removexattr (__NR_SYSCALL_BASE+235) - #define __NR_lremovexattr (__NR_SYSCALL_BASE+236) - #define __NR_fremovexattr (__NR_SYSCALL_BASE+237) --#endif - #define __NR_tkill (__NR_SYSCALL_BASE+238) - /* - * Please check 2.5 _before_ adding calls here, ---- linux-rh-2.4.20-8/include/asm-ppc64/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:45.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/asm-ppc64/unistd.h 2003-05-07 17:34:25.000000000 +0800 -@@ -218,6 +218,7 @@ - #define __NR_gettid 207 - #if 0 /* Reserved syscalls */ - #define __NR_tkill 208 -+#endif - #define __NR_setxattr 209 - #define __NR_lsetxattr 210 - #define __NR_fsetxattr 211 -@@ -230,6 +231,7 @@ - #define __NR_removexattr 218 - #define __NR_lremovexattr 219 - #define __NR_fremovexattr 220 -+#if 0 /* Reserved syscalls */ - #define __NR_futex 221 - #endif - ---- linux-rh-2.4.20-8/include/asm-s390/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:45.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/asm-s390/unistd.h 2003-05-07 17:34:25.000000000 +0800 -@@ -212,9 +212,18 @@ - #define __NR_madvise 219 - #define __NR_getdents64 220 - #define __NR_fcntl64 221 --/* -- * Numbers 224-235 are reserved for posix acl -- */ -+#define __NR_setxattr 224 -+#define __NR_lsetxattr 225 -+#define __NR_fsetxattr 226 -+#define __NR_getxattr 227 -+#define __NR_lgetxattr 228 -+#define __NR_fgetxattr 229 -+#define __NR_listxattr 230 -+#define __NR_llistxattr 231 -+#define __NR_flistxattr 232 -+#define __NR_removexattr 233 -+#define __NR_lremovexattr 234 -+#define __NR_fremovexattr 235 - #define __NR_gettid 236 - #define __NR_tkill 237 - ---- linux-rh-2.4.20-8/include/asm-s390x/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:45.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/asm-s390x/unistd.h 2003-05-07 17:34:25.000000000 +0800 -@@ -180,9 +180,18 @@ - #define __NR_pivot_root 217 - #define __NR_mincore 218 - #define __NR_madvise 219 --/* -- * Numbers 224-235 are reserved for posix acl -- */ -+#define __NR_setxattr 224 -+#define __NR_lsetxattr 225 -+#define __NR_fsetxattr 226 -+#define __NR_getxattr 227 -+#define __NR_lgetxattr 228 -+#define __NR_fgetxattr 229 -+#define __NR_listxattr 230 -+#define __NR_llistxattr 231 -+#define __NR_flistxattr 232 -+#define __NR_removexattr 233 -+#define __NR_lremovexattr 234 -+#define __NR_fremovexattr 235 - #define __NR_gettid 236 - #define __NR_tkill 237 - ---- linux-rh-2.4.20-8/include/asm-sparc/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:45.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/asm-sparc/unistd.h 2003-05-07 17:34:25.000000000 +0800 -@@ -184,24 +184,24 @@ - /* #define __NR_exportfs 166 SunOS Specific */ - #define __NR_mount 167 /* Common */ - #define __NR_ustat 168 /* Common */ --/* #define __NR_semsys 169 SunOS Specific */ --/* #define __NR_msgsys 170 SunOS Specific */ --/* #define __NR_shmsys 171 SunOS Specific */ --/* #define __NR_auditsys 172 SunOS Specific */ --/* #define __NR_rfssys 173 SunOS Specific */ -+#define __NR_setxattr 169 /* SunOS: semsys */ -+#define __NR_lsetxattr 170 /* SunOS: msgsys */ -+#define __NR_fsetxattr 171 /* SunOS: shmsys */ -+#define __NR_getxattr 172 /* SunOS: auditsys */ -+#define __NR_lgetxattr 173 /* SunOS: rfssys */ - #define __NR_getdents 174 /* Common */ - #define __NR_setsid 175 /* Common */ - #define __NR_fchdir 176 /* Common */ --/* #define __NR_fchroot 177 SunOS Specific */ --/* #define __NR_vpixsys 178 SunOS Specific */ --/* #define __NR_aioread 179 SunOS Specific */ --/* #define __NR_aiowrite 180 SunOS Specific */ --/* #define __NR_aiowait 181 SunOS Specific */ --/* #define __NR_aiocancel 182 SunOS Specific */ -+#define __NR_fgetxattr 177 /* SunOS: fchroot */ -+#define __NR_listxattr 178 /* SunOS: vpixsys */ -+#define __NR_llistxattr 179 /* SunOS: aioread */ -+#define __NR_flistxattr 180 /* SunOS: aiowrite */ -+#define __NR_removexattr 181 /* SunOS: aiowait */ -+#define __NR_lremovexattr 182 /* SunOS: aiocancel */ - #define __NR_sigpending 183 /* Common */ - #define __NR_query_module 184 /* Linux Specific */ - #define __NR_setpgid 185 /* Common */ --/* #define __NR_pathconf 186 SunOS Specific */ -+#define __NR_fremovexattr 186 /* SunOS: pathconf */ - #define __NR_tkill 187 /* SunOS: fpathconf */ - /* #define __NR_sysconf 188 SunOS Specific */ - #define __NR_uname 189 /* Linux Specific */ ---- linux-rh-2.4.20-8/include/asm-sparc64/unistd.h~linux-2.4.20-xattr-0.8.54-chaos 2002-08-03 08:39:45.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/asm-sparc64/unistd.h 2003-05-07 17:34:25.000000000 +0800 -@@ -184,24 +184,24 @@ - /* #define __NR_exportfs 166 SunOS Specific */ - #define __NR_mount 167 /* Common */ - #define __NR_ustat 168 /* Common */ --/* #define __NR_semsys 169 SunOS Specific */ --/* #define __NR_msgsys 170 SunOS Specific */ --/* #define __NR_shmsys 171 SunOS Specific */ --/* #define __NR_auditsys 172 SunOS Specific */ --/* #define __NR_rfssys 173 SunOS Specific */ -+#define __NR_setxattr 169 /* SunOS: semsys */ -+#define __NR_lsetxattr 170 /* SunOS: msgsys */ -+#define __NR_fsetxattr 171 /* SunOS: shmsys */ -+#define __NR_getxattr 172 /* SunOS: auditsys */ -+#define __NR_lgetxattr 173 /* SunOS: rfssys */ - #define __NR_getdents 174 /* Common */ - #define __NR_setsid 175 /* Common */ - #define __NR_fchdir 176 /* Common */ --/* #define __NR_fchroot 177 SunOS Specific */ --/* #define __NR_vpixsys 178 SunOS Specific */ --/* #define __NR_aioread 179 SunOS Specific */ --/* #define __NR_aiowrite 180 SunOS Specific */ --/* #define __NR_aiowait 181 SunOS Specific */ --/* #define __NR_aiocancel 182 SunOS Specific */ -+#define __NR_fgetxattr 177 /* SunOS: fchroot */ -+#define __NR_listxattr 178 /* SunOS: vpixsys */ -+#define __NR_llistxattr 179 /* SunOS: aioread */ -+#define __NR_flistxattr 180 /* SunOS: aiowrite */ -+#define __NR_removexattr 181 /* SunOS: aiowait */ -+#define __NR_lremovexattr 182 /* SunOS: aiocancel */ - #define __NR_sigpending 183 /* Common */ - #define __NR_query_module 184 /* Linux Specific */ - #define __NR_setpgid 185 /* Common */ --/* #define __NR_pathconf 186 SunOS Specific */ -+#define __NR_fremovexattr 186 /* SunOS: pathconf */ - #define __NR_tkill 187 /* SunOS: fpathconf */ - /* #define __NR_sysconf 188 SunOS Specific */ - #define __NR_uname 189 /* Linux Specific */ ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/linux/cache_def.h 2003-05-07 17:34:25.000000000 +0800 -@@ -0,0 +1,15 @@ -+/* -+ * linux/cache_def.h -+ * Handling of caches defined in drivers, filesystems, ... -+ * -+ * Copyright (C) 2002 by Andreas Gruenbacher, -+ */ -+ -+struct cache_definition { -+ const char *name; -+ void (*shrink)(int, unsigned int); -+ struct list_head link; -+}; -+ -+extern void register_cache(struct cache_definition *); -+extern void unregister_cache(struct cache_definition *); ---- linux-rh-2.4.20-8/include/linux/errno.h~linux-2.4.20-xattr-0.8.54-chaos 2003-04-11 14:04:53.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/linux/errno.h 2003-05-07 17:34:25.000000000 +0800 -@@ -26,4 +26,8 @@ - - #endif - -+/* Defined for extended attributes */ -+#define ENOATTR ENODATA /* No such attribute */ -+#define ENOTSUP EOPNOTSUPP /* Operation not supported */ -+ - #endif ---- linux-rh-2.4.20-8/include/linux/ext2_fs.h~linux-2.4.20-xattr-0.8.54-chaos 2003-04-12 15:46:42.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/linux/ext2_fs.h 2003-05-07 17:34:25.000000000 +0800 -@@ -57,8 +57,6 @@ - */ - #define EXT2_BAD_INO 1 /* Bad blocks inode */ - #define EXT2_ROOT_INO 2 /* Root inode */ --#define EXT2_ACL_IDX_INO 3 /* ACL inode */ --#define EXT2_ACL_DATA_INO 4 /* ACL inode */ - #define EXT2_BOOT_LOADER_INO 5 /* Boot loader inode */ - #define EXT2_UNDEL_DIR_INO 6 /* Undelete directory inode */ - -@@ -86,7 +84,6 @@ - #else - # define EXT2_BLOCK_SIZE(s) (EXT2_MIN_BLOCK_SIZE << (s)->s_log_block_size) - #endif --#define EXT2_ACLE_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (struct ext2_acl_entry)) - #define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (__u32)) - #ifdef __KERNEL__ - # define EXT2_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -@@ -121,28 +118,6 @@ - #endif - - /* -- * ACL structures -- */ --struct ext2_acl_header /* Header of Access Control Lists */ --{ -- __u32 aclh_size; -- __u32 aclh_file_count; -- __u32 aclh_acle_count; -- __u32 aclh_first_acle; --}; -- --struct ext2_acl_entry /* Access Control List Entry */ --{ -- __u32 acle_size; -- __u16 acle_perms; /* Access permissions */ -- __u16 acle_type; /* Type of entry */ -- __u16 acle_tag; /* User or group identity */ -- __u16 acle_pad1; -- __u32 acle_next; /* Pointer on next entry for the */ -- /* same inode or on next free entry */ --}; -- --/* - * Structure of a blocks group descriptor - */ - struct ext2_group_desc -@@ -314,6 +289,7 @@ struct ext2_inode { - #define EXT2_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */ - #define EXT2_MOUNT_MINIX_DF 0x0080 /* Mimics the Minix statfs */ - #define EXT2_MOUNT_NO_UID32 0x0200 /* Disable 32-bit UIDs */ -+#define EXT2_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ - - #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt - #define set_opt(o, opt) o |= EXT2_MOUNT_##opt -@@ -397,6 +373,7 @@ struct ext2_super_block { - - #ifdef __KERNEL__ - #define EXT2_SB(sb) (&((sb)->u.ext2_sb)) -+#define EXT2_I(inode) (&((inode)->u.ext2_i)) - #else - /* Assume that user mode programs are passing in an ext2fs superblock, not - * a kernel struct super_block. This will allow us to call the feature-test -@@ -466,7 +443,7 @@ struct ext2_super_block { - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 - #define EXT2_FEATURE_INCOMPAT_ANY 0xffffffff - --#define EXT2_FEATURE_COMPAT_SUPP 0 -+#define EXT2_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT2_FEATURE_INCOMPAT_SUPP EXT2_FEATURE_INCOMPAT_FILETYPE - #define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \ -@@ -623,8 +600,10 @@ extern struct address_space_operations e - - /* namei.c */ - extern struct inode_operations ext2_dir_inode_operations; -+extern struct inode_operations ext2_special_inode_operations; - - /* symlink.c */ -+extern struct inode_operations ext2_symlink_inode_operations; - extern struct inode_operations ext2_fast_symlink_inode_operations; - - #endif /* __KERNEL__ */ ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/linux/ext2_xattr.h 2003-05-07 17:34:25.000000000 +0800 -@@ -0,0 +1,157 @@ -+/* -+ File: linux/ext2_xattr.h -+ -+ On-disk format of extended attributes for the ext2 filesystem. -+ -+ (C) 2001 Andreas Gruenbacher, -+*/ -+ -+#include -+#include -+#include -+ -+/* Magic value in attribute blocks */ -+#define EXT2_XATTR_MAGIC 0xEA020000 -+ -+/* Maximum number of references to one attribute block */ -+#define EXT2_XATTR_REFCOUNT_MAX 1024 -+ -+/* Name indexes */ -+#define EXT2_XATTR_INDEX_MAX 10 -+#define EXT2_XATTR_INDEX_USER 1 -+#define EXT2_XATTR_INDEX_POSIX_ACL_ACCESS 2 -+#define EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT 3 -+ -+struct ext2_xattr_header { -+ __u32 h_magic; /* magic number for identification */ -+ __u32 h_refcount; /* reference count */ -+ __u32 h_blocks; /* number of disk blocks used */ -+ __u32 h_hash; /* hash value of all attributes */ -+ __u32 h_reserved[4]; /* zero right now */ -+}; -+ -+struct ext2_xattr_entry { -+ __u8 e_name_len; /* length of name */ -+ __u8 e_name_index; /* attribute name index */ -+ __u16 e_value_offs; /* offset in disk block of value */ -+ __u32 e_value_block; /* disk block attribute is stored on (n/i) */ -+ __u32 e_value_size; /* size of attribute value */ -+ __u32 e_hash; /* hash value of name and value */ -+ char e_name[0]; /* attribute name */ -+}; -+ -+#define EXT2_XATTR_PAD_BITS 2 -+#define EXT2_XATTR_PAD (1<e_name_len)) ) -+#define EXT2_XATTR_SIZE(size) \ -+ (((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND) -+ -+#ifdef __KERNEL__ -+ -+# ifdef CONFIG_EXT2_FS_XATTR -+ -+struct ext2_xattr_handler { -+ char *prefix; -+ size_t (*list)(char *list, struct inode *inode, const char *name, -+ int name_len); -+ int (*get)(struct inode *inode, const char *name, void *buffer, -+ size_t size); -+ int (*set)(struct inode *inode, const char *name, const void *buffer, -+ size_t size, int flags); -+}; -+ -+extern int ext2_xattr_register(int, struct ext2_xattr_handler *); -+extern void ext2_xattr_unregister(int, struct ext2_xattr_handler *); -+ -+extern int ext2_setxattr(struct dentry *, const char *, const void *, size_t, int); -+extern ssize_t ext2_getxattr(struct dentry *, const char *, void *, size_t); -+extern ssize_t ext2_listxattr(struct dentry *, char *, size_t); -+extern int ext2_removexattr(struct dentry *, const char *); -+ -+extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t); -+extern int ext2_xattr_list(struct inode *, char *, size_t); -+extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int); -+ -+extern void ext2_xattr_delete_inode(struct inode *); -+extern void ext2_xattr_put_super(struct super_block *); -+ -+extern int init_ext2_xattr(void) __init; -+extern void exit_ext2_xattr(void); -+ -+# else /* CONFIG_EXT2_FS_XATTR */ -+# define ext2_setxattr NULL -+# define ext2_getxattr NULL -+# define ext2_listxattr NULL -+# define ext2_removexattr NULL -+ -+static inline int -+ext2_xattr_get(struct inode *inode, int name_index, -+ const char *name, void *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext2_xattr_list(struct inode *inode, char *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext2_xattr_set(struct inode *inode, int name_index, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ return -ENOTSUP; -+} -+ -+static inline void -+ext2_xattr_delete_inode(struct inode *inode) -+{ -+} -+ -+static inline void -+ext2_xattr_put_super(struct super_block *sb) -+{ -+} -+ -+static inline int -+init_ext2_xattr(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext2_xattr(void) -+{ -+} -+ -+# endif /* CONFIG_EXT2_FS_XATTR */ -+ -+# ifdef CONFIG_EXT2_FS_XATTR_USER -+ -+extern int init_ext2_xattr_user(void) __init; -+extern void exit_ext2_xattr_user(void); -+ -+# else /* CONFIG_EXT2_FS_XATTR_USER */ -+ -+static inline int -+init_ext2_xattr_user(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext2_xattr_user(void) -+{ -+} -+ -+# endif /* CONFIG_EXT2_FS_XATTR_USER */ -+ -+#endif /* __KERNEL__ */ -+ ---- linux-rh-2.4.20-8/include/linux/ext3_fs.h~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/linux/ext3_fs.h 2003-05-07 17:34:25.000000000 +0800 -@@ -63,8 +63,6 @@ - */ - #define EXT3_BAD_INO 1 /* Bad blocks inode */ - #define EXT3_ROOT_INO 2 /* Root inode */ --#define EXT3_ACL_IDX_INO 3 /* ACL inode */ --#define EXT3_ACL_DATA_INO 4 /* ACL inode */ - #define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */ - #define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */ - #define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */ -@@ -94,7 +92,6 @@ - #else - # define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size) - #endif --#define EXT3_ACLE_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_acl_entry)) - #define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32)) - #ifdef __KERNEL__ - # define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -@@ -129,28 +126,6 @@ - #endif - - /* -- * ACL structures -- */ --struct ext3_acl_header /* Header of Access Control Lists */ --{ -- __u32 aclh_size; -- __u32 aclh_file_count; -- __u32 aclh_acle_count; -- __u32 aclh_first_acle; --}; -- --struct ext3_acl_entry /* Access Control List Entry */ --{ -- __u32 acle_size; -- __u16 acle_perms; /* Access permissions */ -- __u16 acle_type; /* Type of entry */ -- __u16 acle_tag; /* User or group identity */ -- __u16 acle_pad1; -- __u32 acle_next; /* Pointer on next entry for the */ -- /* same inode or on next free entry */ --}; -- --/* - * Structure of a blocks group descriptor - */ - struct ext3_group_desc -@@ -344,6 +319,7 @@ struct ext3_inode { - #define EXT3_MOUNT_WRITEBACK_DATA 0x0C00 /* No data ordering */ - #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ -+#define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -520,7 +496,7 @@ struct ext3_super_block { - #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ - --#define EXT3_FEATURE_COMPAT_SUPP 0 -+#define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ -@@ -703,6 +679,7 @@ extern void ext3_check_inodes_bitmap (st - extern unsigned long ext3_count_free (struct buffer_head *, unsigned); - - /* inode.c */ -+extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); - -@@ -771,8 +748,10 @@ extern struct address_space_operations e - - /* namei.c */ - extern struct inode_operations ext3_dir_inode_operations; -+extern struct inode_operations ext3_special_inode_operations; - - /* symlink.c */ -+extern struct inode_operations ext3_symlink_inode_operations; - extern struct inode_operations ext3_fast_symlink_inode_operations; - - ---- linux-rh-2.4.20-8/include/linux/ext3_jbd.h~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:59.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/linux/ext3_jbd.h 2003-05-07 17:34:25.000000000 +0800 -@@ -30,13 +30,19 @@ - - #define EXT3_SINGLEDATA_TRANS_BLOCKS 8U - -+/* Extended attributes may touch two data buffers, two bitmap buffers, -+ * and two group and summaries. */ -+ -+#define EXT3_XATTR_TRANS_BLOCKS 8 -+ - /* Define the minimum size for a transaction which modifies data. This - * needs to take into account the fact that we may end up modifying two - * quota files too (one for the group, one for the user quota). The - * superblock only gets updated once, of course, so don't bother - * counting that again for the quota updates. */ - --#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS - 2) -+#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS + \ -+ EXT3_XATTR_TRANS_BLOCKS - 2) - - extern int ext3_writepage_trans_blocks(struct inode *inode); - ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/linux/ext3_xattr.h 2003-05-07 17:34:25.000000000 +0800 -@@ -0,0 +1,157 @@ -+/* -+ File: linux/ext3_xattr.h -+ -+ On-disk format of extended attributes for the ext3 filesystem. -+ -+ (C) 2001 Andreas Gruenbacher, -+*/ -+ -+#include -+#include -+#include -+ -+/* Magic value in attribute blocks */ -+#define EXT3_XATTR_MAGIC 0xEA020000 -+ -+/* Maximum number of references to one attribute block */ -+#define EXT3_XATTR_REFCOUNT_MAX 1024 -+ -+/* Name indexes */ -+#define EXT3_XATTR_INDEX_MAX 10 -+#define EXT3_XATTR_INDEX_USER 1 -+#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS 2 -+#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT 3 -+ -+struct ext3_xattr_header { -+ __u32 h_magic; /* magic number for identification */ -+ __u32 h_refcount; /* reference count */ -+ __u32 h_blocks; /* number of disk blocks used */ -+ __u32 h_hash; /* hash value of all attributes */ -+ __u32 h_reserved[4]; /* zero right now */ -+}; -+ -+struct ext3_xattr_entry { -+ __u8 e_name_len; /* length of name */ -+ __u8 e_name_index; /* attribute name index */ -+ __u16 e_value_offs; /* offset in disk block of value */ -+ __u32 e_value_block; /* disk block attribute is stored on (n/i) */ -+ __u32 e_value_size; /* size of attribute value */ -+ __u32 e_hash; /* hash value of name and value */ -+ char e_name[0]; /* attribute name */ -+}; -+ -+#define EXT3_XATTR_PAD_BITS 2 -+#define EXT3_XATTR_PAD (1<e_name_len)) ) -+#define EXT3_XATTR_SIZE(size) \ -+ (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND) -+ -+#ifdef __KERNEL__ -+ -+# ifdef CONFIG_EXT3_FS_XATTR -+ -+struct ext3_xattr_handler { -+ char *prefix; -+ size_t (*list)(char *list, struct inode *inode, const char *name, -+ int name_len); -+ int (*get)(struct inode *inode, const char *name, void *buffer, -+ size_t size); -+ int (*set)(struct inode *inode, const char *name, const void *buffer, -+ size_t size, int flags); -+}; -+ -+extern int ext3_xattr_register(int, struct ext3_xattr_handler *); -+extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *); -+ -+extern int ext3_setxattr(struct dentry *, const char *, const void *, size_t, int); -+extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t); -+extern ssize_t ext3_listxattr(struct dentry *, char *, size_t); -+extern int ext3_removexattr(struct dentry *, const char *); -+ -+extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t); -+extern int ext3_xattr_list(struct inode *, char *, size_t); -+extern int ext3_xattr_set(handle_t *handle, struct inode *, int, const char *, const void *, size_t, int); -+ -+extern void ext3_xattr_delete_inode(handle_t *, struct inode *); -+extern void ext3_xattr_put_super(struct super_block *); -+ -+extern int init_ext3_xattr(void) __init; -+extern void exit_ext3_xattr(void); -+ -+# else /* CONFIG_EXT3_FS_XATTR */ -+# define ext3_setxattr NULL -+# define ext3_getxattr NULL -+# define ext3_listxattr NULL -+# define ext3_removexattr NULL -+ -+static inline int -+ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext3_xattr_list(struct inode *inode, void *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t size, int flags) -+{ -+ return -ENOTSUP; -+} -+ -+static inline void -+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) -+{ -+} -+ -+static inline void -+ext3_xattr_put_super(struct super_block *sb) -+{ -+} -+ -+static inline int -+init_ext3_xattr(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext3_xattr(void) -+{ -+} -+ -+# endif /* CONFIG_EXT3_FS_XATTR */ -+ -+# ifdef CONFIG_EXT3_FS_XATTR_USER -+ -+extern int init_ext3_xattr_user(void) __init; -+extern void exit_ext3_xattr_user(void); -+ -+# else /* CONFIG_EXT3_FS_XATTR_USER */ -+ -+static inline int -+init_ext3_xattr_user(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext3_xattr_user(void) -+{ -+} -+ -+#endif /* CONFIG_EXT3_FS_XATTR_USER */ -+ -+#endif /* __KERNEL__ */ -+ ---- linux-rh-2.4.20-8/include/linux/fs.h~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:58.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/linux/fs.h 2003-05-07 17:34:25.000000000 +0800 -@@ -915,7 +915,7 @@ struct inode_operations { - int (*setattr) (struct dentry *, struct iattr *); - int (*setattr_raw) (struct inode *, struct iattr *); - int (*getattr) (struct dentry *, struct iattr *); -- int (*setxattr) (struct dentry *, const char *, void *, size_t, int); -+ int (*setxattr) (struct dentry *, const char *, const void *, size_t, int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); - ssize_t (*listxattr) (struct dentry *, char *, size_t); - int (*removexattr) (struct dentry *, const char *); ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-rh-2.4.20-8-root/include/linux/mbcache.h 2003-05-07 17:34:25.000000000 +0800 -@@ -0,0 +1,69 @@ -+/* -+ File: linux/mbcache.h -+ -+ (C) 2001 by Andreas Gruenbacher, -+*/ -+ -+/* Hardwire the number of additional indexes */ -+#define MB_CACHE_INDEXES_COUNT 1 -+ -+struct mb_cache_entry; -+ -+struct mb_cache_op { -+ int (*free)(struct mb_cache_entry *, int); -+}; -+ -+struct mb_cache { -+ struct list_head c_cache_list; -+ const char *c_name; -+ struct mb_cache_op c_op; -+ atomic_t c_entry_count; -+ int c_bucket_count; -+#ifndef MB_CACHE_INDEXES_COUNT -+ int c_indexes_count; -+#endif -+ kmem_cache_t *c_entry_cache; -+ struct list_head *c_block_hash; -+ struct list_head *c_indexes_hash[0]; -+}; -+ -+struct mb_cache_entry_index { -+ struct list_head o_list; -+ unsigned int o_key; -+}; -+ -+struct mb_cache_entry { -+ struct list_head e_lru_list; -+ struct mb_cache *e_cache; -+ atomic_t e_used; -+ kdev_t e_dev; -+ unsigned long e_block; -+ struct list_head e_block_list; -+ struct mb_cache_entry_index e_indexes[0]; -+}; -+ -+/* Functions on caches */ -+ -+struct mb_cache * mb_cache_create(const char *, struct mb_cache_op *, size_t, -+ int, int); -+void mb_cache_shrink(struct mb_cache *, kdev_t); -+void mb_cache_destroy(struct mb_cache *); -+ -+/* Functions on cache entries */ -+ -+struct mb_cache_entry *mb_cache_entry_alloc(struct mb_cache *); -+int mb_cache_entry_insert(struct mb_cache_entry *, kdev_t, unsigned long, -+ unsigned int[]); -+void mb_cache_entry_rehash(struct mb_cache_entry *, unsigned int[]); -+void mb_cache_entry_release(struct mb_cache_entry *); -+void mb_cache_entry_takeout(struct mb_cache_entry *); -+void mb_cache_entry_free(struct mb_cache_entry *); -+struct mb_cache_entry *mb_cache_entry_dup(struct mb_cache_entry *); -+struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *, kdev_t, -+ unsigned long); -+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) -+struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, int, -+ kdev_t, unsigned int); -+struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *, int, -+ kdev_t, unsigned int); -+#endif ---- linux-rh-2.4.20-8/kernel/ksyms.c~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:58.000000000 +0800 -+++ linux-rh-2.4.20-8-root/kernel/ksyms.c 2003-05-07 17:34:25.000000000 +0800 -@@ -12,6 +12,7 @@ - #define __KERNEL_SYSCALLS__ - #include - #include -+#include - #include - #include - #include -@@ -107,6 +108,7 @@ EXPORT_SYMBOL(exit_mm); - EXPORT_SYMBOL(exit_files); - EXPORT_SYMBOL(exit_fs); - EXPORT_SYMBOL(exit_sighand); -+EXPORT_SYMBOL(copy_fs_struct); - - /* internal kernel memory management */ - EXPORT_SYMBOL(_alloc_pages); -@@ -125,6 +127,8 @@ EXPORT_SYMBOL(kmem_cache_alloc); - EXPORT_SYMBOL(kmem_cache_free); - EXPORT_SYMBOL(kmem_cache_validate); - EXPORT_SYMBOL(kmem_cache_size); -+EXPORT_SYMBOL(register_cache); -+EXPORT_SYMBOL(unregister_cache); - EXPORT_SYMBOL(kmalloc); - EXPORT_SYMBOL(kfree); - EXPORT_SYMBOL(vfree); ---- linux-rh-2.4.20-8/mm/vmscan.c~linux-2.4.20-xattr-0.8.54-chaos 2003-05-07 17:33:58.000000000 +0800 -+++ linux-rh-2.4.20-8-root/mm/vmscan.c 2003-05-07 17:34:25.000000000 +0800 -@@ -21,6 +21,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -444,6 +445,39 @@ static inline void kachunk_cache(struct - - #define BATCH_WORK_AMOUNT 64 - -+static DECLARE_MUTEX(other_caches_sem); -+static LIST_HEAD(cache_definitions); -+ -+void register_cache(struct cache_definition *cache) -+{ -+ down(&other_caches_sem); -+ list_add(&cache->link, &cache_definitions); -+ up(&other_caches_sem); -+} -+ -+void unregister_cache(struct cache_definition *cache) -+{ -+ down(&other_caches_sem); -+ list_del(&cache->link); -+ up(&other_caches_sem); -+} -+ -+static void shrink_other_caches(unsigned int priority, int gfp_mask) -+{ -+ struct list_head *p; -+ -+ if (down_trylock(&other_caches_sem)) -+ return; -+ -+ list_for_each_prev(p, &cache_definitions) { -+ struct cache_definition *cache = -+ list_entry(p, struct cache_definition, link); -+ -+ cache->shrink(priority, gfp_mask); -+ } -+ up(&other_caches_sem); -+} -+ - /* - * returns the active cache ratio relative to the total active list - * times 10 (eg. 30% cache returns 3) -@@ -887,7 +921,7 @@ static int do_try_to_free_pages_kswapd(u - - ret += shrink_dcache_memory(DEF_PRIORITY, gfp_mask); - ret += shrink_icache_memory(DEF_PRIORITY, gfp_mask); -- // ret += shrink_other_caches(DEF_PRIORITY, gfp_mask); -+ shrink_other_caches(DEF_PRIORITY, gfp_mask); - #ifdef CONFIG_QUOTA - ret += shrink_dqcache_memory(DEF_PRIORITY, gfp_mask); - #endif ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-root/fs/ext3/ext3-exports.c 2003-05-05 18:19:11.000000000 +0800 -@@ -0,0 +1,13 @@ -+#include -+#include -+#include -+#include -+#include -+ -+EXPORT_SYMBOL(ext3_force_commit); -+EXPORT_SYMBOL(ext3_bread); -+EXPORT_SYMBOL(ext3_xattr_register); -+EXPORT_SYMBOL(ext3_xattr_unregister); -+EXPORT_SYMBOL(ext3_xattr_get); -+EXPORT_SYMBOL(ext3_xattr_list); -+EXPORT_SYMBOL(ext3_xattr_set); - -_ diff --git a/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-hp.patch b/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-hp.patch deleted file mode 100644 index f0f92e5..0000000 --- a/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54-hp.patch +++ /dev/null @@ -1,5536 +0,0 @@ - Documentation/Configure.help | 66 ++ - arch/alpha/defconfig | 7 - arch/alpha/kernel/entry.S | 12 - arch/arm/defconfig | 7 - arch/arm/kernel/calls.S | 24 - arch/i386/defconfig | 7 - arch/ia64/defconfig | 7 - arch/m68k/defconfig | 7 - arch/mips/defconfig | 7 - arch/mips64/defconfig | 7 - arch/ppc/defconfig | 14 - arch/ppc64/kernel/misc.S | 2 - arch/s390/defconfig | 7 - arch/s390/kernel/entry.S | 24 - arch/s390x/defconfig | 7 - arch/s390x/kernel/entry.S | 24 - arch/s390x/kernel/wrapper32.S | 92 +++ - arch/sparc/defconfig | 7 - arch/sparc/kernel/systbls.S | 10 - arch/sparc64/defconfig | 7 - arch/sparc64/kernel/systbls.S | 20 - fs/Config.in | 14 - fs/Makefile | 3 - fs/ext2/Makefile | 4 - fs/ext2/file.c | 5 - fs/ext2/ialloc.c | 2 - fs/ext2/inode.c | 34 - - fs/ext2/namei.c | 14 - fs/ext2/super.c | 29 - fs/ext2/symlink.c | 14 - fs/ext2/xattr.c | 1212 +++++++++++++++++++++++++++++++++++++++++ - fs/ext2/xattr_user.c | 103 +++ - fs/ext3/Makefile | 9 - fs/ext3/ext3-exports.c | 13 - fs/ext3/file.c | 5 - fs/ext3/ialloc.c | 2 - fs/ext3/inode.c | 35 - - fs/ext3/namei.c | 21 - fs/ext3/super.c | 36 + - fs/ext3/symlink.c | 14 - fs/ext3/xattr.c | 1225 ++++++++++++++++++++++++++++++++++++++++++ - fs/ext3/xattr_user.c | 111 +++ - fs/jfs/jfs_xattr.h | 6 - fs/jfs/xattr.c | 6 - fs/mbcache.c | 648 ++++++++++++++++++++++ - include/asm-arm/unistd.h | 2 - include/asm-ppc64/unistd.h | 2 - include/asm-s390/unistd.h | 15 - include/asm-s390x/unistd.h | 15 - include/asm-sparc/unistd.h | 24 - include/asm-sparc64/unistd.h | 24 - include/linux/cache_def.h | 15 - include/linux/errno.h | 4 - include/linux/ext2_fs.h | 31 - - include/linux/ext2_xattr.h | 157 +++++ - include/linux/ext3_fs.h | 31 - - include/linux/ext3_jbd.h | 8 - include/linux/ext3_xattr.h | 157 +++++ - include/linux/fs.h | 2 - include/linux/mbcache.h | 69 ++ - kernel/ksyms.c | 4 - mm/vmscan.c | 35 + - 62 files changed, 4343 insertions(+), 182 deletions(-) - ---- linux/Documentation/Configure.help~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:39:23 2003 -+++ linux-mmonroe/Documentation/Configure.help Fri May 16 08:43:00 2003 -@@ -15309,6 +15309,39 @@ CONFIG_EXT2_FS - be compiled as a module, and so this could be dangerous. Most - everyone wants to say Y here. - -+Ext2 extended attributes -+CONFIG_EXT2_FS_XATTR -+ Extended attributes are name:value pairs associated with inodes by -+ the kernel or by users (see the attr(5) manual page, or visit -+ for details). -+ -+ If unsure, say N. -+ -+Ext2 extended attribute block sharing -+CONFIG_EXT2_FS_XATTR_SHARING -+ This options enables code for sharing identical extended attribute -+ blocks among multiple inodes. -+ -+ Usually, say Y. -+ -+Ext2 extended user attributes -+CONFIG_EXT2_FS_XATTR_USER -+ This option enables extended user attributes on ext2. Processes can -+ associate extended user attributes with inodes to store additional -+ information such as the character encoding of files, etc. (see the -+ attr(5) manual page, or visit for details). -+ -+ If unsure, say N. -+ -+Ext2 trusted extended attributes -+CONFIG_EXT2_FS_XATTR_TRUSTED -+ This option enables extended attributes on ext2 that are accessible -+ (and visible) only to users capable of CAP_SYS_ADMIN. Usually this -+ is only the super user. Trusted extended attributes are meant for -+ implementing system/security services. -+ -+ If unsure, say N. -+ - Ext3 journalling file system support (EXPERIMENTAL) - CONFIG_EXT3_FS - This is the journalling version of the Second extended file system -@@ -15341,6 +15374,39 @@ CONFIG_EXT3_FS - of your root partition (the one containing the directory /) cannot - be compiled as a module, and so this may be dangerous. - -+Ext3 extended attributes -+CONFIG_EXT3_FS_XATTR -+ Extended attributes are name:value pairs associated with inodes by -+ the kernel or by users (see the attr(5) manual page, or visit -+ for details). -+ -+ If unsure, say N. -+ -+Ext3 extended attribute block sharing -+CONFIG_EXT3_FS_XATTR_SHARING -+ This options enables code for sharing identical extended attribute -+ blocks among multiple inodes. -+ -+ Usually, say Y. -+ -+Ext3 extended user attributes -+CONFIG_EXT3_FS_XATTR_USER -+ This option enables extended user attributes on ext3. Processes can -+ associate extended user attributes with inodes to store additional -+ information such as the character encoding of files, etc. (see the -+ attr(5) manual page, or visit for details). -+ -+ If unsure, say N. -+ -+Ext3 trusted extended attributes -+CONFIG_EXT3_FS_XATTR_TRUSTED -+ This option enables extended attributes on ext3 that are accessible -+ (and visible) only to users capable of CAP_SYS_ADMIN. Usually this -+ is only the super user. Trusted extended attributes are meant for -+ implementing system/security services. -+ -+ If unsure, say N. -+ - Journal Block Device support (JBD for ext3) (EXPERIMENTAL) - CONFIG_JBD - This is a generic journalling layer for block devices. It is ---- linux/arch/alpha/defconfig~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:39:14 2003 -+++ linux-mmonroe/arch/alpha/defconfig Fri May 16 08:43:00 2003 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_ALPHA=y - # CONFIG_UID16 is not set - # CONFIG_RWSEM_GENERIC_SPINLOCK is not set ---- linux/arch/alpha/kernel/entry.S~linux-2.4.20-xattr-0.8.54-hp Fri Aug 2 17:39:42 2002 -+++ linux-mmonroe/arch/alpha/kernel/entry.S Fri May 16 08:43:00 2003 -@@ -1154,6 +1154,18 @@ sys_call_table: - .quad sys_readahead - .quad sys_ni_syscall /* 380, sys_security */ - .quad sys_tkill -+ .quad sys_setxattr -+ .quad sys_lsetxattr -+ .quad sys_fsetxattr -+ .quad sys_getxattr /* 385 */ -+ .quad sys_lgetxattr -+ .quad sys_fgetxattr -+ .quad sys_listxattr -+ .quad sys_llistxattr -+ .quad sys_flistxattr /* 390 */ -+ .quad sys_removexattr -+ .quad sys_lremovexattr -+ .quad sys_fremovexattr - - /* Remember to update everything, kids. */ - .ifne (. - sys_call_table) - (NR_SYSCALLS * 8) ---- linux/arch/arm/defconfig~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:39:14 2003 -+++ linux-mmonroe/arch/arm/defconfig Fri May 16 08:43:00 2003 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_ARM=y - # CONFIG_EISA is not set - # CONFIG_SBUS is not set ---- linux/arch/arm/kernel/calls.S~linux-2.4.20-xattr-0.8.54-hp Fri Aug 2 17:39:42 2002 -+++ linux-mmonroe/arch/arm/kernel/calls.S Fri May 16 08:43:00 2003 -@@ -240,18 +240,18 @@ __syscall_start: - .long SYMBOL_NAME(sys_ni_syscall) /* Security */ - .long SYMBOL_NAME(sys_gettid) - /* 225 */ .long SYMBOL_NAME(sys_readahead) -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_setxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_lsetxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_fsetxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_getxattr */ --/* 230 */ .long SYMBOL_NAME(sys_ni_syscall) /* sys_lgetxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_fgetxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_listxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_llistxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_flistxattr */ --/* 235 */ .long SYMBOL_NAME(sys_ni_syscall) /* sys_removexattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_lremovexattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_fremovexattr */ -+ .long SYMBOL_NAME(sys_setxattr) -+ .long SYMBOL_NAME(sys_lsetxattr) -+ .long SYMBOL_NAME(sys_fsetxattr) -+ .long SYMBOL_NAME(sys_getxattr) -+/* 230 */ .long SYMBOL_NAME(sys_lgetxattr) -+ .long SYMBOL_NAME(sys_fgetxattr) -+ .long SYMBOL_NAME(sys_listxattr) -+ .long SYMBOL_NAME(sys_llistxattr) -+ .long SYMBOL_NAME(sys_flistxattr) -+/* 235 */ .long SYMBOL_NAME(sys_removexattr) -+ .long SYMBOL_NAME(sys_lremovexattr) -+ .long SYMBOL_NAME(sys_fremovexattr) - .long SYMBOL_NAME(sys_tkill) - /* - * Please check 2.5 _before_ adding calls here, ---- linux/arch/i386/defconfig~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:39:14 2003 -+++ linux-mmonroe/arch/i386/defconfig Fri May 16 08:43:00 2003 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_X86=y - CONFIG_ISA=y - # CONFIG_SBUS is not set ---- linux/arch/ia64/defconfig~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:39:14 2003 -+++ linux-mmonroe/arch/ia64/defconfig Fri May 16 08:43:00 2003 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - - # - # Code maturity level options ---- linux/arch/m68k/defconfig~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:39:14 2003 -+++ linux-mmonroe/arch/m68k/defconfig Fri May 16 08:43:00 2003 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_UID16=y - - # ---- linux/arch/mips/defconfig~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:39:14 2003 -+++ linux-mmonroe/arch/mips/defconfig Fri May 16 08:43:01 2003 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_MIPS=y - CONFIG_MIPS32=y - # CONFIG_MIPS64 is not set ---- linux/arch/mips64/defconfig~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:39:14 2003 -+++ linux-mmonroe/arch/mips64/defconfig Fri May 16 08:43:01 2003 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_MIPS=y - # CONFIG_MIPS32 is not set - CONFIG_MIPS64=y ---- linux/arch/ppc/defconfig~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:39:14 2003 -+++ linux-mmonroe/arch/ppc/defconfig Fri May 16 08:43:01 2003 -@@ -1,6 +1,20 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - # CONFIG_UID16 is not set - # CONFIG_RWSEM_GENERIC_SPINLOCK is not set - CONFIG_RWSEM_XCHGADD_ALGORITHM=y ---- linux/arch/ppc64/kernel/misc.S~linux-2.4.20-xattr-0.8.54-hp Thu Nov 28 15:53:11 2002 -+++ linux-mmonroe/arch/ppc64/kernel/misc.S Fri May 16 08:43:01 2003 -@@ -731,6 +731,7 @@ _GLOBAL(sys_call_table32) - .llong .sys_gettid /* 207 */ - #if 0 /* Reserved syscalls */ - .llong .sys_tkill /* 208 */ -+#endif - .llong .sys_setxattr - .llong .sys_lsetxattr /* 210 */ - .llong .sys_fsetxattr -@@ -743,6 +744,7 @@ _GLOBAL(sys_call_table32) - .llong .sys_removexattr - .llong .sys_lremovexattr - .llong .sys_fremovexattr /* 220 */ -+#if 0 /* Reserved syscalls */ - .llong .sys_futex - #endif - .llong .sys_perfmonctl /* Put this here for now ... */ ---- linux/arch/s390/defconfig~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:39:14 2003 -+++ linux-mmonroe/arch/s390/defconfig Fri May 16 08:43:01 2003 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - # CONFIG_ISA is not set - # CONFIG_EISA is not set - # CONFIG_MCA is not set ---- linux/arch/s390/kernel/entry.S~linux-2.4.20-xattr-0.8.54-hp Thu Nov 28 15:53:11 2002 -+++ linux-mmonroe/arch/s390/kernel/entry.S Fri May 16 08:43:01 2003 -@@ -558,18 +558,18 @@ sys_call_table: - .long sys_fcntl64 - .long sys_ni_syscall - .long sys_ni_syscall -- .long sys_ni_syscall /* 224 - reserved for setxattr */ -- .long sys_ni_syscall /* 225 - reserved for lsetxattr */ -- .long sys_ni_syscall /* 226 - reserved for fsetxattr */ -- .long sys_ni_syscall /* 227 - reserved for getxattr */ -- .long sys_ni_syscall /* 228 - reserved for lgetxattr */ -- .long sys_ni_syscall /* 229 - reserved for fgetxattr */ -- .long sys_ni_syscall /* 230 - reserved for listxattr */ -- .long sys_ni_syscall /* 231 - reserved for llistxattr */ -- .long sys_ni_syscall /* 232 - reserved for flistxattr */ -- .long sys_ni_syscall /* 233 - reserved for removexattr */ -- .long sys_ni_syscall /* 234 - reserved for lremovexattr */ -- .long sys_ni_syscall /* 235 - reserved for fremovexattr */ -+ .long sys_setxattr -+ .long sys_lsetxattr /* 225 */ -+ .long sys_fsetxattr -+ .long sys_getxattr -+ .long sys_lgetxattr -+ .long sys_fgetxattr -+ .long sys_listxattr /* 230 */ -+ .long sys_llistxattr -+ .long sys_flistxattr -+ .long sys_removexattr -+ .long sys_lremovexattr -+ .long sys_fremovexattr /* 235 */ - .long sys_gettid - .long sys_tkill - .rept 255-237 ---- linux/arch/s390x/defconfig~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:39:14 2003 -+++ linux-mmonroe/arch/s390x/defconfig Fri May 16 08:43:01 2003 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - # CONFIG_ISA is not set - # CONFIG_EISA is not set - # CONFIG_MCA is not set ---- linux/arch/s390x/kernel/entry.S~linux-2.4.20-xattr-0.8.54-hp Thu Nov 28 15:53:11 2002 -+++ linux-mmonroe/arch/s390x/kernel/entry.S Fri May 16 08:43:01 2003 -@@ -591,18 +591,18 @@ sys_call_table: - .long SYSCALL(sys_ni_syscall,sys32_fcntl64_wrapper) - .long SYSCALL(sys_ni_syscall,sys_ni_syscall) - .long SYSCALL(sys_ni_syscall,sys_ni_syscall) -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 224 - reserved for setxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 225 - reserved for lsetxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 226 - reserved for fsetxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 227 - reserved for getxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 228 - reserved for lgetxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 229 - reserved for fgetxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 230 - reserved for listxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 231 - reserved for llistxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 232 - reserved for flistxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 233 - reserved for removexattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 234 - reserved for lremovexattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 235 - reserved for fremovexattr */ -+ .long SYSCALL(sys_setxattr,sys32_setxattr_wrapper) -+ .long SYSCALL(sys_lsetxattr,sys32_lsetxattr_wrapper) /* 225 */ -+ .long SYSCALL(sys_fsetxattr,sys32_fsetxattr_wrapper) -+ .long SYSCALL(sys_getxattr,sys32_getxattr_wrapper) -+ .long SYSCALL(sys_lgetxattr,sys32_lgetxattr_wrapper) -+ .long SYSCALL(sys_fgetxattr,sys32_fgetxattr_wrapper) -+ .long SYSCALL(sys_listxattr,sys32_listxattr_wrapper) /* 230 */ -+ .long SYSCALL(sys_llistxattr,sys32_llistxattr_wrapper) -+ .long SYSCALL(sys_flistxattr,sys32_flistxattr_wrapper) -+ .long SYSCALL(sys_removexattr,sys32_removexattr_wrapper) -+ .long SYSCALL(sys_lremovexattr,sys32_lremovexattr_wrapper) -+ .long SYSCALL(sys_fremovexattr,sys32_fremovexattr_wrapper)/* 235 */ - .long SYSCALL(sys_gettid,sys_gettid) - .long SYSCALL(sys_tkill,sys_tkill) - .rept 255-237 ---- linux/arch/s390x/kernel/wrapper32.S~linux-2.4.20-xattr-0.8.54-hp Mon Feb 25 11:37:56 2002 -+++ linux-mmonroe/arch/s390x/kernel/wrapper32.S Fri May 16 08:43:01 2003 -@@ -1091,3 +1091,95 @@ sys32_fstat64_wrapper: - llgtr %r3,%r3 # struct stat64 * - llgfr %r4,%r4 # long - jg sys32_fstat64 # branch to system call -+ -+ .globl sys32_setxattr_wrapper -+sys32_setxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ lgfr %r6,%r6 # int -+ jg sys_setxattr -+ -+ .globl sys32_lsetxattr_wrapper -+sys32_lsetxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ lgfr %r6,%r6 # int -+ jg sys_lsetxattr -+ -+ .globl sys32_fsetxattr_wrapper -+sys32_fsetxattr_wrapper: -+ lgfr %r2,%r2 # int -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ lgfr %r6,%r6 # int -+ jg sys_fsetxattr -+ -+ .globl sys32_getxattr_wrapper -+sys32_getxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ jg sys_getxattr -+ -+ .globl sys32_lgetxattr_wrapper -+sys32_lgetxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ jg sys_lgetxattr -+ -+ .globl sys32_fgetxattr_wrapper -+sys32_fgetxattr_wrapper: -+ lgfr %r2,%r2 # int -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ jg sys_fgetxattr -+ -+ .globl sys32_listxattr_wrapper -+sys32_listxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgfr %r4,%r4 # size_t -+ jg sys_listxattr -+ -+ .globl sys32_llistxattr_wrapper -+sys32_llistxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgfr %r4,%r4 # size_t -+ jg sys_llistxattr -+ -+ .globl sys32_flistxattr_wrapper -+sys32_flistxattr_wrapper: -+ lgfr %r2,%r2 # int -+ llgtr %r3,%r3 # char * -+ llgfr %r4,%r4 # size_t -+ jg sys_flistxattr -+ -+ .globl sys32_removexattr_wrapper -+sys32_removexattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ jg sys_removexattr -+ -+ .globl sys32_lremovexattr_wrapper -+sys32_lremovexattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ jg sys_lremovexattr -+ -+ .globl sys32_fremovexattr_wrapper -+sys32_fremovexattr_wrapper: -+ lgfr %r2,%r2 # int -+ llgtr %r3,%r3 # char * -+ jg sys_fremovexattr -+ -+ ---- linux/arch/sparc/defconfig~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:39:14 2003 -+++ linux-mmonroe/arch/sparc/defconfig Fri May 16 08:43:01 2003 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_UID16=y - CONFIG_HIGHMEM=y - ---- linux/arch/sparc/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-hp Fri Aug 2 17:39:43 2002 -+++ linux-mmonroe/arch/sparc/kernel/systbls.S Fri May 16 08:43:01 2003 -@@ -51,11 +51,11 @@ sys_call_table: - /*150*/ .long sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_getdents64 - /*155*/ .long sys_fcntl64, sys_nis_syscall, sys_statfs, sys_fstatfs, sys_oldumount - /*160*/ .long sys_nis_syscall, sys_nis_syscall, sys_getdomainname, sys_setdomainname, sys_nis_syscall --/*165*/ .long sys_quotactl, sys_nis_syscall, sys_mount, sys_ustat, sys_nis_syscall --/*170*/ .long sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_getdents --/*175*/ .long sys_setsid, sys_fchdir, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall --/*180*/ .long sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_sigpending, sys_query_module --/*185*/ .long sys_setpgid, sys_nis_syscall, sys_tkill, sys_nis_syscall, sys_newuname -+/*165*/ .long sys_quotactl, sys_nis_syscall, sys_mount, sys_ustat, sys_setxattr -+/*170*/ .long sys_lsetxattr, sys_fsetxattr, sys_getxattr, sys_lgetxattr, sys_getdents -+/*175*/ .long sys_setsid, sys_fchdir, sys_fgetxattr, sys_listxattr, sys_llistxattr -+/*180*/ .long sys_flistxattr, sys_removexattr, sys_lremovexattr, sys_sigpending, sys_query_module -+/*185*/ .long sys_setpgid, sys_fremovexattr, sys_tkill, sys_nis_syscall, sys_newuname - /*190*/ .long sys_init_module, sys_personality, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall - /*195*/ .long sys_nis_syscall, sys_nis_syscall, sys_getppid, sparc_sigaction, sys_sgetmask - /*200*/ .long sys_ssetmask, sys_sigsuspend, sys_newlstat, sys_uselib, old_readdir ---- linux/arch/sparc64/defconfig~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:39:14 2003 -+++ linux-mmonroe/arch/sparc64/defconfig Fri May 16 08:43:01 2003 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - - # - # Code maturity level options ---- linux/arch/sparc64/kernel/systbls.S~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:39:14 2003 -+++ linux-mmonroe/arch/sparc64/kernel/systbls.S Fri May 16 08:43:01 2003 -@@ -52,11 +52,11 @@ sys_call_table32: - /*150*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_getdents64 - .word sys32_fcntl64, sys_nis_syscall, sys32_statfs, sys32_fstatfs, sys_oldumount - /*160*/ .word sys_nis_syscall, sys_nis_syscall, sys_getdomainname, sys_setdomainname, sys_nis_syscall -- .word sys32_quotactl, sys_nis_syscall, sys32_mount, sys_ustat, sys_nis_syscall --/*170*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys32_getdents -- .word sys_setsid, sys_fchdir, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall --/*180*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys32_sigpending, sys32_query_module -- .word sys_setpgid, sys_nis_syscall, sys_tkill, sys_nis_syscall, sparc64_newuname -+ .word sys32_quotactl, sys_nis_syscall, sys32_mount, sys_ustat, sys_setxattr -+/*170*/ .word sys_lsetxattr, sys_fsetxattr, sys_getxattr, sys_lgetxattr, sys32_getdents -+ .word sys_setsid, sys_fchdir, sys_fgetxattr, sys_listxattr, sys_llistxattr -+/*180*/ .word sys_flistxattr, sys_removexattr, sys_lremovexattr, sys32_sigpending, sys32_query_module -+ .word sys_setpgid, sys_fremovexattr, sys_tkill, sys_nis_syscall, sparc64_newuname - /*190*/ .word sys32_init_module, sparc64_personality, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall - .word sys_nis_syscall, sys_nis_syscall, sys_getppid, sys32_sigaction, sys_sgetmask - /*200*/ .word sys_ssetmask, sys_sigsuspend, sys32_newlstat, sys_uselib, old32_readdir -@@ -111,11 +111,11 @@ sys_call_table: - /*150*/ .word sys_getsockname, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_getdents64 - .word sys_nis_syscall, sys_nis_syscall, sys_statfs, sys_fstatfs, sys_oldumount - /*160*/ .word sys_nis_syscall, sys_nis_syscall, sys_getdomainname, sys_setdomainname, sys_utrap_install -- .word sys_quotactl, sys_nis_syscall, sys_mount, sys_ustat, sys_nis_syscall --/*170*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_getdents -- .word sys_setsid, sys_fchdir, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall --/*180*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_query_module -- .word sys_setpgid, sys_nis_syscall, sys_tkill, sys_nis_syscall, sparc64_newuname -+ .word sys_quotactl, sys_nis_syscall, sys_mount, sys_ustat, sys_setxattr -+/*170*/ .word sys_lsetxattr, sys_fsetxattr, sys_getxattr, sys_lgetxattr, sys_getdents -+ .word sys_setsid, sys_fchdir, sys_fgetxattr, sys_listxattr, sys_llistxattr -+/*180*/ .word sys_flistxattr, sys_removexattr, sys_lremovexattr, sys_nis_syscall, sys_query_module -+ .word sys_setpgid, sys_fremovexattr, sys_tkill, sys_nis_syscall, sparc64_newuname - /*190*/ .word sys_init_module, sparc64_personality, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall - .word sys_nis_syscall, sys_nis_syscall, sys_getppid, sys_nis_syscall, sys_sgetmask - /*200*/ .word sys_ssetmask, sys_nis_syscall, sys_newlstat, sys_uselib, sys_nis_syscall ---- linux/fs/Config.in~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:39:14 2003 -+++ linux-mmonroe/fs/Config.in Fri May 16 08:43:01 2003 -@@ -35,6 +35,11 @@ dep_mbool ' Debug Befs' CONFIG_BEFS_DEB - dep_tristate 'BFS file system support (EXPERIMENTAL)' CONFIG_BFS_FS $CONFIG_EXPERIMENTAL - - tristate 'Ext3 journalling file system support' CONFIG_EXT3_FS -+dep_mbool ' Ext3 extended attributes' CONFIG_EXT3_FS_XATTR $CONFIG_EXT3_FS -+dep_bool ' Ext3 extended attribute block sharing' \ -+ CONFIG_EXT3_FS_XATTR_SHARING $CONFIG_EXT3_FS_XATTR -+dep_bool ' Ext3 extended user attributes' \ -+ CONFIG_EXT3_FS_XATTR_USER $CONFIG_EXT3_FS_XATTR - # CONFIG_JBD could be its own option (even modular), but until there are - # other users than ext3, we will simply make it be the same as CONFIG_EXT3_FS - # dep_tristate ' Journal Block Device support (JBD for ext3)' CONFIG_JBD $CONFIG_EXT3_FS -@@ -98,6 +103,11 @@ dep_mbool ' QNX4FS write support (DANGE - tristate 'ROM file system support' CONFIG_ROMFS_FS - - tristate 'Second extended fs support' CONFIG_EXT2_FS -+dep_mbool ' Ext2 extended attributes' CONFIG_EXT2_FS_XATTR $CONFIG_EXT2_FS -+dep_bool ' Ext2 extended attribute block sharing' \ -+ CONFIG_EXT2_FS_XATTR_SHARING $CONFIG_EXT2_FS_XATTR -+dep_bool ' Ext2 extended user attributes' \ -+ CONFIG_EXT2_FS_XATTR_USER $CONFIG_EXT2_FS_XATTR - - tristate 'System V/Xenix/V7/Coherent file system support' CONFIG_SYSV_FS - -@@ -176,6 +186,10 @@ else - define_tristate CONFIG_ZISOFS_FS n - fi - -+# Meta block cache for Extended Attributes (ext2/ext3) -+#tristate 'Meta block cache' CONFIG_FS_MBCACHE -+define_tristate CONFIG_FS_MBCACHE y -+ - mainmenu_option next_comment - comment 'Partition Types' - source fs/partitions/Config.in ---- linux/fs/Makefile~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:42:46 2003 -+++ linux-mmonroe/fs/Makefile Fri May 16 08:43:01 2003 -@@ -80,6 +80,9 @@ obj-y += binfmt_script.o - - obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o - -+export-objs += mbcache.o -+obj-$(CONFIG_FS_MBCACHE) += mbcache.o -+ - # persistent filesystems - obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) - ---- linux/fs/ext2/Makefile~linux-2.4.20-xattr-0.8.54-hp Thu Oct 11 08:05:18 2001 -+++ linux-mmonroe/fs/ext2/Makefile Fri May 16 08:43:01 2003 -@@ -13,4 +13,8 @@ obj-y := balloc.o bitmap.o dir.o file - ioctl.o namei.o super.o symlink.o - obj-m := $(O_TARGET) - -+export-objs += xattr.o -+obj-$(CONFIG_EXT2_FS_XATTR) += xattr.o -+obj-$(CONFIG_EXT2_FS_XATTR_USER) += xattr_user.o -+ - include $(TOPDIR)/Rules.make ---- linux/fs/ext2/file.c~linux-2.4.20-xattr-0.8.54-hp Thu Oct 11 08:05:18 2001 -+++ linux-mmonroe/fs/ext2/file.c Fri May 16 08:43:01 2003 -@@ -20,6 +20,7 @@ - - #include - #include -+#include - #include - - /* -@@ -51,4 +52,8 @@ struct file_operations ext2_file_operati - - struct inode_operations ext2_file_inode_operations = { - truncate: ext2_truncate, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, - }; ---- linux/fs/ext2/ialloc.c~linux-2.4.20-xattr-0.8.54-hp Thu Nov 28 15:53:15 2002 -+++ linux-mmonroe/fs/ext2/ialloc.c Fri May 16 08:43:01 2003 -@@ -15,6 +15,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -167,6 +168,7 @@ void ext2_free_inode (struct inode * ino - */ - if (!is_bad_inode(inode)) { - /* Quota is already initialized in iput() */ -+ ext2_xattr_delete_inode(inode); - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); - } ---- linux/fs/ext2/inode.c~linux-2.4.20-xattr-0.8.54-hp Thu Nov 28 15:53:15 2002 -+++ linux-mmonroe/fs/ext2/inode.c Fri May 16 08:43:01 2003 -@@ -39,6 +39,18 @@ MODULE_LICENSE("GPL"); - static int ext2_update_inode(struct inode * inode, int do_sync); - - /* -+ * Test whether an inode is a fast symlink. -+ */ -+static inline int ext2_inode_is_fast_symlink(struct inode *inode) -+{ -+ int ea_blocks = inode->u.ext2_i.i_file_acl ? -+ (inode->i_sb->s_blocksize >> 9) : 0; -+ -+ return (S_ISLNK(inode->i_mode) && -+ inode->i_blocks - ea_blocks == 0); -+} -+ -+/* - * Called at each iput() - */ - void ext2_put_inode (struct inode * inode) -@@ -53,9 +65,7 @@ void ext2_delete_inode (struct inode * i - { - lock_kernel(); - -- if (is_bad_inode(inode) || -- inode->i_ino == EXT2_ACL_IDX_INO || -- inode->i_ino == EXT2_ACL_DATA_INO) -+ if (is_bad_inode(inode)) - goto no_delete; - inode->u.ext2_i.i_dtime = CURRENT_TIME; - mark_inode_dirty(inode); -@@ -801,6 +811,8 @@ void ext2_truncate (struct inode * inode - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; -+ if (ext2_inode_is_fast_symlink(inode)) -+ return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - -@@ -888,8 +900,7 @@ void ext2_read_inode (struct inode * ino - unsigned long offset; - struct ext2_group_desc * gdp; - -- if ((inode->i_ino != EXT2_ROOT_INO && inode->i_ino != EXT2_ACL_IDX_INO && -- inode->i_ino != EXT2_ACL_DATA_INO && -+ if ((inode->i_ino != EXT2_ROOT_INO && - inode->i_ino < EXT2_FIRST_INO(inode->i_sb)) || - inode->i_ino > le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_inodes_count)) { - ext2_error (inode->i_sb, "ext2_read_inode", -@@ -974,10 +985,7 @@ void ext2_read_inode (struct inode * ino - for (block = 0; block < EXT2_N_BLOCKS; block++) - inode->u.ext2_i.i_data[block] = raw_inode->i_block[block]; - -- if (inode->i_ino == EXT2_ACL_IDX_INO || -- inode->i_ino == EXT2_ACL_DATA_INO) -- /* Nothing to do */ ; -- else if (S_ISREG(inode->i_mode)) { -+ if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext2_file_inode_operations; - inode->i_fop = &ext2_file_operations; - inode->i_mapping->a_ops = &ext2_aops; -@@ -986,15 +994,17 @@ void ext2_read_inode (struct inode * ino - inode->i_fop = &ext2_dir_operations; - inode->i_mapping->a_ops = &ext2_aops; - } else if (S_ISLNK(inode->i_mode)) { -- if (!inode->i_blocks) -+ if (ext2_inode_is_fast_symlink(inode)) - inode->i_op = &ext2_fast_symlink_inode_operations; - else { -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext2_symlink_inode_operations; - inode->i_mapping->a_ops = &ext2_aops; - } -- } else -+ } else { -+ inode->i_op = &ext2_special_inode_operations; - init_special_inode(inode, inode->i_mode, - le32_to_cpu(raw_inode->i_block[0])); -+ } - brelse (bh); - inode->i_attr_flags = 0; - if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL) { ---- linux/fs/ext2/namei.c~linux-2.4.20-xattr-0.8.54-hp Wed Oct 3 22:57:36 2001 -+++ linux-mmonroe/fs/ext2/namei.c Fri May 16 08:43:01 2003 -@@ -31,6 +31,7 @@ - - #include - #include -+#include - #include - - /* -@@ -136,7 +137,7 @@ static int ext2_symlink (struct inode * - - if (l > sizeof (inode->u.ext2_i.i_data)) { - /* slow symlink */ -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext2_symlink_inode_operations; - inode->i_mapping->a_ops = &ext2_aops; - err = block_symlink(inode, symname, l); - if (err) -@@ -345,4 +346,15 @@ struct inode_operations ext2_dir_inode_o - rmdir: ext2_rmdir, - mknod: ext2_mknod, - rename: ext2_rename, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, -+}; -+ -+struct inode_operations ext2_special_inode_operations = { -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, - }; ---- linux/fs/ext2/super.c~linux-2.4.20-xattr-0.8.54-hp Thu Nov 28 15:53:15 2002 -+++ linux-mmonroe/fs/ext2/super.c Fri May 16 08:43:01 2003 -@@ -21,6 +21,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -125,6 +126,7 @@ void ext2_put_super (struct super_block - int db_count; - int i; - -+ ext2_xattr_put_super(sb); - if (!(sb->s_flags & MS_RDONLY)) { - struct ext2_super_block *es = EXT2_SB(sb)->s_es; - -@@ -175,6 +177,13 @@ static int parse_options (char * options - this_char = strtok (NULL, ",")) { - if ((value = strchr (this_char, '=')) != NULL) - *value++ = 0; -+#ifdef CONFIG_EXT2_FS_XATTR_USER -+ if (!strcmp (this_char, "user_xattr")) -+ set_opt (*mount_options, XATTR_USER); -+ else if (!strcmp (this_char, "nouser_xattr")) -+ clear_opt (*mount_options, XATTR_USER); -+ else -+#endif - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -424,6 +433,9 @@ struct super_block * ext2_read_super (st - blocksize = BLOCK_SIZE; - - sb->u.ext2_sb.s_mount_opt = 0; -+#ifdef CONFIG_EXT2_FS_XATTR_USER -+ /* set_opt (sb->u.ext2_sb.s_mount_opt, XATTR_USER); */ -+#endif - if (!parse_options ((char *) data, &sb_block, &resuid, &resgid, - &sb->u.ext2_sb.s_mount_opt)) { - return NULL; -@@ -813,12 +825,27 @@ static DECLARE_FSTYPE_DEV(ext2_fs_type, - - static int __init init_ext2_fs(void) - { -- return register_filesystem(&ext2_fs_type); -+ int error = init_ext2_xattr(); -+ if (error) -+ return error; -+ error = init_ext2_xattr_user(); -+ if (error) -+ goto fail; -+ error = register_filesystem(&ext2_fs_type); -+ if (!error) -+ return 0; -+ -+ exit_ext2_xattr_user(); -+fail: -+ exit_ext2_xattr(); -+ return error; - } - - static void __exit exit_ext2_fs(void) - { - unregister_filesystem(&ext2_fs_type); -+ exit_ext2_xattr_user(); -+ exit_ext2_xattr(); - } - - EXPORT_NO_SYMBOLS; ---- linux/fs/ext2/symlink.c~linux-2.4.20-xattr-0.8.54-hp Wed Sep 27 13:41:33 2000 -+++ linux-mmonroe/fs/ext2/symlink.c Fri May 16 08:43:01 2003 -@@ -19,6 +19,7 @@ - - #include - #include -+#include - - static int ext2_readlink(struct dentry *dentry, char *buffer, int buflen) - { -@@ -32,7 +33,20 @@ static int ext2_follow_link(struct dentr - return vfs_follow_link(nd, s); - } - -+struct inode_operations ext2_symlink_inode_operations = { -+ readlink: page_readlink, -+ follow_link: page_follow_link, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, -+}; -+ - struct inode_operations ext2_fast_symlink_inode_operations = { - readlink: ext2_readlink, - follow_link: ext2_follow_link, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, - }; ---- /dev/null Mon May 20 21:11:23 2002 -+++ linux-mmonroe/fs/ext2/xattr.c Fri May 16 08:43:01 2003 -@@ -0,0 +1,1212 @@ -+/* -+ * linux/fs/ext2/xattr.c -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ * -+ * Fix by Harrison Xing . -+ * Extended attributes for symlinks and special files added per -+ * suggestion of Luka Renko . -+ */ -+ -+/* -+ * Extended attributes are stored on disk blocks allocated outside of -+ * any inode. The i_file_acl field is then made to point to this allocated -+ * block. If all extended attributes of an inode are identical, these -+ * inodes may share the same extended attribute block. Such situations -+ * are automatically detected by keeping a cache of recent attribute block -+ * numbers and hashes over the block's contents in memory. -+ * -+ * -+ * Extended attribute block layout: -+ * -+ * +------------------+ -+ * | header | -+ * | entry 1 | | -+ * | entry 2 | | growing downwards -+ * | entry 3 | v -+ * | four null bytes | -+ * | . . . | -+ * | value 1 | ^ -+ * | value 3 | | growing upwards -+ * | value 2 | | -+ * +------------------+ -+ * -+ * The block header is followed by multiple entry descriptors. These entry -+ * descriptors are variable in size, and alligned to EXT2_XATTR_PAD -+ * byte boundaries. The entry descriptors are sorted by attribute name, -+ * so that two extended attribute blocks can be compared efficiently. -+ * -+ * Attribute values are aligned to the end of the block, stored in -+ * no specific order. They are also padded to EXT2_XATTR_PAD byte -+ * boundaries. No additional gaps are left between them. -+ * -+ * Locking strategy -+ * ---------------- -+ * The VFS already holds the BKL and the inode->i_sem semaphore when any of -+ * the xattr inode operations are called, so we are guaranteed that only one -+ * processes accesses extended attributes of an inode at any time. -+ * -+ * For writing we also grab the ext2_xattr_sem semaphore. This ensures that -+ * only a single process is modifying an extended attribute block, even -+ * if the block is shared among inodes. -+ * -+ * Note for porting to 2.5 -+ * ----------------------- -+ * The BKL will no longer be held in the xattr inode operations. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* These symbols may be needed by a module. */ -+EXPORT_SYMBOL(ext2_xattr_register); -+EXPORT_SYMBOL(ext2_xattr_unregister); -+EXPORT_SYMBOL(ext2_xattr_get); -+EXPORT_SYMBOL(ext2_xattr_list); -+EXPORT_SYMBOL(ext2_xattr_set); -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1) -+#endif -+ -+#define HDR(bh) ((struct ext2_xattr_header *)((bh)->b_data)) -+#define ENTRY(ptr) ((struct ext2_xattr_entry *)(ptr)) -+#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1) -+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) -+ -+#ifdef EXT2_XATTR_DEBUG -+# define ea_idebug(inode, f...) do { \ -+ printk(KERN_DEBUG "inode %s:%ld: ", \ -+ kdevname(inode->i_dev), inode->i_ino); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+# define ea_bdebug(bh, f...) do { \ -+ printk(KERN_DEBUG "block %s:%ld: ", \ -+ kdevname(bh->b_dev), bh->b_blocknr); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+#else -+# define ea_idebug(f...) -+# define ea_bdebug(f...) -+#endif -+ -+static int ext2_xattr_set2(struct inode *, struct buffer_head *, -+ struct ext2_xattr_header *); -+ -+#ifdef CONFIG_EXT2_FS_XATTR_SHARING -+ -+static int ext2_xattr_cache_insert(struct buffer_head *); -+static struct buffer_head *ext2_xattr_cache_find(struct inode *, -+ struct ext2_xattr_header *); -+static void ext2_xattr_cache_remove(struct buffer_head *); -+static void ext2_xattr_rehash(struct ext2_xattr_header *, -+ struct ext2_xattr_entry *); -+ -+static struct mb_cache *ext2_xattr_cache; -+ -+#else -+# define ext2_xattr_cache_insert(bh) 0 -+# define ext2_xattr_cache_find(inode, header) NULL -+# define ext2_xattr_cache_remove(bh) while(0) {} -+# define ext2_xattr_rehash(header, entry) while(0) {} -+#endif -+ -+/* -+ * If a file system does not share extended attributes among inodes, -+ * we should not need the ext2_xattr_sem semaphore. However, the -+ * filesystem may still contain shared blocks, so we always take -+ * the lock. -+ */ -+ -+DECLARE_MUTEX(ext2_xattr_sem); -+ -+static inline int -+ext2_xattr_new_block(struct inode *inode, int * errp, int force) -+{ -+ struct super_block *sb = inode->i_sb; -+ int goal = le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block) + -+ EXT2_I(inode)->i_block_group * EXT2_BLOCKS_PER_GROUP(sb); -+ -+ /* How can we enforce the allocation? */ -+ int block = ext2_new_block(inode, goal, 0, 0, errp); -+#ifdef OLD_QUOTAS -+ if (!*errp) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#endif -+ return block; -+} -+ -+static inline int -+ext2_xattr_quota_alloc(struct inode *inode, int force) -+{ -+ /* How can we enforce the allocation? */ -+#ifdef OLD_QUOTAS -+ int error = DQUOT_ALLOC_BLOCK(inode->i_sb, inode, 1); -+ if (!error) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#else -+ int error = DQUOT_ALLOC_BLOCK(inode, 1); -+#endif -+ return error; -+} -+ -+#ifdef OLD_QUOTAS -+ -+static inline void -+ext2_xattr_quota_free(struct inode *inode) -+{ -+ DQUOT_FREE_BLOCK(inode->i_sb, inode, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+static inline void -+ext2_xattr_free_block(struct inode * inode, unsigned long block) -+{ -+ ext2_free_blocks(inode, block, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+#else -+# define ext2_xattr_quota_free(inode) \ -+ DQUOT_FREE_BLOCK(inode, 1) -+# define ext2_xattr_free_block(inode, block) \ -+ ext2_free_blocks(inode, block, 1) -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) -+ -+static inline struct buffer_head * -+sb_bread(struct super_block *sb, int block) -+{ -+ return bread(sb->s_dev, block, sb->s_blocksize); -+} -+ -+static inline struct buffer_head * -+sb_getblk(struct super_block *sb, int block) -+{ -+ return getblk(sb->s_dev, block, sb->s_blocksize); -+} -+ -+#endif -+ -+struct ext2_xattr_handler *ext2_xattr_handlers[EXT2_XATTR_INDEX_MAX]; -+rwlock_t ext2_handler_lock = RW_LOCK_UNLOCKED; -+ -+int -+ext2_xattr_register(int name_index, struct ext2_xattr_handler *handler) -+{ -+ int error = -EINVAL; -+ -+ if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) { -+ write_lock(&ext2_handler_lock); -+ if (!ext2_xattr_handlers[name_index-1]) { -+ ext2_xattr_handlers[name_index-1] = handler; -+ error = 0; -+ } -+ write_unlock(&ext2_handler_lock); -+ } -+ return error; -+} -+ -+void -+ext2_xattr_unregister(int name_index, struct ext2_xattr_handler *handler) -+{ -+ if (name_index > 0 || name_index <= EXT2_XATTR_INDEX_MAX) { -+ write_lock(&ext2_handler_lock); -+ ext2_xattr_handlers[name_index-1] = NULL; -+ write_unlock(&ext2_handler_lock); -+ } -+} -+ -+static inline const char * -+strcmp_prefix(const char *a, const char *a_prefix) -+{ -+ while (*a_prefix && *a == *a_prefix) { -+ a++; -+ a_prefix++; -+ } -+ return *a_prefix ? NULL : a; -+} -+ -+/* -+ * Decode the extended attribute name, and translate it into -+ * the name_index and name suffix. -+ */ -+static struct ext2_xattr_handler * -+ext2_xattr_resolve_name(const char **name) -+{ -+ struct ext2_xattr_handler *handler = NULL; -+ int i; -+ -+ if (!*name) -+ return NULL; -+ read_lock(&ext2_handler_lock); -+ for (i=0; iprefix); -+ if (n) { -+ handler = ext2_xattr_handlers[i]; -+ *name = n; -+ break; -+ } -+ } -+ } -+ read_unlock(&ext2_handler_lock); -+ return handler; -+} -+ -+static inline struct ext2_xattr_handler * -+ext2_xattr_handler(int name_index) -+{ -+ struct ext2_xattr_handler *handler = NULL; -+ if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) { -+ read_lock(&ext2_handler_lock); -+ handler = ext2_xattr_handlers[name_index-1]; -+ read_unlock(&ext2_handler_lock); -+ } -+ return handler; -+} -+ -+/* -+ * Inode operation getxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext2_getxattr(struct dentry *dentry, const char *name, -+ void *buffer, size_t size) -+{ -+ struct ext2_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext2_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->get(inode, name, buffer, size); -+} -+ -+/* -+ * Inode operation listxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) -+{ -+ return ext2_xattr_list(dentry->d_inode, buffer, size); -+} -+ -+/* -+ * Inode operation setxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext2_setxattr(struct dentry *dentry, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ struct ext2_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ if (size == 0) -+ value = ""; /* empty EA, do not remove */ -+ handler = ext2_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, value, size, flags); -+} -+ -+/* -+ * Inode operation removexattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext2_removexattr(struct dentry *dentry, const char *name) -+{ -+ struct ext2_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext2_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, NULL, 0, XATTR_REPLACE); -+} -+ -+/* -+ * ext2_xattr_get() -+ * -+ * Copy an extended attribute into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext2_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext2_xattr_entry *entry; -+ unsigned int block, size; -+ char *end; -+ int name_len, error; -+ -+ ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", -+ name_index, name, buffer, (long)buffer_size); -+ -+ if (name == NULL) -+ return -EINVAL; -+ if (!EXT2_I(inode)->i_file_acl) -+ return -ENOATTR; -+ block = EXT2_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ -+ error = -ERANGE; -+ if (name_len > 255) -+ goto cleanup; -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext2_xattr_entry *next = -+ EXT2_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) -+ goto found; -+ entry = next; -+ } -+ /* Check the remaining name entries */ -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext2_xattr_entry *next = -+ EXT2_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ entry = next; -+ } -+ if (ext2_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ error = -ENOATTR; -+ goto cleanup; -+found: -+ /* check the buffer size */ -+ if (entry->e_value_block != 0) -+ goto bad_block; -+ size = le32_to_cpu(entry->e_value_size); -+ if (size > inode->i_sb->s_blocksize || -+ le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) -+ goto bad_block; -+ -+ if (ext2_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (buffer) { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ /* return value of attribute */ -+ memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), -+ size); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext2_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext2_xattr_entry *entry; -+ unsigned int block, size = 0; -+ char *buf, *end; -+ int error; -+ -+ ea_idebug(inode, "buffer=%p, buffer_size=%ld", -+ buffer, (long)buffer_size); -+ -+ if (!EXT2_I(inode)->i_file_acl) -+ return 0; -+ block = EXT2_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* compute the size required for the list of attribute names */ -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT2_XATTR_NEXT(entry)) { -+ struct ext2_xattr_handler *handler; -+ struct ext2_xattr_entry *next = -+ EXT2_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ -+ handler = ext2_xattr_handler(entry->e_name_index); -+ if (handler) -+ size += handler->list(NULL, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ -+ if (ext2_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (!buffer) { -+ error = size; -+ goto cleanup; -+ } else { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ /* list the attribute names */ -+ buf = buffer; -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT2_XATTR_NEXT(entry)) { -+ struct ext2_xattr_handler *handler; -+ -+ handler = ext2_xattr_handler(entry->e_name_index); -+ if (handler) -+ buf += handler->list(buf, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * If the EXT2_FEATURE_COMPAT_EXT_ATTR feature of this file system is -+ * not set, set it. -+ */ -+static void ext2_xattr_update_super_block(struct super_block *sb) -+{ -+ if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR)) -+ return; -+ -+ lock_super(sb); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ EXT2_SB(sb)->s_feature_compat |= EXT2_FEATURE_COMPAT_EXT_ATTR; -+#endif -+ EXT2_SB(sb)->s_es->s_feature_compat |= -+ cpu_to_le32(EXT2_FEATURE_COMPAT_EXT_ATTR); -+ sb->s_dirt = 1; -+ mark_buffer_dirty(EXT2_SB(sb)->s_sbh); -+ unlock_super(sb); -+} -+ -+/* -+ * ext2_xattr_set() -+ * -+ * Create, replace or remove an extended attribute for this inode. Buffer -+ * is NULL to remove an existing extended attribute, and non-NULL to -+ * either replace an existing extended attribute, or create a new extended -+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE -+ * specify that an extended attribute must exist and must not exist -+ * previous to the call, respectively. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+int -+ext2_xattr_set(struct inode *inode, int name_index, const char *name, -+ const void *value, size_t value_len, int flags) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *bh = NULL; -+ struct ext2_xattr_header *header = NULL; -+ struct ext2_xattr_entry *here, *last; -+ unsigned int name_len; -+ int block = EXT2_I(inode)->i_file_acl; -+ int min_offs = sb->s_blocksize, not_found = 1, free, error; -+ char *end; -+ -+ /* -+ * header -- Points either into bh, or to a temporarily -+ * allocated buffer. -+ * here -- The named entry found, or the place for inserting, within -+ * the block pointed to by header. -+ * last -- Points right after the last named entry within the block -+ * pointed to by header. -+ * min_offs -- The offset of the first value (values are aligned -+ * towards the end of the block). -+ * end -- Points right after the block pointed to by header. -+ */ -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > sb->s_blocksize) -+ return -ERANGE; -+ down(&ext2_xattr_sem); -+ -+ if (block) { -+ /* The inode already has an extended attribute block. */ -+ -+ bh = sb_bread(sb, block); -+ error = -EIO; -+ if (!bh) -+ goto cleanup; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), -+ le32_to_cpu(HDR(bh)->h_refcount)); -+ header = HDR(bh); -+ end = bh->b_data + bh->b_size; -+ if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ header->h_blocks != cpu_to_le32(1)) { -+bad_block: ext2_error(sb, "ext2_xattr_set", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* Find the named attribute. */ -+ here = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(here)) { -+ struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(here); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!here->e_value_block && here->e_value_size) { -+ int offs = le16_to_cpu(here->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ not_found = name_index - here->e_name_index; -+ if (!not_found) -+ not_found = name_len - here->e_name_len; -+ if (!not_found) -+ not_found = memcmp(name, here->e_name,name_len); -+ if (not_found <= 0) -+ break; -+ here = next; -+ } -+ last = here; -+ /* We still need to compute min_offs and last. */ -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(last); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!last->e_value_block && last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ last = next; -+ } -+ -+ /* Check whether we have enough space left. */ -+ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); -+ } else { -+ /* We will use a new extended attribute block. */ -+ free = sb->s_blocksize - -+ sizeof(struct ext2_xattr_header) - sizeof(__u32); -+ here = last = NULL; /* avoid gcc uninitialized warning. */ -+ } -+ -+ if (not_found) { -+ /* Request to remove a nonexistent attribute? */ -+ error = -ENOATTR; -+ if (flags & XATTR_REPLACE) -+ goto cleanup; -+ error = 0; -+ if (value == NULL) -+ goto cleanup; -+ else -+ free -= EXT2_XATTR_LEN(name_len); -+ } else { -+ /* Request to create an existing attribute? */ -+ error = -EEXIST; -+ if (flags & XATTR_CREATE) -+ goto cleanup; -+ if (!here->e_value_block && here->e_value_size) { -+ unsigned int size = le32_to_cpu(here->e_value_size); -+ -+ if (le16_to_cpu(here->e_value_offs) + size > -+ sb->s_blocksize || size > sb->s_blocksize) -+ goto bad_block; -+ free += EXT2_XATTR_SIZE(size); -+ } -+ } -+ free -= EXT2_XATTR_SIZE(value_len); -+ error = -ENOSPC; -+ if (free < 0) -+ goto cleanup; -+ -+ /* Here we know that we can set the new attribute. */ -+ -+ if (header) { -+ if (header->h_refcount == cpu_to_le32(1)) { -+ ea_bdebug(bh, "modifying in-place"); -+ ext2_xattr_cache_remove(bh); -+ } else { -+ int offset; -+ -+ ea_bdebug(bh, "cloning"); -+ header = kmalloc(bh->b_size, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memcpy(header, HDR(bh), bh->b_size); -+ header->h_refcount = cpu_to_le32(1); -+ offset = (char *)header - bh->b_data; -+ here = ENTRY((char *)here + offset); -+ last = ENTRY((char *)last + offset); -+ } -+ } else { -+ /* Allocate a buffer where we construct the new block. */ -+ header = kmalloc(sb->s_blocksize, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memset(header, 0, sb->s_blocksize); -+ end = (char *)header + sb->s_blocksize; -+ header->h_magic = cpu_to_le32(EXT2_XATTR_MAGIC); -+ header->h_blocks = header->h_refcount = cpu_to_le32(1); -+ last = here = ENTRY(header+1); -+ } -+ -+ if (not_found) { -+ /* Insert the new name. */ -+ int size = EXT2_XATTR_LEN(name_len); -+ int rest = (char *)last - (char *)here; -+ memmove((char *)here + size, here, rest); -+ memset(here, 0, size); -+ here->e_name_index = name_index; -+ here->e_name_len = name_len; -+ memcpy(here->e_name, name, name_len); -+ } else { -+ /* Remove the old value. */ -+ if (!here->e_value_block && here->e_value_size) { -+ char *first_val = (char *)header + min_offs; -+ int offs = le16_to_cpu(here->e_value_offs); -+ char *val = (char *)header + offs; -+ size_t size = EXT2_XATTR_SIZE( -+ le32_to_cpu(here->e_value_size)); -+ memmove(first_val + size, first_val, val - first_val); -+ memset(first_val, 0, size); -+ here->e_value_offs = 0; -+ min_offs += size; -+ -+ /* Adjust all value offsets. */ -+ last = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(last)) { -+ int o = le16_to_cpu(last->e_value_offs); -+ if (!last->e_value_block && o < offs) -+ last->e_value_offs = -+ cpu_to_le16(o + size); -+ last = EXT2_XATTR_NEXT(last); -+ } -+ } -+ if (value == NULL) { -+ /* Remove this attribute. */ -+ if (EXT2_XATTR_NEXT(ENTRY(header+1)) == last) { -+ /* This block is now empty. */ -+ error = ext2_xattr_set2(inode, bh, NULL); -+ goto cleanup; -+ } else { -+ /* Remove the old name. */ -+ int size = EXT2_XATTR_LEN(name_len); -+ last = ENTRY((char *)last - size); -+ memmove(here, (char*)here + size, -+ (char*)last - (char*)here); -+ memset(last, 0, size); -+ } -+ } -+ } -+ -+ if (value != NULL) { -+ /* Insert the new value. */ -+ here->e_value_size = cpu_to_le32(value_len); -+ if (value_len) { -+ size_t size = EXT2_XATTR_SIZE(value_len); -+ char *val = (char *)header + min_offs - size; -+ here->e_value_offs = -+ cpu_to_le16((char *)val - (char *)header); -+ memset(val + size - EXT2_XATTR_PAD, 0, -+ EXT2_XATTR_PAD); /* Clear the pad bytes. */ -+ memcpy(val, value, value_len); -+ } -+ } -+ ext2_xattr_rehash(header, here); -+ -+ error = ext2_xattr_set2(inode, bh, header); -+ -+cleanup: -+ brelse(bh); -+ if (!(bh && header == HDR(bh))) -+ kfree(header); -+ up(&ext2_xattr_sem); -+ -+ return error; -+} -+ -+/* -+ * Second half of ext2_xattr_set(): Update the file system. -+ */ -+static int -+ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, -+ struct ext2_xattr_header *header) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *new_bh = NULL; -+ int error; -+ -+ if (header) { -+ new_bh = ext2_xattr_cache_find(inode, header); -+ if (new_bh) { -+ /* -+ * We found an identical block in the cache. -+ * The old block will be released after updating -+ * the inode. -+ */ -+ ea_bdebug(old_bh, "reusing block %ld", -+ new_bh->b_blocknr); -+ -+ error = -EDQUOT; -+ if (ext2_xattr_quota_alloc(inode, 1)) -+ goto cleanup; -+ -+ HDR(new_bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(new_bh)->h_refcount) + 1); -+ ea_bdebug(new_bh, "refcount now=%d", -+ le32_to_cpu(HDR(new_bh)->h_refcount)); -+ } else if (old_bh && header == HDR(old_bh)) { -+ /* Keep this block. */ -+ new_bh = old_bh; -+ ext2_xattr_cache_insert(new_bh); -+ } else { -+ /* We need to allocate a new block */ -+ int force = EXT2_I(inode)->i_file_acl != 0; -+ int block = ext2_xattr_new_block(inode, &error, force); -+ if (error) -+ goto cleanup; -+ ea_idebug(inode, "creating block %d", block); -+ -+ new_bh = sb_getblk(sb, block); -+ if (!new_bh) { -+ ext2_xattr_free_block(inode, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(new_bh); -+ memcpy(new_bh->b_data, header, new_bh->b_size); -+ mark_buffer_uptodate(new_bh, 1); -+ unlock_buffer(new_bh); -+ ext2_xattr_cache_insert(new_bh); -+ -+ ext2_xattr_update_super_block(sb); -+ } -+ mark_buffer_dirty(new_bh); -+ if (IS_SYNC(inode)) { -+ ll_rw_block(WRITE, 1, &new_bh); -+ wait_on_buffer(new_bh); -+ error = -EIO; -+ if (buffer_req(new_bh) && !buffer_uptodate(new_bh)) -+ goto cleanup; -+ } -+ } -+ -+ /* Update the inode. */ -+ EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; -+ inode->i_ctime = CURRENT_TIME; -+ if (IS_SYNC(inode)) { -+ error = ext2_sync_inode (inode); -+ if (error) -+ goto cleanup; -+ } else -+ mark_inode_dirty(inode); -+ -+ error = 0; -+ if (old_bh && old_bh != new_bh) { -+ /* -+ * If there was an old block, and we are not still using it, -+ * we now release the old block. -+ */ -+ unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); -+ -+ if (refcount == 1) { -+ /* Free the old block. */ -+ ea_bdebug(old_bh, "freeing"); -+ ext2_xattr_free_block(inode, old_bh->b_blocknr); -+ mark_buffer_clean(old_bh); -+ } else { -+ /* Decrement the refcount only. */ -+ refcount--; -+ HDR(old_bh)->h_refcount = cpu_to_le32(refcount); -+ ext2_xattr_quota_free(inode); -+ mark_buffer_dirty(old_bh); -+ ea_bdebug(old_bh, "refcount now=%d", refcount); -+ } -+ } -+ -+cleanup: -+ if (old_bh != new_bh) -+ brelse(new_bh); -+ -+ return error; -+} -+ -+/* -+ * ext2_xattr_delete_inode() -+ * -+ * Free extended attribute resources associated with this inode. This -+ * is called immediately before an inode is freed. -+ */ -+void -+ext2_xattr_delete_inode(struct inode *inode) -+{ -+ struct buffer_head *bh; -+ unsigned int block = EXT2_I(inode)->i_file_acl; -+ -+ if (!block) -+ return; -+ down(&ext2_xattr_sem); -+ -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) { -+ ext2_error(inode->i_sb, "ext2_xattr_delete_inode", -+ "inode %ld: block %d read error", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+ ext2_error(inode->i_sb, "ext2_xattr_delete_inode", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) { -+ ext2_xattr_cache_remove(bh); -+ ext2_xattr_free_block(inode, block); -+ bforget(bh); -+ bh = NULL; -+ } else { -+ HDR(bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ mark_buffer_dirty(bh); -+ if (IS_SYNC(inode)) { -+ ll_rw_block(WRITE, 1, &bh); -+ wait_on_buffer(bh); -+ } -+ ext2_xattr_quota_free(inode); -+ } -+ EXT2_I(inode)->i_file_acl = 0; -+ -+cleanup: -+ brelse(bh); -+ up(&ext2_xattr_sem); -+} -+ -+/* -+ * ext2_xattr_put_super() -+ * -+ * This is called when a file system is unmounted. -+ */ -+void -+ext2_xattr_put_super(struct super_block *sb) -+{ -+#ifdef CONFIG_EXT2_FS_XATTR_SHARING -+ mb_cache_shrink(ext2_xattr_cache, sb->s_dev); -+#endif -+} -+ -+#ifdef CONFIG_EXT2_FS_XATTR_SHARING -+ -+/* -+ * ext2_xattr_cache_insert() -+ * -+ * Create a new entry in the extended attribute cache, and insert -+ * it unless such an entry is already in the cache. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+static int -+ext2_xattr_cache_insert(struct buffer_head *bh) -+{ -+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash); -+ struct mb_cache_entry *ce; -+ int error; -+ -+ ce = mb_cache_entry_alloc(ext2_xattr_cache); -+ if (!ce) -+ return -ENOMEM; -+ error = mb_cache_entry_insert(ce, bh->b_dev, bh->b_blocknr, &hash); -+ if (error) { -+ mb_cache_entry_free(ce); -+ if (error == -EBUSY) { -+ ea_bdebug(bh, "already in cache (%d cache entries)", -+ atomic_read(&ext2_xattr_cache->c_entry_count)); -+ error = 0; -+ } -+ } else { -+ ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, -+ atomic_read(&ext2_xattr_cache->c_entry_count)); -+ mb_cache_entry_release(ce); -+ } -+ return error; -+} -+ -+/* -+ * ext2_xattr_cmp() -+ * -+ * Compare two extended attribute blocks for equality. -+ * -+ * Returns 0 if the blocks are equal, 1 if they differ, and -+ * a negative error number on errors. -+ */ -+static int -+ext2_xattr_cmp(struct ext2_xattr_header *header1, -+ struct ext2_xattr_header *header2) -+{ -+ struct ext2_xattr_entry *entry1, *entry2; -+ -+ entry1 = ENTRY(header1+1); -+ entry2 = ENTRY(header2+1); -+ while (!IS_LAST_ENTRY(entry1)) { -+ if (IS_LAST_ENTRY(entry2)) -+ return 1; -+ if (entry1->e_hash != entry2->e_hash || -+ entry1->e_name_len != entry2->e_name_len || -+ entry1->e_value_size != entry2->e_value_size || -+ memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) -+ return 1; -+ if (entry1->e_value_block != 0 || entry2->e_value_block != 0) -+ return -EIO; -+ if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), -+ (char *)header2 + le16_to_cpu(entry2->e_value_offs), -+ le32_to_cpu(entry1->e_value_size))) -+ return 1; -+ -+ entry1 = EXT2_XATTR_NEXT(entry1); -+ entry2 = EXT2_XATTR_NEXT(entry2); -+ } -+ if (!IS_LAST_ENTRY(entry2)) -+ return 1; -+ return 0; -+} -+ -+/* -+ * ext2_xattr_cache_find() -+ * -+ * Find an identical extended attribute block. -+ * -+ * Returns a pointer to the block found, or NULL if such a block was -+ * not found or an error occurred. -+ */ -+static struct buffer_head * -+ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) -+{ -+ __u32 hash = le32_to_cpu(header->h_hash); -+ struct mb_cache_entry *ce; -+ -+ if (!header->h_hash) -+ return NULL; /* never share */ -+ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); -+ ce = mb_cache_entry_find_first(ext2_xattr_cache, 0, inode->i_dev, hash); -+ while (ce) { -+ struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block); -+ -+ if (!bh) { -+ ext2_error(inode->i_sb, "ext2_xattr_cache_find", -+ "inode %ld: block %ld read error", -+ inode->i_ino, ce->e_block); -+ } else if (le32_to_cpu(HDR(bh)->h_refcount) > -+ EXT2_XATTR_REFCOUNT_MAX) { -+ ea_idebug(inode, "block %ld refcount %d>%d",ce->e_block, -+ le32_to_cpu(HDR(bh)->h_refcount), -+ EXT2_XATTR_REFCOUNT_MAX); -+ } else if (!ext2_xattr_cmp(header, HDR(bh))) { -+ ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); -+ mb_cache_entry_release(ce); -+ return bh; -+ } -+ brelse(bh); -+ ce = mb_cache_entry_find_next(ce, 0, inode->i_dev, hash); -+ } -+ return NULL; -+} -+ -+/* -+ * ext2_xattr_cache_remove() -+ * -+ * Remove the cache entry of a block from the cache. Called when a -+ * block becomes invalid. -+ */ -+static void -+ext2_xattr_cache_remove(struct buffer_head *bh) -+{ -+ struct mb_cache_entry *ce; -+ -+ ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_dev, bh->b_blocknr); -+ if (ce) { -+ ea_bdebug(bh, "removing (%d cache entries remaining)", -+ atomic_read(&ext2_xattr_cache->c_entry_count)-1); -+ mb_cache_entry_free(ce); -+ } else -+ ea_bdebug(bh, "no cache entry"); -+} -+ -+#define NAME_HASH_SHIFT 5 -+#define VALUE_HASH_SHIFT 16 -+ -+/* -+ * ext2_xattr_hash_entry() -+ * -+ * Compute the hash of an extended attribute. -+ */ -+static inline void ext2_xattr_hash_entry(struct ext2_xattr_header *header, -+ struct ext2_xattr_entry *entry) -+{ -+ __u32 hash = 0; -+ char *name = entry->e_name; -+ int n; -+ -+ for (n=0; n < entry->e_name_len; n++) { -+ hash = (hash << NAME_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ -+ *name++; -+ } -+ -+ if (entry->e_value_block == 0 && entry->e_value_size != 0) { -+ __u32 *value = (__u32 *)((char *)header + -+ le16_to_cpu(entry->e_value_offs)); -+ for (n = (le32_to_cpu(entry->e_value_size) + -+ EXT2_XATTR_ROUND) >> EXT2_XATTR_PAD_BITS; n; n--) { -+ hash = (hash << VALUE_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ -+ le32_to_cpu(*value++); -+ } -+ } -+ entry->e_hash = cpu_to_le32(hash); -+} -+ -+#undef NAME_HASH_SHIFT -+#undef VALUE_HASH_SHIFT -+ -+#define BLOCK_HASH_SHIFT 16 -+ -+/* -+ * ext2_xattr_rehash() -+ * -+ * Re-compute the extended attribute hash value after an entry has changed. -+ */ -+static void ext2_xattr_rehash(struct ext2_xattr_header *header, -+ struct ext2_xattr_entry *entry) -+{ -+ struct ext2_xattr_entry *here; -+ __u32 hash = 0; -+ -+ ext2_xattr_hash_entry(header, entry); -+ here = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(here)) { -+ if (!here->e_hash) { -+ /* Block is not shared if an entry's hash value == 0 */ -+ hash = 0; -+ break; -+ } -+ hash = (hash << BLOCK_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ -+ le32_to_cpu(here->e_hash); -+ here = EXT2_XATTR_NEXT(here); -+ } -+ header->h_hash = cpu_to_le32(hash); -+} -+ -+#undef BLOCK_HASH_SHIFT -+ -+int __init -+init_ext2_xattr(void) -+{ -+ ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL, -+ sizeof(struct mb_cache_entry) + -+ sizeof(struct mb_cache_entry_index), 1, 61); -+ if (!ext2_xattr_cache) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+void -+exit_ext2_xattr(void) -+{ -+ mb_cache_destroy(ext2_xattr_cache); -+} -+ -+#else /* CONFIG_EXT2_FS_XATTR_SHARING */ -+ -+int __init -+init_ext2_xattr(void) -+{ -+ return 0; -+} -+ -+void -+exit_ext2_xattr(void) -+{ -+} -+ -+#endif /* CONFIG_EXT2_FS_XATTR_SHARING */ ---- /dev/null Mon May 20 21:11:23 2002 -+++ linux-mmonroe/fs/ext2/xattr_user.c Fri May 16 08:43:01 2003 -@@ -0,0 +1,103 @@ -+/* -+ * linux/fs/ext2/xattr_user.c -+ * Handler for extended user attributes. -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef CONFIG_EXT2_FS_POSIX_ACL -+# include -+#endif -+ -+#define XATTR_USER_PREFIX "user." -+ -+static size_t -+ext2_xattr_user_list(char *list, struct inode *inode, -+ const char *name, int name_len) -+{ -+ const int prefix_len = sizeof(XATTR_USER_PREFIX)-1; -+ -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return 0; -+ -+ if (list) { -+ memcpy(list, XATTR_USER_PREFIX, prefix_len); -+ memcpy(list+prefix_len, name, name_len); -+ list[prefix_len + name_len] = '\0'; -+ } -+ return prefix_len + name_len + 1; -+} -+ -+static int -+ext2_xattr_user_get(struct inode *inode, const char *name, -+ void *buffer, size_t size) -+{ -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+#ifdef CONFIG_EXT2_FS_POSIX_ACL -+ error = ext2_permission_locked(inode, MAY_READ); -+#else -+ error = permission(inode, MAY_READ); -+#endif -+ if (error) -+ return error; -+ -+ return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, -+ buffer, size); -+} -+ -+static int -+ext2_xattr_user_set(struct inode *inode, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+ if ( !S_ISREG(inode->i_mode) && -+ (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) -+ return -EPERM; -+#ifdef CONFIG_EXT2_FS_POSIX_ACL -+ error = ext2_permission_locked(inode, MAY_WRITE); -+#else -+ error = permission(inode, MAY_WRITE); -+#endif -+ if (error) -+ return error; -+ -+ return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name, -+ value, size, flags); -+} -+ -+struct ext2_xattr_handler ext2_xattr_user_handler = { -+ prefix: XATTR_USER_PREFIX, -+ list: ext2_xattr_user_list, -+ get: ext2_xattr_user_get, -+ set: ext2_xattr_user_set, -+}; -+ -+int __init -+init_ext2_xattr_user(void) -+{ -+ return ext2_xattr_register(EXT2_XATTR_INDEX_USER, -+ &ext2_xattr_user_handler); -+} -+ -+void -+exit_ext2_xattr_user(void) -+{ -+ ext2_xattr_unregister(EXT2_XATTR_INDEX_USER, -+ &ext2_xattr_user_handler); -+} ---- linux/fs/ext3/Makefile~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:42:46 2003 -+++ linux-mmonroe/fs/ext3/Makefile Fri May 16 08:43:01 2003 -@@ -1,5 +1,5 @@ - # --# Makefile for the linux ext2-filesystem routines. -+# Makefile for the linux ext3-filesystem routines. - # - # Note! Dependencies are done automagically by 'make dep', which also - # removes any old dependencies. DON'T put your own dependencies here -@@ -9,10 +9,13 @@ - - O_TARGET := ext3.o - --export-objs := super.o inode.o -+export-objs := ext3-exports.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -- ioctl.o namei.o super.o symlink.o hash.o -+ ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o - obj-m := $(O_TARGET) - -+obj-$(CONFIG_EXT3_FS_XATTR) += xattr.o -+obj-$(CONFIG_EXT3_FS_XATTR_USER) += xattr_user.o -+ - include $(TOPDIR)/Rules.make ---- linux/fs/ext3/file.c~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:42:46 2003 -+++ linux-mmonroe/fs/ext3/file.c Fri May 16 08:43:01 2003 -@@ -23,6 +23,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -126,5 +127,9 @@ struct file_operations ext3_file_operati - struct inode_operations ext3_file_inode_operations = { - truncate: ext3_truncate, /* BKL held */ - setattr: ext3_setattr, /* BKL held */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ - }; - ---- linux/fs/ext3/ialloc.c~linux-2.4.20-xattr-0.8.54-hp Thu Nov 28 15:53:15 2002 -+++ linux-mmonroe/fs/ext3/ialloc.c Fri May 16 08:43:01 2003 -@@ -17,6 +17,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -216,6 +217,7 @@ void ext3_free_inode (handle_t *handle, - * as writing the quota to disk may need the lock as well. - */ - DQUOT_INIT(inode); -+ ext3_xattr_delete_inode(handle, inode); - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); - ---- linux/fs/ext3/inode.c~linux-2.4.20-xattr-0.8.54-hp Thu Nov 28 15:53:15 2002 -+++ linux-mmonroe/fs/ext3/inode.c Fri May 16 08:43:01 2003 -@@ -39,6 +39,18 @@ - */ - #undef SEARCH_FROM_ZERO - -+/* -+ * Test whether an inode is a fast symlink. -+ */ -+static inline int ext3_inode_is_fast_symlink(struct inode *inode) -+{ -+ int ea_blocks = inode->u.ext3_i.i_file_acl ? -+ (inode->i_sb->s_blocksize >> 9) : 0; -+ -+ return (S_ISLNK(inode->i_mode) && -+ inode->i_blocks - ea_blocks == 0); -+} -+ - /* The ext3 forget function must perform a revoke if we are freeing data - * which has been journaled. Metadata (eg. indirect blocks) must be - * revoked in all cases. -@@ -48,7 +60,7 @@ - * still needs to be revoked. - */ - --static int ext3_forget(handle_t *handle, int is_metadata, -+int ext3_forget(handle_t *handle, int is_metadata, - struct inode *inode, struct buffer_head *bh, - int blocknr) - { -@@ -164,9 +176,7 @@ void ext3_delete_inode (struct inode * i - { - handle_t *handle; - -- if (is_bad_inode(inode) || -- inode->i_ino == EXT3_ACL_IDX_INO || -- inode->i_ino == EXT3_ACL_DATA_INO) -+ if (is_bad_inode(inode)) - goto no_delete; - - lock_kernel(); -@@ -1855,6 +1865,8 @@ void ext3_truncate(struct inode * inode) - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; -+ if (ext3_inode_is_fast_symlink(inode)) -+ return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - -@@ -2002,8 +2014,6 @@ int ext3_get_inode_loc (struct inode *in - struct ext3_group_desc * gdp; - - if ((inode->i_ino != EXT3_ROOT_INO && -- inode->i_ino != EXT3_ACL_IDX_INO && -- inode->i_ino != EXT3_ACL_DATA_INO && - inode->i_ino != EXT3_JOURNAL_INO && - inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || - inode->i_ino > le32_to_cpu( -@@ -2130,10 +2140,7 @@ void ext3_read_inode(struct inode * inod - - brelse (iloc.bh); - -- if (inode->i_ino == EXT3_ACL_IDX_INO || -- inode->i_ino == EXT3_ACL_DATA_INO) -- /* Nothing to do */ ; -- else if (S_ISREG(inode->i_mode)) { -+ if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; - inode->i_mapping->a_ops = &ext3_aops; -@@ -2141,15 +2148,17 @@ void ext3_read_inode(struct inode * inod - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; - } else if (S_ISLNK(inode->i_mode)) { -- if (!inode->i_blocks) -+ if (ext3_inode_is_fast_symlink(inode)) - inode->i_op = &ext3_fast_symlink_inode_operations; - else { -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext3_symlink_inode_operations; - inode->i_mapping->a_ops = &ext3_aops; - } -- } else -+ } else { -+ inode->i_op = &ext3_special_inode_operations; - init_special_inode(inode, inode->i_mode, - le32_to_cpu(iloc.raw_inode->i_block[0])); -+ } - /* inode->i_attr_flags = 0; unused */ - if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL) { - /* inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS; unused */ ---- linux/fs/ext3/namei.c~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:42:47 2003 -+++ linux-mmonroe/fs/ext3/namei.c Fri May 16 08:43:01 2003 -@@ -29,6 +29,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -1611,7 +1612,7 @@ static int ext3_mkdir(struct inode * dir - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFDIR); -+ inode = ext3_new_inode (handle, dir, S_IFDIR | mode); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -@@ -1619,7 +1620,6 @@ static int ext3_mkdir(struct inode * dir - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; - inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; -- inode->i_blocks = 0; - dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) { - inode->i_nlink--; /* is this nlink == 0? */ -@@ -1646,9 +1646,6 @@ static int ext3_mkdir(struct inode * dir - BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, dir_block); - brelse (dir_block); -- inode->i_mode = S_IFDIR | mode; -- if (dir->i_mode & S_ISGID) -- inode->i_mode |= S_ISGID; - ext3_mark_inode_dirty(handle, inode); - err = ext3_add_entry (handle, dentry, inode); - if (err) { -@@ -2017,7 +2014,7 @@ static int ext3_symlink (struct inode * - goto out_stop; - - if (l > sizeof (EXT3_I(inode)->i_data)) { -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext3_symlink_inode_operations; - inode->i_mapping->a_ops = &ext3_aops; - /* - * block_symlink() calls back into ext3_prepare/commit_write. -@@ -2244,4 +2241,16 @@ struct inode_operations ext3_dir_inode_o - rmdir: ext3_rmdir, /* BKL held */ - mknod: ext3_mknod, /* BKL held */ - rename: ext3_rename, /* BKL held */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ - }; -+ -+struct inode_operations ext3_special_inode_operations = { -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ -+}; -+ ---- linux/fs/ext3/super.c~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:42:46 2003 -+++ linux-mmonroe/fs/ext3/super.c Fri May 16 08:43:01 2003 -@@ -24,6 +24,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -406,6 +407,7 @@ void ext3_put_super (struct super_block - kdev_t j_dev = sbi->s_journal->j_dev; - int i; - -+ ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { - EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); -@@ -502,6 +504,7 @@ static int parse_options (char * options - int is_remount) - { - unsigned long *mount_options = &sbi->s_mount_opt; -+ - uid_t *resuid = &sbi->s_resuid; - gid_t *resgid = &sbi->s_resgid; - char * this_char; -@@ -514,6 +517,13 @@ static int parse_options (char * options - this_char = strtok (NULL, ",")) { - if ((value = strchr (this_char, '=')) != NULL) - *value++ = 0; -+#ifdef CONFIG_EXT3_FS_XATTR_USER -+ if (!strcmp (this_char, "user_xattr")) -+ set_opt (*mount_options, XATTR_USER); -+ else if (!strcmp (this_char, "nouser_xattr")) -+ clear_opt (*mount_options, XATTR_USER); -+ else -+#endif - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -931,6 +941,12 @@ struct super_block * ext3_read_super (st - sbi->s_mount_opt = 0; - sbi->s_resuid = EXT3_DEF_RESUID; - sbi->s_resgid = EXT3_DEF_RESGID; -+ -+ /* Default extended attribute flags */ -+#ifdef CONFIG_EXT3_FS_XATTR_USER -+ /* set_opt(sbi->s_mount_opt, XATTR_USER); */ -+#endif -+ - if (!parse_options ((char *) data, &sb_block, sbi, &journal_inum, 0)) { - sb->s_dev = 0; - goto out_fail; -@@ -1768,17 +1784,29 @@ static DECLARE_FSTYPE_DEV(ext3_fs_type, - - static int __init init_ext3_fs(void) - { -- return register_filesystem(&ext3_fs_type); -+ int error = init_ext3_xattr(); -+ if (error) -+ return error; -+ error = init_ext3_xattr_user(); -+ if (error) -+ goto fail; -+ error = register_filesystem(&ext3_fs_type); -+ if (!error) -+ return 0; -+ -+ exit_ext3_xattr_user(); -+fail: -+ exit_ext3_xattr(); -+ return error; - } - - static void __exit exit_ext3_fs(void) - { - unregister_filesystem(&ext3_fs_type); -+ exit_ext3_xattr_user(); -+ exit_ext3_xattr(); - } - --EXPORT_SYMBOL(ext3_force_commit); --EXPORT_SYMBOL(ext3_bread); -- - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); - MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); - MODULE_LICENSE("GPL"); ---- linux/fs/ext3/symlink.c~linux-2.4.20-xattr-0.8.54-hp Fri Nov 9 14:25:04 2001 -+++ linux-mmonroe/fs/ext3/symlink.c Fri May 16 08:43:01 2003 -@@ -20,6 +20,7 @@ - #include - #include - #include -+#include - - static int ext3_readlink(struct dentry *dentry, char *buffer, int buflen) - { -@@ -33,7 +34,20 @@ static int ext3_follow_link(struct dentr - return vfs_follow_link(nd, s); - } - -+struct inode_operations ext3_symlink_inode_operations = { -+ readlink: page_readlink, /* BKL not held. Don't need */ -+ follow_link: page_follow_link, /* BKL not held. Don't need */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ -+}; -+ - struct inode_operations ext3_fast_symlink_inode_operations = { - readlink: ext3_readlink, /* BKL not held. Don't need */ - follow_link: ext3_follow_link, /* BKL not held. Don't need */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ - }; ---- /dev/null Mon May 20 21:11:23 2002 -+++ linux-mmonroe/fs/ext3/xattr.c Fri May 16 08:43:01 2003 -@@ -0,0 +1,1225 @@ -+/* -+ * linux/fs/ext3/xattr.c -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ * -+ * Fix by Harrison Xing . -+ * Ext3 code with a lot of help from Eric Jarman . -+ * Extended attributes for symlinks and special files added per -+ * suggestion of Luka Renko . -+ */ -+ -+/* -+ * Extended attributes are stored on disk blocks allocated outside of -+ * any inode. The i_file_acl field is then made to point to this allocated -+ * block. If all extended attributes of an inode are identical, these -+ * inodes may share the same extended attribute block. Such situations -+ * are automatically detected by keeping a cache of recent attribute block -+ * numbers and hashes over the block's contents in memory. -+ * -+ * -+ * Extended attribute block layout: -+ * -+ * +------------------+ -+ * | header | -+ * | entry 1 | | -+ * | entry 2 | | growing downwards -+ * | entry 3 | v -+ * | four null bytes | -+ * | . . . | -+ * | value 1 | ^ -+ * | value 3 | | growing upwards -+ * | value 2 | | -+ * +------------------+ -+ * -+ * The block header is followed by multiple entry descriptors. These entry -+ * descriptors are variable in size, and alligned to EXT3_XATTR_PAD -+ * byte boundaries. The entry descriptors are sorted by attribute name, -+ * so that two extended attribute blocks can be compared efficiently. -+ * -+ * Attribute values are aligned to the end of the block, stored in -+ * no specific order. They are also padded to EXT3_XATTR_PAD byte -+ * boundaries. No additional gaps are left between them. -+ * -+ * Locking strategy -+ * ---------------- -+ * The VFS already holds the BKL and the inode->i_sem semaphore when any of -+ * the xattr inode operations are called, so we are guaranteed that only one -+ * processes accesses extended attributes of an inode at any time. -+ * -+ * For writing we also grab the ext3_xattr_sem semaphore. This ensures that -+ * only a single process is modifying an extended attribute block, even -+ * if the block is shared among inodes. -+ * -+ * Note for porting to 2.5 -+ * ----------------------- -+ * The BKL will no longer be held in the xattr inode operations. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define EXT3_EA_USER "user." -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1) -+#endif -+ -+#define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data)) -+#define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr)) -+#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1) -+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) -+ -+#ifdef EXT3_XATTR_DEBUG -+# define ea_idebug(inode, f...) do { \ -+ printk(KERN_DEBUG "inode %s:%ld: ", \ -+ kdevname(inode->i_dev), inode->i_ino); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+# define ea_bdebug(bh, f...) do { \ -+ printk(KERN_DEBUG "block %s:%ld: ", \ -+ kdevname(bh->b_dev), bh->b_blocknr); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+#else -+# define ea_idebug(f...) -+# define ea_bdebug(f...) -+#endif -+ -+static int ext3_xattr_set2(handle_t *, struct inode *, struct buffer_head *, -+ struct ext3_xattr_header *); -+ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ -+static int ext3_xattr_cache_insert(struct buffer_head *); -+static struct buffer_head *ext3_xattr_cache_find(struct inode *, -+ struct ext3_xattr_header *); -+static void ext3_xattr_cache_remove(struct buffer_head *); -+static void ext3_xattr_rehash(struct ext3_xattr_header *, -+ struct ext3_xattr_entry *); -+ -+static struct mb_cache *ext3_xattr_cache; -+ -+#else -+# define ext3_xattr_cache_insert(bh) 0 -+# define ext3_xattr_cache_find(inode, header) NULL -+# define ext3_xattr_cache_remove(bh) while(0) {} -+# define ext3_xattr_rehash(header, entry) while(0) {} -+#endif -+ -+/* -+ * If a file system does not share extended attributes among inodes, -+ * we should not need the ext3_xattr_sem semaphore. However, the -+ * filesystem may still contain shared blocks, so we always take -+ * the lock. -+ */ -+ -+DECLARE_MUTEX(ext3_xattr_sem); -+ -+static inline int -+ext3_xattr_new_block(handle_t *handle, struct inode *inode, -+ int * errp, int force) -+{ -+ struct super_block *sb = inode->i_sb; -+ int goal = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + -+ EXT3_I(inode)->i_block_group * EXT3_BLOCKS_PER_GROUP(sb); -+ -+ /* How can we enforce the allocation? */ -+ int block = ext3_new_block(handle, inode, goal, 0, 0, errp); -+#ifdef OLD_QUOTAS -+ if (!*errp) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#endif -+ return block; -+} -+ -+static inline int -+ext3_xattr_quota_alloc(struct inode *inode, int force) -+{ -+ /* How can we enforce the allocation? */ -+#ifdef OLD_QUOTAS -+ int error = DQUOT_ALLOC_BLOCK(inode->i_sb, inode, 1); -+ if (!error) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#else -+ int error = DQUOT_ALLOC_BLOCK(inode, 1); -+#endif -+ return error; -+} -+ -+#ifdef OLD_QUOTAS -+ -+static inline void -+ext3_xattr_quota_free(struct inode *inode) -+{ -+ DQUOT_FREE_BLOCK(inode->i_sb, inode, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+static inline void -+ext3_xattr_free_block(handle_t *handle, struct inode * inode, -+ unsigned long block) -+{ -+ ext3_free_blocks(handle, inode, block, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+#else -+# define ext3_xattr_quota_free(inode) \ -+ DQUOT_FREE_BLOCK(inode, 1) -+# define ext3_xattr_free_block(handle, inode, block) \ -+ ext3_free_blocks(handle, inode, block, 1) -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) -+ -+static inline struct buffer_head * -+sb_bread(struct super_block *sb, int block) -+{ -+ return bread(sb->s_dev, block, sb->s_blocksize); -+} -+ -+static inline struct buffer_head * -+sb_getblk(struct super_block *sb, int block) -+{ -+ return getblk(sb->s_dev, block, sb->s_blocksize); -+} -+ -+#endif -+ -+struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX]; -+rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED; -+ -+int -+ext3_xattr_register(int name_index, struct ext3_xattr_handler *handler) -+{ -+ int error = -EINVAL; -+ -+ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { -+ write_lock(&ext3_handler_lock); -+ if (!ext3_xattr_handlers[name_index-1]) { -+ ext3_xattr_handlers[name_index-1] = handler; -+ error = 0; -+ } -+ write_unlock(&ext3_handler_lock); -+ } -+ return error; -+} -+ -+void -+ext3_xattr_unregister(int name_index, struct ext3_xattr_handler *handler) -+{ -+ if (name_index > 0 || name_index <= EXT3_XATTR_INDEX_MAX) { -+ write_lock(&ext3_handler_lock); -+ ext3_xattr_handlers[name_index-1] = NULL; -+ write_unlock(&ext3_handler_lock); -+ } -+} -+ -+static inline const char * -+strcmp_prefix(const char *a, const char *a_prefix) -+{ -+ while (*a_prefix && *a == *a_prefix) { -+ a++; -+ a_prefix++; -+ } -+ return *a_prefix ? NULL : a; -+} -+ -+/* -+ * Decode the extended attribute name, and translate it into -+ * the name_index and name suffix. -+ */ -+static inline struct ext3_xattr_handler * -+ext3_xattr_resolve_name(const char **name) -+{ -+ struct ext3_xattr_handler *handler = NULL; -+ int i; -+ -+ if (!*name) -+ return NULL; -+ read_lock(&ext3_handler_lock); -+ for (i=0; iprefix); -+ if (n) { -+ handler = ext3_xattr_handlers[i]; -+ *name = n; -+ break; -+ } -+ } -+ } -+ read_unlock(&ext3_handler_lock); -+ return handler; -+} -+ -+static inline struct ext3_xattr_handler * -+ext3_xattr_handler(int name_index) -+{ -+ struct ext3_xattr_handler *handler = NULL; -+ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { -+ read_lock(&ext3_handler_lock); -+ handler = ext3_xattr_handlers[name_index-1]; -+ read_unlock(&ext3_handler_lock); -+ } -+ return handler; -+} -+ -+/* -+ * Inode operation getxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext3_getxattr(struct dentry *dentry, const char *name, -+ void *buffer, size_t size) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->get(inode, name, buffer, size); -+} -+ -+/* -+ * Inode operation listxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) -+{ -+ return ext3_xattr_list(dentry->d_inode, buffer, size); -+} -+ -+/* -+ * Inode operation setxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext3_setxattr(struct dentry *dentry, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ if (size == 0) -+ value = ""; /* empty EA, do not remove */ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, value, size, flags); -+} -+ -+/* -+ * Inode operation removexattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext3_removexattr(struct dentry *dentry, const char *name) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, NULL, 0, XATTR_REPLACE); -+} -+ -+/* -+ * ext3_xattr_get() -+ * -+ * Copy an extended attribute into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ unsigned int block, size; -+ char *end; -+ int name_len, error; -+ -+ ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", -+ name_index, name, buffer, (long)buffer_size); -+ -+ if (name == NULL) -+ return -EINVAL; -+ if (!EXT3_I(inode)->i_file_acl) -+ return -ENOATTR; -+ block = EXT3_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ -+ error = -ERANGE; -+ if (name_len > 255) -+ goto cleanup; -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) -+ goto found; -+ entry = next; -+ } -+ /* Check the remaining name entries */ -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ entry = next; -+ } -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ error = -ENOATTR; -+ goto cleanup; -+found: -+ /* check the buffer size */ -+ if (entry->e_value_block != 0) -+ goto bad_block; -+ size = le32_to_cpu(entry->e_value_size); -+ if (size > inode->i_sb->s_blocksize || -+ le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) -+ goto bad_block; -+ -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (buffer) { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ /* return value of attribute */ -+ memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), -+ size); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ unsigned int block, size = 0; -+ char *buf, *end; -+ int error; -+ -+ ea_idebug(inode, "buffer=%p, buffer_size=%ld", -+ buffer, (long)buffer_size); -+ -+ if (!EXT3_I(inode)->i_file_acl) -+ return 0; -+ block = EXT3_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_list", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* compute the size required for the list of attribute names */ -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT3_XATTR_NEXT(entry)) { -+ struct ext3_xattr_handler *handler; -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ -+ handler = ext3_xattr_handler(entry->e_name_index); -+ if (handler) -+ size += handler->list(NULL, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (!buffer) { -+ error = size; -+ goto cleanup; -+ } else { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ /* list the attribute names */ -+ buf = buffer; -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT3_XATTR_NEXT(entry)) { -+ struct ext3_xattr_handler *handler; -+ -+ handler = ext3_xattr_handler(entry->e_name_index); -+ if (handler) -+ buf += handler->list(buf, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is -+ * not set, set it. -+ */ -+static void ext3_xattr_update_super_block(handle_t *handle, -+ struct super_block *sb) -+{ -+ if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR)) -+ return; -+ -+ lock_super(sb); -+ ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ EXT3_SB(sb)->s_feature_compat |= EXT3_FEATURE_COMPAT_EXT_ATTR; -+#endif -+ EXT3_SB(sb)->s_es->s_feature_compat |= -+ cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR); -+ sb->s_dirt = 1; -+ ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); -+ unlock_super(sb); -+} -+ -+/* -+ * ext3_xattr_set() -+ * -+ * Create, replace or remove an extended attribute for this inode. Buffer -+ * is NULL to remove an existing extended attribute, and non-NULL to -+ * either replace an existing extended attribute, or create a new extended -+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE -+ * specify that an extended attribute must exist and must not exist -+ * previous to the call, respectively. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+int -+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, int flags) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_header *header = NULL; -+ struct ext3_xattr_entry *here, *last; -+ unsigned int name_len; -+ int block = EXT3_I(inode)->i_file_acl; -+ int min_offs = sb->s_blocksize, not_found = 1, free, error; -+ char *end; -+ -+ /* -+ * header -- Points either into bh, or to a temporarily -+ * allocated buffer. -+ * here -- The named entry found, or the place for inserting, within -+ * the block pointed to by header. -+ * last -- Points right after the last named entry within the block -+ * pointed to by header. -+ * min_offs -- The offset of the first value (values are aligned -+ * towards the end of the block). -+ * end -- Points right after the block pointed to by header. -+ */ -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > sb->s_blocksize) -+ return -ERANGE; -+ down(&ext3_xattr_sem); -+ -+ if (block) { -+ /* The inode already has an extended attribute block. */ -+ bh = sb_bread(sb, block); -+ error = -EIO; -+ if (!bh) -+ goto cleanup; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), -+ le32_to_cpu(HDR(bh)->h_refcount)); -+ header = HDR(bh); -+ end = bh->b_data + bh->b_size; -+ if (header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ header->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(sb, "ext3_xattr_set", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* Find the named attribute. */ -+ here = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(here)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(here); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!here->e_value_block && here->e_value_size) { -+ int offs = le16_to_cpu(here->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ not_found = name_index - here->e_name_index; -+ if (!not_found) -+ not_found = name_len - here->e_name_len; -+ if (!not_found) -+ not_found = memcmp(name, here->e_name,name_len); -+ if (not_found <= 0) -+ break; -+ here = next; -+ } -+ last = here; -+ /* We still need to compute min_offs and last. */ -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!last->e_value_block && last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ last = next; -+ } -+ -+ /* Check whether we have enough space left. */ -+ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); -+ } else { -+ /* We will use a new extended attribute block. */ -+ free = sb->s_blocksize - -+ sizeof(struct ext3_xattr_header) - sizeof(__u32); -+ here = last = NULL; /* avoid gcc uninitialized warning. */ -+ } -+ -+ if (not_found) { -+ /* Request to remove a nonexistent attribute? */ -+ error = -ENOATTR; -+ if (flags & XATTR_REPLACE) -+ goto cleanup; -+ error = 0; -+ if (value == NULL) -+ goto cleanup; -+ else -+ free -= EXT3_XATTR_LEN(name_len); -+ } else { -+ /* Request to create an existing attribute? */ -+ error = -EEXIST; -+ if (flags & XATTR_CREATE) -+ goto cleanup; -+ if (!here->e_value_block && here->e_value_size) { -+ unsigned int size = le32_to_cpu(here->e_value_size); -+ -+ if (le16_to_cpu(here->e_value_offs) + size > -+ sb->s_blocksize || size > sb->s_blocksize) -+ goto bad_block; -+ free += EXT3_XATTR_SIZE(size); -+ } -+ } -+ free -= EXT3_XATTR_SIZE(value_len); -+ error = -ENOSPC; -+ if (free < 0) -+ goto cleanup; -+ -+ /* Here we know that we can set the new attribute. */ -+ -+ if (header) { -+ if (header->h_refcount == cpu_to_le32(1)) { -+ ea_bdebug(bh, "modifying in-place"); -+ ext3_xattr_cache_remove(bh); -+ error = ext3_journal_get_write_access(handle, bh); -+ if (error) -+ goto cleanup; -+ } else { -+ int offset; -+ -+ ea_bdebug(bh, "cloning"); -+ header = kmalloc(bh->b_size, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memcpy(header, HDR(bh), bh->b_size); -+ header->h_refcount = cpu_to_le32(1); -+ offset = (char *)header - bh->b_data; -+ here = ENTRY((char *)here + offset); -+ last = ENTRY((char *)last + offset); -+ } -+ } else { -+ /* Allocate a buffer where we construct the new block. */ -+ header = kmalloc(sb->s_blocksize, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memset(header, 0, sb->s_blocksize); -+ end = (char *)header + sb->s_blocksize; -+ header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC); -+ header->h_blocks = header->h_refcount = cpu_to_le32(1); -+ last = here = ENTRY(header+1); -+ } -+ -+ if (not_found) { -+ /* Insert the new name. */ -+ int size = EXT3_XATTR_LEN(name_len); -+ int rest = (char *)last - (char *)here; -+ memmove((char *)here + size, here, rest); -+ memset(here, 0, size); -+ here->e_name_index = name_index; -+ here->e_name_len = name_len; -+ memcpy(here->e_name, name, name_len); -+ } else { -+ /* Remove the old value. */ -+ if (!here->e_value_block && here->e_value_size) { -+ char *first_val = (char *)header + min_offs; -+ int offs = le16_to_cpu(here->e_value_offs); -+ char *val = (char *)header + offs; -+ size_t size = EXT3_XATTR_SIZE( -+ le32_to_cpu(here->e_value_size)); -+ memmove(first_val + size, first_val, val - first_val); -+ memset(first_val, 0, size); -+ here->e_value_offs = 0; -+ min_offs += size; -+ -+ /* Adjust all value offsets. */ -+ last = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(last)) { -+ int o = le16_to_cpu(last->e_value_offs); -+ if (!last->e_value_block && o < offs) -+ last->e_value_offs = -+ cpu_to_le16(o + size); -+ last = EXT3_XATTR_NEXT(last); -+ } -+ } -+ if (value == NULL) { -+ /* Remove this attribute. */ -+ if (EXT3_XATTR_NEXT(ENTRY(header+1)) == last) { -+ /* This block is now empty. */ -+ error = ext3_xattr_set2(handle, inode, bh,NULL); -+ goto cleanup; -+ } else { -+ /* Remove the old name. */ -+ int size = EXT3_XATTR_LEN(name_len); -+ last = ENTRY((char *)last - size); -+ memmove(here, (char*)here + size, -+ (char*)last - (char*)here); -+ memset(last, 0, size); -+ } -+ } -+ } -+ -+ if (value != NULL) { -+ /* Insert the new value. */ -+ here->e_value_size = cpu_to_le32(value_len); -+ if (value_len) { -+ size_t size = EXT3_XATTR_SIZE(value_len); -+ char *val = (char *)header + min_offs - size; -+ here->e_value_offs = -+ cpu_to_le16((char *)val - (char *)header); -+ memset(val + size - EXT3_XATTR_PAD, 0, -+ EXT3_XATTR_PAD); /* Clear the pad bytes. */ -+ memcpy(val, value, value_len); -+ } -+ } -+ ext3_xattr_rehash(header, here); -+ -+ error = ext3_xattr_set2(handle, inode, bh, header); -+ -+cleanup: -+ brelse(bh); -+ if (!(bh && header == HDR(bh))) -+ kfree(header); -+ up(&ext3_xattr_sem); -+ -+ return error; -+} -+ -+/* -+ * Second half of ext3_xattr_set(): Update the file system. -+ */ -+static int -+ext3_xattr_set2(handle_t *handle, struct inode *inode, -+ struct buffer_head *old_bh, struct ext3_xattr_header *header) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *new_bh = NULL; -+ int error; -+ -+ if (header) { -+ new_bh = ext3_xattr_cache_find(inode, header); -+ if (new_bh) { -+ /* -+ * We found an identical block in the cache. -+ * The old block will be released after updating -+ * the inode. -+ */ -+ ea_bdebug(old_bh, "reusing block %ld", -+ new_bh->b_blocknr); -+ -+ error = -EDQUOT; -+ if (ext3_xattr_quota_alloc(inode, 1)) -+ goto cleanup; -+ -+ error = ext3_journal_get_write_access(handle, new_bh); -+ if (error) -+ goto cleanup; -+ HDR(new_bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(new_bh)->h_refcount) + 1); -+ ea_bdebug(new_bh, "refcount now=%d", -+ le32_to_cpu(HDR(new_bh)->h_refcount)); -+ } else if (old_bh && header == HDR(old_bh)) { -+ /* Keep this block. */ -+ new_bh = old_bh; -+ ext3_xattr_cache_insert(new_bh); -+ } else { -+ /* We need to allocate a new block */ -+ int force = EXT3_I(inode)->i_file_acl != 0; -+ int block = ext3_xattr_new_block(handle, inode, -+ &error, force); -+ if (error) -+ goto cleanup; -+ ea_idebug(inode, "creating block %d", block); -+ -+ new_bh = sb_getblk(sb, block); -+ if (!new_bh) { -+getblk_failed: ext3_xattr_free_block(handle, inode, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(new_bh); -+ error = ext3_journal_get_create_access(handle, new_bh); -+ if (error) { -+ unlock_buffer(new_bh); -+ goto getblk_failed; -+ } -+ memcpy(new_bh->b_data, header, new_bh->b_size); -+ mark_buffer_uptodate(new_bh, 1); -+ unlock_buffer(new_bh); -+ ext3_xattr_cache_insert(new_bh); -+ -+ ext3_xattr_update_super_block(handle, sb); -+ } -+ error = ext3_journal_dirty_metadata(handle, new_bh); -+ if (error) -+ goto cleanup; -+ } -+ -+ /* Update the inode. */ -+ EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; -+ inode->i_ctime = CURRENT_TIME; -+ ext3_mark_inode_dirty(handle, inode); -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ -+ error = 0; -+ if (old_bh && old_bh != new_bh) { -+ /* -+ * If there was an old block, and we are not still using it, -+ * we now release the old block. -+ */ -+ unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); -+ -+ error = ext3_journal_get_write_access(handle, old_bh); -+ if (error) -+ goto cleanup; -+ if (refcount == 1) { -+ /* Free the old block. */ -+ ea_bdebug(old_bh, "freeing"); -+ ext3_xattr_free_block(handle, inode, old_bh->b_blocknr); -+ -+ /* ext3_forget() calls bforget() for us, but we -+ let our caller release old_bh, so we need to -+ duplicate the handle before. */ -+ get_bh(old_bh); -+ ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr); -+ } else { -+ /* Decrement the refcount only. */ -+ refcount--; -+ HDR(old_bh)->h_refcount = cpu_to_le32(refcount); -+ ext3_xattr_quota_free(inode); -+ ext3_journal_dirty_metadata(handle, old_bh); -+ ea_bdebug(old_bh, "refcount now=%d", refcount); -+ } -+ } -+ -+cleanup: -+ if (old_bh != new_bh) -+ brelse(new_bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_delete_inode() -+ * -+ * Free extended attribute resources associated with this inode. This -+ * is called immediately before an inode is freed. -+ */ -+void -+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) -+{ -+ struct buffer_head *bh; -+ unsigned int block = EXT3_I(inode)->i_file_acl; -+ -+ if (!block) -+ return; -+ down(&ext3_xattr_sem); -+ -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) { -+ ext3_error(inode->i_sb, "ext3_xattr_delete_inode", -+ "inode %ld: block %d read error", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+ ext3_error(inode->i_sb, "ext3_xattr_delete_inode", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ goto cleanup; -+ } -+ ext3_journal_get_write_access(handle, bh); -+ ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) { -+ ext3_xattr_cache_remove(bh); -+ ext3_xattr_free_block(handle, inode, block); -+ ext3_forget(handle, 1, inode, bh, block); -+ bh = NULL; -+ } else { -+ HDR(bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ ext3_journal_dirty_metadata(handle, bh); -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ ext3_xattr_quota_free(inode); -+ } -+ EXT3_I(inode)->i_file_acl = 0; -+ -+cleanup: -+ brelse(bh); -+ up(&ext3_xattr_sem); -+} -+ -+/* -+ * ext3_xattr_put_super() -+ * -+ * This is called when a file system is unmounted. -+ */ -+void -+ext3_xattr_put_super(struct super_block *sb) -+{ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ mb_cache_shrink(ext3_xattr_cache, sb->s_dev); -+#endif -+} -+ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ -+/* -+ * ext3_xattr_cache_insert() -+ * -+ * Create a new entry in the extended attribute cache, and insert -+ * it unless such an entry is already in the cache. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+static int -+ext3_xattr_cache_insert(struct buffer_head *bh) -+{ -+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash); -+ struct mb_cache_entry *ce; -+ int error; -+ -+ ce = mb_cache_entry_alloc(ext3_xattr_cache); -+ if (!ce) -+ return -ENOMEM; -+ error = mb_cache_entry_insert(ce, bh->b_dev, bh->b_blocknr, &hash); -+ if (error) { -+ mb_cache_entry_free(ce); -+ if (error == -EBUSY) { -+ ea_bdebug(bh, "already in cache (%d cache entries)", -+ atomic_read(&ext3_xattr_cache->c_entry_count)); -+ error = 0; -+ } -+ } else { -+ ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, -+ atomic_read(&ext3_xattr_cache->c_entry_count)); -+ mb_cache_entry_release(ce); -+ } -+ return error; -+} -+ -+/* -+ * ext3_xattr_cmp() -+ * -+ * Compare two extended attribute blocks for equality. -+ * -+ * Returns 0 if the blocks are equal, 1 if they differ, and -+ * a negative error number on errors. -+ */ -+static int -+ext3_xattr_cmp(struct ext3_xattr_header *header1, -+ struct ext3_xattr_header *header2) -+{ -+ struct ext3_xattr_entry *entry1, *entry2; -+ -+ entry1 = ENTRY(header1+1); -+ entry2 = ENTRY(header2+1); -+ while (!IS_LAST_ENTRY(entry1)) { -+ if (IS_LAST_ENTRY(entry2)) -+ return 1; -+ if (entry1->e_hash != entry2->e_hash || -+ entry1->e_name_len != entry2->e_name_len || -+ entry1->e_value_size != entry2->e_value_size || -+ memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) -+ return 1; -+ if (entry1->e_value_block != 0 || entry2->e_value_block != 0) -+ return -EIO; -+ if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), -+ (char *)header2 + le16_to_cpu(entry2->e_value_offs), -+ le32_to_cpu(entry1->e_value_size))) -+ return 1; -+ -+ entry1 = EXT3_XATTR_NEXT(entry1); -+ entry2 = EXT3_XATTR_NEXT(entry2); -+ } -+ if (!IS_LAST_ENTRY(entry2)) -+ return 1; -+ return 0; -+} -+ -+/* -+ * ext3_xattr_cache_find() -+ * -+ * Find an identical extended attribute block. -+ * -+ * Returns a pointer to the block found, or NULL if such a block was -+ * not found or an error occurred. -+ */ -+static struct buffer_head * -+ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header) -+{ -+ __u32 hash = le32_to_cpu(header->h_hash); -+ struct mb_cache_entry *ce; -+ -+ if (!header->h_hash) -+ return NULL; /* never share */ -+ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); -+ ce = mb_cache_entry_find_first(ext3_xattr_cache, 0, inode->i_dev, hash); -+ while (ce) { -+ struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block); -+ -+ if (!bh) { -+ ext3_error(inode->i_sb, "ext3_xattr_cache_find", -+ "inode %ld: block %ld read error", -+ inode->i_ino, ce->e_block); -+ } else if (le32_to_cpu(HDR(bh)->h_refcount) > -+ EXT3_XATTR_REFCOUNT_MAX) { -+ ea_idebug(inode, "block %ld refcount %d>%d",ce->e_block, -+ le32_to_cpu(HDR(bh)->h_refcount), -+ EXT3_XATTR_REFCOUNT_MAX); -+ } else if (!ext3_xattr_cmp(header, HDR(bh))) { -+ ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); -+ mb_cache_entry_release(ce); -+ return bh; -+ } -+ brelse(bh); -+ ce = mb_cache_entry_find_next(ce, 0, inode->i_dev, hash); -+ } -+ return NULL; -+} -+ -+/* -+ * ext3_xattr_cache_remove() -+ * -+ * Remove the cache entry of a block from the cache. Called when a -+ * block becomes invalid. -+ */ -+static void -+ext3_xattr_cache_remove(struct buffer_head *bh) -+{ -+ struct mb_cache_entry *ce; -+ -+ ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_dev, bh->b_blocknr); -+ if (ce) { -+ ea_bdebug(bh, "removing (%d cache entries remaining)", -+ atomic_read(&ext3_xattr_cache->c_entry_count)-1); -+ mb_cache_entry_free(ce); -+ } else -+ ea_bdebug(bh, "no cache entry"); -+} -+ -+#define NAME_HASH_SHIFT 5 -+#define VALUE_HASH_SHIFT 16 -+ -+/* -+ * ext3_xattr_hash_entry() -+ * -+ * Compute the hash of an extended attribute. -+ */ -+static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header, -+ struct ext3_xattr_entry *entry) -+{ -+ __u32 hash = 0; -+ char *name = entry->e_name; -+ int n; -+ -+ for (n=0; n < entry->e_name_len; n++) { -+ hash = (hash << NAME_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ -+ *name++; -+ } -+ -+ if (entry->e_value_block == 0 && entry->e_value_size != 0) { -+ __u32 *value = (__u32 *)((char *)header + -+ le16_to_cpu(entry->e_value_offs)); -+ for (n = (le32_to_cpu(entry->e_value_size) + -+ EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) { -+ hash = (hash << VALUE_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ -+ le32_to_cpu(*value++); -+ } -+ } -+ entry->e_hash = cpu_to_le32(hash); -+} -+ -+#undef NAME_HASH_SHIFT -+#undef VALUE_HASH_SHIFT -+ -+#define BLOCK_HASH_SHIFT 16 -+ -+/* -+ * ext3_xattr_rehash() -+ * -+ * Re-compute the extended attribute hash value after an entry has changed. -+ */ -+static void ext3_xattr_rehash(struct ext3_xattr_header *header, -+ struct ext3_xattr_entry *entry) -+{ -+ struct ext3_xattr_entry *here; -+ __u32 hash = 0; -+ -+ ext3_xattr_hash_entry(header, entry); -+ here = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(here)) { -+ if (!here->e_hash) { -+ /* Block is not shared if an entry's hash value == 0 */ -+ hash = 0; -+ break; -+ } -+ hash = (hash << BLOCK_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ -+ le32_to_cpu(here->e_hash); -+ here = EXT3_XATTR_NEXT(here); -+ } -+ header->h_hash = cpu_to_le32(hash); -+} -+ -+#undef BLOCK_HASH_SHIFT -+ -+int __init -+init_ext3_xattr(void) -+{ -+ ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL, -+ sizeof(struct mb_cache_entry) + -+ sizeof(struct mb_cache_entry_index), 1, 61); -+ if (!ext3_xattr_cache) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+void -+exit_ext3_xattr(void) -+{ -+ if (ext3_xattr_cache) -+ mb_cache_destroy(ext3_xattr_cache); -+ ext3_xattr_cache = NULL; -+} -+ -+#else /* CONFIG_EXT3_FS_XATTR_SHARING */ -+ -+int __init -+init_ext3_xattr(void) -+{ -+ return 0; -+} -+ -+void -+exit_ext3_xattr(void) -+{ -+} -+ -+#endif /* CONFIG_EXT3_FS_XATTR_SHARING */ ---- /dev/null Mon May 20 21:11:23 2002 -+++ linux-mmonroe/fs/ext3/xattr_user.c Fri May 16 08:43:01 2003 -@@ -0,0 +1,111 @@ -+/* -+ * linux/fs/ext3/xattr_user.c -+ * Handler for extended user attributes. -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef CONFIG_EXT3_FS_POSIX_ACL -+# include -+#endif -+ -+#define XATTR_USER_PREFIX "user." -+ -+static size_t -+ext3_xattr_user_list(char *list, struct inode *inode, -+ const char *name, int name_len) -+{ -+ const int prefix_len = sizeof(XATTR_USER_PREFIX)-1; -+ -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return 0; -+ -+ if (list) { -+ memcpy(list, XATTR_USER_PREFIX, prefix_len); -+ memcpy(list+prefix_len, name, name_len); -+ list[prefix_len + name_len] = '\0'; -+ } -+ return prefix_len + name_len + 1; -+} -+ -+static int -+ext3_xattr_user_get(struct inode *inode, const char *name, -+ void *buffer, size_t size) -+{ -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+#ifdef CONFIG_EXT3_FS_POSIX_ACL -+ error = ext3_permission_locked(inode, MAY_READ); -+#else -+ error = permission(inode, MAY_READ); -+#endif -+ if (error) -+ return error; -+ -+ return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, -+ buffer, size); -+} -+ -+static int -+ext3_xattr_user_set(struct inode *inode, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ handle_t *handle; -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+ if ( !S_ISREG(inode->i_mode) && -+ (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) -+ return -EPERM; -+#ifdef CONFIG_EXT3_FS_POSIX_ACL -+ error = ext3_permission_locked(inode, MAY_WRITE); -+#else -+ error = permission(inode, MAY_WRITE); -+#endif -+ if (error) -+ return error; -+ -+ handle = ext3_journal_start(inode, EXT3_XATTR_TRANS_BLOCKS); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ error = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_USER, name, -+ value, size, flags); -+ ext3_journal_stop(handle, inode); -+ -+ return error; -+} -+ -+struct ext3_xattr_handler ext3_xattr_user_handler = { -+ prefix: XATTR_USER_PREFIX, -+ list: ext3_xattr_user_list, -+ get: ext3_xattr_user_get, -+ set: ext3_xattr_user_set, -+}; -+ -+int __init -+init_ext3_xattr_user(void) -+{ -+ return ext3_xattr_register(EXT3_XATTR_INDEX_USER, -+ &ext3_xattr_user_handler); -+} -+ -+void -+exit_ext3_xattr_user(void) -+{ -+ ext3_xattr_unregister(EXT3_XATTR_INDEX_USER, -+ &ext3_xattr_user_handler); -+} ---- /dev/null Mon May 20 21:11:23 2002 -+++ linux-mmonroe/fs/ext3/ext3-exports.c Fri May 16 08:43:01 2003 -@@ -0,0 +1,13 @@ -+#include -+#include -+#include -+#include -+#include -+ -+EXPORT_SYMBOL(ext3_force_commit); -+EXPORT_SYMBOL(ext3_bread); -+EXPORT_SYMBOL(ext3_xattr_register); -+EXPORT_SYMBOL(ext3_xattr_unregister); -+EXPORT_SYMBOL(ext3_xattr_get); -+EXPORT_SYMBOL(ext3_xattr_list); -+EXPORT_SYMBOL(ext3_xattr_set); ---- linux/fs/jfs/jfs_xattr.h~linux-2.4.20-xattr-0.8.54-hp Thu Nov 28 15:53:15 2002 -+++ linux-mmonroe/fs/jfs/jfs_xattr.h Fri May 16 08:43:01 2003 -@@ -52,8 +52,10 @@ struct jfs_ea_list { - #define END_EALIST(ealist) \ - ((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist))) - --extern int __jfs_setxattr(struct inode *, const char *, void *, size_t, int); --extern int jfs_setxattr(struct dentry *, const char *, void *, size_t, int); -+extern int __jfs_setxattr(struct inode *, const char *, const void *, size_t, -+ int); -+extern int jfs_setxattr(struct dentry *, const char *, const void *, size_t, -+ int); - extern ssize_t __jfs_getxattr(struct inode *, const char *, void *, size_t); - extern ssize_t jfs_getxattr(struct dentry *, const char *, void *, size_t); - extern ssize_t jfs_listxattr(struct dentry *, char *, size_t); ---- linux/fs/jfs/xattr.c~linux-2.4.20-xattr-0.8.54-hp Thu Nov 28 15:53:15 2002 -+++ linux-mmonroe/fs/jfs/xattr.c Fri May 16 08:43:01 2003 -@@ -641,7 +641,7 @@ static int ea_put(struct inode *inode, s - } - - static int can_set_xattr(struct inode *inode, const char *name, -- void *value, size_t value_len) -+ const void *value, size_t value_len) - { - if (IS_RDONLY(inode)) - return -EROFS; -@@ -660,7 +660,7 @@ static int can_set_xattr(struct inode *i - return permission(inode, MAY_WRITE); - } - --int __jfs_setxattr(struct inode *inode, const char *name, void *value, -+int __jfs_setxattr(struct inode *inode, const char *name, const void *value, - size_t value_len, int flags) - { - struct jfs_ea_list *ealist; -@@ -799,7 +799,7 @@ int __jfs_setxattr(struct inode *inode, - return rc; - } - --int jfs_setxattr(struct dentry *dentry, const char *name, void *value, -+int jfs_setxattr(struct dentry *dentry, const char *name, const void *value, - size_t value_len, int flags) - { - if (value == NULL) { /* empty EA, do not remove */ ---- /dev/null Mon May 20 21:11:23 2002 -+++ linux-mmonroe/fs/mbcache.c Fri May 16 08:43:01 2003 -@@ -0,0 +1,648 @@ -+/* -+ * linux/fs/mbcache.c -+ * (C) 2001-2002 Andreas Gruenbacher, -+ */ -+ -+/* -+ * Filesystem Meta Information Block Cache (mbcache) -+ * -+ * The mbcache caches blocks of block devices that need to be located -+ * by their device/block number, as well as by other criteria (such -+ * as the block's contents). -+ * -+ * There can only be one cache entry in a cache per device and block number. -+ * Additional indexes need not be unique in this sense. The number of -+ * additional indexes (=other criteria) can be hardwired at compile time -+ * or specified at cache create time. -+ * -+ * Each cache entry is of fixed size. An entry may be `valid' or `invalid' -+ * in the cache. A valid entry is in the main hash tables of the cache, -+ * and may also be in the lru list. An invalid entry is not in any hashes -+ * or lists. -+ * -+ * A valid cache entry is only in the lru list if no handles refer to it. -+ * Invalid cache entries will be freed when the last handle to the cache -+ * entry is released. Entries that cannot be freed immediately are put -+ * back on the lru list. -+ */ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+#ifdef MB_CACHE_DEBUG -+# define mb_debug(f...) do { \ -+ printk(KERN_DEBUG f); \ -+ printk("\n"); \ -+ } while (0) -+#define mb_assert(c) do { if (!(c)) \ -+ printk(KERN_ERR "assertion " #c " failed\n"); \ -+ } while(0) -+#else -+# define mb_debug(f...) do { } while(0) -+# define mb_assert(c) do { } while(0) -+#endif -+#define mb_error(f...) do { \ -+ printk(KERN_ERR f); \ -+ printk("\n"); \ -+ } while(0) -+ -+MODULE_AUTHOR("Andreas Gruenbacher "); -+MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) -+MODULE_LICENSE("GPL"); -+#endif -+ -+EXPORT_SYMBOL(mb_cache_create); -+EXPORT_SYMBOL(mb_cache_shrink); -+EXPORT_SYMBOL(mb_cache_destroy); -+EXPORT_SYMBOL(mb_cache_entry_alloc); -+EXPORT_SYMBOL(mb_cache_entry_insert); -+EXPORT_SYMBOL(mb_cache_entry_release); -+EXPORT_SYMBOL(mb_cache_entry_takeout); -+EXPORT_SYMBOL(mb_cache_entry_free); -+EXPORT_SYMBOL(mb_cache_entry_dup); -+EXPORT_SYMBOL(mb_cache_entry_get); -+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) -+EXPORT_SYMBOL(mb_cache_entry_find_first); -+EXPORT_SYMBOL(mb_cache_entry_find_next); -+#endif -+ -+ -+/* -+ * Global data: list of all mbcache's, lru list, and a spinlock for -+ * accessing cache data structures on SMP machines. The lru list is -+ * global across all mbcaches. -+ */ -+ -+static LIST_HEAD(mb_cache_list); -+static LIST_HEAD(mb_cache_lru_list); -+static spinlock_t mb_cache_spinlock = SPIN_LOCK_UNLOCKED; -+ -+static inline int -+mb_cache_indexes(struct mb_cache *cache) -+{ -+#ifdef MB_CACHE_INDEXES_COUNT -+ return MB_CACHE_INDEXES_COUNT; -+#else -+ return cache->c_indexes_count; -+#endif -+} -+ -+/* -+ * What the mbcache registers as to get shrunk dynamically. -+ */ -+ -+static void -+mb_cache_memory_pressure(int priority, unsigned int gfp_mask); -+ -+static struct cache_definition mb_cache_definition = { -+ "mb_cache", -+ mb_cache_memory_pressure -+}; -+ -+ -+static inline int -+__mb_cache_entry_is_hashed(struct mb_cache_entry *ce) -+{ -+ return !list_empty(&ce->e_block_list); -+} -+ -+ -+static inline void -+__mb_cache_entry_unhash(struct mb_cache_entry *ce) -+{ -+ int n; -+ -+ if (__mb_cache_entry_is_hashed(ce)) { -+ list_del_init(&ce->e_block_list); -+ for (n=0; ne_cache); n++) -+ list_del(&ce->e_indexes[n].o_list); -+ } -+} -+ -+ -+static inline void -+__mb_cache_entry_forget(struct mb_cache_entry *ce, int gfp_mask) -+{ -+ struct mb_cache *cache = ce->e_cache; -+ -+ mb_assert(atomic_read(&ce->e_used) == 0); -+ if (cache->c_op.free && cache->c_op.free(ce, gfp_mask)) { -+ /* free failed -- put back on the lru list -+ for freeing later. */ -+ spin_lock(&mb_cache_spinlock); -+ list_add(&ce->e_lru_list, &mb_cache_lru_list); -+ spin_unlock(&mb_cache_spinlock); -+ } else { -+ kmem_cache_free(cache->c_entry_cache, ce); -+ atomic_dec(&cache->c_entry_count); -+ } -+} -+ -+ -+static inline void -+__mb_cache_entry_release_unlock(struct mb_cache_entry *ce) -+{ -+ if (atomic_dec_and_test(&ce->e_used)) { -+ if (__mb_cache_entry_is_hashed(ce)) -+ list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); -+ else { -+ spin_unlock(&mb_cache_spinlock); -+ __mb_cache_entry_forget(ce, GFP_KERNEL); -+ return; -+ } -+ } -+ spin_unlock(&mb_cache_spinlock); -+} -+ -+ -+/* -+ * mb_cache_memory_pressure() memory pressure callback -+ * -+ * This function is called by the kernel memory management when memory -+ * gets low. -+ * -+ * @priority: Amount by which to shrink the cache (0 = highes priority) -+ * @gfp_mask: (ignored) -+ */ -+static void -+mb_cache_memory_pressure(int priority, unsigned int gfp_mask) -+{ -+ LIST_HEAD(free_list); -+ struct list_head *l, *ltmp; -+ int count = 0; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each(l, &mb_cache_list) { -+ struct mb_cache *cache = -+ list_entry(l, struct mb_cache, c_cache_list); -+ mb_debug("cache %s (%d)", cache->c_name, -+ atomic_read(&cache->c_entry_count)); -+ count += atomic_read(&cache->c_entry_count); -+ } -+ mb_debug("trying to free %d of %d entries", -+ count / (priority ? priority : 1), count); -+ if (priority) -+ count /= priority; -+ while (count-- && !list_empty(&mb_cache_lru_list)) { -+ struct mb_cache_entry *ce = -+ list_entry(mb_cache_lru_list.next, -+ struct mb_cache_entry, e_lru_list); -+ list_del(&ce->e_lru_list); -+ __mb_cache_entry_unhash(ce); -+ list_add_tail(&ce->e_lru_list, &free_list); -+ } -+ spin_unlock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &free_list) { -+ __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, -+ e_lru_list), gfp_mask); -+ } -+} -+ -+ -+/* -+ * mb_cache_create() create a new cache -+ * -+ * All entries in one cache are equal size. Cache entries may be from -+ * multiple devices. If this is the first mbcache created, registers -+ * the cache with kernel memory management. Returns NULL if no more -+ * memory was available. -+ * -+ * @name: name of the cache (informal) -+ * @cache_op: contains the callback called when freeing a cache entry -+ * @entry_size: The size of a cache entry, including -+ * struct mb_cache_entry -+ * @indexes_count: number of additional indexes in the cache. Must equal -+ * MB_CACHE_INDEXES_COUNT if the number of indexes is -+ * hardwired. -+ * @bucket_count: number of hash buckets -+ */ -+struct mb_cache * -+mb_cache_create(const char *name, struct mb_cache_op *cache_op, -+ size_t entry_size, int indexes_count, int bucket_count) -+{ -+ int m=0, n; -+ struct mb_cache *cache = NULL; -+ -+ if(entry_size < sizeof(struct mb_cache_entry) + -+ indexes_count * sizeof(struct mb_cache_entry_index)) -+ return NULL; -+ -+ MOD_INC_USE_COUNT; -+ cache = kmalloc(sizeof(struct mb_cache) + -+ indexes_count * sizeof(struct list_head), GFP_KERNEL); -+ if (!cache) -+ goto fail; -+ cache->c_name = name; -+ cache->c_op.free = NULL; -+ if (cache_op) -+ cache->c_op.free = cache_op->free; -+ atomic_set(&cache->c_entry_count, 0); -+ cache->c_bucket_count = bucket_count; -+#ifdef MB_CACHE_INDEXES_COUNT -+ mb_assert(indexes_count == MB_CACHE_INDEXES_COUNT); -+#else -+ cache->c_indexes_count = indexes_count; -+#endif -+ cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head), -+ GFP_KERNEL); -+ if (!cache->c_block_hash) -+ goto fail; -+ for (n=0; nc_block_hash[n]); -+ for (m=0; mc_indexes_hash[m] = kmalloc(bucket_count * -+ sizeof(struct list_head), -+ GFP_KERNEL); -+ if (!cache->c_indexes_hash[m]) -+ goto fail; -+ for (n=0; nc_indexes_hash[m][n]); -+ } -+ cache->c_entry_cache = kmem_cache_create(name, entry_size, 0, -+ 0 /*SLAB_POISON | SLAB_RED_ZONE*/, NULL, NULL); -+ if (!cache->c_entry_cache) -+ goto fail; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_add(&cache->c_cache_list, &mb_cache_list); -+ spin_unlock(&mb_cache_spinlock); -+ return cache; -+ -+fail: -+ if (cache) { -+ while (--m >= 0) -+ kfree(cache->c_indexes_hash[m]); -+ if (cache->c_block_hash) -+ kfree(cache->c_block_hash); -+ kfree(cache); -+ } -+ MOD_DEC_USE_COUNT; -+ return NULL; -+} -+ -+ -+/* -+ * mb_cache_shrink() -+ * -+ * Removes all cache entires of a device from the cache. All cache entries -+ * currently in use cannot be freed, and thus remain in the cache. -+ * -+ * @cache: which cache to shrink -+ * @dev: which device's cache entries to shrink -+ */ -+void -+mb_cache_shrink(struct mb_cache *cache, kdev_t dev) -+{ -+ LIST_HEAD(free_list); -+ struct list_head *l, *ltmp; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &mb_cache_lru_list) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, e_lru_list); -+ if (ce->e_dev == dev) { -+ list_del(&ce->e_lru_list); -+ list_add_tail(&ce->e_lru_list, &free_list); -+ __mb_cache_entry_unhash(ce); -+ } -+ } -+ spin_unlock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &free_list) { -+ __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, -+ e_lru_list), GFP_KERNEL); -+ } -+} -+ -+ -+/* -+ * mb_cache_destroy() -+ * -+ * Shrinks the cache to its minimum possible size (hopefully 0 entries), -+ * and then destroys it. If this was the last mbcache, un-registers the -+ * mbcache from kernel memory management. -+ */ -+void -+mb_cache_destroy(struct mb_cache *cache) -+{ -+ LIST_HEAD(free_list); -+ struct list_head *l, *ltmp; -+ int n; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &mb_cache_lru_list) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, e_lru_list); -+ if (ce->e_cache == cache) { -+ list_del(&ce->e_lru_list); -+ list_add_tail(&ce->e_lru_list, &free_list); -+ __mb_cache_entry_unhash(ce); -+ } -+ } -+ list_del(&cache->c_cache_list); -+ spin_unlock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &free_list) { -+ __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, -+ e_lru_list), GFP_KERNEL); -+ } -+ -+ if (atomic_read(&cache->c_entry_count) > 0) { -+ mb_error("cache %s: %d orphaned entries", -+ cache->c_name, -+ atomic_read(&cache->c_entry_count)); -+ } -+ -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,3,0)) -+ /* We don't have kmem_cache_destroy() in 2.2.x */ -+ kmem_cache_shrink(cache->c_entry_cache); -+#else -+ kmem_cache_destroy(cache->c_entry_cache); -+#endif -+ for (n=0; n < mb_cache_indexes(cache); n++) -+ kfree(cache->c_indexes_hash[n]); -+ kfree(cache->c_block_hash); -+ kfree(cache); -+ -+ MOD_DEC_USE_COUNT; -+} -+ -+ -+/* -+ * mb_cache_entry_alloc() -+ * -+ * Allocates a new cache entry. The new entry will not be valid initially, -+ * and thus cannot be looked up yet. It should be filled with data, and -+ * then inserted into the cache using mb_cache_entry_insert(). Returns NULL -+ * if no more memory was available. -+ */ -+struct mb_cache_entry * -+mb_cache_entry_alloc(struct mb_cache *cache) -+{ -+ struct mb_cache_entry *ce; -+ -+ atomic_inc(&cache->c_entry_count); -+ ce = kmem_cache_alloc(cache->c_entry_cache, GFP_KERNEL); -+ if (ce) { -+ INIT_LIST_HEAD(&ce->e_lru_list); -+ INIT_LIST_HEAD(&ce->e_block_list); -+ ce->e_cache = cache; -+ atomic_set(&ce->e_used, 1); -+ } -+ return ce; -+} -+ -+ -+/* -+ * mb_cache_entry_insert() -+ * -+ * Inserts an entry that was allocated using mb_cache_entry_alloc() into -+ * the cache. After this, the cache entry can be looked up, but is not yet -+ * in the lru list as the caller still holds a handle to it. Returns 0 on -+ * success, or -EBUSY if a cache entry for that device + inode exists -+ * already (this may happen after a failed lookup, if another process has -+ * inserted the same cache entry in the meantime). -+ * -+ * @dev: device the cache entry belongs to -+ * @block: block number -+ * @keys: array of additional keys. There must be indexes_count entries -+ * in the array (as specified when creating the cache). -+ */ -+int -+mb_cache_entry_insert(struct mb_cache_entry *ce, kdev_t dev, -+ unsigned long block, unsigned int keys[]) -+{ -+ struct mb_cache *cache = ce->e_cache; -+ unsigned int bucket = (HASHDEV(dev) + block) % cache->c_bucket_count; -+ struct list_head *l; -+ int error = -EBUSY, n; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each(l, &cache->c_block_hash[bucket]) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, e_block_list); -+ if (ce->e_dev == dev && ce->e_block == block) -+ goto out; -+ } -+ __mb_cache_entry_unhash(ce); -+ ce->e_dev = dev; -+ ce->e_block = block; -+ list_add(&ce->e_block_list, &cache->c_block_hash[bucket]); -+ for (n=0; ne_indexes[n].o_key = keys[n]; -+ bucket = keys[n] % cache->c_bucket_count; -+ list_add(&ce->e_indexes[n].o_list, -+ &cache->c_indexes_hash[n][bucket]); -+ } -+out: -+ spin_unlock(&mb_cache_spinlock); -+ return error; -+} -+ -+ -+/* -+ * mb_cache_entry_release() -+ * -+ * Release a handle to a cache entry. When the last handle to a cache entry -+ * is released it is either freed (if it is invalid) or otherwise inserted -+ * in to the lru list. -+ */ -+void -+mb_cache_entry_release(struct mb_cache_entry *ce) -+{ -+ spin_lock(&mb_cache_spinlock); -+ __mb_cache_entry_release_unlock(ce); -+} -+ -+ -+/* -+ * mb_cache_entry_takeout() -+ * -+ * Take a cache entry out of the cache, making it invalid. The entry can later -+ * be re-inserted using mb_cache_entry_insert(), or released using -+ * mb_cache_entry_release(). -+ */ -+void -+mb_cache_entry_takeout(struct mb_cache_entry *ce) -+{ -+ spin_lock(&mb_cache_spinlock); -+ mb_assert(list_empty(&ce->e_lru_list)); -+ __mb_cache_entry_unhash(ce); -+ spin_unlock(&mb_cache_spinlock); -+} -+ -+ -+/* -+ * mb_cache_entry_free() -+ * -+ * This is equivalent to the sequence mb_cache_entry_takeout() -- -+ * mb_cache_entry_release(). -+ */ -+void -+mb_cache_entry_free(struct mb_cache_entry *ce) -+{ -+ spin_lock(&mb_cache_spinlock); -+ mb_assert(list_empty(&ce->e_lru_list)); -+ __mb_cache_entry_unhash(ce); -+ __mb_cache_entry_release_unlock(ce); -+} -+ -+ -+/* -+ * mb_cache_entry_dup() -+ * -+ * Duplicate a handle to a cache entry (does not duplicate the cache entry -+ * itself). After the call, both the old and the new handle must be released. -+ */ -+struct mb_cache_entry * -+mb_cache_entry_dup(struct mb_cache_entry *ce) -+{ -+ atomic_inc(&ce->e_used); -+ return ce; -+} -+ -+ -+/* -+ * mb_cache_entry_get() -+ * -+ * Get a cache entry by device / block number. (There can only be one entry -+ * in the cache per device and block.) Returns NULL if no such cache entry -+ * exists. -+ */ -+struct mb_cache_entry * -+mb_cache_entry_get(struct mb_cache *cache, kdev_t dev, unsigned long block) -+{ -+ unsigned int bucket = (HASHDEV(dev) + block) % cache->c_bucket_count; -+ struct list_head *l; -+ struct mb_cache_entry *ce; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each(l, &cache->c_block_hash[bucket]) { -+ ce = list_entry(l, struct mb_cache_entry, e_block_list); -+ if (ce->e_dev == dev && ce->e_block == block) { -+ if (!list_empty(&ce->e_lru_list)) -+ list_del_init(&ce->e_lru_list); -+ atomic_inc(&ce->e_used); -+ goto cleanup; -+ } -+ } -+ ce = NULL; -+ -+cleanup: -+ spin_unlock(&mb_cache_spinlock); -+ return ce; -+} -+ -+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) -+ -+static struct mb_cache_entry * -+__mb_cache_entry_find(struct list_head *l, struct list_head *head, -+ int index, kdev_t dev, unsigned int key) -+{ -+ while (l != head) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, -+ e_indexes[index].o_list); -+ if (ce->e_dev == dev && ce->e_indexes[index].o_key == key) { -+ if (!list_empty(&ce->e_lru_list)) -+ list_del_init(&ce->e_lru_list); -+ atomic_inc(&ce->e_used); -+ return ce; -+ } -+ l = l->next; -+ } -+ return NULL; -+} -+ -+ -+/* -+ * mb_cache_entry_find_first() -+ * -+ * Find the first cache entry on a given device with a certain key in -+ * an additional index. Additonal matches can be found with -+ * mb_cache_entry_find_next(). Returns NULL if no match was found. -+ * -+ * @cache: the cache to search -+ * @index: the number of the additonal index to search (0<=indexc_bucket_count; -+ struct list_head *l; -+ struct mb_cache_entry *ce; -+ -+ mb_assert(index < mb_cache_indexes(cache)); -+ spin_lock(&mb_cache_spinlock); -+ l = cache->c_indexes_hash[index][bucket].next; -+ ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], -+ index, dev, key); -+ spin_unlock(&mb_cache_spinlock); -+ return ce; -+} -+ -+ -+/* -+ * mb_cache_entry_find_next() -+ * -+ * Find the next cache entry on a given device with a certain key in an -+ * additional index. Returns NULL if no match could be found. The previous -+ * entry is atomatically released, so that mb_cache_entry_find_next() can -+ * be called like this: -+ * -+ * entry = mb_cache_entry_find_first(); -+ * while (entry) { -+ * ... -+ * entry = mb_cache_entry_find_next(entry, ...); -+ * } -+ * -+ * @prev: The previous match -+ * @index: the number of the additonal index to search (0<=indexe_cache; -+ unsigned int bucket = key % cache->c_bucket_count; -+ struct list_head *l; -+ struct mb_cache_entry *ce; -+ -+ mb_assert(index < mb_cache_indexes(cache)); -+ spin_lock(&mb_cache_spinlock); -+ l = prev->e_indexes[index].o_list.next; -+ ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], -+ index, dev, key); -+ __mb_cache_entry_release_unlock(prev); -+ return ce; -+} -+ -+#endif /* !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) */ -+ -+static int __init init_mbcache(void) -+{ -+ register_cache(&mb_cache_definition); -+ return 0; -+} -+ -+static void __exit exit_mbcache(void) -+{ -+ unregister_cache(&mb_cache_definition); -+} -+ -+module_init(init_mbcache) -+module_exit(exit_mbcache) -+ ---- linux/include/asm-arm/unistd.h~linux-2.4.20-xattr-0.8.54-hp Fri Aug 2 17:39:45 2002 -+++ linux-mmonroe/include/asm-arm/unistd.h Fri May 16 08:43:01 2003 -@@ -244,7 +244,6 @@ - #define __NR_security (__NR_SYSCALL_BASE+223) - #define __NR_gettid (__NR_SYSCALL_BASE+224) - #define __NR_readahead (__NR_SYSCALL_BASE+225) --#if 0 /* allocated in 2.5 */ - #define __NR_setxattr (__NR_SYSCALL_BASE+226) - #define __NR_lsetxattr (__NR_SYSCALL_BASE+227) - #define __NR_fsetxattr (__NR_SYSCALL_BASE+228) -@@ -257,7 +256,6 @@ - #define __NR_removexattr (__NR_SYSCALL_BASE+235) - #define __NR_lremovexattr (__NR_SYSCALL_BASE+236) - #define __NR_fremovexattr (__NR_SYSCALL_BASE+237) --#endif - #define __NR_tkill (__NR_SYSCALL_BASE+238) - /* - * Please check 2.5 _before_ adding calls here, ---- linux/include/asm-ppc64/unistd.h~linux-2.4.20-xattr-0.8.54-hp Fri Aug 2 17:39:45 2002 -+++ linux-mmonroe/include/asm-ppc64/unistd.h Fri May 16 08:43:01 2003 -@@ -218,6 +218,7 @@ - #define __NR_gettid 207 - #if 0 /* Reserved syscalls */ - #define __NR_tkill 208 -+#endif - #define __NR_setxattr 209 - #define __NR_lsetxattr 210 - #define __NR_fsetxattr 211 -@@ -230,6 +231,7 @@ - #define __NR_removexattr 218 - #define __NR_lremovexattr 219 - #define __NR_fremovexattr 220 -+#if 0 /* Reserved syscalls */ - #define __NR_futex 221 - #endif - ---- linux/include/asm-s390/unistd.h~linux-2.4.20-xattr-0.8.54-hp Fri Aug 2 17:39:45 2002 -+++ linux-mmonroe/include/asm-s390/unistd.h Fri May 16 08:43:01 2003 -@@ -212,9 +212,18 @@ - #define __NR_madvise 219 - #define __NR_getdents64 220 - #define __NR_fcntl64 221 --/* -- * Numbers 224-235 are reserved for posix acl -- */ -+#define __NR_setxattr 224 -+#define __NR_lsetxattr 225 -+#define __NR_fsetxattr 226 -+#define __NR_getxattr 227 -+#define __NR_lgetxattr 228 -+#define __NR_fgetxattr 229 -+#define __NR_listxattr 230 -+#define __NR_llistxattr 231 -+#define __NR_flistxattr 232 -+#define __NR_removexattr 233 -+#define __NR_lremovexattr 234 -+#define __NR_fremovexattr 235 - #define __NR_gettid 236 - #define __NR_tkill 237 - ---- linux/include/asm-s390x/unistd.h~linux-2.4.20-xattr-0.8.54-hp Fri Aug 2 17:39:45 2002 -+++ linux-mmonroe/include/asm-s390x/unistd.h Fri May 16 08:43:01 2003 -@@ -180,9 +180,18 @@ - #define __NR_pivot_root 217 - #define __NR_mincore 218 - #define __NR_madvise 219 --/* -- * Numbers 224-235 are reserved for posix acl -- */ -+#define __NR_setxattr 224 -+#define __NR_lsetxattr 225 -+#define __NR_fsetxattr 226 -+#define __NR_getxattr 227 -+#define __NR_lgetxattr 228 -+#define __NR_fgetxattr 229 -+#define __NR_listxattr 230 -+#define __NR_llistxattr 231 -+#define __NR_flistxattr 232 -+#define __NR_removexattr 233 -+#define __NR_lremovexattr 234 -+#define __NR_fremovexattr 235 - #define __NR_gettid 236 - #define __NR_tkill 237 - ---- linux/include/asm-sparc/unistd.h~linux-2.4.20-xattr-0.8.54-hp Fri Aug 2 17:39:45 2002 -+++ linux-mmonroe/include/asm-sparc/unistd.h Fri May 16 08:43:01 2003 -@@ -184,24 +184,24 @@ - /* #define __NR_exportfs 166 SunOS Specific */ - #define __NR_mount 167 /* Common */ - #define __NR_ustat 168 /* Common */ --/* #define __NR_semsys 169 SunOS Specific */ --/* #define __NR_msgsys 170 SunOS Specific */ --/* #define __NR_shmsys 171 SunOS Specific */ --/* #define __NR_auditsys 172 SunOS Specific */ --/* #define __NR_rfssys 173 SunOS Specific */ -+#define __NR_setxattr 169 /* SunOS: semsys */ -+#define __NR_lsetxattr 170 /* SunOS: msgsys */ -+#define __NR_fsetxattr 171 /* SunOS: shmsys */ -+#define __NR_getxattr 172 /* SunOS: auditsys */ -+#define __NR_lgetxattr 173 /* SunOS: rfssys */ - #define __NR_getdents 174 /* Common */ - #define __NR_setsid 175 /* Common */ - #define __NR_fchdir 176 /* Common */ --/* #define __NR_fchroot 177 SunOS Specific */ --/* #define __NR_vpixsys 178 SunOS Specific */ --/* #define __NR_aioread 179 SunOS Specific */ --/* #define __NR_aiowrite 180 SunOS Specific */ --/* #define __NR_aiowait 181 SunOS Specific */ --/* #define __NR_aiocancel 182 SunOS Specific */ -+#define __NR_fgetxattr 177 /* SunOS: fchroot */ -+#define __NR_listxattr 178 /* SunOS: vpixsys */ -+#define __NR_llistxattr 179 /* SunOS: aioread */ -+#define __NR_flistxattr 180 /* SunOS: aiowrite */ -+#define __NR_removexattr 181 /* SunOS: aiowait */ -+#define __NR_lremovexattr 182 /* SunOS: aiocancel */ - #define __NR_sigpending 183 /* Common */ - #define __NR_query_module 184 /* Linux Specific */ - #define __NR_setpgid 185 /* Common */ --/* #define __NR_pathconf 186 SunOS Specific */ -+#define __NR_fremovexattr 186 /* SunOS: pathconf */ - #define __NR_tkill 187 /* SunOS: fpathconf */ - /* #define __NR_sysconf 188 SunOS Specific */ - #define __NR_uname 189 /* Linux Specific */ ---- linux/include/asm-sparc64/unistd.h~linux-2.4.20-xattr-0.8.54-hp Fri Aug 2 17:39:45 2002 -+++ linux-mmonroe/include/asm-sparc64/unistd.h Fri May 16 08:43:01 2003 -@@ -184,24 +184,24 @@ - /* #define __NR_exportfs 166 SunOS Specific */ - #define __NR_mount 167 /* Common */ - #define __NR_ustat 168 /* Common */ --/* #define __NR_semsys 169 SunOS Specific */ --/* #define __NR_msgsys 170 SunOS Specific */ --/* #define __NR_shmsys 171 SunOS Specific */ --/* #define __NR_auditsys 172 SunOS Specific */ --/* #define __NR_rfssys 173 SunOS Specific */ -+#define __NR_setxattr 169 /* SunOS: semsys */ -+#define __NR_lsetxattr 170 /* SunOS: msgsys */ -+#define __NR_fsetxattr 171 /* SunOS: shmsys */ -+#define __NR_getxattr 172 /* SunOS: auditsys */ -+#define __NR_lgetxattr 173 /* SunOS: rfssys */ - #define __NR_getdents 174 /* Common */ - #define __NR_setsid 175 /* Common */ - #define __NR_fchdir 176 /* Common */ --/* #define __NR_fchroot 177 SunOS Specific */ --/* #define __NR_vpixsys 178 SunOS Specific */ --/* #define __NR_aioread 179 SunOS Specific */ --/* #define __NR_aiowrite 180 SunOS Specific */ --/* #define __NR_aiowait 181 SunOS Specific */ --/* #define __NR_aiocancel 182 SunOS Specific */ -+#define __NR_fgetxattr 177 /* SunOS: fchroot */ -+#define __NR_listxattr 178 /* SunOS: vpixsys */ -+#define __NR_llistxattr 179 /* SunOS: aioread */ -+#define __NR_flistxattr 180 /* SunOS: aiowrite */ -+#define __NR_removexattr 181 /* SunOS: aiowait */ -+#define __NR_lremovexattr 182 /* SunOS: aiocancel */ - #define __NR_sigpending 183 /* Common */ - #define __NR_query_module 184 /* Linux Specific */ - #define __NR_setpgid 185 /* Common */ --/* #define __NR_pathconf 186 SunOS Specific */ -+#define __NR_fremovexattr 186 /* SunOS: pathconf */ - #define __NR_tkill 187 /* SunOS: fpathconf */ - /* #define __NR_sysconf 188 SunOS Specific */ - #define __NR_uname 189 /* Linux Specific */ ---- /dev/null Mon May 20 21:11:23 2002 -+++ linux-mmonroe/include/linux/cache_def.h Fri May 16 08:43:01 2003 -@@ -0,0 +1,15 @@ -+/* -+ * linux/cache_def.h -+ * Handling of caches defined in drivers, filesystems, ... -+ * -+ * Copyright (C) 2002 by Andreas Gruenbacher, -+ */ -+ -+struct cache_definition { -+ const char *name; -+ void (*shrink)(int, unsigned int); -+ struct list_head link; -+}; -+ -+extern void register_cache(struct cache_definition *); -+extern void unregister_cache(struct cache_definition *); ---- linux/include/linux/errno.h~linux-2.4.20-xattr-0.8.54-hp Fri Feb 9 14:46:13 2001 -+++ linux-mmonroe/include/linux/errno.h Fri May 16 08:43:01 2003 -@@ -23,4 +23,8 @@ - - #endif - -+/* Defined for extended attributes */ -+#define ENOATTR ENODATA /* No such attribute */ -+#define ENOTSUP EOPNOTSUPP /* Operation not supported */ -+ - #endif ---- linux/include/linux/ext2_fs.h~linux-2.4.20-xattr-0.8.54-hp Thu Nov 22 11:46:52 2001 -+++ linux-mmonroe/include/linux/ext2_fs.h Fri May 16 08:43:01 2003 -@@ -57,8 +57,6 @@ - */ - #define EXT2_BAD_INO 1 /* Bad blocks inode */ - #define EXT2_ROOT_INO 2 /* Root inode */ --#define EXT2_ACL_IDX_INO 3 /* ACL inode */ --#define EXT2_ACL_DATA_INO 4 /* ACL inode */ - #define EXT2_BOOT_LOADER_INO 5 /* Boot loader inode */ - #define EXT2_UNDEL_DIR_INO 6 /* Undelete directory inode */ - -@@ -86,7 +84,6 @@ - #else - # define EXT2_BLOCK_SIZE(s) (EXT2_MIN_BLOCK_SIZE << (s)->s_log_block_size) - #endif --#define EXT2_ACLE_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (struct ext2_acl_entry)) - #define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (__u32)) - #ifdef __KERNEL__ - # define EXT2_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -@@ -121,28 +118,6 @@ - #endif - - /* -- * ACL structures -- */ --struct ext2_acl_header /* Header of Access Control Lists */ --{ -- __u32 aclh_size; -- __u32 aclh_file_count; -- __u32 aclh_acle_count; -- __u32 aclh_first_acle; --}; -- --struct ext2_acl_entry /* Access Control List Entry */ --{ -- __u32 acle_size; -- __u16 acle_perms; /* Access permissions */ -- __u16 acle_type; /* Type of entry */ -- __u16 acle_tag; /* User or group identity */ -- __u16 acle_pad1; -- __u32 acle_next; /* Pointer on next entry for the */ -- /* same inode or on next free entry */ --}; -- --/* - * Structure of a blocks group descriptor - */ - struct ext2_group_desc -@@ -314,6 +289,7 @@ struct ext2_inode { - #define EXT2_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */ - #define EXT2_MOUNT_MINIX_DF 0x0080 /* Mimics the Minix statfs */ - #define EXT2_MOUNT_NO_UID32 0x0200 /* Disable 32-bit UIDs */ -+#define EXT2_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ - - #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt - #define set_opt(o, opt) o |= EXT2_MOUNT_##opt -@@ -397,6 +373,7 @@ struct ext2_super_block { - - #ifdef __KERNEL__ - #define EXT2_SB(sb) (&((sb)->u.ext2_sb)) -+#define EXT2_I(inode) (&((inode)->u.ext2_i)) - #else - /* Assume that user mode programs are passing in an ext2fs superblock, not - * a kernel struct super_block. This will allow us to call the feature-test -@@ -466,7 +443,7 @@ struct ext2_super_block { - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 - #define EXT2_FEATURE_INCOMPAT_ANY 0xffffffff - --#define EXT2_FEATURE_COMPAT_SUPP 0 -+#define EXT2_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT2_FEATURE_INCOMPAT_SUPP EXT2_FEATURE_INCOMPAT_FILETYPE - #define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \ -@@ -623,8 +600,10 @@ extern struct address_space_operations e - - /* namei.c */ - extern struct inode_operations ext2_dir_inode_operations; -+extern struct inode_operations ext2_special_inode_operations; - - /* symlink.c */ -+extern struct inode_operations ext2_symlink_inode_operations; - extern struct inode_operations ext2_fast_symlink_inode_operations; - - #endif /* __KERNEL__ */ ---- /dev/null Mon May 20 21:11:23 2002 -+++ linux-mmonroe/include/linux/ext2_xattr.h Fri May 16 08:43:01 2003 -@@ -0,0 +1,157 @@ -+/* -+ File: linux/ext2_xattr.h -+ -+ On-disk format of extended attributes for the ext2 filesystem. -+ -+ (C) 2001 Andreas Gruenbacher, -+*/ -+ -+#include -+#include -+#include -+ -+/* Magic value in attribute blocks */ -+#define EXT2_XATTR_MAGIC 0xEA020000 -+ -+/* Maximum number of references to one attribute block */ -+#define EXT2_XATTR_REFCOUNT_MAX 1024 -+ -+/* Name indexes */ -+#define EXT2_XATTR_INDEX_MAX 10 -+#define EXT2_XATTR_INDEX_USER 1 -+#define EXT2_XATTR_INDEX_POSIX_ACL_ACCESS 2 -+#define EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT 3 -+ -+struct ext2_xattr_header { -+ __u32 h_magic; /* magic number for identification */ -+ __u32 h_refcount; /* reference count */ -+ __u32 h_blocks; /* number of disk blocks used */ -+ __u32 h_hash; /* hash value of all attributes */ -+ __u32 h_reserved[4]; /* zero right now */ -+}; -+ -+struct ext2_xattr_entry { -+ __u8 e_name_len; /* length of name */ -+ __u8 e_name_index; /* attribute name index */ -+ __u16 e_value_offs; /* offset in disk block of value */ -+ __u32 e_value_block; /* disk block attribute is stored on (n/i) */ -+ __u32 e_value_size; /* size of attribute value */ -+ __u32 e_hash; /* hash value of name and value */ -+ char e_name[0]; /* attribute name */ -+}; -+ -+#define EXT2_XATTR_PAD_BITS 2 -+#define EXT2_XATTR_PAD (1<e_name_len)) ) -+#define EXT2_XATTR_SIZE(size) \ -+ (((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND) -+ -+#ifdef __KERNEL__ -+ -+# ifdef CONFIG_EXT2_FS_XATTR -+ -+struct ext2_xattr_handler { -+ char *prefix; -+ size_t (*list)(char *list, struct inode *inode, const char *name, -+ int name_len); -+ int (*get)(struct inode *inode, const char *name, void *buffer, -+ size_t size); -+ int (*set)(struct inode *inode, const char *name, const void *buffer, -+ size_t size, int flags); -+}; -+ -+extern int ext2_xattr_register(int, struct ext2_xattr_handler *); -+extern void ext2_xattr_unregister(int, struct ext2_xattr_handler *); -+ -+extern int ext2_setxattr(struct dentry *, const char *, const void *, size_t, int); -+extern ssize_t ext2_getxattr(struct dentry *, const char *, void *, size_t); -+extern ssize_t ext2_listxattr(struct dentry *, char *, size_t); -+extern int ext2_removexattr(struct dentry *, const char *); -+ -+extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t); -+extern int ext2_xattr_list(struct inode *, char *, size_t); -+extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int); -+ -+extern void ext2_xattr_delete_inode(struct inode *); -+extern void ext2_xattr_put_super(struct super_block *); -+ -+extern int init_ext2_xattr(void) __init; -+extern void exit_ext2_xattr(void); -+ -+# else /* CONFIG_EXT2_FS_XATTR */ -+# define ext2_setxattr NULL -+# define ext2_getxattr NULL -+# define ext2_listxattr NULL -+# define ext2_removexattr NULL -+ -+static inline int -+ext2_xattr_get(struct inode *inode, int name_index, -+ const char *name, void *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext2_xattr_list(struct inode *inode, char *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext2_xattr_set(struct inode *inode, int name_index, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ return -ENOTSUP; -+} -+ -+static inline void -+ext2_xattr_delete_inode(struct inode *inode) -+{ -+} -+ -+static inline void -+ext2_xattr_put_super(struct super_block *sb) -+{ -+} -+ -+static inline int -+init_ext2_xattr(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext2_xattr(void) -+{ -+} -+ -+# endif /* CONFIG_EXT2_FS_XATTR */ -+ -+# ifdef CONFIG_EXT2_FS_XATTR_USER -+ -+extern int init_ext2_xattr_user(void) __init; -+extern void exit_ext2_xattr_user(void); -+ -+# else /* CONFIG_EXT2_FS_XATTR_USER */ -+ -+static inline int -+init_ext2_xattr_user(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext2_xattr_user(void) -+{ -+} -+ -+# endif /* CONFIG_EXT2_FS_XATTR_USER */ -+ -+#endif /* __KERNEL__ */ -+ ---- linux/include/linux/ext3_fs.h~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:42:47 2003 -+++ linux-mmonroe/include/linux/ext3_fs.h Fri May 16 08:43:01 2003 -@@ -63,8 +63,6 @@ - */ - #define EXT3_BAD_INO 1 /* Bad blocks inode */ - #define EXT3_ROOT_INO 2 /* Root inode */ --#define EXT3_ACL_IDX_INO 3 /* ACL inode */ --#define EXT3_ACL_DATA_INO 4 /* ACL inode */ - #define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */ - #define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */ - #define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */ -@@ -94,7 +92,6 @@ - #else - # define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size) - #endif --#define EXT3_ACLE_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_acl_entry)) - #define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32)) - #ifdef __KERNEL__ - # define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -@@ -129,28 +126,6 @@ - #endif - - /* -- * ACL structures -- */ --struct ext3_acl_header /* Header of Access Control Lists */ --{ -- __u32 aclh_size; -- __u32 aclh_file_count; -- __u32 aclh_acle_count; -- __u32 aclh_first_acle; --}; -- --struct ext3_acl_entry /* Access Control List Entry */ --{ -- __u32 acle_size; -- __u16 acle_perms; /* Access permissions */ -- __u16 acle_type; /* Type of entry */ -- __u16 acle_tag; /* User or group identity */ -- __u16 acle_pad1; -- __u32 acle_next; /* Pointer on next entry for the */ -- /* same inode or on next free entry */ --}; -- --/* - * Structure of a blocks group descriptor - */ - struct ext3_group_desc -@@ -344,6 +319,7 @@ struct ext3_inode { - #define EXT3_MOUNT_WRITEBACK_DATA 0x0C00 /* No data ordering */ - #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ -+#define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -520,7 +496,7 @@ struct ext3_super_block { - #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ - --#define EXT3_FEATURE_COMPAT_SUPP 0 -+#define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ -@@ -703,6 +679,7 @@ extern void ext3_check_inodes_bitmap (st - extern unsigned long ext3_count_free (struct buffer_head *, unsigned); - - /* inode.c */ -+extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); - -@@ -771,8 +748,10 @@ extern struct address_space_operations e - - /* namei.c */ - extern struct inode_operations ext3_dir_inode_operations; -+extern struct inode_operations ext3_special_inode_operations; - - /* symlink.c */ -+extern struct inode_operations ext3_symlink_inode_operations; - extern struct inode_operations ext3_fast_symlink_inode_operations; - - ---- linux/include/linux/ext3_jbd.h~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:42:46 2003 -+++ linux-mmonroe/include/linux/ext3_jbd.h Fri May 16 08:43:01 2003 -@@ -30,13 +30,19 @@ - - #define EXT3_SINGLEDATA_TRANS_BLOCKS 8 - -+/* Extended attributes may touch two data buffers, two bitmap buffers, -+ * and two group and summaries. */ -+ -+#define EXT3_XATTR_TRANS_BLOCKS 8 -+ - /* Define the minimum size for a transaction which modifies data. This - * needs to take into account the fact that we may end up modifying two - * quota files too (one for the group, one for the user quota). The - * superblock only gets updated once, of course, so don't bother - * counting that again for the quota updates. */ - --#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS - 2) -+#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS + \ -+ EXT3_XATTR_TRANS_BLOCKS - 2) - - extern int ext3_writepage_trans_blocks(struct inode *inode); - ---- /dev/null Mon May 20 21:11:23 2002 -+++ linux-mmonroe/include/linux/ext3_xattr.h Fri May 16 08:43:01 2003 -@@ -0,0 +1,157 @@ -+/* -+ File: linux/ext3_xattr.h -+ -+ On-disk format of extended attributes for the ext3 filesystem. -+ -+ (C) 2001 Andreas Gruenbacher, -+*/ -+ -+#include -+#include -+#include -+ -+/* Magic value in attribute blocks */ -+#define EXT3_XATTR_MAGIC 0xEA020000 -+ -+/* Maximum number of references to one attribute block */ -+#define EXT3_XATTR_REFCOUNT_MAX 1024 -+ -+/* Name indexes */ -+#define EXT3_XATTR_INDEX_MAX 10 -+#define EXT3_XATTR_INDEX_USER 1 -+#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS 2 -+#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT 3 -+ -+struct ext3_xattr_header { -+ __u32 h_magic; /* magic number for identification */ -+ __u32 h_refcount; /* reference count */ -+ __u32 h_blocks; /* number of disk blocks used */ -+ __u32 h_hash; /* hash value of all attributes */ -+ __u32 h_reserved[4]; /* zero right now */ -+}; -+ -+struct ext3_xattr_entry { -+ __u8 e_name_len; /* length of name */ -+ __u8 e_name_index; /* attribute name index */ -+ __u16 e_value_offs; /* offset in disk block of value */ -+ __u32 e_value_block; /* disk block attribute is stored on (n/i) */ -+ __u32 e_value_size; /* size of attribute value */ -+ __u32 e_hash; /* hash value of name and value */ -+ char e_name[0]; /* attribute name */ -+}; -+ -+#define EXT3_XATTR_PAD_BITS 2 -+#define EXT3_XATTR_PAD (1<e_name_len)) ) -+#define EXT3_XATTR_SIZE(size) \ -+ (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND) -+ -+#ifdef __KERNEL__ -+ -+# ifdef CONFIG_EXT3_FS_XATTR -+ -+struct ext3_xattr_handler { -+ char *prefix; -+ size_t (*list)(char *list, struct inode *inode, const char *name, -+ int name_len); -+ int (*get)(struct inode *inode, const char *name, void *buffer, -+ size_t size); -+ int (*set)(struct inode *inode, const char *name, const void *buffer, -+ size_t size, int flags); -+}; -+ -+extern int ext3_xattr_register(int, struct ext3_xattr_handler *); -+extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *); -+ -+extern int ext3_setxattr(struct dentry *, const char *, const void *, size_t, int); -+extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t); -+extern ssize_t ext3_listxattr(struct dentry *, char *, size_t); -+extern int ext3_removexattr(struct dentry *, const char *); -+ -+extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t); -+extern int ext3_xattr_list(struct inode *, char *, size_t); -+extern int ext3_xattr_set(handle_t *handle, struct inode *, int, const char *, const void *, size_t, int); -+ -+extern void ext3_xattr_delete_inode(handle_t *, struct inode *); -+extern void ext3_xattr_put_super(struct super_block *); -+ -+extern int init_ext3_xattr(void) __init; -+extern void exit_ext3_xattr(void); -+ -+# else /* CONFIG_EXT3_FS_XATTR */ -+# define ext3_setxattr NULL -+# define ext3_getxattr NULL -+# define ext3_listxattr NULL -+# define ext3_removexattr NULL -+ -+static inline int -+ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext3_xattr_list(struct inode *inode, void *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t size, int flags) -+{ -+ return -ENOTSUP; -+} -+ -+static inline void -+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) -+{ -+} -+ -+static inline void -+ext3_xattr_put_super(struct super_block *sb) -+{ -+} -+ -+static inline int -+init_ext3_xattr(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext3_xattr(void) -+{ -+} -+ -+# endif /* CONFIG_EXT3_FS_XATTR */ -+ -+# ifdef CONFIG_EXT3_FS_XATTR_USER -+ -+extern int init_ext3_xattr_user(void) __init; -+extern void exit_ext3_xattr_user(void); -+ -+# else /* CONFIG_EXT3_FS_XATTR_USER */ -+ -+static inline int -+init_ext3_xattr_user(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext3_xattr_user(void) -+{ -+} -+ -+#endif /* CONFIG_EXT3_FS_XATTR_USER */ -+ -+#endif /* __KERNEL__ */ -+ ---- linux/include/linux/fs.h~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:42:46 2003 -+++ linux-mmonroe/include/linux/fs.h Fri May 16 08:43:01 2003 -@@ -909,7 +909,7 @@ struct inode_operations { - int (*setattr) (struct dentry *, struct iattr *); - int (*setattr_raw) (struct inode *, struct iattr *); - int (*getattr) (struct dentry *, struct iattr *); -- int (*setxattr) (struct dentry *, const char *, void *, size_t, int); -+ int (*setxattr) (struct dentry *, const char *, const void *, size_t, int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); - ssize_t (*listxattr) (struct dentry *, char *, size_t); - int (*removexattr) (struct dentry *, const char *); ---- /dev/null Mon May 20 21:11:23 2002 -+++ linux-mmonroe/include/linux/mbcache.h Fri May 16 08:43:01 2003 -@@ -0,0 +1,69 @@ -+/* -+ File: linux/mbcache.h -+ -+ (C) 2001 by Andreas Gruenbacher, -+*/ -+ -+/* Hardwire the number of additional indexes */ -+#define MB_CACHE_INDEXES_COUNT 1 -+ -+struct mb_cache_entry; -+ -+struct mb_cache_op { -+ int (*free)(struct mb_cache_entry *, int); -+}; -+ -+struct mb_cache { -+ struct list_head c_cache_list; -+ const char *c_name; -+ struct mb_cache_op c_op; -+ atomic_t c_entry_count; -+ int c_bucket_count; -+#ifndef MB_CACHE_INDEXES_COUNT -+ int c_indexes_count; -+#endif -+ kmem_cache_t *c_entry_cache; -+ struct list_head *c_block_hash; -+ struct list_head *c_indexes_hash[0]; -+}; -+ -+struct mb_cache_entry_index { -+ struct list_head o_list; -+ unsigned int o_key; -+}; -+ -+struct mb_cache_entry { -+ struct list_head e_lru_list; -+ struct mb_cache *e_cache; -+ atomic_t e_used; -+ kdev_t e_dev; -+ unsigned long e_block; -+ struct list_head e_block_list; -+ struct mb_cache_entry_index e_indexes[0]; -+}; -+ -+/* Functions on caches */ -+ -+struct mb_cache * mb_cache_create(const char *, struct mb_cache_op *, size_t, -+ int, int); -+void mb_cache_shrink(struct mb_cache *, kdev_t); -+void mb_cache_destroy(struct mb_cache *); -+ -+/* Functions on cache entries */ -+ -+struct mb_cache_entry *mb_cache_entry_alloc(struct mb_cache *); -+int mb_cache_entry_insert(struct mb_cache_entry *, kdev_t, unsigned long, -+ unsigned int[]); -+void mb_cache_entry_rehash(struct mb_cache_entry *, unsigned int[]); -+void mb_cache_entry_release(struct mb_cache_entry *); -+void mb_cache_entry_takeout(struct mb_cache_entry *); -+void mb_cache_entry_free(struct mb_cache_entry *); -+struct mb_cache_entry *mb_cache_entry_dup(struct mb_cache_entry *); -+struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *, kdev_t, -+ unsigned long); -+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) -+struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, int, -+ kdev_t, unsigned int); -+struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *, int, -+ kdev_t, unsigned int); -+#endif ---- linux/kernel/ksyms.c~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:42:45 2003 -+++ linux-mmonroe/kernel/ksyms.c Fri May 16 08:43:52 2003 -@@ -11,6 +11,7 @@ - - #include - #include -+#include - #include - #include - #include -@@ -106,6 +107,7 @@ EXPORT_SYMBOL(exit_mm); - EXPORT_SYMBOL(exit_files); - EXPORT_SYMBOL(exit_fs); - EXPORT_SYMBOL(exit_sighand); -+EXPORT_SYMBOL(copy_fs_struct); - EXPORT_SYMBOL_GPL(make_pages_present); - - /* internal kernel memory management */ -@@ -126,6 +128,8 @@ EXPORT_SYMBOL(kmem_cache_validate); - EXPORT_SYMBOL(kmem_cache_alloc); - EXPORT_SYMBOL(kmem_cache_free); - EXPORT_SYMBOL(kmem_cache_size); -+EXPORT_SYMBOL(register_cache); -+EXPORT_SYMBOL(unregister_cache); - EXPORT_SYMBOL(kmalloc); - EXPORT_SYMBOL(kfree); - EXPORT_SYMBOL(vfree); ---- linux/mm/vmscan.c~linux-2.4.20-xattr-0.8.54-hp Fri May 16 08:39:23 2003 -+++ linux-mmonroe/mm/vmscan.c Fri May 16 08:43:01 2003 -@@ -18,6 +18,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -35,6 +36,39 @@ - */ - #define DEF_PRIORITY (6) - -+static DECLARE_MUTEX(other_caches_sem); -+static LIST_HEAD(cache_definitions); -+ -+void register_cache(struct cache_definition *cache) -+{ -+ down(&other_caches_sem); -+ list_add(&cache->link, &cache_definitions); -+ up(&other_caches_sem); -+} -+ -+void unregister_cache(struct cache_definition *cache) -+{ -+ down(&other_caches_sem); -+ list_del(&cache->link); -+ up(&other_caches_sem); -+} -+ -+static void shrink_other_caches(unsigned int priority, int gfp_mask) -+{ -+ struct list_head *p; -+ -+ if (down_trylock(&other_caches_sem)) -+ return; -+ -+ list_for_each_prev(p, &cache_definitions) { -+ struct cache_definition *cache = -+ list_entry(p, struct cache_definition, link); -+ -+ cache->shrink(priority, gfp_mask); -+ } -+ up(&other_caches_sem); -+} -+ - /* - * The swap-out function returns 1 if it successfully - * scanned all the pages it was asked to (`count'). -@@ -579,6 +613,7 @@ static int shrink_caches(zone_t * classz - - shrink_dcache_memory(priority, gfp_mask); - shrink_icache_memory(priority, gfp_mask); -+ shrink_other_caches(priority, gfp_mask); - #ifdef CONFIG_QUOTA - shrink_dqcache_memory(DEF_PRIORITY, gfp_mask); - #endif - -_ diff --git a/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54.patch b/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54.patch deleted file mode 100644 index 1489989..0000000 --- a/lustre/kernel_patches/patches/linux-2.4.20-xattr-0.8.54.patch +++ /dev/null @@ -1,5595 +0,0 @@ - Documentation/Configure.help | 66 ++ - arch/alpha/defconfig | 7 - arch/alpha/kernel/entry.S | 12 - arch/arm/defconfig | 7 - arch/arm/kernel/calls.S | 24 - arch/i386/defconfig | 7 - arch/ia64/defconfig | 7 - arch/ia64/kernel/entry.S | 24 - arch/m68k/defconfig | 7 - arch/mips/defconfig | 7 - arch/mips64/defconfig | 7 - arch/ppc/defconfig | 14 - arch/ppc64/kernel/misc.S | 2 - arch/s390/defconfig | 7 - arch/s390/kernel/entry.S | 24 - arch/s390x/defconfig | 7 - arch/s390x/kernel/entry.S | 24 - arch/s390x/kernel/wrapper32.S | 92 +++ - arch/sparc/defconfig | 7 - arch/sparc/kernel/systbls.S | 10 - arch/sparc64/defconfig | 7 - arch/sparc64/kernel/systbls.S | 20 - fs/Config.in | 14 - fs/Makefile | 3 - fs/ext2/Makefile | 4 - fs/ext2/file.c | 5 - fs/ext2/ialloc.c | 2 - fs/ext2/inode.c | 34 - - fs/ext2/namei.c | 14 - fs/ext2/super.c | 29 - fs/ext2/symlink.c | 14 - fs/ext2/xattr.c | 1212 +++++++++++++++++++++++++++++++++++++++++ - fs/ext2/xattr_user.c | 103 +++ - fs/ext3/Makefile | 10 - fs/ext3/file.c | 5 - fs/ext3/ialloc.c | 2 - fs/ext3/inode.c | 35 - - fs/ext3/namei.c | 21 - fs/ext3/super.c | 36 + - fs/ext3/symlink.c | 14 - fs/ext3/xattr.c | 1225 ++++++++++++++++++++++++++++++++++++++++++ - fs/ext3/xattr_user.c | 111 +++ - fs/jfs/jfs_xattr.h | 6 - fs/jfs/xattr.c | 6 - fs/mbcache.c | 648 ++++++++++++++++++++++ - include/asm-arm/unistd.h | 2 - include/asm-ia64/unistd.h | 13 - include/asm-ppc64/unistd.h | 2 - include/asm-s390/unistd.h | 15 - include/asm-s390x/unistd.h | 15 - include/asm-sparc/unistd.h | 24 - include/asm-sparc64/unistd.h | 24 - include/linux/cache_def.h | 15 - include/linux/errno.h | 4 - include/linux/ext2_fs.h | 31 - - include/linux/ext2_xattr.h | 157 +++++ - include/linux/ext3_fs.h | 31 - - include/linux/ext3_jbd.h | 8 - include/linux/ext3_xattr.h | 157 +++++ - include/linux/fs.h | 2 - include/linux/mbcache.h | 69 ++ - kernel/ksyms.c | 4 - mm/vmscan.c | 35 + - fs/ext3/ext3-exports.c | 14 + - 64 files changed, 4355 insertions(+), 195 deletions(-) - ---- linux-2.4.20/Documentation/Configure.help~linux-2.4.20-xattr-0.8.54 2003-05-05 17:43:06.000000000 +0800 -+++ linux-2.4.20-root/Documentation/Configure.help 2003-05-07 18:08:03.000000000 +0800 -@@ -15242,6 +15242,39 @@ CONFIG_EXT2_FS - be compiled as a module, and so this could be dangerous. Most - everyone wants to say Y here. - -+Ext2 extended attributes -+CONFIG_EXT2_FS_XATTR -+ Extended attributes are name:value pairs associated with inodes by -+ the kernel or by users (see the attr(5) manual page, or visit -+ for details). -+ -+ If unsure, say N. -+ -+Ext2 extended attribute block sharing -+CONFIG_EXT2_FS_XATTR_SHARING -+ This options enables code for sharing identical extended attribute -+ blocks among multiple inodes. -+ -+ Usually, say Y. -+ -+Ext2 extended user attributes -+CONFIG_EXT2_FS_XATTR_USER -+ This option enables extended user attributes on ext2. Processes can -+ associate extended user attributes with inodes to store additional -+ information such as the character encoding of files, etc. (see the -+ attr(5) manual page, or visit for details). -+ -+ If unsure, say N. -+ -+Ext2 trusted extended attributes -+CONFIG_EXT2_FS_XATTR_TRUSTED -+ This option enables extended attributes on ext2 that are accessible -+ (and visible) only to users capable of CAP_SYS_ADMIN. Usually this -+ is only the super user. Trusted extended attributes are meant for -+ implementing system/security services. -+ -+ If unsure, say N. -+ - Ext3 journalling file system support (EXPERIMENTAL) - CONFIG_EXT3_FS - This is the journalling version of the Second extended file system -@@ -15274,6 +15307,39 @@ CONFIG_EXT3_FS - of your root partition (the one containing the directory /) cannot - be compiled as a module, and so this may be dangerous. - -+Ext3 extended attributes -+CONFIG_EXT3_FS_XATTR -+ Extended attributes are name:value pairs associated with inodes by -+ the kernel or by users (see the attr(5) manual page, or visit -+ for details). -+ -+ If unsure, say N. -+ -+Ext3 extended attribute block sharing -+CONFIG_EXT3_FS_XATTR_SHARING -+ This options enables code for sharing identical extended attribute -+ blocks among multiple inodes. -+ -+ Usually, say Y. -+ -+Ext3 extended user attributes -+CONFIG_EXT3_FS_XATTR_USER -+ This option enables extended user attributes on ext3. Processes can -+ associate extended user attributes with inodes to store additional -+ information such as the character encoding of files, etc. (see the -+ attr(5) manual page, or visit for details). -+ -+ If unsure, say N. -+ -+Ext3 trusted extended attributes -+CONFIG_EXT3_FS_XATTR_TRUSTED -+ This option enables extended attributes on ext3 that are accessible -+ (and visible) only to users capable of CAP_SYS_ADMIN. Usually this -+ is only the super user. Trusted extended attributes are meant for -+ implementing system/security services. -+ -+ If unsure, say N. -+ - Journal Block Device support (JBD for ext3) (EXPERIMENTAL) - CONFIG_JBD - This is a generic journalling layer for block devices. It is ---- linux-2.4.20/arch/alpha/defconfig~linux-2.4.20-xattr-0.8.54 2001-11-20 07:19:42.000000000 +0800 -+++ linux-2.4.20-root/arch/alpha/defconfig 2003-05-07 18:08:03.000000000 +0800 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_ALPHA=y - # CONFIG_UID16 is not set - # CONFIG_RWSEM_GENERIC_SPINLOCK is not set ---- linux-2.4.20/arch/alpha/kernel/entry.S~linux-2.4.20-xattr-0.8.54 2002-08-03 08:39:42.000000000 +0800 -+++ linux-2.4.20-root/arch/alpha/kernel/entry.S 2003-05-07 18:08:03.000000000 +0800 -@@ -1154,6 +1154,18 @@ sys_call_table: - .quad sys_readahead - .quad sys_ni_syscall /* 380, sys_security */ - .quad sys_tkill -+ .quad sys_setxattr -+ .quad sys_lsetxattr -+ .quad sys_fsetxattr -+ .quad sys_getxattr /* 385 */ -+ .quad sys_lgetxattr -+ .quad sys_fgetxattr -+ .quad sys_listxattr -+ .quad sys_llistxattr -+ .quad sys_flistxattr /* 390 */ -+ .quad sys_removexattr -+ .quad sys_lremovexattr -+ .quad sys_fremovexattr - - /* Remember to update everything, kids. */ - .ifne (. - sys_call_table) - (NR_SYSCALLS * 8) ---- linux-2.4.20/arch/arm/defconfig~linux-2.4.20-xattr-0.8.54 2001-05-20 08:43:05.000000000 +0800 -+++ linux-2.4.20-root/arch/arm/defconfig 2003-05-07 18:08:03.000000000 +0800 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_ARM=y - # CONFIG_EISA is not set - # CONFIG_SBUS is not set ---- linux-2.4.20/arch/arm/kernel/calls.S~linux-2.4.20-xattr-0.8.54 2002-08-03 08:39:42.000000000 +0800 -+++ linux-2.4.20-root/arch/arm/kernel/calls.S 2003-05-07 18:08:03.000000000 +0800 -@@ -240,18 +240,18 @@ __syscall_start: - .long SYMBOL_NAME(sys_ni_syscall) /* Security */ - .long SYMBOL_NAME(sys_gettid) - /* 225 */ .long SYMBOL_NAME(sys_readahead) -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_setxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_lsetxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_fsetxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_getxattr */ --/* 230 */ .long SYMBOL_NAME(sys_ni_syscall) /* sys_lgetxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_fgetxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_listxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_llistxattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_flistxattr */ --/* 235 */ .long SYMBOL_NAME(sys_ni_syscall) /* sys_removexattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_lremovexattr */ -- .long SYMBOL_NAME(sys_ni_syscall) /* sys_fremovexattr */ -+ .long SYMBOL_NAME(sys_setxattr) -+ .long SYMBOL_NAME(sys_lsetxattr) -+ .long SYMBOL_NAME(sys_fsetxattr) -+ .long SYMBOL_NAME(sys_getxattr) -+/* 230 */ .long SYMBOL_NAME(sys_lgetxattr) -+ .long SYMBOL_NAME(sys_fgetxattr) -+ .long SYMBOL_NAME(sys_listxattr) -+ .long SYMBOL_NAME(sys_llistxattr) -+ .long SYMBOL_NAME(sys_flistxattr) -+/* 235 */ .long SYMBOL_NAME(sys_removexattr) -+ .long SYMBOL_NAME(sys_lremovexattr) -+ .long SYMBOL_NAME(sys_fremovexattr) - .long SYMBOL_NAME(sys_tkill) - /* - * Please check 2.5 _before_ adding calls here, ---- linux-2.4.20/arch/i386/defconfig~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:09.000000000 +0800 -+++ linux-2.4.20-root/arch/i386/defconfig 2003-05-07 18:08:03.000000000 +0800 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_X86=y - CONFIG_ISA=y - # CONFIG_SBUS is not set ---- linux-2.4.20/arch/ia64/defconfig~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:09.000000000 +0800 -+++ linux-2.4.20-root/arch/ia64/defconfig 2003-05-07 18:08:03.000000000 +0800 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - - # - # Code maturity level options ---- linux-2.4.20/arch/ia64/kernel/entry.S~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:09.000000000 +0800 -+++ linux-2.4.20-root/arch/ia64/kernel/entry.S 2003-05-07 18:08:03.000000000 +0800 -@@ -1170,18 +1170,18 @@ sys_call_table: - data8 sys_getdents64 - data8 sys_getunwind // 1215 - data8 sys_readahead -- data8 ia64_ni_syscall -- data8 ia64_ni_syscall -- data8 ia64_ni_syscall -- data8 ia64_ni_syscall // 1220 -- data8 ia64_ni_syscall -- data8 ia64_ni_syscall -- data8 ia64_ni_syscall -- data8 ia64_ni_syscall -- data8 ia64_ni_syscall // 1225 -- data8 ia64_ni_syscall -- data8 ia64_ni_syscall -- data8 ia64_ni_syscall -+ data8 sys_setxattr -+ data8 sys_lsetxattr -+ data8 sys_fsetxattr -+ data8 sys_getxattr // 1220 -+ data8 sys_lgetxattr -+ data8 sys_fgetxattr -+ data8 sys_listxattr -+ data8 sys_llistxattr -+ data8 sys_flistxattr // 1225 -+ data8 sys_removexattr -+ data8 sys_lremovexattr -+ data8 sys_fremovexattr - data8 sys_tkill - data8 ia64_ni_syscall // 1230 - data8 ia64_ni_syscall ---- linux-2.4.20/arch/m68k/defconfig~linux-2.4.20-xattr-0.8.54 2000-06-20 03:56:08.000000000 +0800 -+++ linux-2.4.20-root/arch/m68k/defconfig 2003-05-07 18:08:03.000000000 +0800 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_UID16=y - - # ---- linux-2.4.20/arch/mips/defconfig~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:10.000000000 +0800 -+++ linux-2.4.20-root/arch/mips/defconfig 2003-05-07 18:08:03.000000000 +0800 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_MIPS=y - CONFIG_MIPS32=y - # CONFIG_MIPS64 is not set ---- linux-2.4.20/arch/mips64/defconfig~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:10.000000000 +0800 -+++ linux-2.4.20-root/arch/mips64/defconfig 2003-05-07 18:08:03.000000000 +0800 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_MIPS=y - # CONFIG_MIPS32 is not set - CONFIG_MIPS64=y ---- linux-2.4.20/arch/ppc/defconfig~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:11.000000000 +0800 -+++ linux-2.4.20-root/arch/ppc/defconfig 2003-05-07 18:08:03.000000000 +0800 -@@ -1,6 +1,20 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - # CONFIG_UID16 is not set - # CONFIG_RWSEM_GENERIC_SPINLOCK is not set - CONFIG_RWSEM_XCHGADD_ALGORITHM=y ---- linux-2.4.20/arch/ppc64/kernel/misc.S~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:11.000000000 +0800 -+++ linux-2.4.20-root/arch/ppc64/kernel/misc.S 2003-05-07 18:08:03.000000000 +0800 -@@ -731,6 +731,7 @@ _GLOBAL(sys_call_table32) - .llong .sys_gettid /* 207 */ - #if 0 /* Reserved syscalls */ - .llong .sys_tkill /* 208 */ -+#endif - .llong .sys_setxattr - .llong .sys_lsetxattr /* 210 */ - .llong .sys_fsetxattr -@@ -743,6 +744,7 @@ _GLOBAL(sys_call_table32) - .llong .sys_removexattr - .llong .sys_lremovexattr - .llong .sys_fremovexattr /* 220 */ -+#if 0 /* Reserved syscalls */ - .llong .sys_futex - #endif - .llong .sys_perfmonctl /* Put this here for now ... */ ---- linux-2.4.20/arch/s390/defconfig~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:11.000000000 +0800 -+++ linux-2.4.20-root/arch/s390/defconfig 2003-05-07 18:08:03.000000000 +0800 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - # CONFIG_ISA is not set - # CONFIG_EISA is not set - # CONFIG_MCA is not set ---- linux-2.4.20/arch/s390/kernel/entry.S~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:11.000000000 +0800 -+++ linux-2.4.20-root/arch/s390/kernel/entry.S 2003-05-07 18:08:03.000000000 +0800 -@@ -558,18 +558,18 @@ sys_call_table: - .long sys_fcntl64 - .long sys_ni_syscall - .long sys_ni_syscall -- .long sys_ni_syscall /* 224 - reserved for setxattr */ -- .long sys_ni_syscall /* 225 - reserved for lsetxattr */ -- .long sys_ni_syscall /* 226 - reserved for fsetxattr */ -- .long sys_ni_syscall /* 227 - reserved for getxattr */ -- .long sys_ni_syscall /* 228 - reserved for lgetxattr */ -- .long sys_ni_syscall /* 229 - reserved for fgetxattr */ -- .long sys_ni_syscall /* 230 - reserved for listxattr */ -- .long sys_ni_syscall /* 231 - reserved for llistxattr */ -- .long sys_ni_syscall /* 232 - reserved for flistxattr */ -- .long sys_ni_syscall /* 233 - reserved for removexattr */ -- .long sys_ni_syscall /* 234 - reserved for lremovexattr */ -- .long sys_ni_syscall /* 235 - reserved for fremovexattr */ -+ .long sys_setxattr -+ .long sys_lsetxattr /* 225 */ -+ .long sys_fsetxattr -+ .long sys_getxattr -+ .long sys_lgetxattr -+ .long sys_fgetxattr -+ .long sys_listxattr /* 230 */ -+ .long sys_llistxattr -+ .long sys_flistxattr -+ .long sys_removexattr -+ .long sys_lremovexattr -+ .long sys_fremovexattr /* 235 */ - .long sys_gettid - .long sys_tkill - .rept 255-237 ---- linux-2.4.20/arch/s390x/defconfig~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:11.000000000 +0800 -+++ linux-2.4.20-root/arch/s390x/defconfig 2003-05-07 18:08:03.000000000 +0800 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - # CONFIG_ISA is not set - # CONFIG_EISA is not set - # CONFIG_MCA is not set ---- linux-2.4.20/arch/s390x/kernel/entry.S~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:11.000000000 +0800 -+++ linux-2.4.20-root/arch/s390x/kernel/entry.S 2003-05-07 18:08:03.000000000 +0800 -@@ -591,18 +591,18 @@ sys_call_table: - .long SYSCALL(sys_ni_syscall,sys32_fcntl64_wrapper) - .long SYSCALL(sys_ni_syscall,sys_ni_syscall) - .long SYSCALL(sys_ni_syscall,sys_ni_syscall) -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 224 - reserved for setxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 225 - reserved for lsetxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 226 - reserved for fsetxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 227 - reserved for getxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 228 - reserved for lgetxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 229 - reserved for fgetxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 230 - reserved for listxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 231 - reserved for llistxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 232 - reserved for flistxattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 233 - reserved for removexattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 234 - reserved for lremovexattr */ -- .long SYSCALL(sys_ni_syscall,sys_ni_syscall) /* 235 - reserved for fremovexattr */ -+ .long SYSCALL(sys_setxattr,sys32_setxattr_wrapper) -+ .long SYSCALL(sys_lsetxattr,sys32_lsetxattr_wrapper) /* 225 */ -+ .long SYSCALL(sys_fsetxattr,sys32_fsetxattr_wrapper) -+ .long SYSCALL(sys_getxattr,sys32_getxattr_wrapper) -+ .long SYSCALL(sys_lgetxattr,sys32_lgetxattr_wrapper) -+ .long SYSCALL(sys_fgetxattr,sys32_fgetxattr_wrapper) -+ .long SYSCALL(sys_listxattr,sys32_listxattr_wrapper) /* 230 */ -+ .long SYSCALL(sys_llistxattr,sys32_llistxattr_wrapper) -+ .long SYSCALL(sys_flistxattr,sys32_flistxattr_wrapper) -+ .long SYSCALL(sys_removexattr,sys32_removexattr_wrapper) -+ .long SYSCALL(sys_lremovexattr,sys32_lremovexattr_wrapper) -+ .long SYSCALL(sys_fremovexattr,sys32_fremovexattr_wrapper)/* 235 */ - .long SYSCALL(sys_gettid,sys_gettid) - .long SYSCALL(sys_tkill,sys_tkill) - .rept 255-237 ---- linux-2.4.20/arch/s390x/kernel/wrapper32.S~linux-2.4.20-xattr-0.8.54 2002-02-26 03:37:56.000000000 +0800 -+++ linux-2.4.20-root/arch/s390x/kernel/wrapper32.S 2003-05-07 18:08:03.000000000 +0800 -@@ -1091,3 +1091,95 @@ sys32_fstat64_wrapper: - llgtr %r3,%r3 # struct stat64 * - llgfr %r4,%r4 # long - jg sys32_fstat64 # branch to system call -+ -+ .globl sys32_setxattr_wrapper -+sys32_setxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ lgfr %r6,%r6 # int -+ jg sys_setxattr -+ -+ .globl sys32_lsetxattr_wrapper -+sys32_lsetxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ lgfr %r6,%r6 # int -+ jg sys_lsetxattr -+ -+ .globl sys32_fsetxattr_wrapper -+sys32_fsetxattr_wrapper: -+ lgfr %r2,%r2 # int -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ lgfr %r6,%r6 # int -+ jg sys_fsetxattr -+ -+ .globl sys32_getxattr_wrapper -+sys32_getxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ jg sys_getxattr -+ -+ .globl sys32_lgetxattr_wrapper -+sys32_lgetxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ jg sys_lgetxattr -+ -+ .globl sys32_fgetxattr_wrapper -+sys32_fgetxattr_wrapper: -+ lgfr %r2,%r2 # int -+ llgtr %r3,%r3 # char * -+ llgtr %r4,%r4 # void * -+ llgfr %r5,%r5 # size_t -+ jg sys_fgetxattr -+ -+ .globl sys32_listxattr_wrapper -+sys32_listxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgfr %r4,%r4 # size_t -+ jg sys_listxattr -+ -+ .globl sys32_llistxattr_wrapper -+sys32_llistxattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ llgfr %r4,%r4 # size_t -+ jg sys_llistxattr -+ -+ .globl sys32_flistxattr_wrapper -+sys32_flistxattr_wrapper: -+ lgfr %r2,%r2 # int -+ llgtr %r3,%r3 # char * -+ llgfr %r4,%r4 # size_t -+ jg sys_flistxattr -+ -+ .globl sys32_removexattr_wrapper -+sys32_removexattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ jg sys_removexattr -+ -+ .globl sys32_lremovexattr_wrapper -+sys32_lremovexattr_wrapper: -+ llgtr %r2,%r2 # char * -+ llgtr %r3,%r3 # char * -+ jg sys_lremovexattr -+ -+ .globl sys32_fremovexattr_wrapper -+sys32_fremovexattr_wrapper: -+ lgfr %r2,%r2 # int -+ llgtr %r3,%r3 # char * -+ jg sys_fremovexattr -+ -+ ---- linux-2.4.20/arch/sparc/defconfig~linux-2.4.20-xattr-0.8.54 2002-08-03 08:39:43.000000000 +0800 -+++ linux-2.4.20-root/arch/sparc/defconfig 2003-05-07 18:08:03.000000000 +0800 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - CONFIG_UID16=y - CONFIG_HIGHMEM=y - ---- linux-2.4.20/arch/sparc/kernel/systbls.S~linux-2.4.20-xattr-0.8.54 2002-08-03 08:39:43.000000000 +0800 -+++ linux-2.4.20-root/arch/sparc/kernel/systbls.S 2003-05-07 18:08:03.000000000 +0800 -@@ -51,11 +51,11 @@ sys_call_table: - /*150*/ .long sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_getdents64 - /*155*/ .long sys_fcntl64, sys_nis_syscall, sys_statfs, sys_fstatfs, sys_oldumount - /*160*/ .long sys_nis_syscall, sys_nis_syscall, sys_getdomainname, sys_setdomainname, sys_nis_syscall --/*165*/ .long sys_quotactl, sys_nis_syscall, sys_mount, sys_ustat, sys_nis_syscall --/*170*/ .long sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_getdents --/*175*/ .long sys_setsid, sys_fchdir, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall --/*180*/ .long sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_sigpending, sys_query_module --/*185*/ .long sys_setpgid, sys_nis_syscall, sys_tkill, sys_nis_syscall, sys_newuname -+/*165*/ .long sys_quotactl, sys_nis_syscall, sys_mount, sys_ustat, sys_setxattr -+/*170*/ .long sys_lsetxattr, sys_fsetxattr, sys_getxattr, sys_lgetxattr, sys_getdents -+/*175*/ .long sys_setsid, sys_fchdir, sys_fgetxattr, sys_listxattr, sys_llistxattr -+/*180*/ .long sys_flistxattr, sys_removexattr, sys_lremovexattr, sys_sigpending, sys_query_module -+/*185*/ .long sys_setpgid, sys_fremovexattr, sys_tkill, sys_nis_syscall, sys_newuname - /*190*/ .long sys_init_module, sys_personality, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall - /*195*/ .long sys_nis_syscall, sys_nis_syscall, sys_getppid, sparc_sigaction, sys_sgetmask - /*200*/ .long sys_ssetmask, sys_sigsuspend, sys_newlstat, sys_uselib, old_readdir ---- linux-2.4.20/arch/sparc64/defconfig~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:12.000000000 +0800 -+++ linux-2.4.20-root/arch/sparc64/defconfig 2003-05-07 18:08:03.000000000 +0800 -@@ -1,6 +1,13 @@ - # - # Automatically generated make config: don't edit - # -+# CONFIG_EXT3_FS_XATTR is not set -+# CONFIG_EXT3_FS_XATTR_SHARING is not set -+# CONFIG_EXT3_FS_XATTR_USER is not set -+# CONFIG_EXT2_FS_XATTR is not set -+# CONFIG_EXT2_FS_XATTR_SHARING is not set -+# CONFIG_EXT2_FS_XATTR_USER is not set -+# CONFIG_FS_MBCACHE is not set - - # - # Code maturity level options ---- linux-2.4.20/arch/sparc64/kernel/systbls.S~linux-2.4.20-xattr-0.8.54 2002-08-03 08:39:43.000000000 +0800 -+++ linux-2.4.20-root/arch/sparc64/kernel/systbls.S 2003-05-07 18:08:03.000000000 +0800 -@@ -52,11 +52,11 @@ sys_call_table32: - /*150*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_getdents64 - .word sys32_fcntl64, sys_nis_syscall, sys32_statfs, sys32_fstatfs, sys_oldumount - /*160*/ .word sys_nis_syscall, sys_nis_syscall, sys_getdomainname, sys_setdomainname, sys_nis_syscall -- .word sys32_quotactl, sys_nis_syscall, sys32_mount, sys_ustat, sys_nis_syscall --/*170*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys32_getdents -- .word sys_setsid, sys_fchdir, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall --/*180*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys32_sigpending, sys32_query_module -- .word sys_setpgid, sys_nis_syscall, sys_tkill, sys_nis_syscall, sparc64_newuname -+ .word sys32_quotactl, sys_nis_syscall, sys32_mount, sys_ustat, sys_setxattr -+/*170*/ .word sys_lsetxattr, sys_fsetxattr, sys_getxattr, sys_lgetxattr, sys32_getdents -+ .word sys_setsid, sys_fchdir, sys_fgetxattr, sys_listxattr, sys_llistxattr -+/*180*/ .word sys_flistxattr, sys_removexattr, sys_lremovexattr, sys32_sigpending, sys32_query_module -+ .word sys_setpgid, sys_fremovexattr, sys_tkill, sys_nis_syscall, sparc64_newuname - /*190*/ .word sys32_init_module, sparc64_personality, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall - .word sys_nis_syscall, sys_nis_syscall, sys_getppid, sys32_sigaction, sys_sgetmask - /*200*/ .word sys_ssetmask, sys_sigsuspend, sys32_newlstat, sys_uselib, old32_readdir -@@ -111,11 +111,11 @@ sys_call_table: - /*150*/ .word sys_getsockname, sys_nis_syscall, sys_nis_syscall, sys_poll, sys_getdents64 - .word sys_nis_syscall, sys_nis_syscall, sys_statfs, sys_fstatfs, sys_oldumount - /*160*/ .word sys_nis_syscall, sys_nis_syscall, sys_getdomainname, sys_setdomainname, sys_utrap_install -- .word sys_quotactl, sys_nis_syscall, sys_mount, sys_ustat, sys_nis_syscall --/*170*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_getdents -- .word sys_setsid, sys_fchdir, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall --/*180*/ .word sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall, sys_query_module -- .word sys_setpgid, sys_nis_syscall, sys_tkill, sys_nis_syscall, sparc64_newuname -+ .word sys_quotactl, sys_nis_syscall, sys_mount, sys_ustat, sys_setxattr -+/*170*/ .word sys_lsetxattr, sys_fsetxattr, sys_getxattr, sys_lgetxattr, sys_getdents -+ .word sys_setsid, sys_fchdir, sys_fgetxattr, sys_listxattr, sys_llistxattr -+/*180*/ .word sys_flistxattr, sys_removexattr, sys_lremovexattr, sys_nis_syscall, sys_query_module -+ .word sys_setpgid, sys_fremovexattr, sys_tkill, sys_nis_syscall, sparc64_newuname - /*190*/ .word sys_init_module, sparc64_personality, sys_nis_syscall, sys_nis_syscall, sys_nis_syscall - .word sys_nis_syscall, sys_nis_syscall, sys_getppid, sys_nis_syscall, sys_sgetmask - /*200*/ .word sys_ssetmask, sys_nis_syscall, sys_newlstat, sys_uselib, sys_nis_syscall ---- linux-2.4.20/fs/Config.in~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:15.000000000 +0800 -+++ linux-2.4.20-root/fs/Config.in 2003-05-07 18:08:03.000000000 +0800 -@@ -25,6 +25,11 @@ dep_mbool ' Debug Befs' CONFIG_BEFS_DEB - dep_tristate 'BFS file system support (EXPERIMENTAL)' CONFIG_BFS_FS $CONFIG_EXPERIMENTAL - - tristate 'Ext3 journalling file system support' CONFIG_EXT3_FS -+dep_mbool ' Ext3 extended attributes' CONFIG_EXT3_FS_XATTR $CONFIG_EXT3_FS -+dep_bool ' Ext3 extended attribute block sharing' \ -+ CONFIG_EXT3_FS_XATTR_SHARING $CONFIG_EXT3_FS_XATTR -+dep_bool ' Ext3 extended user attributes' \ -+ CONFIG_EXT3_FS_XATTR_USER $CONFIG_EXT3_FS_XATTR - # CONFIG_JBD could be its own option (even modular), but until there are - # other users than ext3, we will simply make it be the same as CONFIG_EXT3_FS - # dep_tristate ' Journal Block Device support (JBD for ext3)' CONFIG_JBD $CONFIG_EXT3_FS -@@ -84,6 +89,11 @@ dep_mbool ' QNX4FS write support (DANGE - tristate 'ROM file system support' CONFIG_ROMFS_FS - - tristate 'Second extended fs support' CONFIG_EXT2_FS -+dep_mbool ' Ext2 extended attributes' CONFIG_EXT2_FS_XATTR $CONFIG_EXT2_FS -+dep_bool ' Ext2 extended attribute block sharing' \ -+ CONFIG_EXT2_FS_XATTR_SHARING $CONFIG_EXT2_FS_XATTR -+dep_bool ' Ext2 extended user attributes' \ -+ CONFIG_EXT2_FS_XATTR_USER $CONFIG_EXT2_FS_XATTR - - tristate 'System V/Xenix/V7/Coherent file system support' CONFIG_SYSV_FS - -@@ -155,6 +165,10 @@ else - define_tristate CONFIG_ZISOFS_FS n - fi - -+# Meta block cache for Extended Attributes (ext2/ext3) -+#tristate 'Meta block cache' CONFIG_FS_MBCACHE -+define_tristate CONFIG_FS_MBCACHE y -+ - mainmenu_option next_comment - comment 'Partition Types' - source fs/partitions/Config.in ---- linux-2.4.20/fs/Makefile~linux-2.4.20-xattr-0.8.54 2003-05-05 19:00:58.000000000 +0800 -+++ linux-2.4.20-root/fs/Makefile 2003-05-07 18:08:03.000000000 +0800 -@@ -79,6 +79,9 @@ obj-y += binfmt_script.o - - obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o - -+export-objs += mbcache.o -+obj-$(CONFIG_FS_MBCACHE) += mbcache.o -+ - # persistent filesystems - obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) - ---- linux-2.4.20/fs/ext2/Makefile~linux-2.4.20-xattr-0.8.54 2001-10-11 23:05:18.000000000 +0800 -+++ linux-2.4.20-root/fs/ext2/Makefile 2003-05-07 18:08:03.000000000 +0800 -@@ -13,4 +13,8 @@ obj-y := balloc.o bitmap.o dir.o file - ioctl.o namei.o super.o symlink.o - obj-m := $(O_TARGET) - -+export-objs += xattr.o -+obj-$(CONFIG_EXT2_FS_XATTR) += xattr.o -+obj-$(CONFIG_EXT2_FS_XATTR_USER) += xattr_user.o -+ - include $(TOPDIR)/Rules.make ---- linux-2.4.20/fs/ext2/file.c~linux-2.4.20-xattr-0.8.54 2001-10-11 23:05:18.000000000 +0800 -+++ linux-2.4.20-root/fs/ext2/file.c 2003-05-07 18:08:03.000000000 +0800 -@@ -20,6 +20,7 @@ - - #include - #include -+#include - #include - - /* -@@ -51,4 +52,8 @@ struct file_operations ext2_file_operati - - struct inode_operations ext2_file_inode_operations = { - truncate: ext2_truncate, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, - }; ---- linux-2.4.20/fs/ext2/ialloc.c~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:15.000000000 +0800 -+++ linux-2.4.20-root/fs/ext2/ialloc.c 2003-05-07 18:08:03.000000000 +0800 -@@ -15,6 +15,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -167,6 +168,7 @@ void ext2_free_inode (struct inode * ino - */ - if (!is_bad_inode(inode)) { - /* Quota is already initialized in iput() */ -+ ext2_xattr_delete_inode(inode); - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); - } ---- linux-2.4.20/fs/ext2/inode.c~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:15.000000000 +0800 -+++ linux-2.4.20-root/fs/ext2/inode.c 2003-05-07 18:08:03.000000000 +0800 -@@ -39,6 +39,18 @@ MODULE_LICENSE("GPL"); - static int ext2_update_inode(struct inode * inode, int do_sync); - - /* -+ * Test whether an inode is a fast symlink. -+ */ -+static inline int ext2_inode_is_fast_symlink(struct inode *inode) -+{ -+ int ea_blocks = inode->u.ext2_i.i_file_acl ? -+ (inode->i_sb->s_blocksize >> 9) : 0; -+ -+ return (S_ISLNK(inode->i_mode) && -+ inode->i_blocks - ea_blocks == 0); -+} -+ -+/* - * Called at each iput() - */ - void ext2_put_inode (struct inode * inode) -@@ -53,9 +65,7 @@ void ext2_delete_inode (struct inode * i - { - lock_kernel(); - -- if (is_bad_inode(inode) || -- inode->i_ino == EXT2_ACL_IDX_INO || -- inode->i_ino == EXT2_ACL_DATA_INO) -+ if (is_bad_inode(inode)) - goto no_delete; - inode->u.ext2_i.i_dtime = CURRENT_TIME; - mark_inode_dirty(inode); -@@ -801,6 +811,8 @@ void ext2_truncate (struct inode * inode - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; -+ if (ext2_inode_is_fast_symlink(inode)) -+ return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - -@@ -888,8 +900,7 @@ void ext2_read_inode (struct inode * ino - unsigned long offset; - struct ext2_group_desc * gdp; - -- if ((inode->i_ino != EXT2_ROOT_INO && inode->i_ino != EXT2_ACL_IDX_INO && -- inode->i_ino != EXT2_ACL_DATA_INO && -+ if ((inode->i_ino != EXT2_ROOT_INO && - inode->i_ino < EXT2_FIRST_INO(inode->i_sb)) || - inode->i_ino > le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_inodes_count)) { - ext2_error (inode->i_sb, "ext2_read_inode", -@@ -974,10 +985,7 @@ void ext2_read_inode (struct inode * ino - for (block = 0; block < EXT2_N_BLOCKS; block++) - inode->u.ext2_i.i_data[block] = raw_inode->i_block[block]; - -- if (inode->i_ino == EXT2_ACL_IDX_INO || -- inode->i_ino == EXT2_ACL_DATA_INO) -- /* Nothing to do */ ; -- else if (S_ISREG(inode->i_mode)) { -+ if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext2_file_inode_operations; - inode->i_fop = &ext2_file_operations; - inode->i_mapping->a_ops = &ext2_aops; -@@ -986,15 +994,17 @@ void ext2_read_inode (struct inode * ino - inode->i_fop = &ext2_dir_operations; - inode->i_mapping->a_ops = &ext2_aops; - } else if (S_ISLNK(inode->i_mode)) { -- if (!inode->i_blocks) -+ if (ext2_inode_is_fast_symlink(inode)) - inode->i_op = &ext2_fast_symlink_inode_operations; - else { -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext2_symlink_inode_operations; - inode->i_mapping->a_ops = &ext2_aops; - } -- } else -+ } else { -+ inode->i_op = &ext2_special_inode_operations; - init_special_inode(inode, inode->i_mode, - le32_to_cpu(raw_inode->i_block[0])); -+ } - brelse (bh); - inode->i_attr_flags = 0; - if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL) { ---- linux-2.4.20/fs/ext2/namei.c~linux-2.4.20-xattr-0.8.54 2001-10-04 13:57:36.000000000 +0800 -+++ linux-2.4.20-root/fs/ext2/namei.c 2003-05-07 18:08:03.000000000 +0800 -@@ -31,6 +31,7 @@ - - #include - #include -+#include - #include - - /* -@@ -136,7 +137,7 @@ static int ext2_symlink (struct inode * - - if (l > sizeof (inode->u.ext2_i.i_data)) { - /* slow symlink */ -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext2_symlink_inode_operations; - inode->i_mapping->a_ops = &ext2_aops; - err = block_symlink(inode, symname, l); - if (err) -@@ -345,4 +346,15 @@ struct inode_operations ext2_dir_inode_o - rmdir: ext2_rmdir, - mknod: ext2_mknod, - rename: ext2_rename, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, -+}; -+ -+struct inode_operations ext2_special_inode_operations = { -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, - }; ---- linux-2.4.20/fs/ext2/super.c~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:15.000000000 +0800 -+++ linux-2.4.20-root/fs/ext2/super.c 2003-05-07 18:08:03.000000000 +0800 -@@ -21,6 +21,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -125,6 +126,7 @@ void ext2_put_super (struct super_block - int db_count; - int i; - -+ ext2_xattr_put_super(sb); - if (!(sb->s_flags & MS_RDONLY)) { - struct ext2_super_block *es = EXT2_SB(sb)->s_es; - -@@ -175,6 +177,13 @@ static int parse_options (char * options - this_char = strtok (NULL, ",")) { - if ((value = strchr (this_char, '=')) != NULL) - *value++ = 0; -+#ifdef CONFIG_EXT2_FS_XATTR_USER -+ if (!strcmp (this_char, "user_xattr")) -+ set_opt (*mount_options, XATTR_USER); -+ else if (!strcmp (this_char, "nouser_xattr")) -+ clear_opt (*mount_options, XATTR_USER); -+ else -+#endif - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -424,6 +433,9 @@ struct super_block * ext2_read_super (st - blocksize = BLOCK_SIZE; - - sb->u.ext2_sb.s_mount_opt = 0; -+#ifdef CONFIG_EXT2_FS_XATTR_USER -+ /* set_opt (sb->u.ext2_sb.s_mount_opt, XATTR_USER); */ -+#endif - if (!parse_options ((char *) data, &sb_block, &resuid, &resgid, - &sb->u.ext2_sb.s_mount_opt)) { - return NULL; -@@ -813,12 +825,27 @@ static DECLARE_FSTYPE_DEV(ext2_fs_type, - - static int __init init_ext2_fs(void) - { -- return register_filesystem(&ext2_fs_type); -+ int error = init_ext2_xattr(); -+ if (error) -+ return error; -+ error = init_ext2_xattr_user(); -+ if (error) -+ goto fail; -+ error = register_filesystem(&ext2_fs_type); -+ if (!error) -+ return 0; -+ -+ exit_ext2_xattr_user(); -+fail: -+ exit_ext2_xattr(); -+ return error; - } - - static void __exit exit_ext2_fs(void) - { - unregister_filesystem(&ext2_fs_type); -+ exit_ext2_xattr_user(); -+ exit_ext2_xattr(); - } - - EXPORT_NO_SYMBOLS; ---- linux-2.4.20/fs/ext2/symlink.c~linux-2.4.20-xattr-0.8.54 2000-09-28 04:41:33.000000000 +0800 -+++ linux-2.4.20-root/fs/ext2/symlink.c 2003-05-07 18:08:03.000000000 +0800 -@@ -19,6 +19,7 @@ - - #include - #include -+#include - - static int ext2_readlink(struct dentry *dentry, char *buffer, int buflen) - { -@@ -32,7 +33,20 @@ static int ext2_follow_link(struct dentr - return vfs_follow_link(nd, s); - } - -+struct inode_operations ext2_symlink_inode_operations = { -+ readlink: page_readlink, -+ follow_link: page_follow_link, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, -+}; -+ - struct inode_operations ext2_fast_symlink_inode_operations = { - readlink: ext2_readlink, - follow_link: ext2_follow_link, -+ setxattr: ext2_setxattr, -+ getxattr: ext2_getxattr, -+ listxattr: ext2_listxattr, -+ removexattr: ext2_removexattr, - }; ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-2.4.20-root/fs/ext2/xattr.c 2003-05-07 18:08:03.000000000 +0800 -@@ -0,0 +1,1212 @@ -+/* -+ * linux/fs/ext2/xattr.c -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ * -+ * Fix by Harrison Xing . -+ * Extended attributes for symlinks and special files added per -+ * suggestion of Luka Renko . -+ */ -+ -+/* -+ * Extended attributes are stored on disk blocks allocated outside of -+ * any inode. The i_file_acl field is then made to point to this allocated -+ * block. If all extended attributes of an inode are identical, these -+ * inodes may share the same extended attribute block. Such situations -+ * are automatically detected by keeping a cache of recent attribute block -+ * numbers and hashes over the block's contents in memory. -+ * -+ * -+ * Extended attribute block layout: -+ * -+ * +------------------+ -+ * | header | -+ * | entry 1 | | -+ * | entry 2 | | growing downwards -+ * | entry 3 | v -+ * | four null bytes | -+ * | . . . | -+ * | value 1 | ^ -+ * | value 3 | | growing upwards -+ * | value 2 | | -+ * +------------------+ -+ * -+ * The block header is followed by multiple entry descriptors. These entry -+ * descriptors are variable in size, and alligned to EXT2_XATTR_PAD -+ * byte boundaries. The entry descriptors are sorted by attribute name, -+ * so that two extended attribute blocks can be compared efficiently. -+ * -+ * Attribute values are aligned to the end of the block, stored in -+ * no specific order. They are also padded to EXT2_XATTR_PAD byte -+ * boundaries. No additional gaps are left between them. -+ * -+ * Locking strategy -+ * ---------------- -+ * The VFS already holds the BKL and the inode->i_sem semaphore when any of -+ * the xattr inode operations are called, so we are guaranteed that only one -+ * processes accesses extended attributes of an inode at any time. -+ * -+ * For writing we also grab the ext2_xattr_sem semaphore. This ensures that -+ * only a single process is modifying an extended attribute block, even -+ * if the block is shared among inodes. -+ * -+ * Note for porting to 2.5 -+ * ----------------------- -+ * The BKL will no longer be held in the xattr inode operations. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+/* These symbols may be needed by a module. */ -+EXPORT_SYMBOL(ext2_xattr_register); -+EXPORT_SYMBOL(ext2_xattr_unregister); -+EXPORT_SYMBOL(ext2_xattr_get); -+EXPORT_SYMBOL(ext2_xattr_list); -+EXPORT_SYMBOL(ext2_xattr_set); -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1) -+#endif -+ -+#define HDR(bh) ((struct ext2_xattr_header *)((bh)->b_data)) -+#define ENTRY(ptr) ((struct ext2_xattr_entry *)(ptr)) -+#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1) -+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) -+ -+#ifdef EXT2_XATTR_DEBUG -+# define ea_idebug(inode, f...) do { \ -+ printk(KERN_DEBUG "inode %s:%ld: ", \ -+ kdevname(inode->i_dev), inode->i_ino); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+# define ea_bdebug(bh, f...) do { \ -+ printk(KERN_DEBUG "block %s:%ld: ", \ -+ kdevname(bh->b_dev), bh->b_blocknr); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+#else -+# define ea_idebug(f...) -+# define ea_bdebug(f...) -+#endif -+ -+static int ext2_xattr_set2(struct inode *, struct buffer_head *, -+ struct ext2_xattr_header *); -+ -+#ifdef CONFIG_EXT2_FS_XATTR_SHARING -+ -+static int ext2_xattr_cache_insert(struct buffer_head *); -+static struct buffer_head *ext2_xattr_cache_find(struct inode *, -+ struct ext2_xattr_header *); -+static void ext2_xattr_cache_remove(struct buffer_head *); -+static void ext2_xattr_rehash(struct ext2_xattr_header *, -+ struct ext2_xattr_entry *); -+ -+static struct mb_cache *ext2_xattr_cache; -+ -+#else -+# define ext2_xattr_cache_insert(bh) 0 -+# define ext2_xattr_cache_find(inode, header) NULL -+# define ext2_xattr_cache_remove(bh) while(0) {} -+# define ext2_xattr_rehash(header, entry) while(0) {} -+#endif -+ -+/* -+ * If a file system does not share extended attributes among inodes, -+ * we should not need the ext2_xattr_sem semaphore. However, the -+ * filesystem may still contain shared blocks, so we always take -+ * the lock. -+ */ -+ -+DECLARE_MUTEX(ext2_xattr_sem); -+ -+static inline int -+ext2_xattr_new_block(struct inode *inode, int * errp, int force) -+{ -+ struct super_block *sb = inode->i_sb; -+ int goal = le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block) + -+ EXT2_I(inode)->i_block_group * EXT2_BLOCKS_PER_GROUP(sb); -+ -+ /* How can we enforce the allocation? */ -+ int block = ext2_new_block(inode, goal, 0, 0, errp); -+#ifdef OLD_QUOTAS -+ if (!*errp) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#endif -+ return block; -+} -+ -+static inline int -+ext2_xattr_quota_alloc(struct inode *inode, int force) -+{ -+ /* How can we enforce the allocation? */ -+#ifdef OLD_QUOTAS -+ int error = DQUOT_ALLOC_BLOCK(inode->i_sb, inode, 1); -+ if (!error) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#else -+ int error = DQUOT_ALLOC_BLOCK(inode, 1); -+#endif -+ return error; -+} -+ -+#ifdef OLD_QUOTAS -+ -+static inline void -+ext2_xattr_quota_free(struct inode *inode) -+{ -+ DQUOT_FREE_BLOCK(inode->i_sb, inode, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+static inline void -+ext2_xattr_free_block(struct inode * inode, unsigned long block) -+{ -+ ext2_free_blocks(inode, block, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+#else -+# define ext2_xattr_quota_free(inode) \ -+ DQUOT_FREE_BLOCK(inode, 1) -+# define ext2_xattr_free_block(inode, block) \ -+ ext2_free_blocks(inode, block, 1) -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) -+ -+static inline struct buffer_head * -+sb_bread(struct super_block *sb, int block) -+{ -+ return bread(sb->s_dev, block, sb->s_blocksize); -+} -+ -+static inline struct buffer_head * -+sb_getblk(struct super_block *sb, int block) -+{ -+ return getblk(sb->s_dev, block, sb->s_blocksize); -+} -+ -+#endif -+ -+struct ext2_xattr_handler *ext2_xattr_handlers[EXT2_XATTR_INDEX_MAX]; -+rwlock_t ext2_handler_lock = RW_LOCK_UNLOCKED; -+ -+int -+ext2_xattr_register(int name_index, struct ext2_xattr_handler *handler) -+{ -+ int error = -EINVAL; -+ -+ if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) { -+ write_lock(&ext2_handler_lock); -+ if (!ext2_xattr_handlers[name_index-1]) { -+ ext2_xattr_handlers[name_index-1] = handler; -+ error = 0; -+ } -+ write_unlock(&ext2_handler_lock); -+ } -+ return error; -+} -+ -+void -+ext2_xattr_unregister(int name_index, struct ext2_xattr_handler *handler) -+{ -+ if (name_index > 0 || name_index <= EXT2_XATTR_INDEX_MAX) { -+ write_lock(&ext2_handler_lock); -+ ext2_xattr_handlers[name_index-1] = NULL; -+ write_unlock(&ext2_handler_lock); -+ } -+} -+ -+static inline const char * -+strcmp_prefix(const char *a, const char *a_prefix) -+{ -+ while (*a_prefix && *a == *a_prefix) { -+ a++; -+ a_prefix++; -+ } -+ return *a_prefix ? NULL : a; -+} -+ -+/* -+ * Decode the extended attribute name, and translate it into -+ * the name_index and name suffix. -+ */ -+static struct ext2_xattr_handler * -+ext2_xattr_resolve_name(const char **name) -+{ -+ struct ext2_xattr_handler *handler = NULL; -+ int i; -+ -+ if (!*name) -+ return NULL; -+ read_lock(&ext2_handler_lock); -+ for (i=0; iprefix); -+ if (n) { -+ handler = ext2_xattr_handlers[i]; -+ *name = n; -+ break; -+ } -+ } -+ } -+ read_unlock(&ext2_handler_lock); -+ return handler; -+} -+ -+static inline struct ext2_xattr_handler * -+ext2_xattr_handler(int name_index) -+{ -+ struct ext2_xattr_handler *handler = NULL; -+ if (name_index > 0 && name_index <= EXT2_XATTR_INDEX_MAX) { -+ read_lock(&ext2_handler_lock); -+ handler = ext2_xattr_handlers[name_index-1]; -+ read_unlock(&ext2_handler_lock); -+ } -+ return handler; -+} -+ -+/* -+ * Inode operation getxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext2_getxattr(struct dentry *dentry, const char *name, -+ void *buffer, size_t size) -+{ -+ struct ext2_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext2_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->get(inode, name, buffer, size); -+} -+ -+/* -+ * Inode operation listxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext2_listxattr(struct dentry *dentry, char *buffer, size_t size) -+{ -+ return ext2_xattr_list(dentry->d_inode, buffer, size); -+} -+ -+/* -+ * Inode operation setxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext2_setxattr(struct dentry *dentry, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ struct ext2_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ if (size == 0) -+ value = ""; /* empty EA, do not remove */ -+ handler = ext2_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, value, size, flags); -+} -+ -+/* -+ * Inode operation removexattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext2_removexattr(struct dentry *dentry, const char *name) -+{ -+ struct ext2_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext2_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, NULL, 0, XATTR_REPLACE); -+} -+ -+/* -+ * ext2_xattr_get() -+ * -+ * Copy an extended attribute into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext2_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext2_xattr_entry *entry; -+ unsigned int block, size; -+ char *end; -+ int name_len, error; -+ -+ ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", -+ name_index, name, buffer, (long)buffer_size); -+ -+ if (name == NULL) -+ return -EINVAL; -+ if (!EXT2_I(inode)->i_file_acl) -+ return -ENOATTR; -+ block = EXT2_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ -+ error = -ERANGE; -+ if (name_len > 255) -+ goto cleanup; -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext2_xattr_entry *next = -+ EXT2_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) -+ goto found; -+ entry = next; -+ } -+ /* Check the remaining name entries */ -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext2_xattr_entry *next = -+ EXT2_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ entry = next; -+ } -+ if (ext2_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ error = -ENOATTR; -+ goto cleanup; -+found: -+ /* check the buffer size */ -+ if (entry->e_value_block != 0) -+ goto bad_block; -+ size = le32_to_cpu(entry->e_value_size); -+ if (size > inode->i_sb->s_blocksize || -+ le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) -+ goto bad_block; -+ -+ if (ext2_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (buffer) { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ /* return value of attribute */ -+ memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), -+ size); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext2_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext2_xattr_entry *entry; -+ unsigned int block, size = 0; -+ char *buf, *end; -+ int error; -+ -+ ea_idebug(inode, "buffer=%p, buffer_size=%ld", -+ buffer, (long)buffer_size); -+ -+ if (!EXT2_I(inode)->i_file_acl) -+ return 0; -+ block = EXT2_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext2_error(inode->i_sb, "ext2_xattr_list", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* compute the size required for the list of attribute names */ -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT2_XATTR_NEXT(entry)) { -+ struct ext2_xattr_handler *handler; -+ struct ext2_xattr_entry *next = -+ EXT2_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ -+ handler = ext2_xattr_handler(entry->e_name_index); -+ if (handler) -+ size += handler->list(NULL, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ -+ if (ext2_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (!buffer) { -+ error = size; -+ goto cleanup; -+ } else { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ /* list the attribute names */ -+ buf = buffer; -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT2_XATTR_NEXT(entry)) { -+ struct ext2_xattr_handler *handler; -+ -+ handler = ext2_xattr_handler(entry->e_name_index); -+ if (handler) -+ buf += handler->list(buf, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * If the EXT2_FEATURE_COMPAT_EXT_ATTR feature of this file system is -+ * not set, set it. -+ */ -+static void ext2_xattr_update_super_block(struct super_block *sb) -+{ -+ if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR)) -+ return; -+ -+ lock_super(sb); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ EXT2_SB(sb)->s_feature_compat |= EXT2_FEATURE_COMPAT_EXT_ATTR; -+#endif -+ EXT2_SB(sb)->s_es->s_feature_compat |= -+ cpu_to_le32(EXT2_FEATURE_COMPAT_EXT_ATTR); -+ sb->s_dirt = 1; -+ mark_buffer_dirty(EXT2_SB(sb)->s_sbh); -+ unlock_super(sb); -+} -+ -+/* -+ * ext2_xattr_set() -+ * -+ * Create, replace or remove an extended attribute for this inode. Buffer -+ * is NULL to remove an existing extended attribute, and non-NULL to -+ * either replace an existing extended attribute, or create a new extended -+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE -+ * specify that an extended attribute must exist and must not exist -+ * previous to the call, respectively. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+int -+ext2_xattr_set(struct inode *inode, int name_index, const char *name, -+ const void *value, size_t value_len, int flags) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *bh = NULL; -+ struct ext2_xattr_header *header = NULL; -+ struct ext2_xattr_entry *here, *last; -+ unsigned int name_len; -+ int block = EXT2_I(inode)->i_file_acl; -+ int min_offs = sb->s_blocksize, not_found = 1, free, error; -+ char *end; -+ -+ /* -+ * header -- Points either into bh, or to a temporarily -+ * allocated buffer. -+ * here -- The named entry found, or the place for inserting, within -+ * the block pointed to by header. -+ * last -- Points right after the last named entry within the block -+ * pointed to by header. -+ * min_offs -- The offset of the first value (values are aligned -+ * towards the end of the block). -+ * end -- Points right after the block pointed to by header. -+ */ -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > sb->s_blocksize) -+ return -ERANGE; -+ down(&ext2_xattr_sem); -+ -+ if (block) { -+ /* The inode already has an extended attribute block. */ -+ -+ bh = sb_bread(sb, block); -+ error = -EIO; -+ if (!bh) -+ goto cleanup; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), -+ le32_to_cpu(HDR(bh)->h_refcount)); -+ header = HDR(bh); -+ end = bh->b_data + bh->b_size; -+ if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ header->h_blocks != cpu_to_le32(1)) { -+bad_block: ext2_error(sb, "ext2_xattr_set", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* Find the named attribute. */ -+ here = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(here)) { -+ struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(here); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!here->e_value_block && here->e_value_size) { -+ int offs = le16_to_cpu(here->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ not_found = name_index - here->e_name_index; -+ if (!not_found) -+ not_found = name_len - here->e_name_len; -+ if (!not_found) -+ not_found = memcmp(name, here->e_name,name_len); -+ if (not_found <= 0) -+ break; -+ here = next; -+ } -+ last = here; -+ /* We still need to compute min_offs and last. */ -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext2_xattr_entry *next = EXT2_XATTR_NEXT(last); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!last->e_value_block && last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ last = next; -+ } -+ -+ /* Check whether we have enough space left. */ -+ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); -+ } else { -+ /* We will use a new extended attribute block. */ -+ free = sb->s_blocksize - -+ sizeof(struct ext2_xattr_header) - sizeof(__u32); -+ here = last = NULL; /* avoid gcc uninitialized warning. */ -+ } -+ -+ if (not_found) { -+ /* Request to remove a nonexistent attribute? */ -+ error = -ENOATTR; -+ if (flags & XATTR_REPLACE) -+ goto cleanup; -+ error = 0; -+ if (value == NULL) -+ goto cleanup; -+ else -+ free -= EXT2_XATTR_LEN(name_len); -+ } else { -+ /* Request to create an existing attribute? */ -+ error = -EEXIST; -+ if (flags & XATTR_CREATE) -+ goto cleanup; -+ if (!here->e_value_block && here->e_value_size) { -+ unsigned int size = le32_to_cpu(here->e_value_size); -+ -+ if (le16_to_cpu(here->e_value_offs) + size > -+ sb->s_blocksize || size > sb->s_blocksize) -+ goto bad_block; -+ free += EXT2_XATTR_SIZE(size); -+ } -+ } -+ free -= EXT2_XATTR_SIZE(value_len); -+ error = -ENOSPC; -+ if (free < 0) -+ goto cleanup; -+ -+ /* Here we know that we can set the new attribute. */ -+ -+ if (header) { -+ if (header->h_refcount == cpu_to_le32(1)) { -+ ea_bdebug(bh, "modifying in-place"); -+ ext2_xattr_cache_remove(bh); -+ } else { -+ int offset; -+ -+ ea_bdebug(bh, "cloning"); -+ header = kmalloc(bh->b_size, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memcpy(header, HDR(bh), bh->b_size); -+ header->h_refcount = cpu_to_le32(1); -+ offset = (char *)header - bh->b_data; -+ here = ENTRY((char *)here + offset); -+ last = ENTRY((char *)last + offset); -+ } -+ } else { -+ /* Allocate a buffer where we construct the new block. */ -+ header = kmalloc(sb->s_blocksize, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memset(header, 0, sb->s_blocksize); -+ end = (char *)header + sb->s_blocksize; -+ header->h_magic = cpu_to_le32(EXT2_XATTR_MAGIC); -+ header->h_blocks = header->h_refcount = cpu_to_le32(1); -+ last = here = ENTRY(header+1); -+ } -+ -+ if (not_found) { -+ /* Insert the new name. */ -+ int size = EXT2_XATTR_LEN(name_len); -+ int rest = (char *)last - (char *)here; -+ memmove((char *)here + size, here, rest); -+ memset(here, 0, size); -+ here->e_name_index = name_index; -+ here->e_name_len = name_len; -+ memcpy(here->e_name, name, name_len); -+ } else { -+ /* Remove the old value. */ -+ if (!here->e_value_block && here->e_value_size) { -+ char *first_val = (char *)header + min_offs; -+ int offs = le16_to_cpu(here->e_value_offs); -+ char *val = (char *)header + offs; -+ size_t size = EXT2_XATTR_SIZE( -+ le32_to_cpu(here->e_value_size)); -+ memmove(first_val + size, first_val, val - first_val); -+ memset(first_val, 0, size); -+ here->e_value_offs = 0; -+ min_offs += size; -+ -+ /* Adjust all value offsets. */ -+ last = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(last)) { -+ int o = le16_to_cpu(last->e_value_offs); -+ if (!last->e_value_block && o < offs) -+ last->e_value_offs = -+ cpu_to_le16(o + size); -+ last = EXT2_XATTR_NEXT(last); -+ } -+ } -+ if (value == NULL) { -+ /* Remove this attribute. */ -+ if (EXT2_XATTR_NEXT(ENTRY(header+1)) == last) { -+ /* This block is now empty. */ -+ error = ext2_xattr_set2(inode, bh, NULL); -+ goto cleanup; -+ } else { -+ /* Remove the old name. */ -+ int size = EXT2_XATTR_LEN(name_len); -+ last = ENTRY((char *)last - size); -+ memmove(here, (char*)here + size, -+ (char*)last - (char*)here); -+ memset(last, 0, size); -+ } -+ } -+ } -+ -+ if (value != NULL) { -+ /* Insert the new value. */ -+ here->e_value_size = cpu_to_le32(value_len); -+ if (value_len) { -+ size_t size = EXT2_XATTR_SIZE(value_len); -+ char *val = (char *)header + min_offs - size; -+ here->e_value_offs = -+ cpu_to_le16((char *)val - (char *)header); -+ memset(val + size - EXT2_XATTR_PAD, 0, -+ EXT2_XATTR_PAD); /* Clear the pad bytes. */ -+ memcpy(val, value, value_len); -+ } -+ } -+ ext2_xattr_rehash(header, here); -+ -+ error = ext2_xattr_set2(inode, bh, header); -+ -+cleanup: -+ brelse(bh); -+ if (!(bh && header == HDR(bh))) -+ kfree(header); -+ up(&ext2_xattr_sem); -+ -+ return error; -+} -+ -+/* -+ * Second half of ext2_xattr_set(): Update the file system. -+ */ -+static int -+ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, -+ struct ext2_xattr_header *header) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *new_bh = NULL; -+ int error; -+ -+ if (header) { -+ new_bh = ext2_xattr_cache_find(inode, header); -+ if (new_bh) { -+ /* -+ * We found an identical block in the cache. -+ * The old block will be released after updating -+ * the inode. -+ */ -+ ea_bdebug(old_bh, "reusing block %ld", -+ new_bh->b_blocknr); -+ -+ error = -EDQUOT; -+ if (ext2_xattr_quota_alloc(inode, 1)) -+ goto cleanup; -+ -+ HDR(new_bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(new_bh)->h_refcount) + 1); -+ ea_bdebug(new_bh, "refcount now=%d", -+ le32_to_cpu(HDR(new_bh)->h_refcount)); -+ } else if (old_bh && header == HDR(old_bh)) { -+ /* Keep this block. */ -+ new_bh = old_bh; -+ ext2_xattr_cache_insert(new_bh); -+ } else { -+ /* We need to allocate a new block */ -+ int force = EXT2_I(inode)->i_file_acl != 0; -+ int block = ext2_xattr_new_block(inode, &error, force); -+ if (error) -+ goto cleanup; -+ ea_idebug(inode, "creating block %d", block); -+ -+ new_bh = sb_getblk(sb, block); -+ if (!new_bh) { -+ ext2_xattr_free_block(inode, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(new_bh); -+ memcpy(new_bh->b_data, header, new_bh->b_size); -+ mark_buffer_uptodate(new_bh, 1); -+ unlock_buffer(new_bh); -+ ext2_xattr_cache_insert(new_bh); -+ -+ ext2_xattr_update_super_block(sb); -+ } -+ mark_buffer_dirty(new_bh); -+ if (IS_SYNC(inode)) { -+ ll_rw_block(WRITE, 1, &new_bh); -+ wait_on_buffer(new_bh); -+ error = -EIO; -+ if (buffer_req(new_bh) && !buffer_uptodate(new_bh)) -+ goto cleanup; -+ } -+ } -+ -+ /* Update the inode. */ -+ EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; -+ inode->i_ctime = CURRENT_TIME; -+ if (IS_SYNC(inode)) { -+ error = ext2_sync_inode (inode); -+ if (error) -+ goto cleanup; -+ } else -+ mark_inode_dirty(inode); -+ -+ error = 0; -+ if (old_bh && old_bh != new_bh) { -+ /* -+ * If there was an old block, and we are not still using it, -+ * we now release the old block. -+ */ -+ unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); -+ -+ if (refcount == 1) { -+ /* Free the old block. */ -+ ea_bdebug(old_bh, "freeing"); -+ ext2_xattr_free_block(inode, old_bh->b_blocknr); -+ mark_buffer_clean(old_bh); -+ } else { -+ /* Decrement the refcount only. */ -+ refcount--; -+ HDR(old_bh)->h_refcount = cpu_to_le32(refcount); -+ ext2_xattr_quota_free(inode); -+ mark_buffer_dirty(old_bh); -+ ea_bdebug(old_bh, "refcount now=%d", refcount); -+ } -+ } -+ -+cleanup: -+ if (old_bh != new_bh) -+ brelse(new_bh); -+ -+ return error; -+} -+ -+/* -+ * ext2_xattr_delete_inode() -+ * -+ * Free extended attribute resources associated with this inode. This -+ * is called immediately before an inode is freed. -+ */ -+void -+ext2_xattr_delete_inode(struct inode *inode) -+{ -+ struct buffer_head *bh; -+ unsigned int block = EXT2_I(inode)->i_file_acl; -+ -+ if (!block) -+ return; -+ down(&ext2_xattr_sem); -+ -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) { -+ ext2_error(inode->i_sb, "ext2_xattr_delete_inode", -+ "inode %ld: block %d read error", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+ ext2_error(inode->i_sb, "ext2_xattr_delete_inode", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) { -+ ext2_xattr_cache_remove(bh); -+ ext2_xattr_free_block(inode, block); -+ bforget(bh); -+ bh = NULL; -+ } else { -+ HDR(bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ mark_buffer_dirty(bh); -+ if (IS_SYNC(inode)) { -+ ll_rw_block(WRITE, 1, &bh); -+ wait_on_buffer(bh); -+ } -+ ext2_xattr_quota_free(inode); -+ } -+ EXT2_I(inode)->i_file_acl = 0; -+ -+cleanup: -+ brelse(bh); -+ up(&ext2_xattr_sem); -+} -+ -+/* -+ * ext2_xattr_put_super() -+ * -+ * This is called when a file system is unmounted. -+ */ -+void -+ext2_xattr_put_super(struct super_block *sb) -+{ -+#ifdef CONFIG_EXT2_FS_XATTR_SHARING -+ mb_cache_shrink(ext2_xattr_cache, sb->s_dev); -+#endif -+} -+ -+#ifdef CONFIG_EXT2_FS_XATTR_SHARING -+ -+/* -+ * ext2_xattr_cache_insert() -+ * -+ * Create a new entry in the extended attribute cache, and insert -+ * it unless such an entry is already in the cache. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+static int -+ext2_xattr_cache_insert(struct buffer_head *bh) -+{ -+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash); -+ struct mb_cache_entry *ce; -+ int error; -+ -+ ce = mb_cache_entry_alloc(ext2_xattr_cache); -+ if (!ce) -+ return -ENOMEM; -+ error = mb_cache_entry_insert(ce, bh->b_dev, bh->b_blocknr, &hash); -+ if (error) { -+ mb_cache_entry_free(ce); -+ if (error == -EBUSY) { -+ ea_bdebug(bh, "already in cache (%d cache entries)", -+ atomic_read(&ext2_xattr_cache->c_entry_count)); -+ error = 0; -+ } -+ } else { -+ ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, -+ atomic_read(&ext2_xattr_cache->c_entry_count)); -+ mb_cache_entry_release(ce); -+ } -+ return error; -+} -+ -+/* -+ * ext2_xattr_cmp() -+ * -+ * Compare two extended attribute blocks for equality. -+ * -+ * Returns 0 if the blocks are equal, 1 if they differ, and -+ * a negative error number on errors. -+ */ -+static int -+ext2_xattr_cmp(struct ext2_xattr_header *header1, -+ struct ext2_xattr_header *header2) -+{ -+ struct ext2_xattr_entry *entry1, *entry2; -+ -+ entry1 = ENTRY(header1+1); -+ entry2 = ENTRY(header2+1); -+ while (!IS_LAST_ENTRY(entry1)) { -+ if (IS_LAST_ENTRY(entry2)) -+ return 1; -+ if (entry1->e_hash != entry2->e_hash || -+ entry1->e_name_len != entry2->e_name_len || -+ entry1->e_value_size != entry2->e_value_size || -+ memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) -+ return 1; -+ if (entry1->e_value_block != 0 || entry2->e_value_block != 0) -+ return -EIO; -+ if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), -+ (char *)header2 + le16_to_cpu(entry2->e_value_offs), -+ le32_to_cpu(entry1->e_value_size))) -+ return 1; -+ -+ entry1 = EXT2_XATTR_NEXT(entry1); -+ entry2 = EXT2_XATTR_NEXT(entry2); -+ } -+ if (!IS_LAST_ENTRY(entry2)) -+ return 1; -+ return 0; -+} -+ -+/* -+ * ext2_xattr_cache_find() -+ * -+ * Find an identical extended attribute block. -+ * -+ * Returns a pointer to the block found, or NULL if such a block was -+ * not found or an error occurred. -+ */ -+static struct buffer_head * -+ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header) -+{ -+ __u32 hash = le32_to_cpu(header->h_hash); -+ struct mb_cache_entry *ce; -+ -+ if (!header->h_hash) -+ return NULL; /* never share */ -+ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); -+ ce = mb_cache_entry_find_first(ext2_xattr_cache, 0, inode->i_dev, hash); -+ while (ce) { -+ struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block); -+ -+ if (!bh) { -+ ext2_error(inode->i_sb, "ext2_xattr_cache_find", -+ "inode %ld: block %ld read error", -+ inode->i_ino, ce->e_block); -+ } else if (le32_to_cpu(HDR(bh)->h_refcount) > -+ EXT2_XATTR_REFCOUNT_MAX) { -+ ea_idebug(inode, "block %ld refcount %d>%d",ce->e_block, -+ le32_to_cpu(HDR(bh)->h_refcount), -+ EXT2_XATTR_REFCOUNT_MAX); -+ } else if (!ext2_xattr_cmp(header, HDR(bh))) { -+ ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); -+ mb_cache_entry_release(ce); -+ return bh; -+ } -+ brelse(bh); -+ ce = mb_cache_entry_find_next(ce, 0, inode->i_dev, hash); -+ } -+ return NULL; -+} -+ -+/* -+ * ext2_xattr_cache_remove() -+ * -+ * Remove the cache entry of a block from the cache. Called when a -+ * block becomes invalid. -+ */ -+static void -+ext2_xattr_cache_remove(struct buffer_head *bh) -+{ -+ struct mb_cache_entry *ce; -+ -+ ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_dev, bh->b_blocknr); -+ if (ce) { -+ ea_bdebug(bh, "removing (%d cache entries remaining)", -+ atomic_read(&ext2_xattr_cache->c_entry_count)-1); -+ mb_cache_entry_free(ce); -+ } else -+ ea_bdebug(bh, "no cache entry"); -+} -+ -+#define NAME_HASH_SHIFT 5 -+#define VALUE_HASH_SHIFT 16 -+ -+/* -+ * ext2_xattr_hash_entry() -+ * -+ * Compute the hash of an extended attribute. -+ */ -+static inline void ext2_xattr_hash_entry(struct ext2_xattr_header *header, -+ struct ext2_xattr_entry *entry) -+{ -+ __u32 hash = 0; -+ char *name = entry->e_name; -+ int n; -+ -+ for (n=0; n < entry->e_name_len; n++) { -+ hash = (hash << NAME_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ -+ *name++; -+ } -+ -+ if (entry->e_value_block == 0 && entry->e_value_size != 0) { -+ __u32 *value = (__u32 *)((char *)header + -+ le16_to_cpu(entry->e_value_offs)); -+ for (n = (le32_to_cpu(entry->e_value_size) + -+ EXT2_XATTR_ROUND) >> EXT2_XATTR_PAD_BITS; n; n--) { -+ hash = (hash << VALUE_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ -+ le32_to_cpu(*value++); -+ } -+ } -+ entry->e_hash = cpu_to_le32(hash); -+} -+ -+#undef NAME_HASH_SHIFT -+#undef VALUE_HASH_SHIFT -+ -+#define BLOCK_HASH_SHIFT 16 -+ -+/* -+ * ext2_xattr_rehash() -+ * -+ * Re-compute the extended attribute hash value after an entry has changed. -+ */ -+static void ext2_xattr_rehash(struct ext2_xattr_header *header, -+ struct ext2_xattr_entry *entry) -+{ -+ struct ext2_xattr_entry *here; -+ __u32 hash = 0; -+ -+ ext2_xattr_hash_entry(header, entry); -+ here = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(here)) { -+ if (!here->e_hash) { -+ /* Block is not shared if an entry's hash value == 0 */ -+ hash = 0; -+ break; -+ } -+ hash = (hash << BLOCK_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ -+ le32_to_cpu(here->e_hash); -+ here = EXT2_XATTR_NEXT(here); -+ } -+ header->h_hash = cpu_to_le32(hash); -+} -+ -+#undef BLOCK_HASH_SHIFT -+ -+int __init -+init_ext2_xattr(void) -+{ -+ ext2_xattr_cache = mb_cache_create("ext2_xattr", NULL, -+ sizeof(struct mb_cache_entry) + -+ sizeof(struct mb_cache_entry_index), 1, 61); -+ if (!ext2_xattr_cache) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+void -+exit_ext2_xattr(void) -+{ -+ mb_cache_destroy(ext2_xattr_cache); -+} -+ -+#else /* CONFIG_EXT2_FS_XATTR_SHARING */ -+ -+int __init -+init_ext2_xattr(void) -+{ -+ return 0; -+} -+ -+void -+exit_ext2_xattr(void) -+{ -+} -+ -+#endif /* CONFIG_EXT2_FS_XATTR_SHARING */ ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-2.4.20-root/fs/ext2/xattr_user.c 2003-05-07 18:08:03.000000000 +0800 -@@ -0,0 +1,103 @@ -+/* -+ * linux/fs/ext2/xattr_user.c -+ * Handler for extended user attributes. -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef CONFIG_EXT2_FS_POSIX_ACL -+# include -+#endif -+ -+#define XATTR_USER_PREFIX "user." -+ -+static size_t -+ext2_xattr_user_list(char *list, struct inode *inode, -+ const char *name, int name_len) -+{ -+ const int prefix_len = sizeof(XATTR_USER_PREFIX)-1; -+ -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return 0; -+ -+ if (list) { -+ memcpy(list, XATTR_USER_PREFIX, prefix_len); -+ memcpy(list+prefix_len, name, name_len); -+ list[prefix_len + name_len] = '\0'; -+ } -+ return prefix_len + name_len + 1; -+} -+ -+static int -+ext2_xattr_user_get(struct inode *inode, const char *name, -+ void *buffer, size_t size) -+{ -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+#ifdef CONFIG_EXT2_FS_POSIX_ACL -+ error = ext2_permission_locked(inode, MAY_READ); -+#else -+ error = permission(inode, MAY_READ); -+#endif -+ if (error) -+ return error; -+ -+ return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, -+ buffer, size); -+} -+ -+static int -+ext2_xattr_user_set(struct inode *inode, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+ if ( !S_ISREG(inode->i_mode) && -+ (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) -+ return -EPERM; -+#ifdef CONFIG_EXT2_FS_POSIX_ACL -+ error = ext2_permission_locked(inode, MAY_WRITE); -+#else -+ error = permission(inode, MAY_WRITE); -+#endif -+ if (error) -+ return error; -+ -+ return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name, -+ value, size, flags); -+} -+ -+struct ext2_xattr_handler ext2_xattr_user_handler = { -+ prefix: XATTR_USER_PREFIX, -+ list: ext2_xattr_user_list, -+ get: ext2_xattr_user_get, -+ set: ext2_xattr_user_set, -+}; -+ -+int __init -+init_ext2_xattr_user(void) -+{ -+ return ext2_xattr_register(EXT2_XATTR_INDEX_USER, -+ &ext2_xattr_user_handler); -+} -+ -+void -+exit_ext2_xattr_user(void) -+{ -+ ext2_xattr_unregister(EXT2_XATTR_INDEX_USER, -+ &ext2_xattr_user_handler); -+} ---- linux-2.4.20/fs/ext3/Makefile~linux-2.4.20-xattr-0.8.54 2003-05-05 19:01:02.000000000 +0800 -+++ linux-2.4.20-root/fs/ext3/Makefile 2003-05-07 18:10:33.000000000 +0800 -@@ -1,5 +1,5 @@ - # --# Makefile for the linux ext2-filesystem routines. -+# Makefile for the linux ext3-filesystem routines. - # - # Note! Dependencies are done automagically by 'make dep', which also - # removes any old dependencies. DON'T put your own dependencies here -@@ -9,10 +9,14 @@ - - O_TARGET := ext3.o - --export-objs := super.o inode.o -+export-objs := ext3-exports.o - - obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -- ioctl.o namei.o super.o symlink.o hash.o -+ ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o - obj-m := $(O_TARGET) - -+export-objs += xattr.o -+obj-$(CONFIG_EXT3_FS_XATTR) += xattr.o -+obj-$(CONFIG_EXT3_FS_XATTR_USER) += xattr_user.o -+ - include $(TOPDIR)/Rules.make ---- linux-2.4.20/fs/ext3/file.c~linux-2.4.20-xattr-0.8.54 2003-05-05 19:01:02.000000000 +0800 -+++ linux-2.4.20-root/fs/ext3/file.c 2003-05-07 18:08:03.000000000 +0800 -@@ -23,6 +23,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -126,5 +127,9 @@ struct file_operations ext3_file_operati - struct inode_operations ext3_file_inode_operations = { - truncate: ext3_truncate, /* BKL held */ - setattr: ext3_setattr, /* BKL held */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ - }; - ---- linux-2.4.20/fs/ext3/ialloc.c~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:15.000000000 +0800 -+++ linux-2.4.20-root/fs/ext3/ialloc.c 2003-05-07 18:08:03.000000000 +0800 -@@ -17,6 +17,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -216,6 +217,7 @@ void ext3_free_inode (handle_t *handle, - * as writing the quota to disk may need the lock as well. - */ - DQUOT_INIT(inode); -+ ext3_xattr_delete_inode(handle, inode); - DQUOT_FREE_INODE(inode); - DQUOT_DROP(inode); - ---- linux-2.4.20/fs/ext3/inode.c~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:15.000000000 +0800 -+++ linux-2.4.20-root/fs/ext3/inode.c 2003-05-07 18:08:03.000000000 +0800 -@@ -39,6 +39,18 @@ - */ - #undef SEARCH_FROM_ZERO - -+/* -+ * Test whether an inode is a fast symlink. -+ */ -+static inline int ext3_inode_is_fast_symlink(struct inode *inode) -+{ -+ int ea_blocks = inode->u.ext3_i.i_file_acl ? -+ (inode->i_sb->s_blocksize >> 9) : 0; -+ -+ return (S_ISLNK(inode->i_mode) && -+ inode->i_blocks - ea_blocks == 0); -+} -+ - /* The ext3 forget function must perform a revoke if we are freeing data - * which has been journaled. Metadata (eg. indirect blocks) must be - * revoked in all cases. -@@ -48,7 +60,7 @@ - * still needs to be revoked. - */ - --static int ext3_forget(handle_t *handle, int is_metadata, -+int ext3_forget(handle_t *handle, int is_metadata, - struct inode *inode, struct buffer_head *bh, - int blocknr) - { -@@ -164,9 +176,7 @@ void ext3_delete_inode (struct inode * i - { - handle_t *handle; - -- if (is_bad_inode(inode) || -- inode->i_ino == EXT3_ACL_IDX_INO || -- inode->i_ino == EXT3_ACL_DATA_INO) -+ if (is_bad_inode(inode)) - goto no_delete; - - lock_kernel(); -@@ -1855,6 +1865,8 @@ void ext3_truncate(struct inode * inode) - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode))) - return; -+ if (ext3_inode_is_fast_symlink(inode)) -+ return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return; - -@@ -2002,8 +2014,6 @@ int ext3_get_inode_loc (struct inode *in - struct ext3_group_desc * gdp; - - if ((inode->i_ino != EXT3_ROOT_INO && -- inode->i_ino != EXT3_ACL_IDX_INO && -- inode->i_ino != EXT3_ACL_DATA_INO && - inode->i_ino != EXT3_JOURNAL_INO && - inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) || - inode->i_ino > le32_to_cpu( -@@ -2130,10 +2140,7 @@ void ext3_read_inode(struct inode * inod - - brelse (iloc.bh); - -- if (inode->i_ino == EXT3_ACL_IDX_INO || -- inode->i_ino == EXT3_ACL_DATA_INO) -- /* Nothing to do */ ; -- else if (S_ISREG(inode->i_mode)) { -+ if (S_ISREG(inode->i_mode)) { - inode->i_op = &ext3_file_inode_operations; - inode->i_fop = &ext3_file_operations; - inode->i_mapping->a_ops = &ext3_aops; -@@ -2141,15 +2148,17 @@ void ext3_read_inode(struct inode * inod - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; - } else if (S_ISLNK(inode->i_mode)) { -- if (!inode->i_blocks) -+ if (ext3_inode_is_fast_symlink(inode)) - inode->i_op = &ext3_fast_symlink_inode_operations; - else { -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext3_symlink_inode_operations; - inode->i_mapping->a_ops = &ext3_aops; - } -- } else -+ } else { -+ inode->i_op = &ext3_special_inode_operations; - init_special_inode(inode, inode->i_mode, - le32_to_cpu(iloc.raw_inode->i_block[0])); -+ } - /* inode->i_attr_flags = 0; unused */ - if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL) { - /* inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS; unused */ ---- linux-2.4.20/fs/ext3/namei.c~linux-2.4.20-xattr-0.8.54 2003-05-05 19:01:05.000000000 +0800 -+++ linux-2.4.20-root/fs/ext3/namei.c 2003-05-07 18:08:03.000000000 +0800 -@@ -29,6 +29,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -1611,7 +1612,7 @@ static int ext3_mkdir(struct inode * dir - if (IS_SYNC(dir)) - handle->h_sync = 1; - -- inode = ext3_new_inode (handle, dir, S_IFDIR); -+ inode = ext3_new_inode (handle, dir, S_IFDIR | mode); - err = PTR_ERR(inode); - if (IS_ERR(inode)) - goto out_stop; -@@ -1619,7 +1620,6 @@ static int ext3_mkdir(struct inode * dir - inode->i_op = &ext3_dir_inode_operations; - inode->i_fop = &ext3_dir_operations; - inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize; -- inode->i_blocks = 0; - dir_block = ext3_bread (handle, inode, 0, 1, &err); - if (!dir_block) { - inode->i_nlink--; /* is this nlink == 0? */ -@@ -1646,9 +1646,6 @@ static int ext3_mkdir(struct inode * dir - BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata"); - ext3_journal_dirty_metadata(handle, dir_block); - brelse (dir_block); -- inode->i_mode = S_IFDIR | mode; -- if (dir->i_mode & S_ISGID) -- inode->i_mode |= S_ISGID; - ext3_mark_inode_dirty(handle, inode); - err = ext3_add_entry (handle, dentry, inode); - if (err) { -@@ -2017,7 +2014,7 @@ static int ext3_symlink (struct inode * - goto out_stop; - - if (l > sizeof (EXT3_I(inode)->i_data)) { -- inode->i_op = &page_symlink_inode_operations; -+ inode->i_op = &ext3_symlink_inode_operations; - inode->i_mapping->a_ops = &ext3_aops; - /* - * block_symlink() calls back into ext3_prepare/commit_write. -@@ -2244,4 +2241,16 @@ struct inode_operations ext3_dir_inode_o - rmdir: ext3_rmdir, /* BKL held */ - mknod: ext3_mknod, /* BKL held */ - rename: ext3_rename, /* BKL held */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ - }; -+ -+struct inode_operations ext3_special_inode_operations = { -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ -+}; -+ ---- linux-2.4.20/fs/ext3/super.c~linux-2.4.20-xattr-0.8.54 2003-05-05 19:01:02.000000000 +0800 -+++ linux-2.4.20-root/fs/ext3/super.c 2003-05-07 18:08:39.000000000 +0800 -@@ -24,6 +24,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -404,6 +405,7 @@ void ext3_put_super (struct super_block - kdev_t j_dev = sbi->s_journal->j_dev; - int i; - -+ ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { - EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); -@@ -499,6 +501,7 @@ static int parse_options (char * options - int is_remount) - { - unsigned long *mount_options = &sbi->s_mount_opt; -+ - uid_t *resuid = &sbi->s_resuid; - gid_t *resgid = &sbi->s_resgid; - char * this_char; -@@ -511,6 +514,13 @@ static int parse_options (char * options - this_char = strtok (NULL, ",")) { - if ((value = strchr (this_char, '=')) != NULL) - *value++ = 0; -+#ifdef CONFIG_EXT3_FS_XATTR_USER -+ if (!strcmp (this_char, "user_xattr")) -+ set_opt (*mount_options, XATTR_USER); -+ else if (!strcmp (this_char, "nouser_xattr")) -+ clear_opt (*mount_options, XATTR_USER); -+ else -+#endif - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -928,6 +938,12 @@ struct super_block * ext3_read_super (st - sbi->s_mount_opt = 0; - sbi->s_resuid = EXT3_DEF_RESUID; - sbi->s_resgid = EXT3_DEF_RESGID; -+ -+ /* Default extended attribute flags */ -+#ifdef CONFIG_EXT3_FS_XATTR_USER -+ /* set_opt(sbi->s_mount_opt, XATTR_USER); */ -+#endif -+ - if (!parse_options ((char *) data, &sb_block, sbi, &journal_inum, 0)) { - sb->s_dev = 0; - goto out_fail; -@@ -1767,17 +1783,29 @@ static DECLARE_FSTYPE_DEV(ext3_fs_type, - - static int __init init_ext3_fs(void) - { -- return register_filesystem(&ext3_fs_type); -+ int error = init_ext3_xattr(); -+ if (error) -+ return error; -+ error = init_ext3_xattr_user(); -+ if (error) -+ goto fail; -+ error = register_filesystem(&ext3_fs_type); -+ if (!error) -+ return 0; -+ -+ exit_ext3_xattr_user(); -+fail: -+ exit_ext3_xattr(); -+ return error; - } - - static void __exit exit_ext3_fs(void) - { - unregister_filesystem(&ext3_fs_type); -+ exit_ext3_xattr_user(); -+ exit_ext3_xattr(); - } - --EXPORT_SYMBOL(ext3_force_commit); --EXPORT_SYMBOL(ext3_bread); -- - MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); - MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); - MODULE_LICENSE("GPL"); ---- linux-2.4.20/fs/ext3/symlink.c~linux-2.4.20-xattr-0.8.54 2001-11-10 06:25:04.000000000 +0800 -+++ linux-2.4.20-root/fs/ext3/symlink.c 2003-05-07 18:08:03.000000000 +0800 -@@ -20,6 +20,7 @@ - #include - #include - #include -+#include - - static int ext3_readlink(struct dentry *dentry, char *buffer, int buflen) - { -@@ -33,7 +34,20 @@ static int ext3_follow_link(struct dentr - return vfs_follow_link(nd, s); - } - -+struct inode_operations ext3_symlink_inode_operations = { -+ readlink: page_readlink, /* BKL not held. Don't need */ -+ follow_link: page_follow_link, /* BKL not held. Don't need */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ -+}; -+ - struct inode_operations ext3_fast_symlink_inode_operations = { - readlink: ext3_readlink, /* BKL not held. Don't need */ - follow_link: ext3_follow_link, /* BKL not held. Don't need */ -+ setxattr: ext3_setxattr, /* BKL held */ -+ getxattr: ext3_getxattr, /* BKL held */ -+ listxattr: ext3_listxattr, /* BKL held */ -+ removexattr: ext3_removexattr, /* BKL held */ - }; ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-2.4.20-root/fs/ext3/xattr.c 2003-05-07 18:09:23.000000000 +0800 -@@ -0,0 +1,1225 @@ -+/* -+ * linux/fs/ext3/xattr.c -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ * -+ * Fix by Harrison Xing . -+ * Ext3 code with a lot of help from Eric Jarman . -+ * Extended attributes for symlinks and special files added per -+ * suggestion of Luka Renko . -+ */ -+ -+/* -+ * Extended attributes are stored on disk blocks allocated outside of -+ * any inode. The i_file_acl field is then made to point to this allocated -+ * block. If all extended attributes of an inode are identical, these -+ * inodes may share the same extended attribute block. Such situations -+ * are automatically detected by keeping a cache of recent attribute block -+ * numbers and hashes over the block's contents in memory. -+ * -+ * -+ * Extended attribute block layout: -+ * -+ * +------------------+ -+ * | header | -+ * | entry 1 | | -+ * | entry 2 | | growing downwards -+ * | entry 3 | v -+ * | four null bytes | -+ * | . . . | -+ * | value 1 | ^ -+ * | value 3 | | growing upwards -+ * | value 2 | | -+ * +------------------+ -+ * -+ * The block header is followed by multiple entry descriptors. These entry -+ * descriptors are variable in size, and alligned to EXT3_XATTR_PAD -+ * byte boundaries. The entry descriptors are sorted by attribute name, -+ * so that two extended attribute blocks can be compared efficiently. -+ * -+ * Attribute values are aligned to the end of the block, stored in -+ * no specific order. They are also padded to EXT3_XATTR_PAD byte -+ * boundaries. No additional gaps are left between them. -+ * -+ * Locking strategy -+ * ---------------- -+ * The VFS already holds the BKL and the inode->i_sem semaphore when any of -+ * the xattr inode operations are called, so we are guaranteed that only one -+ * processes accesses extended attributes of an inode at any time. -+ * -+ * For writing we also grab the ext3_xattr_sem semaphore. This ensures that -+ * only a single process is modifying an extended attribute block, even -+ * if the block is shared among inodes. -+ * -+ * Note for porting to 2.5 -+ * ----------------------- -+ * The BKL will no longer be held in the xattr inode operations. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#define EXT3_EA_USER "user." -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1) -+#endif -+ -+#define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data)) -+#define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr)) -+#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1) -+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0) -+ -+#ifdef EXT3_XATTR_DEBUG -+# define ea_idebug(inode, f...) do { \ -+ printk(KERN_DEBUG "inode %s:%ld: ", \ -+ kdevname(inode->i_dev), inode->i_ino); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+# define ea_bdebug(bh, f...) do { \ -+ printk(KERN_DEBUG "block %s:%ld: ", \ -+ kdevname(bh->b_dev), bh->b_blocknr); \ -+ printk(f); \ -+ printk("\n"); \ -+ } while (0) -+#else -+# define ea_idebug(f...) -+# define ea_bdebug(f...) -+#endif -+ -+static int ext3_xattr_set2(handle_t *, struct inode *, struct buffer_head *, -+ struct ext3_xattr_header *); -+ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ -+static int ext3_xattr_cache_insert(struct buffer_head *); -+static struct buffer_head *ext3_xattr_cache_find(struct inode *, -+ struct ext3_xattr_header *); -+static void ext3_xattr_cache_remove(struct buffer_head *); -+static void ext3_xattr_rehash(struct ext3_xattr_header *, -+ struct ext3_xattr_entry *); -+ -+static struct mb_cache *ext3_xattr_cache; -+ -+#else -+# define ext3_xattr_cache_insert(bh) 0 -+# define ext3_xattr_cache_find(inode, header) NULL -+# define ext3_xattr_cache_remove(bh) while(0) {} -+# define ext3_xattr_rehash(header, entry) while(0) {} -+#endif -+ -+/* -+ * If a file system does not share extended attributes among inodes, -+ * we should not need the ext3_xattr_sem semaphore. However, the -+ * filesystem may still contain shared blocks, so we always take -+ * the lock. -+ */ -+ -+DECLARE_MUTEX(ext3_xattr_sem); -+ -+static inline int -+ext3_xattr_new_block(handle_t *handle, struct inode *inode, -+ int * errp, int force) -+{ -+ struct super_block *sb = inode->i_sb; -+ int goal = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) + -+ EXT3_I(inode)->i_block_group * EXT3_BLOCKS_PER_GROUP(sb); -+ -+ /* How can we enforce the allocation? */ -+ int block = ext3_new_block(handle, inode, goal, 0, 0, errp); -+#ifdef OLD_QUOTAS -+ if (!*errp) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#endif -+ return block; -+} -+ -+static inline int -+ext3_xattr_quota_alloc(struct inode *inode, int force) -+{ -+ /* How can we enforce the allocation? */ -+#ifdef OLD_QUOTAS -+ int error = DQUOT_ALLOC_BLOCK(inode->i_sb, inode, 1); -+ if (!error) -+ inode->i_blocks += inode->i_sb->s_blocksize >> 9; -+#else -+ int error = DQUOT_ALLOC_BLOCK(inode, 1); -+#endif -+ return error; -+} -+ -+#ifdef OLD_QUOTAS -+ -+static inline void -+ext3_xattr_quota_free(struct inode *inode) -+{ -+ DQUOT_FREE_BLOCK(inode->i_sb, inode, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+static inline void -+ext3_xattr_free_block(handle_t *handle, struct inode * inode, -+ unsigned long block) -+{ -+ ext3_free_blocks(handle, inode, block, 1); -+ inode->i_blocks -= inode->i_sb->s_blocksize >> 9; -+} -+ -+#else -+# define ext3_xattr_quota_free(inode) \ -+ DQUOT_FREE_BLOCK(inode, 1) -+# define ext3_xattr_free_block(handle, inode, block) \ -+ ext3_free_blocks(handle, inode, block, 1) -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) -+ -+static inline struct buffer_head * -+sb_bread(struct super_block *sb, int block) -+{ -+ return bread(sb->s_dev, block, sb->s_blocksize); -+} -+ -+static inline struct buffer_head * -+sb_getblk(struct super_block *sb, int block) -+{ -+ return getblk(sb->s_dev, block, sb->s_blocksize); -+} -+ -+#endif -+ -+struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX]; -+rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED; -+ -+int -+ext3_xattr_register(int name_index, struct ext3_xattr_handler *handler) -+{ -+ int error = -EINVAL; -+ -+ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { -+ write_lock(&ext3_handler_lock); -+ if (!ext3_xattr_handlers[name_index-1]) { -+ ext3_xattr_handlers[name_index-1] = handler; -+ error = 0; -+ } -+ write_unlock(&ext3_handler_lock); -+ } -+ return error; -+} -+ -+void -+ext3_xattr_unregister(int name_index, struct ext3_xattr_handler *handler) -+{ -+ if (name_index > 0 || name_index <= EXT3_XATTR_INDEX_MAX) { -+ write_lock(&ext3_handler_lock); -+ ext3_xattr_handlers[name_index-1] = NULL; -+ write_unlock(&ext3_handler_lock); -+ } -+} -+ -+static inline const char * -+strcmp_prefix(const char *a, const char *a_prefix) -+{ -+ while (*a_prefix && *a == *a_prefix) { -+ a++; -+ a_prefix++; -+ } -+ return *a_prefix ? NULL : a; -+} -+ -+/* -+ * Decode the extended attribute name, and translate it into -+ * the name_index and name suffix. -+ */ -+static inline struct ext3_xattr_handler * -+ext3_xattr_resolve_name(const char **name) -+{ -+ struct ext3_xattr_handler *handler = NULL; -+ int i; -+ -+ if (!*name) -+ return NULL; -+ read_lock(&ext3_handler_lock); -+ for (i=0; iprefix); -+ if (n) { -+ handler = ext3_xattr_handlers[i]; -+ *name = n; -+ break; -+ } -+ } -+ } -+ read_unlock(&ext3_handler_lock); -+ return handler; -+} -+ -+static inline struct ext3_xattr_handler * -+ext3_xattr_handler(int name_index) -+{ -+ struct ext3_xattr_handler *handler = NULL; -+ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) { -+ read_lock(&ext3_handler_lock); -+ handler = ext3_xattr_handlers[name_index-1]; -+ read_unlock(&ext3_handler_lock); -+ } -+ return handler; -+} -+ -+/* -+ * Inode operation getxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext3_getxattr(struct dentry *dentry, const char *name, -+ void *buffer, size_t size) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->get(inode, name, buffer, size); -+} -+ -+/* -+ * Inode operation listxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+ssize_t -+ext3_listxattr(struct dentry *dentry, char *buffer, size_t size) -+{ -+ return ext3_xattr_list(dentry->d_inode, buffer, size); -+} -+ -+/* -+ * Inode operation setxattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext3_setxattr(struct dentry *dentry, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ if (size == 0) -+ value = ""; /* empty EA, do not remove */ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, value, size, flags); -+} -+ -+/* -+ * Inode operation removexattr() -+ * -+ * dentry->d_inode->i_sem down -+ * BKL held [before 2.5.x] -+ */ -+int -+ext3_removexattr(struct dentry *dentry, const char *name) -+{ -+ struct ext3_xattr_handler *handler; -+ struct inode *inode = dentry->d_inode; -+ -+ handler = ext3_xattr_resolve_name(&name); -+ if (!handler) -+ return -ENOTSUP; -+ return handler->set(inode, name, NULL, 0, XATTR_REPLACE); -+} -+ -+/* -+ * ext3_xattr_get() -+ * -+ * Copy an extended attribute into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ unsigned int block, size; -+ char *end; -+ int name_len, error; -+ -+ ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", -+ name_index, name, buffer, (long)buffer_size); -+ -+ if (name == NULL) -+ return -EINVAL; -+ if (!EXT3_I(inode)->i_file_acl) -+ return -ENOATTR; -+ block = EXT3_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_get", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* find named attribute */ -+ name_len = strlen(name); -+ -+ error = -ERANGE; -+ if (name_len > 255) -+ goto cleanup; -+ entry = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (name_index == entry->e_name_index && -+ name_len == entry->e_name_len && -+ memcmp(name, entry->e_name, name_len) == 0) -+ goto found; -+ entry = next; -+ } -+ /* Check the remaining name entries */ -+ while (!IS_LAST_ENTRY(entry)) { -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ entry = next; -+ } -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ error = -ENOATTR; -+ goto cleanup; -+found: -+ /* check the buffer size */ -+ if (entry->e_value_block != 0) -+ goto bad_block; -+ size = le32_to_cpu(entry->e_value_size); -+ if (size > inode->i_sb->s_blocksize || -+ le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize) -+ goto bad_block; -+ -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (buffer) { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ /* return value of attribute */ -+ memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs), -+ size); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_list() -+ * -+ * Copy a list of attribute names into the buffer -+ * provided, or compute the buffer size required. -+ * Buffer is NULL to compute the size of the buffer required. -+ * -+ * Returns a negative error number on failure, or the number of bytes -+ * used / required on success. -+ */ -+int -+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size) -+{ -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_entry *entry; -+ unsigned int block, size = 0; -+ char *buf, *end; -+ int error; -+ -+ ea_idebug(inode, "buffer=%p, buffer_size=%ld", -+ buffer, (long)buffer_size); -+ -+ if (!EXT3_I(inode)->i_file_acl) -+ return 0; -+ block = EXT3_I(inode)->i_file_acl; -+ ea_idebug(inode, "reading block %d", block); -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) -+ return -EIO; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount)); -+ end = bh->b_data + bh->b_size; -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(inode->i_sb, "ext3_xattr_list", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* compute the size required for the list of attribute names */ -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT3_XATTR_NEXT(entry)) { -+ struct ext3_xattr_handler *handler; -+ struct ext3_xattr_entry *next = -+ EXT3_XATTR_NEXT(entry); -+ if ((char *)next >= end) -+ goto bad_block; -+ -+ handler = ext3_xattr_handler(entry->e_name_index); -+ if (handler) -+ size += handler->list(NULL, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ -+ if (ext3_xattr_cache_insert(bh)) -+ ea_idebug(inode, "cache insert failed"); -+ if (!buffer) { -+ error = size; -+ goto cleanup; -+ } else { -+ error = -ERANGE; -+ if (size > buffer_size) -+ goto cleanup; -+ } -+ -+ /* list the attribute names */ -+ buf = buffer; -+ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry); -+ entry = EXT3_XATTR_NEXT(entry)) { -+ struct ext3_xattr_handler *handler; -+ -+ handler = ext3_xattr_handler(entry->e_name_index); -+ if (handler) -+ buf += handler->list(buf, inode, entry->e_name, -+ entry->e_name_len); -+ } -+ error = size; -+ -+cleanup: -+ brelse(bh); -+ -+ return error; -+} -+ -+/* -+ * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is -+ * not set, set it. -+ */ -+static void ext3_xattr_update_super_block(handle_t *handle, -+ struct super_block *sb) -+{ -+ if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR)) -+ return; -+ -+ lock_super(sb); -+ ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) -+ EXT3_SB(sb)->s_feature_compat |= EXT3_FEATURE_COMPAT_EXT_ATTR; -+#endif -+ EXT3_SB(sb)->s_es->s_feature_compat |= -+ cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR); -+ sb->s_dirt = 1; -+ ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); -+ unlock_super(sb); -+} -+ -+/* -+ * ext3_xattr_set() -+ * -+ * Create, replace or remove an extended attribute for this inode. Buffer -+ * is NULL to remove an existing extended attribute, and non-NULL to -+ * either replace an existing extended attribute, or create a new extended -+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE -+ * specify that an extended attribute must exist and must not exist -+ * previous to the call, respectively. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+int -+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t value_len, int flags) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *bh = NULL; -+ struct ext3_xattr_header *header = NULL; -+ struct ext3_xattr_entry *here, *last; -+ unsigned int name_len; -+ int block = EXT3_I(inode)->i_file_acl; -+ int min_offs = sb->s_blocksize, not_found = 1, free, error; -+ char *end; -+ -+ /* -+ * header -- Points either into bh, or to a temporarily -+ * allocated buffer. -+ * here -- The named entry found, or the place for inserting, within -+ * the block pointed to by header. -+ * last -- Points right after the last named entry within the block -+ * pointed to by header. -+ * min_offs -- The offset of the first value (values are aligned -+ * towards the end of the block). -+ * end -- Points right after the block pointed to by header. -+ */ -+ -+ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld", -+ name_index, name, value, (long)value_len); -+ -+ if (IS_RDONLY(inode)) -+ return -EROFS; -+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -+ return -EPERM; -+ if (value == NULL) -+ value_len = 0; -+ if (name == NULL) -+ return -EINVAL; -+ name_len = strlen(name); -+ if (name_len > 255 || value_len > sb->s_blocksize) -+ return -ERANGE; -+ down(&ext3_xattr_sem); -+ -+ if (block) { -+ /* The inode already has an extended attribute block. */ -+ bh = sb_bread(sb, block); -+ error = -EIO; -+ if (!bh) -+ goto cleanup; -+ ea_bdebug(bh, "b_count=%d, refcount=%d", -+ atomic_read(&(bh->b_count)), -+ le32_to_cpu(HDR(bh)->h_refcount)); -+ header = HDR(bh); -+ end = bh->b_data + bh->b_size; -+ if (header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ header->h_blocks != cpu_to_le32(1)) { -+bad_block: ext3_error(sb, "ext3_xattr_set", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ /* Find the named attribute. */ -+ here = FIRST_ENTRY(bh); -+ while (!IS_LAST_ENTRY(here)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(here); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!here->e_value_block && here->e_value_size) { -+ int offs = le16_to_cpu(here->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ not_found = name_index - here->e_name_index; -+ if (!not_found) -+ not_found = name_len - here->e_name_len; -+ if (!not_found) -+ not_found = memcmp(name, here->e_name,name_len); -+ if (not_found <= 0) -+ break; -+ here = next; -+ } -+ last = here; -+ /* We still need to compute min_offs and last. */ -+ while (!IS_LAST_ENTRY(last)) { -+ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last); -+ if ((char *)next >= end) -+ goto bad_block; -+ if (!last->e_value_block && last->e_value_size) { -+ int offs = le16_to_cpu(last->e_value_offs); -+ if (offs < min_offs) -+ min_offs = offs; -+ } -+ last = next; -+ } -+ -+ /* Check whether we have enough space left. */ -+ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32); -+ } else { -+ /* We will use a new extended attribute block. */ -+ free = sb->s_blocksize - -+ sizeof(struct ext3_xattr_header) - sizeof(__u32); -+ here = last = NULL; /* avoid gcc uninitialized warning. */ -+ } -+ -+ if (not_found) { -+ /* Request to remove a nonexistent attribute? */ -+ error = -ENOATTR; -+ if (flags & XATTR_REPLACE) -+ goto cleanup; -+ error = 0; -+ if (value == NULL) -+ goto cleanup; -+ else -+ free -= EXT3_XATTR_LEN(name_len); -+ } else { -+ /* Request to create an existing attribute? */ -+ error = -EEXIST; -+ if (flags & XATTR_CREATE) -+ goto cleanup; -+ if (!here->e_value_block && here->e_value_size) { -+ unsigned int size = le32_to_cpu(here->e_value_size); -+ -+ if (le16_to_cpu(here->e_value_offs) + size > -+ sb->s_blocksize || size > sb->s_blocksize) -+ goto bad_block; -+ free += EXT3_XATTR_SIZE(size); -+ } -+ } -+ free -= EXT3_XATTR_SIZE(value_len); -+ error = -ENOSPC; -+ if (free < 0) -+ goto cleanup; -+ -+ /* Here we know that we can set the new attribute. */ -+ -+ if (header) { -+ if (header->h_refcount == cpu_to_le32(1)) { -+ ea_bdebug(bh, "modifying in-place"); -+ ext3_xattr_cache_remove(bh); -+ error = ext3_journal_get_write_access(handle, bh); -+ if (error) -+ goto cleanup; -+ } else { -+ int offset; -+ -+ ea_bdebug(bh, "cloning"); -+ header = kmalloc(bh->b_size, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memcpy(header, HDR(bh), bh->b_size); -+ header->h_refcount = cpu_to_le32(1); -+ offset = (char *)header - bh->b_data; -+ here = ENTRY((char *)here + offset); -+ last = ENTRY((char *)last + offset); -+ } -+ } else { -+ /* Allocate a buffer where we construct the new block. */ -+ header = kmalloc(sb->s_blocksize, GFP_KERNEL); -+ error = -ENOMEM; -+ if (header == NULL) -+ goto cleanup; -+ memset(header, 0, sb->s_blocksize); -+ end = (char *)header + sb->s_blocksize; -+ header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC); -+ header->h_blocks = header->h_refcount = cpu_to_le32(1); -+ last = here = ENTRY(header+1); -+ } -+ -+ if (not_found) { -+ /* Insert the new name. */ -+ int size = EXT3_XATTR_LEN(name_len); -+ int rest = (char *)last - (char *)here; -+ memmove((char *)here + size, here, rest); -+ memset(here, 0, size); -+ here->e_name_index = name_index; -+ here->e_name_len = name_len; -+ memcpy(here->e_name, name, name_len); -+ } else { -+ /* Remove the old value. */ -+ if (!here->e_value_block && here->e_value_size) { -+ char *first_val = (char *)header + min_offs; -+ int offs = le16_to_cpu(here->e_value_offs); -+ char *val = (char *)header + offs; -+ size_t size = EXT3_XATTR_SIZE( -+ le32_to_cpu(here->e_value_size)); -+ memmove(first_val + size, first_val, val - first_val); -+ memset(first_val, 0, size); -+ here->e_value_offs = 0; -+ min_offs += size; -+ -+ /* Adjust all value offsets. */ -+ last = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(last)) { -+ int o = le16_to_cpu(last->e_value_offs); -+ if (!last->e_value_block && o < offs) -+ last->e_value_offs = -+ cpu_to_le16(o + size); -+ last = EXT3_XATTR_NEXT(last); -+ } -+ } -+ if (value == NULL) { -+ /* Remove this attribute. */ -+ if (EXT3_XATTR_NEXT(ENTRY(header+1)) == last) { -+ /* This block is now empty. */ -+ error = ext3_xattr_set2(handle, inode, bh,NULL); -+ goto cleanup; -+ } else { -+ /* Remove the old name. */ -+ int size = EXT3_XATTR_LEN(name_len); -+ last = ENTRY((char *)last - size); -+ memmove(here, (char*)here + size, -+ (char*)last - (char*)here); -+ memset(last, 0, size); -+ } -+ } -+ } -+ -+ if (value != NULL) { -+ /* Insert the new value. */ -+ here->e_value_size = cpu_to_le32(value_len); -+ if (value_len) { -+ size_t size = EXT3_XATTR_SIZE(value_len); -+ char *val = (char *)header + min_offs - size; -+ here->e_value_offs = -+ cpu_to_le16((char *)val - (char *)header); -+ memset(val + size - EXT3_XATTR_PAD, 0, -+ EXT3_XATTR_PAD); /* Clear the pad bytes. */ -+ memcpy(val, value, value_len); -+ } -+ } -+ ext3_xattr_rehash(header, here); -+ -+ error = ext3_xattr_set2(handle, inode, bh, header); -+ -+cleanup: -+ brelse(bh); -+ if (!(bh && header == HDR(bh))) -+ kfree(header); -+ up(&ext3_xattr_sem); -+ -+ return error; -+} -+ -+/* -+ * Second half of ext3_xattr_set(): Update the file system. -+ */ -+static int -+ext3_xattr_set2(handle_t *handle, struct inode *inode, -+ struct buffer_head *old_bh, struct ext3_xattr_header *header) -+{ -+ struct super_block *sb = inode->i_sb; -+ struct buffer_head *new_bh = NULL; -+ int error; -+ -+ if (header) { -+ new_bh = ext3_xattr_cache_find(inode, header); -+ if (new_bh) { -+ /* -+ * We found an identical block in the cache. -+ * The old block will be released after updating -+ * the inode. -+ */ -+ ea_bdebug(old_bh, "reusing block %ld", -+ new_bh->b_blocknr); -+ -+ error = -EDQUOT; -+ if (ext3_xattr_quota_alloc(inode, 1)) -+ goto cleanup; -+ -+ error = ext3_journal_get_write_access(handle, new_bh); -+ if (error) -+ goto cleanup; -+ HDR(new_bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(new_bh)->h_refcount) + 1); -+ ea_bdebug(new_bh, "refcount now=%d", -+ le32_to_cpu(HDR(new_bh)->h_refcount)); -+ } else if (old_bh && header == HDR(old_bh)) { -+ /* Keep this block. */ -+ new_bh = old_bh; -+ ext3_xattr_cache_insert(new_bh); -+ } else { -+ /* We need to allocate a new block */ -+ int force = EXT3_I(inode)->i_file_acl != 0; -+ int block = ext3_xattr_new_block(handle, inode, -+ &error, force); -+ if (error) -+ goto cleanup; -+ ea_idebug(inode, "creating block %d", block); -+ -+ new_bh = sb_getblk(sb, block); -+ if (!new_bh) { -+getblk_failed: ext3_xattr_free_block(handle, inode, block); -+ error = -EIO; -+ goto cleanup; -+ } -+ lock_buffer(new_bh); -+ error = ext3_journal_get_create_access(handle, new_bh); -+ if (error) { -+ unlock_buffer(new_bh); -+ goto getblk_failed; -+ } -+ memcpy(new_bh->b_data, header, new_bh->b_size); -+ mark_buffer_uptodate(new_bh, 1); -+ unlock_buffer(new_bh); -+ ext3_xattr_cache_insert(new_bh); -+ -+ ext3_xattr_update_super_block(handle, sb); -+ } -+ error = ext3_journal_dirty_metadata(handle, new_bh); -+ if (error) -+ goto cleanup; -+ } -+ -+ /* Update the inode. */ -+ EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; -+ inode->i_ctime = CURRENT_TIME; -+ ext3_mark_inode_dirty(handle, inode); -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ -+ error = 0; -+ if (old_bh && old_bh != new_bh) { -+ /* -+ * If there was an old block, and we are not still using it, -+ * we now release the old block. -+ */ -+ unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount); -+ -+ error = ext3_journal_get_write_access(handle, old_bh); -+ if (error) -+ goto cleanup; -+ if (refcount == 1) { -+ /* Free the old block. */ -+ ea_bdebug(old_bh, "freeing"); -+ ext3_xattr_free_block(handle, inode, old_bh->b_blocknr); -+ -+ /* ext3_forget() calls bforget() for us, but we -+ let our caller release old_bh, so we need to -+ duplicate the handle before. */ -+ get_bh(old_bh); -+ ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr); -+ } else { -+ /* Decrement the refcount only. */ -+ refcount--; -+ HDR(old_bh)->h_refcount = cpu_to_le32(refcount); -+ ext3_xattr_quota_free(inode); -+ ext3_journal_dirty_metadata(handle, old_bh); -+ ea_bdebug(old_bh, "refcount now=%d", refcount); -+ } -+ } -+ -+cleanup: -+ if (old_bh != new_bh) -+ brelse(new_bh); -+ -+ return error; -+} -+ -+/* -+ * ext3_xattr_delete_inode() -+ * -+ * Free extended attribute resources associated with this inode. This -+ * is called immediately before an inode is freed. -+ */ -+void -+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) -+{ -+ struct buffer_head *bh; -+ unsigned int block = EXT3_I(inode)->i_file_acl; -+ -+ if (!block) -+ return; -+ down(&ext3_xattr_sem); -+ -+ bh = sb_bread(inode->i_sb, block); -+ if (!bh) { -+ ext3_error(inode->i_sb, "ext3_xattr_delete_inode", -+ "inode %ld: block %d read error", inode->i_ino, block); -+ goto cleanup; -+ } -+ ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count))); -+ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) || -+ HDR(bh)->h_blocks != cpu_to_le32(1)) { -+ ext3_error(inode->i_sb, "ext3_xattr_delete_inode", -+ "inode %ld: bad block %d", inode->i_ino, block); -+ goto cleanup; -+ } -+ ext3_journal_get_write_access(handle, bh); -+ ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ if (HDR(bh)->h_refcount == cpu_to_le32(1)) { -+ ext3_xattr_cache_remove(bh); -+ ext3_xattr_free_block(handle, inode, block); -+ ext3_forget(handle, 1, inode, bh, block); -+ bh = NULL; -+ } else { -+ HDR(bh)->h_refcount = cpu_to_le32( -+ le32_to_cpu(HDR(bh)->h_refcount) - 1); -+ ext3_journal_dirty_metadata(handle, bh); -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; -+ ext3_xattr_quota_free(inode); -+ } -+ EXT3_I(inode)->i_file_acl = 0; -+ -+cleanup: -+ brelse(bh); -+ up(&ext3_xattr_sem); -+} -+ -+/* -+ * ext3_xattr_put_super() -+ * -+ * This is called when a file system is unmounted. -+ */ -+void -+ext3_xattr_put_super(struct super_block *sb) -+{ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ mb_cache_shrink(ext3_xattr_cache, sb->s_dev); -+#endif -+} -+ -+#ifdef CONFIG_EXT3_FS_XATTR_SHARING -+ -+/* -+ * ext3_xattr_cache_insert() -+ * -+ * Create a new entry in the extended attribute cache, and insert -+ * it unless such an entry is already in the cache. -+ * -+ * Returns 0, or a negative error number on failure. -+ */ -+static int -+ext3_xattr_cache_insert(struct buffer_head *bh) -+{ -+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash); -+ struct mb_cache_entry *ce; -+ int error; -+ -+ ce = mb_cache_entry_alloc(ext3_xattr_cache); -+ if (!ce) -+ return -ENOMEM; -+ error = mb_cache_entry_insert(ce, bh->b_dev, bh->b_blocknr, &hash); -+ if (error) { -+ mb_cache_entry_free(ce); -+ if (error == -EBUSY) { -+ ea_bdebug(bh, "already in cache (%d cache entries)", -+ atomic_read(&ext3_xattr_cache->c_entry_count)); -+ error = 0; -+ } -+ } else { -+ ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash, -+ atomic_read(&ext3_xattr_cache->c_entry_count)); -+ mb_cache_entry_release(ce); -+ } -+ return error; -+} -+ -+/* -+ * ext3_xattr_cmp() -+ * -+ * Compare two extended attribute blocks for equality. -+ * -+ * Returns 0 if the blocks are equal, 1 if they differ, and -+ * a negative error number on errors. -+ */ -+static int -+ext3_xattr_cmp(struct ext3_xattr_header *header1, -+ struct ext3_xattr_header *header2) -+{ -+ struct ext3_xattr_entry *entry1, *entry2; -+ -+ entry1 = ENTRY(header1+1); -+ entry2 = ENTRY(header2+1); -+ while (!IS_LAST_ENTRY(entry1)) { -+ if (IS_LAST_ENTRY(entry2)) -+ return 1; -+ if (entry1->e_hash != entry2->e_hash || -+ entry1->e_name_len != entry2->e_name_len || -+ entry1->e_value_size != entry2->e_value_size || -+ memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len)) -+ return 1; -+ if (entry1->e_value_block != 0 || entry2->e_value_block != 0) -+ return -EIO; -+ if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs), -+ (char *)header2 + le16_to_cpu(entry2->e_value_offs), -+ le32_to_cpu(entry1->e_value_size))) -+ return 1; -+ -+ entry1 = EXT3_XATTR_NEXT(entry1); -+ entry2 = EXT3_XATTR_NEXT(entry2); -+ } -+ if (!IS_LAST_ENTRY(entry2)) -+ return 1; -+ return 0; -+} -+ -+/* -+ * ext3_xattr_cache_find() -+ * -+ * Find an identical extended attribute block. -+ * -+ * Returns a pointer to the block found, or NULL if such a block was -+ * not found or an error occurred. -+ */ -+static struct buffer_head * -+ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header) -+{ -+ __u32 hash = le32_to_cpu(header->h_hash); -+ struct mb_cache_entry *ce; -+ -+ if (!header->h_hash) -+ return NULL; /* never share */ -+ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); -+ ce = mb_cache_entry_find_first(ext3_xattr_cache, 0, inode->i_dev, hash); -+ while (ce) { -+ struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block); -+ -+ if (!bh) { -+ ext3_error(inode->i_sb, "ext3_xattr_cache_find", -+ "inode %ld: block %ld read error", -+ inode->i_ino, ce->e_block); -+ } else if (le32_to_cpu(HDR(bh)->h_refcount) > -+ EXT3_XATTR_REFCOUNT_MAX) { -+ ea_idebug(inode, "block %ld refcount %d>%d",ce->e_block, -+ le32_to_cpu(HDR(bh)->h_refcount), -+ EXT3_XATTR_REFCOUNT_MAX); -+ } else if (!ext3_xattr_cmp(header, HDR(bh))) { -+ ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count))); -+ mb_cache_entry_release(ce); -+ return bh; -+ } -+ brelse(bh); -+ ce = mb_cache_entry_find_next(ce, 0, inode->i_dev, hash); -+ } -+ return NULL; -+} -+ -+/* -+ * ext3_xattr_cache_remove() -+ * -+ * Remove the cache entry of a block from the cache. Called when a -+ * block becomes invalid. -+ */ -+static void -+ext3_xattr_cache_remove(struct buffer_head *bh) -+{ -+ struct mb_cache_entry *ce; -+ -+ ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_dev, bh->b_blocknr); -+ if (ce) { -+ ea_bdebug(bh, "removing (%d cache entries remaining)", -+ atomic_read(&ext3_xattr_cache->c_entry_count)-1); -+ mb_cache_entry_free(ce); -+ } else -+ ea_bdebug(bh, "no cache entry"); -+} -+ -+#define NAME_HASH_SHIFT 5 -+#define VALUE_HASH_SHIFT 16 -+ -+/* -+ * ext3_xattr_hash_entry() -+ * -+ * Compute the hash of an extended attribute. -+ */ -+static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header, -+ struct ext3_xattr_entry *entry) -+{ -+ __u32 hash = 0; -+ char *name = entry->e_name; -+ int n; -+ -+ for (n=0; n < entry->e_name_len; n++) { -+ hash = (hash << NAME_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ -+ *name++; -+ } -+ -+ if (entry->e_value_block == 0 && entry->e_value_size != 0) { -+ __u32 *value = (__u32 *)((char *)header + -+ le16_to_cpu(entry->e_value_offs)); -+ for (n = (le32_to_cpu(entry->e_value_size) + -+ EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) { -+ hash = (hash << VALUE_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^ -+ le32_to_cpu(*value++); -+ } -+ } -+ entry->e_hash = cpu_to_le32(hash); -+} -+ -+#undef NAME_HASH_SHIFT -+#undef VALUE_HASH_SHIFT -+ -+#define BLOCK_HASH_SHIFT 16 -+ -+/* -+ * ext3_xattr_rehash() -+ * -+ * Re-compute the extended attribute hash value after an entry has changed. -+ */ -+static void ext3_xattr_rehash(struct ext3_xattr_header *header, -+ struct ext3_xattr_entry *entry) -+{ -+ struct ext3_xattr_entry *here; -+ __u32 hash = 0; -+ -+ ext3_xattr_hash_entry(header, entry); -+ here = ENTRY(header+1); -+ while (!IS_LAST_ENTRY(here)) { -+ if (!here->e_hash) { -+ /* Block is not shared if an entry's hash value == 0 */ -+ hash = 0; -+ break; -+ } -+ hash = (hash << BLOCK_HASH_SHIFT) ^ -+ (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^ -+ le32_to_cpu(here->e_hash); -+ here = EXT3_XATTR_NEXT(here); -+ } -+ header->h_hash = cpu_to_le32(hash); -+} -+ -+#undef BLOCK_HASH_SHIFT -+ -+int __init -+init_ext3_xattr(void) -+{ -+ ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL, -+ sizeof(struct mb_cache_entry) + -+ sizeof(struct mb_cache_entry_index), 1, 61); -+ if (!ext3_xattr_cache) -+ return -ENOMEM; -+ -+ return 0; -+} -+ -+void -+exit_ext3_xattr(void) -+{ -+ if (ext3_xattr_cache) -+ mb_cache_destroy(ext3_xattr_cache); -+ ext3_xattr_cache = NULL; -+} -+ -+#else /* CONFIG_EXT3_FS_XATTR_SHARING */ -+ -+int __init -+init_ext3_xattr(void) -+{ -+ return 0; -+} -+ -+void -+exit_ext3_xattr(void) -+{ -+} -+ -+#endif /* CONFIG_EXT3_FS_XATTR_SHARING */ ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-2.4.20-root/fs/ext3/xattr_user.c 2003-05-07 18:08:03.000000000 +0800 -@@ -0,0 +1,111 @@ -+/* -+ * linux/fs/ext3/xattr_user.c -+ * Handler for extended user attributes. -+ * -+ * Copyright (C) 2001 by Andreas Gruenbacher, -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef CONFIG_EXT3_FS_POSIX_ACL -+# include -+#endif -+ -+#define XATTR_USER_PREFIX "user." -+ -+static size_t -+ext3_xattr_user_list(char *list, struct inode *inode, -+ const char *name, int name_len) -+{ -+ const int prefix_len = sizeof(XATTR_USER_PREFIX)-1; -+ -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return 0; -+ -+ if (list) { -+ memcpy(list, XATTR_USER_PREFIX, prefix_len); -+ memcpy(list+prefix_len, name, name_len); -+ list[prefix_len + name_len] = '\0'; -+ } -+ return prefix_len + name_len + 1; -+} -+ -+static int -+ext3_xattr_user_get(struct inode *inode, const char *name, -+ void *buffer, size_t size) -+{ -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+#ifdef CONFIG_EXT3_FS_POSIX_ACL -+ error = ext3_permission_locked(inode, MAY_READ); -+#else -+ error = permission(inode, MAY_READ); -+#endif -+ if (error) -+ return error; -+ -+ return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, -+ buffer, size); -+} -+ -+static int -+ext3_xattr_user_set(struct inode *inode, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ handle_t *handle; -+ int error; -+ -+ if (strcmp(name, "") == 0) -+ return -EINVAL; -+ if (!test_opt(inode->i_sb, XATTR_USER)) -+ return -ENOTSUP; -+ if ( !S_ISREG(inode->i_mode) && -+ (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) -+ return -EPERM; -+#ifdef CONFIG_EXT3_FS_POSIX_ACL -+ error = ext3_permission_locked(inode, MAY_WRITE); -+#else -+ error = permission(inode, MAY_WRITE); -+#endif -+ if (error) -+ return error; -+ -+ handle = ext3_journal_start(inode, EXT3_XATTR_TRANS_BLOCKS); -+ if (IS_ERR(handle)) -+ return PTR_ERR(handle); -+ error = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_USER, name, -+ value, size, flags); -+ ext3_journal_stop(handle, inode); -+ -+ return error; -+} -+ -+struct ext3_xattr_handler ext3_xattr_user_handler = { -+ prefix: XATTR_USER_PREFIX, -+ list: ext3_xattr_user_list, -+ get: ext3_xattr_user_get, -+ set: ext3_xattr_user_set, -+}; -+ -+int __init -+init_ext3_xattr_user(void) -+{ -+ return ext3_xattr_register(EXT3_XATTR_INDEX_USER, -+ &ext3_xattr_user_handler); -+} -+ -+void -+exit_ext3_xattr_user(void) -+{ -+ ext3_xattr_unregister(EXT3_XATTR_INDEX_USER, -+ &ext3_xattr_user_handler); -+} ---- linux-2.4.20/fs/jfs/jfs_xattr.h~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:15.000000000 +0800 -+++ linux-2.4.20-root/fs/jfs/jfs_xattr.h 2003-05-07 18:08:03.000000000 +0800 -@@ -52,8 +52,10 @@ struct jfs_ea_list { - #define END_EALIST(ealist) \ - ((struct jfs_ea *) (((char *) (ealist)) + EALIST_SIZE(ealist))) - --extern int __jfs_setxattr(struct inode *, const char *, void *, size_t, int); --extern int jfs_setxattr(struct dentry *, const char *, void *, size_t, int); -+extern int __jfs_setxattr(struct inode *, const char *, const void *, size_t, -+ int); -+extern int jfs_setxattr(struct dentry *, const char *, const void *, size_t, -+ int); - extern ssize_t __jfs_getxattr(struct inode *, const char *, void *, size_t); - extern ssize_t jfs_getxattr(struct dentry *, const char *, void *, size_t); - extern ssize_t jfs_listxattr(struct dentry *, char *, size_t); ---- linux-2.4.20/fs/jfs/xattr.c~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:15.000000000 +0800 -+++ linux-2.4.20-root/fs/jfs/xattr.c 2003-05-07 18:08:03.000000000 +0800 -@@ -641,7 +641,7 @@ static int ea_put(struct inode *inode, s - } - - static int can_set_xattr(struct inode *inode, const char *name, -- void *value, size_t value_len) -+ const void *value, size_t value_len) - { - if (IS_RDONLY(inode)) - return -EROFS; -@@ -660,7 +660,7 @@ static int can_set_xattr(struct inode *i - return permission(inode, MAY_WRITE); - } - --int __jfs_setxattr(struct inode *inode, const char *name, void *value, -+int __jfs_setxattr(struct inode *inode, const char *name, const void *value, - size_t value_len, int flags) - { - struct jfs_ea_list *ealist; -@@ -799,7 +799,7 @@ int __jfs_setxattr(struct inode *inode, - return rc; - } - --int jfs_setxattr(struct dentry *dentry, const char *name, void *value, -+int jfs_setxattr(struct dentry *dentry, const char *name, const void *value, - size_t value_len, int flags) - { - if (value == NULL) { /* empty EA, do not remove */ ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-2.4.20-root/fs/mbcache.c 2003-05-07 18:08:03.000000000 +0800 -@@ -0,0 +1,648 @@ -+/* -+ * linux/fs/mbcache.c -+ * (C) 2001-2002 Andreas Gruenbacher, -+ */ -+ -+/* -+ * Filesystem Meta Information Block Cache (mbcache) -+ * -+ * The mbcache caches blocks of block devices that need to be located -+ * by their device/block number, as well as by other criteria (such -+ * as the block's contents). -+ * -+ * There can only be one cache entry in a cache per device and block number. -+ * Additional indexes need not be unique in this sense. The number of -+ * additional indexes (=other criteria) can be hardwired at compile time -+ * or specified at cache create time. -+ * -+ * Each cache entry is of fixed size. An entry may be `valid' or `invalid' -+ * in the cache. A valid entry is in the main hash tables of the cache, -+ * and may also be in the lru list. An invalid entry is not in any hashes -+ * or lists. -+ * -+ * A valid cache entry is only in the lru list if no handles refer to it. -+ * Invalid cache entries will be freed when the last handle to the cache -+ * entry is released. Entries that cannot be freed immediately are put -+ * back on the lru list. -+ */ -+ -+#include -+#include -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+ -+#ifdef MB_CACHE_DEBUG -+# define mb_debug(f...) do { \ -+ printk(KERN_DEBUG f); \ -+ printk("\n"); \ -+ } while (0) -+#define mb_assert(c) do { if (!(c)) \ -+ printk(KERN_ERR "assertion " #c " failed\n"); \ -+ } while(0) -+#else -+# define mb_debug(f...) do { } while(0) -+# define mb_assert(c) do { } while(0) -+#endif -+#define mb_error(f...) do { \ -+ printk(KERN_ERR f); \ -+ printk("\n"); \ -+ } while(0) -+ -+MODULE_AUTHOR("Andreas Gruenbacher "); -+MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,0) -+MODULE_LICENSE("GPL"); -+#endif -+ -+EXPORT_SYMBOL(mb_cache_create); -+EXPORT_SYMBOL(mb_cache_shrink); -+EXPORT_SYMBOL(mb_cache_destroy); -+EXPORT_SYMBOL(mb_cache_entry_alloc); -+EXPORT_SYMBOL(mb_cache_entry_insert); -+EXPORT_SYMBOL(mb_cache_entry_release); -+EXPORT_SYMBOL(mb_cache_entry_takeout); -+EXPORT_SYMBOL(mb_cache_entry_free); -+EXPORT_SYMBOL(mb_cache_entry_dup); -+EXPORT_SYMBOL(mb_cache_entry_get); -+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) -+EXPORT_SYMBOL(mb_cache_entry_find_first); -+EXPORT_SYMBOL(mb_cache_entry_find_next); -+#endif -+ -+ -+/* -+ * Global data: list of all mbcache's, lru list, and a spinlock for -+ * accessing cache data structures on SMP machines. The lru list is -+ * global across all mbcaches. -+ */ -+ -+static LIST_HEAD(mb_cache_list); -+static LIST_HEAD(mb_cache_lru_list); -+static spinlock_t mb_cache_spinlock = SPIN_LOCK_UNLOCKED; -+ -+static inline int -+mb_cache_indexes(struct mb_cache *cache) -+{ -+#ifdef MB_CACHE_INDEXES_COUNT -+ return MB_CACHE_INDEXES_COUNT; -+#else -+ return cache->c_indexes_count; -+#endif -+} -+ -+/* -+ * What the mbcache registers as to get shrunk dynamically. -+ */ -+ -+static void -+mb_cache_memory_pressure(int priority, unsigned int gfp_mask); -+ -+static struct cache_definition mb_cache_definition = { -+ "mb_cache", -+ mb_cache_memory_pressure -+}; -+ -+ -+static inline int -+__mb_cache_entry_is_hashed(struct mb_cache_entry *ce) -+{ -+ return !list_empty(&ce->e_block_list); -+} -+ -+ -+static inline void -+__mb_cache_entry_unhash(struct mb_cache_entry *ce) -+{ -+ int n; -+ -+ if (__mb_cache_entry_is_hashed(ce)) { -+ list_del_init(&ce->e_block_list); -+ for (n=0; ne_cache); n++) -+ list_del(&ce->e_indexes[n].o_list); -+ } -+} -+ -+ -+static inline void -+__mb_cache_entry_forget(struct mb_cache_entry *ce, int gfp_mask) -+{ -+ struct mb_cache *cache = ce->e_cache; -+ -+ mb_assert(atomic_read(&ce->e_used) == 0); -+ if (cache->c_op.free && cache->c_op.free(ce, gfp_mask)) { -+ /* free failed -- put back on the lru list -+ for freeing later. */ -+ spin_lock(&mb_cache_spinlock); -+ list_add(&ce->e_lru_list, &mb_cache_lru_list); -+ spin_unlock(&mb_cache_spinlock); -+ } else { -+ kmem_cache_free(cache->c_entry_cache, ce); -+ atomic_dec(&cache->c_entry_count); -+ } -+} -+ -+ -+static inline void -+__mb_cache_entry_release_unlock(struct mb_cache_entry *ce) -+{ -+ if (atomic_dec_and_test(&ce->e_used)) { -+ if (__mb_cache_entry_is_hashed(ce)) -+ list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); -+ else { -+ spin_unlock(&mb_cache_spinlock); -+ __mb_cache_entry_forget(ce, GFP_KERNEL); -+ return; -+ } -+ } -+ spin_unlock(&mb_cache_spinlock); -+} -+ -+ -+/* -+ * mb_cache_memory_pressure() memory pressure callback -+ * -+ * This function is called by the kernel memory management when memory -+ * gets low. -+ * -+ * @priority: Amount by which to shrink the cache (0 = highes priority) -+ * @gfp_mask: (ignored) -+ */ -+static void -+mb_cache_memory_pressure(int priority, unsigned int gfp_mask) -+{ -+ LIST_HEAD(free_list); -+ struct list_head *l, *ltmp; -+ int count = 0; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each(l, &mb_cache_list) { -+ struct mb_cache *cache = -+ list_entry(l, struct mb_cache, c_cache_list); -+ mb_debug("cache %s (%d)", cache->c_name, -+ atomic_read(&cache->c_entry_count)); -+ count += atomic_read(&cache->c_entry_count); -+ } -+ mb_debug("trying to free %d of %d entries", -+ count / (priority ? priority : 1), count); -+ if (priority) -+ count /= priority; -+ while (count-- && !list_empty(&mb_cache_lru_list)) { -+ struct mb_cache_entry *ce = -+ list_entry(mb_cache_lru_list.next, -+ struct mb_cache_entry, e_lru_list); -+ list_del(&ce->e_lru_list); -+ __mb_cache_entry_unhash(ce); -+ list_add_tail(&ce->e_lru_list, &free_list); -+ } -+ spin_unlock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &free_list) { -+ __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, -+ e_lru_list), gfp_mask); -+ } -+} -+ -+ -+/* -+ * mb_cache_create() create a new cache -+ * -+ * All entries in one cache are equal size. Cache entries may be from -+ * multiple devices. If this is the first mbcache created, registers -+ * the cache with kernel memory management. Returns NULL if no more -+ * memory was available. -+ * -+ * @name: name of the cache (informal) -+ * @cache_op: contains the callback called when freeing a cache entry -+ * @entry_size: The size of a cache entry, including -+ * struct mb_cache_entry -+ * @indexes_count: number of additional indexes in the cache. Must equal -+ * MB_CACHE_INDEXES_COUNT if the number of indexes is -+ * hardwired. -+ * @bucket_count: number of hash buckets -+ */ -+struct mb_cache * -+mb_cache_create(const char *name, struct mb_cache_op *cache_op, -+ size_t entry_size, int indexes_count, int bucket_count) -+{ -+ int m=0, n; -+ struct mb_cache *cache = NULL; -+ -+ if(entry_size < sizeof(struct mb_cache_entry) + -+ indexes_count * sizeof(struct mb_cache_entry_index)) -+ return NULL; -+ -+ MOD_INC_USE_COUNT; -+ cache = kmalloc(sizeof(struct mb_cache) + -+ indexes_count * sizeof(struct list_head), GFP_KERNEL); -+ if (!cache) -+ goto fail; -+ cache->c_name = name; -+ cache->c_op.free = NULL; -+ if (cache_op) -+ cache->c_op.free = cache_op->free; -+ atomic_set(&cache->c_entry_count, 0); -+ cache->c_bucket_count = bucket_count; -+#ifdef MB_CACHE_INDEXES_COUNT -+ mb_assert(indexes_count == MB_CACHE_INDEXES_COUNT); -+#else -+ cache->c_indexes_count = indexes_count; -+#endif -+ cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head), -+ GFP_KERNEL); -+ if (!cache->c_block_hash) -+ goto fail; -+ for (n=0; nc_block_hash[n]); -+ for (m=0; mc_indexes_hash[m] = kmalloc(bucket_count * -+ sizeof(struct list_head), -+ GFP_KERNEL); -+ if (!cache->c_indexes_hash[m]) -+ goto fail; -+ for (n=0; nc_indexes_hash[m][n]); -+ } -+ cache->c_entry_cache = kmem_cache_create(name, entry_size, 0, -+ 0 /*SLAB_POISON | SLAB_RED_ZONE*/, NULL, NULL); -+ if (!cache->c_entry_cache) -+ goto fail; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_add(&cache->c_cache_list, &mb_cache_list); -+ spin_unlock(&mb_cache_spinlock); -+ return cache; -+ -+fail: -+ if (cache) { -+ while (--m >= 0) -+ kfree(cache->c_indexes_hash[m]); -+ if (cache->c_block_hash) -+ kfree(cache->c_block_hash); -+ kfree(cache); -+ } -+ MOD_DEC_USE_COUNT; -+ return NULL; -+} -+ -+ -+/* -+ * mb_cache_shrink() -+ * -+ * Removes all cache entires of a device from the cache. All cache entries -+ * currently in use cannot be freed, and thus remain in the cache. -+ * -+ * @cache: which cache to shrink -+ * @dev: which device's cache entries to shrink -+ */ -+void -+mb_cache_shrink(struct mb_cache *cache, kdev_t dev) -+{ -+ LIST_HEAD(free_list); -+ struct list_head *l, *ltmp; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &mb_cache_lru_list) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, e_lru_list); -+ if (ce->e_dev == dev) { -+ list_del(&ce->e_lru_list); -+ list_add_tail(&ce->e_lru_list, &free_list); -+ __mb_cache_entry_unhash(ce); -+ } -+ } -+ spin_unlock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &free_list) { -+ __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, -+ e_lru_list), GFP_KERNEL); -+ } -+} -+ -+ -+/* -+ * mb_cache_destroy() -+ * -+ * Shrinks the cache to its minimum possible size (hopefully 0 entries), -+ * and then destroys it. If this was the last mbcache, un-registers the -+ * mbcache from kernel memory management. -+ */ -+void -+mb_cache_destroy(struct mb_cache *cache) -+{ -+ LIST_HEAD(free_list); -+ struct list_head *l, *ltmp; -+ int n; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &mb_cache_lru_list) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, e_lru_list); -+ if (ce->e_cache == cache) { -+ list_del(&ce->e_lru_list); -+ list_add_tail(&ce->e_lru_list, &free_list); -+ __mb_cache_entry_unhash(ce); -+ } -+ } -+ list_del(&cache->c_cache_list); -+ spin_unlock(&mb_cache_spinlock); -+ list_for_each_safe(l, ltmp, &free_list) { -+ __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, -+ e_lru_list), GFP_KERNEL); -+ } -+ -+ if (atomic_read(&cache->c_entry_count) > 0) { -+ mb_error("cache %s: %d orphaned entries", -+ cache->c_name, -+ atomic_read(&cache->c_entry_count)); -+ } -+ -+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,3,0)) -+ /* We don't have kmem_cache_destroy() in 2.2.x */ -+ kmem_cache_shrink(cache->c_entry_cache); -+#else -+ kmem_cache_destroy(cache->c_entry_cache); -+#endif -+ for (n=0; n < mb_cache_indexes(cache); n++) -+ kfree(cache->c_indexes_hash[n]); -+ kfree(cache->c_block_hash); -+ kfree(cache); -+ -+ MOD_DEC_USE_COUNT; -+} -+ -+ -+/* -+ * mb_cache_entry_alloc() -+ * -+ * Allocates a new cache entry. The new entry will not be valid initially, -+ * and thus cannot be looked up yet. It should be filled with data, and -+ * then inserted into the cache using mb_cache_entry_insert(). Returns NULL -+ * if no more memory was available. -+ */ -+struct mb_cache_entry * -+mb_cache_entry_alloc(struct mb_cache *cache) -+{ -+ struct mb_cache_entry *ce; -+ -+ atomic_inc(&cache->c_entry_count); -+ ce = kmem_cache_alloc(cache->c_entry_cache, GFP_KERNEL); -+ if (ce) { -+ INIT_LIST_HEAD(&ce->e_lru_list); -+ INIT_LIST_HEAD(&ce->e_block_list); -+ ce->e_cache = cache; -+ atomic_set(&ce->e_used, 1); -+ } -+ return ce; -+} -+ -+ -+/* -+ * mb_cache_entry_insert() -+ * -+ * Inserts an entry that was allocated using mb_cache_entry_alloc() into -+ * the cache. After this, the cache entry can be looked up, but is not yet -+ * in the lru list as the caller still holds a handle to it. Returns 0 on -+ * success, or -EBUSY if a cache entry for that device + inode exists -+ * already (this may happen after a failed lookup, if another process has -+ * inserted the same cache entry in the meantime). -+ * -+ * @dev: device the cache entry belongs to -+ * @block: block number -+ * @keys: array of additional keys. There must be indexes_count entries -+ * in the array (as specified when creating the cache). -+ */ -+int -+mb_cache_entry_insert(struct mb_cache_entry *ce, kdev_t dev, -+ unsigned long block, unsigned int keys[]) -+{ -+ struct mb_cache *cache = ce->e_cache; -+ unsigned int bucket = (HASHDEV(dev) + block) % cache->c_bucket_count; -+ struct list_head *l; -+ int error = -EBUSY, n; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each(l, &cache->c_block_hash[bucket]) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, e_block_list); -+ if (ce->e_dev == dev && ce->e_block == block) -+ goto out; -+ } -+ __mb_cache_entry_unhash(ce); -+ ce->e_dev = dev; -+ ce->e_block = block; -+ list_add(&ce->e_block_list, &cache->c_block_hash[bucket]); -+ for (n=0; ne_indexes[n].o_key = keys[n]; -+ bucket = keys[n] % cache->c_bucket_count; -+ list_add(&ce->e_indexes[n].o_list, -+ &cache->c_indexes_hash[n][bucket]); -+ } -+out: -+ spin_unlock(&mb_cache_spinlock); -+ return error; -+} -+ -+ -+/* -+ * mb_cache_entry_release() -+ * -+ * Release a handle to a cache entry. When the last handle to a cache entry -+ * is released it is either freed (if it is invalid) or otherwise inserted -+ * in to the lru list. -+ */ -+void -+mb_cache_entry_release(struct mb_cache_entry *ce) -+{ -+ spin_lock(&mb_cache_spinlock); -+ __mb_cache_entry_release_unlock(ce); -+} -+ -+ -+/* -+ * mb_cache_entry_takeout() -+ * -+ * Take a cache entry out of the cache, making it invalid. The entry can later -+ * be re-inserted using mb_cache_entry_insert(), or released using -+ * mb_cache_entry_release(). -+ */ -+void -+mb_cache_entry_takeout(struct mb_cache_entry *ce) -+{ -+ spin_lock(&mb_cache_spinlock); -+ mb_assert(list_empty(&ce->e_lru_list)); -+ __mb_cache_entry_unhash(ce); -+ spin_unlock(&mb_cache_spinlock); -+} -+ -+ -+/* -+ * mb_cache_entry_free() -+ * -+ * This is equivalent to the sequence mb_cache_entry_takeout() -- -+ * mb_cache_entry_release(). -+ */ -+void -+mb_cache_entry_free(struct mb_cache_entry *ce) -+{ -+ spin_lock(&mb_cache_spinlock); -+ mb_assert(list_empty(&ce->e_lru_list)); -+ __mb_cache_entry_unhash(ce); -+ __mb_cache_entry_release_unlock(ce); -+} -+ -+ -+/* -+ * mb_cache_entry_dup() -+ * -+ * Duplicate a handle to a cache entry (does not duplicate the cache entry -+ * itself). After the call, both the old and the new handle must be released. -+ */ -+struct mb_cache_entry * -+mb_cache_entry_dup(struct mb_cache_entry *ce) -+{ -+ atomic_inc(&ce->e_used); -+ return ce; -+} -+ -+ -+/* -+ * mb_cache_entry_get() -+ * -+ * Get a cache entry by device / block number. (There can only be one entry -+ * in the cache per device and block.) Returns NULL if no such cache entry -+ * exists. -+ */ -+struct mb_cache_entry * -+mb_cache_entry_get(struct mb_cache *cache, kdev_t dev, unsigned long block) -+{ -+ unsigned int bucket = (HASHDEV(dev) + block) % cache->c_bucket_count; -+ struct list_head *l; -+ struct mb_cache_entry *ce; -+ -+ spin_lock(&mb_cache_spinlock); -+ list_for_each(l, &cache->c_block_hash[bucket]) { -+ ce = list_entry(l, struct mb_cache_entry, e_block_list); -+ if (ce->e_dev == dev && ce->e_block == block) { -+ if (!list_empty(&ce->e_lru_list)) -+ list_del_init(&ce->e_lru_list); -+ atomic_inc(&ce->e_used); -+ goto cleanup; -+ } -+ } -+ ce = NULL; -+ -+cleanup: -+ spin_unlock(&mb_cache_spinlock); -+ return ce; -+} -+ -+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) -+ -+static struct mb_cache_entry * -+__mb_cache_entry_find(struct list_head *l, struct list_head *head, -+ int index, kdev_t dev, unsigned int key) -+{ -+ while (l != head) { -+ struct mb_cache_entry *ce = -+ list_entry(l, struct mb_cache_entry, -+ e_indexes[index].o_list); -+ if (ce->e_dev == dev && ce->e_indexes[index].o_key == key) { -+ if (!list_empty(&ce->e_lru_list)) -+ list_del_init(&ce->e_lru_list); -+ atomic_inc(&ce->e_used); -+ return ce; -+ } -+ l = l->next; -+ } -+ return NULL; -+} -+ -+ -+/* -+ * mb_cache_entry_find_first() -+ * -+ * Find the first cache entry on a given device with a certain key in -+ * an additional index. Additonal matches can be found with -+ * mb_cache_entry_find_next(). Returns NULL if no match was found. -+ * -+ * @cache: the cache to search -+ * @index: the number of the additonal index to search (0<=indexc_bucket_count; -+ struct list_head *l; -+ struct mb_cache_entry *ce; -+ -+ mb_assert(index < mb_cache_indexes(cache)); -+ spin_lock(&mb_cache_spinlock); -+ l = cache->c_indexes_hash[index][bucket].next; -+ ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], -+ index, dev, key); -+ spin_unlock(&mb_cache_spinlock); -+ return ce; -+} -+ -+ -+/* -+ * mb_cache_entry_find_next() -+ * -+ * Find the next cache entry on a given device with a certain key in an -+ * additional index. Returns NULL if no match could be found. The previous -+ * entry is atomatically released, so that mb_cache_entry_find_next() can -+ * be called like this: -+ * -+ * entry = mb_cache_entry_find_first(); -+ * while (entry) { -+ * ... -+ * entry = mb_cache_entry_find_next(entry, ...); -+ * } -+ * -+ * @prev: The previous match -+ * @index: the number of the additonal index to search (0<=indexe_cache; -+ unsigned int bucket = key % cache->c_bucket_count; -+ struct list_head *l; -+ struct mb_cache_entry *ce; -+ -+ mb_assert(index < mb_cache_indexes(cache)); -+ spin_lock(&mb_cache_spinlock); -+ l = prev->e_indexes[index].o_list.next; -+ ce = __mb_cache_entry_find(l, &cache->c_indexes_hash[index][bucket], -+ index, dev, key); -+ __mb_cache_entry_release_unlock(prev); -+ return ce; -+} -+ -+#endif /* !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) */ -+ -+static int __init init_mbcache(void) -+{ -+ register_cache(&mb_cache_definition); -+ return 0; -+} -+ -+static void __exit exit_mbcache(void) -+{ -+ unregister_cache(&mb_cache_definition); -+} -+ -+module_init(init_mbcache) -+module_exit(exit_mbcache) -+ ---- linux-2.4.20/include/asm-arm/unistd.h~linux-2.4.20-xattr-0.8.54 2002-08-03 08:39:45.000000000 +0800 -+++ linux-2.4.20-root/include/asm-arm/unistd.h 2003-05-07 18:08:03.000000000 +0800 -@@ -244,7 +244,6 @@ - #define __NR_security (__NR_SYSCALL_BASE+223) - #define __NR_gettid (__NR_SYSCALL_BASE+224) - #define __NR_readahead (__NR_SYSCALL_BASE+225) --#if 0 /* allocated in 2.5 */ - #define __NR_setxattr (__NR_SYSCALL_BASE+226) - #define __NR_lsetxattr (__NR_SYSCALL_BASE+227) - #define __NR_fsetxattr (__NR_SYSCALL_BASE+228) -@@ -257,7 +256,6 @@ - #define __NR_removexattr (__NR_SYSCALL_BASE+235) - #define __NR_lremovexattr (__NR_SYSCALL_BASE+236) - #define __NR_fremovexattr (__NR_SYSCALL_BASE+237) --#endif - #define __NR_tkill (__NR_SYSCALL_BASE+238) - /* - * Please check 2.5 _before_ adding calls here, ---- linux-2.4.20/include/asm-ia64/unistd.h~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:15.000000000 +0800 -+++ linux-2.4.20-root/include/asm-ia64/unistd.h 2003-05-07 18:08:03.000000000 +0800 -@@ -206,8 +206,19 @@ - #define __NR_getdents64 1214 - #define __NR_getunwind 1215 - #define __NR_readahead 1216 -+#define __NR_setxattr 1217 -+#define __NR_lsetxattr 1218 -+#define __NR_fsetxattr 1219 -+#define __NR_getxattr 1220 -+#define __NR_lgetxattr 1221 -+#define __NR_fgetxattr 1222 -+#define __NR_listxattr 1223 -+#define __NR_llistxattr 1224 -+#define __NR_flistxattr 1225 -+#define __NR_removexattr 1226 -+#define __NR_lremovexattr 1227 -+#define __NR_fremovexattr 1228 - /* -- * 1217-1228: reserved for xattr - * 1230-1232: reserved for futex and sched_[sg]etaffinity. - */ - #define __NR_tkill 1229 ---- linux-2.4.20/include/asm-ppc64/unistd.h~linux-2.4.20-xattr-0.8.54 2002-08-03 08:39:45.000000000 +0800 -+++ linux-2.4.20-root/include/asm-ppc64/unistd.h 2003-05-07 18:08:03.000000000 +0800 -@@ -218,6 +218,7 @@ - #define __NR_gettid 207 - #if 0 /* Reserved syscalls */ - #define __NR_tkill 208 -+#endif - #define __NR_setxattr 209 - #define __NR_lsetxattr 210 - #define __NR_fsetxattr 211 -@@ -230,6 +231,7 @@ - #define __NR_removexattr 218 - #define __NR_lremovexattr 219 - #define __NR_fremovexattr 220 -+#if 0 /* Reserved syscalls */ - #define __NR_futex 221 - #endif - ---- linux-2.4.20/include/asm-s390/unistd.h~linux-2.4.20-xattr-0.8.54 2002-08-03 08:39:45.000000000 +0800 -+++ linux-2.4.20-root/include/asm-s390/unistd.h 2003-05-07 18:08:03.000000000 +0800 -@@ -212,9 +212,18 @@ - #define __NR_madvise 219 - #define __NR_getdents64 220 - #define __NR_fcntl64 221 --/* -- * Numbers 224-235 are reserved for posix acl -- */ -+#define __NR_setxattr 224 -+#define __NR_lsetxattr 225 -+#define __NR_fsetxattr 226 -+#define __NR_getxattr 227 -+#define __NR_lgetxattr 228 -+#define __NR_fgetxattr 229 -+#define __NR_listxattr 230 -+#define __NR_llistxattr 231 -+#define __NR_flistxattr 232 -+#define __NR_removexattr 233 -+#define __NR_lremovexattr 234 -+#define __NR_fremovexattr 235 - #define __NR_gettid 236 - #define __NR_tkill 237 - ---- linux-2.4.20/include/asm-s390x/unistd.h~linux-2.4.20-xattr-0.8.54 2002-08-03 08:39:45.000000000 +0800 -+++ linux-2.4.20-root/include/asm-s390x/unistd.h 2003-05-07 18:08:03.000000000 +0800 -@@ -180,9 +180,18 @@ - #define __NR_pivot_root 217 - #define __NR_mincore 218 - #define __NR_madvise 219 --/* -- * Numbers 224-235 are reserved for posix acl -- */ -+#define __NR_setxattr 224 -+#define __NR_lsetxattr 225 -+#define __NR_fsetxattr 226 -+#define __NR_getxattr 227 -+#define __NR_lgetxattr 228 -+#define __NR_fgetxattr 229 -+#define __NR_listxattr 230 -+#define __NR_llistxattr 231 -+#define __NR_flistxattr 232 -+#define __NR_removexattr 233 -+#define __NR_lremovexattr 234 -+#define __NR_fremovexattr 235 - #define __NR_gettid 236 - #define __NR_tkill 237 - ---- linux-2.4.20/include/asm-sparc/unistd.h~linux-2.4.20-xattr-0.8.54 2002-08-03 08:39:45.000000000 +0800 -+++ linux-2.4.20-root/include/asm-sparc/unistd.h 2003-05-07 18:08:03.000000000 +0800 -@@ -184,24 +184,24 @@ - /* #define __NR_exportfs 166 SunOS Specific */ - #define __NR_mount 167 /* Common */ - #define __NR_ustat 168 /* Common */ --/* #define __NR_semsys 169 SunOS Specific */ --/* #define __NR_msgsys 170 SunOS Specific */ --/* #define __NR_shmsys 171 SunOS Specific */ --/* #define __NR_auditsys 172 SunOS Specific */ --/* #define __NR_rfssys 173 SunOS Specific */ -+#define __NR_setxattr 169 /* SunOS: semsys */ -+#define __NR_lsetxattr 170 /* SunOS: msgsys */ -+#define __NR_fsetxattr 171 /* SunOS: shmsys */ -+#define __NR_getxattr 172 /* SunOS: auditsys */ -+#define __NR_lgetxattr 173 /* SunOS: rfssys */ - #define __NR_getdents 174 /* Common */ - #define __NR_setsid 175 /* Common */ - #define __NR_fchdir 176 /* Common */ --/* #define __NR_fchroot 177 SunOS Specific */ --/* #define __NR_vpixsys 178 SunOS Specific */ --/* #define __NR_aioread 179 SunOS Specific */ --/* #define __NR_aiowrite 180 SunOS Specific */ --/* #define __NR_aiowait 181 SunOS Specific */ --/* #define __NR_aiocancel 182 SunOS Specific */ -+#define __NR_fgetxattr 177 /* SunOS: fchroot */ -+#define __NR_listxattr 178 /* SunOS: vpixsys */ -+#define __NR_llistxattr 179 /* SunOS: aioread */ -+#define __NR_flistxattr 180 /* SunOS: aiowrite */ -+#define __NR_removexattr 181 /* SunOS: aiowait */ -+#define __NR_lremovexattr 182 /* SunOS: aiocancel */ - #define __NR_sigpending 183 /* Common */ - #define __NR_query_module 184 /* Linux Specific */ - #define __NR_setpgid 185 /* Common */ --/* #define __NR_pathconf 186 SunOS Specific */ -+#define __NR_fremovexattr 186 /* SunOS: pathconf */ - #define __NR_tkill 187 /* SunOS: fpathconf */ - /* #define __NR_sysconf 188 SunOS Specific */ - #define __NR_uname 189 /* Linux Specific */ ---- linux-2.4.20/include/asm-sparc64/unistd.h~linux-2.4.20-xattr-0.8.54 2002-08-03 08:39:45.000000000 +0800 -+++ linux-2.4.20-root/include/asm-sparc64/unistd.h 2003-05-07 18:08:03.000000000 +0800 -@@ -184,24 +184,24 @@ - /* #define __NR_exportfs 166 SunOS Specific */ - #define __NR_mount 167 /* Common */ - #define __NR_ustat 168 /* Common */ --/* #define __NR_semsys 169 SunOS Specific */ --/* #define __NR_msgsys 170 SunOS Specific */ --/* #define __NR_shmsys 171 SunOS Specific */ --/* #define __NR_auditsys 172 SunOS Specific */ --/* #define __NR_rfssys 173 SunOS Specific */ -+#define __NR_setxattr 169 /* SunOS: semsys */ -+#define __NR_lsetxattr 170 /* SunOS: msgsys */ -+#define __NR_fsetxattr 171 /* SunOS: shmsys */ -+#define __NR_getxattr 172 /* SunOS: auditsys */ -+#define __NR_lgetxattr 173 /* SunOS: rfssys */ - #define __NR_getdents 174 /* Common */ - #define __NR_setsid 175 /* Common */ - #define __NR_fchdir 176 /* Common */ --/* #define __NR_fchroot 177 SunOS Specific */ --/* #define __NR_vpixsys 178 SunOS Specific */ --/* #define __NR_aioread 179 SunOS Specific */ --/* #define __NR_aiowrite 180 SunOS Specific */ --/* #define __NR_aiowait 181 SunOS Specific */ --/* #define __NR_aiocancel 182 SunOS Specific */ -+#define __NR_fgetxattr 177 /* SunOS: fchroot */ -+#define __NR_listxattr 178 /* SunOS: vpixsys */ -+#define __NR_llistxattr 179 /* SunOS: aioread */ -+#define __NR_flistxattr 180 /* SunOS: aiowrite */ -+#define __NR_removexattr 181 /* SunOS: aiowait */ -+#define __NR_lremovexattr 182 /* SunOS: aiocancel */ - #define __NR_sigpending 183 /* Common */ - #define __NR_query_module 184 /* Linux Specific */ - #define __NR_setpgid 185 /* Common */ --/* #define __NR_pathconf 186 SunOS Specific */ -+#define __NR_fremovexattr 186 /* SunOS: pathconf */ - #define __NR_tkill 187 /* SunOS: fpathconf */ - /* #define __NR_sysconf 188 SunOS Specific */ - #define __NR_uname 189 /* Linux Specific */ ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-2.4.20-root/include/linux/cache_def.h 2003-05-07 18:08:03.000000000 +0800 -@@ -0,0 +1,15 @@ -+/* -+ * linux/cache_def.h -+ * Handling of caches defined in drivers, filesystems, ... -+ * -+ * Copyright (C) 2002 by Andreas Gruenbacher, -+ */ -+ -+struct cache_definition { -+ const char *name; -+ void (*shrink)(int, unsigned int); -+ struct list_head link; -+}; -+ -+extern void register_cache(struct cache_definition *); -+extern void unregister_cache(struct cache_definition *); ---- linux-2.4.20/include/linux/errno.h~linux-2.4.20-xattr-0.8.54 2003-04-14 16:39:03.000000000 +0800 -+++ linux-2.4.20-root/include/linux/errno.h 2003-05-07 18:08:03.000000000 +0800 -@@ -23,4 +23,8 @@ - - #endif - -+/* Defined for extended attributes */ -+#define ENOATTR ENODATA /* No such attribute */ -+#define ENOTSUP EOPNOTSUPP /* Operation not supported */ -+ - #endif ---- linux-2.4.20/include/linux/ext2_fs.h~linux-2.4.20-xattr-0.8.54 2003-04-14 16:39:08.000000000 +0800 -+++ linux-2.4.20-root/include/linux/ext2_fs.h 2003-05-07 18:08:03.000000000 +0800 -@@ -57,8 +57,6 @@ - */ - #define EXT2_BAD_INO 1 /* Bad blocks inode */ - #define EXT2_ROOT_INO 2 /* Root inode */ --#define EXT2_ACL_IDX_INO 3 /* ACL inode */ --#define EXT2_ACL_DATA_INO 4 /* ACL inode */ - #define EXT2_BOOT_LOADER_INO 5 /* Boot loader inode */ - #define EXT2_UNDEL_DIR_INO 6 /* Undelete directory inode */ - -@@ -86,7 +84,6 @@ - #else - # define EXT2_BLOCK_SIZE(s) (EXT2_MIN_BLOCK_SIZE << (s)->s_log_block_size) - #endif --#define EXT2_ACLE_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (struct ext2_acl_entry)) - #define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (__u32)) - #ifdef __KERNEL__ - # define EXT2_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -@@ -121,28 +118,6 @@ - #endif - - /* -- * ACL structures -- */ --struct ext2_acl_header /* Header of Access Control Lists */ --{ -- __u32 aclh_size; -- __u32 aclh_file_count; -- __u32 aclh_acle_count; -- __u32 aclh_first_acle; --}; -- --struct ext2_acl_entry /* Access Control List Entry */ --{ -- __u32 acle_size; -- __u16 acle_perms; /* Access permissions */ -- __u16 acle_type; /* Type of entry */ -- __u16 acle_tag; /* User or group identity */ -- __u16 acle_pad1; -- __u32 acle_next; /* Pointer on next entry for the */ -- /* same inode or on next free entry */ --}; -- --/* - * Structure of a blocks group descriptor - */ - struct ext2_group_desc -@@ -314,6 +289,7 @@ struct ext2_inode { - #define EXT2_MOUNT_ERRORS_PANIC 0x0040 /* Panic on errors */ - #define EXT2_MOUNT_MINIX_DF 0x0080 /* Mimics the Minix statfs */ - #define EXT2_MOUNT_NO_UID32 0x0200 /* Disable 32-bit UIDs */ -+#define EXT2_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ - - #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt - #define set_opt(o, opt) o |= EXT2_MOUNT_##opt -@@ -397,6 +373,7 @@ struct ext2_super_block { - - #ifdef __KERNEL__ - #define EXT2_SB(sb) (&((sb)->u.ext2_sb)) -+#define EXT2_I(inode) (&((inode)->u.ext2_i)) - #else - /* Assume that user mode programs are passing in an ext2fs superblock, not - * a kernel struct super_block. This will allow us to call the feature-test -@@ -466,7 +443,7 @@ struct ext2_super_block { - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 - #define EXT2_FEATURE_INCOMPAT_ANY 0xffffffff - --#define EXT2_FEATURE_COMPAT_SUPP 0 -+#define EXT2_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT2_FEATURE_INCOMPAT_SUPP EXT2_FEATURE_INCOMPAT_FILETYPE - #define EXT2_FEATURE_RO_COMPAT_SUPP (EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \ -@@ -623,8 +600,10 @@ extern struct address_space_operations e - - /* namei.c */ - extern struct inode_operations ext2_dir_inode_operations; -+extern struct inode_operations ext2_special_inode_operations; - - /* symlink.c */ -+extern struct inode_operations ext2_symlink_inode_operations; - extern struct inode_operations ext2_fast_symlink_inode_operations; - - #endif /* __KERNEL__ */ ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-2.4.20-root/include/linux/ext2_xattr.h 2003-05-07 18:08:03.000000000 +0800 -@@ -0,0 +1,157 @@ -+/* -+ File: linux/ext2_xattr.h -+ -+ On-disk format of extended attributes for the ext2 filesystem. -+ -+ (C) 2001 Andreas Gruenbacher, -+*/ -+ -+#include -+#include -+#include -+ -+/* Magic value in attribute blocks */ -+#define EXT2_XATTR_MAGIC 0xEA020000 -+ -+/* Maximum number of references to one attribute block */ -+#define EXT2_XATTR_REFCOUNT_MAX 1024 -+ -+/* Name indexes */ -+#define EXT2_XATTR_INDEX_MAX 10 -+#define EXT2_XATTR_INDEX_USER 1 -+#define EXT2_XATTR_INDEX_POSIX_ACL_ACCESS 2 -+#define EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT 3 -+ -+struct ext2_xattr_header { -+ __u32 h_magic; /* magic number for identification */ -+ __u32 h_refcount; /* reference count */ -+ __u32 h_blocks; /* number of disk blocks used */ -+ __u32 h_hash; /* hash value of all attributes */ -+ __u32 h_reserved[4]; /* zero right now */ -+}; -+ -+struct ext2_xattr_entry { -+ __u8 e_name_len; /* length of name */ -+ __u8 e_name_index; /* attribute name index */ -+ __u16 e_value_offs; /* offset in disk block of value */ -+ __u32 e_value_block; /* disk block attribute is stored on (n/i) */ -+ __u32 e_value_size; /* size of attribute value */ -+ __u32 e_hash; /* hash value of name and value */ -+ char e_name[0]; /* attribute name */ -+}; -+ -+#define EXT2_XATTR_PAD_BITS 2 -+#define EXT2_XATTR_PAD (1<e_name_len)) ) -+#define EXT2_XATTR_SIZE(size) \ -+ (((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND) -+ -+#ifdef __KERNEL__ -+ -+# ifdef CONFIG_EXT2_FS_XATTR -+ -+struct ext2_xattr_handler { -+ char *prefix; -+ size_t (*list)(char *list, struct inode *inode, const char *name, -+ int name_len); -+ int (*get)(struct inode *inode, const char *name, void *buffer, -+ size_t size); -+ int (*set)(struct inode *inode, const char *name, const void *buffer, -+ size_t size, int flags); -+}; -+ -+extern int ext2_xattr_register(int, struct ext2_xattr_handler *); -+extern void ext2_xattr_unregister(int, struct ext2_xattr_handler *); -+ -+extern int ext2_setxattr(struct dentry *, const char *, const void *, size_t, int); -+extern ssize_t ext2_getxattr(struct dentry *, const char *, void *, size_t); -+extern ssize_t ext2_listxattr(struct dentry *, char *, size_t); -+extern int ext2_removexattr(struct dentry *, const char *); -+ -+extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t); -+extern int ext2_xattr_list(struct inode *, char *, size_t); -+extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int); -+ -+extern void ext2_xattr_delete_inode(struct inode *); -+extern void ext2_xattr_put_super(struct super_block *); -+ -+extern int init_ext2_xattr(void) __init; -+extern void exit_ext2_xattr(void); -+ -+# else /* CONFIG_EXT2_FS_XATTR */ -+# define ext2_setxattr NULL -+# define ext2_getxattr NULL -+# define ext2_listxattr NULL -+# define ext2_removexattr NULL -+ -+static inline int -+ext2_xattr_get(struct inode *inode, int name_index, -+ const char *name, void *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext2_xattr_list(struct inode *inode, char *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext2_xattr_set(struct inode *inode, int name_index, const char *name, -+ const void *value, size_t size, int flags) -+{ -+ return -ENOTSUP; -+} -+ -+static inline void -+ext2_xattr_delete_inode(struct inode *inode) -+{ -+} -+ -+static inline void -+ext2_xattr_put_super(struct super_block *sb) -+{ -+} -+ -+static inline int -+init_ext2_xattr(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext2_xattr(void) -+{ -+} -+ -+# endif /* CONFIG_EXT2_FS_XATTR */ -+ -+# ifdef CONFIG_EXT2_FS_XATTR_USER -+ -+extern int init_ext2_xattr_user(void) __init; -+extern void exit_ext2_xattr_user(void); -+ -+# else /* CONFIG_EXT2_FS_XATTR_USER */ -+ -+static inline int -+init_ext2_xattr_user(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext2_xattr_user(void) -+{ -+} -+ -+# endif /* CONFIG_EXT2_FS_XATTR_USER */ -+ -+#endif /* __KERNEL__ */ -+ ---- linux-2.4.20/include/linux/ext3_fs.h~linux-2.4.20-xattr-0.8.54 2003-05-05 19:01:04.000000000 +0800 -+++ linux-2.4.20-root/include/linux/ext3_fs.h 2003-05-07 18:08:03.000000000 +0800 -@@ -63,8 +63,6 @@ - */ - #define EXT3_BAD_INO 1 /* Bad blocks inode */ - #define EXT3_ROOT_INO 2 /* Root inode */ --#define EXT3_ACL_IDX_INO 3 /* ACL inode */ --#define EXT3_ACL_DATA_INO 4 /* ACL inode */ - #define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */ - #define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */ - #define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */ -@@ -94,7 +92,6 @@ - #else - # define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size) - #endif --#define EXT3_ACLE_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_acl_entry)) - #define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32)) - #ifdef __KERNEL__ - # define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) -@@ -129,28 +126,6 @@ - #endif - - /* -- * ACL structures -- */ --struct ext3_acl_header /* Header of Access Control Lists */ --{ -- __u32 aclh_size; -- __u32 aclh_file_count; -- __u32 aclh_acle_count; -- __u32 aclh_first_acle; --}; -- --struct ext3_acl_entry /* Access Control List Entry */ --{ -- __u32 acle_size; -- __u16 acle_perms; /* Access permissions */ -- __u16 acle_type; /* Type of entry */ -- __u16 acle_tag; /* User or group identity */ -- __u16 acle_pad1; -- __u32 acle_next; /* Pointer on next entry for the */ -- /* same inode or on next free entry */ --}; -- --/* - * Structure of a blocks group descriptor - */ - struct ext3_group_desc -@@ -344,6 +319,7 @@ struct ext3_inode { - #define EXT3_MOUNT_WRITEBACK_DATA 0x0C00 /* No data ordering */ - #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ -+#define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -520,7 +496,7 @@ struct ext3_super_block { - #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ - --#define EXT3_FEATURE_COMPAT_SUPP 0 -+#define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ -@@ -703,6 +679,7 @@ extern void ext3_check_inodes_bitmap (st - extern unsigned long ext3_count_free (struct buffer_head *, unsigned); - - /* inode.c */ -+extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); - -@@ -771,8 +748,10 @@ extern struct address_space_operations e - - /* namei.c */ - extern struct inode_operations ext3_dir_inode_operations; -+extern struct inode_operations ext3_special_inode_operations; - - /* symlink.c */ -+extern struct inode_operations ext3_symlink_inode_operations; - extern struct inode_operations ext3_fast_symlink_inode_operations; - - ---- linux-2.4.20/include/linux/ext3_jbd.h~linux-2.4.20-xattr-0.8.54 2003-05-05 19:01:02.000000000 +0800 -+++ linux-2.4.20-root/include/linux/ext3_jbd.h 2003-05-07 18:08:03.000000000 +0800 -@@ -30,13 +30,19 @@ - - #define EXT3_SINGLEDATA_TRANS_BLOCKS 8 - -+/* Extended attributes may touch two data buffers, two bitmap buffers, -+ * and two group and summaries. */ -+ -+#define EXT3_XATTR_TRANS_BLOCKS 8 -+ - /* Define the minimum size for a transaction which modifies data. This - * needs to take into account the fact that we may end up modifying two - * quota files too (one for the group, one for the user quota). The - * superblock only gets updated once, of course, so don't bother - * counting that again for the quota updates. */ - --#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS - 2) -+#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS + \ -+ EXT3_XATTR_TRANS_BLOCKS - 2) - - extern int ext3_writepage_trans_blocks(struct inode *inode); - ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-2.4.20-root/include/linux/ext3_xattr.h 2003-05-07 18:08:03.000000000 +0800 -@@ -0,0 +1,157 @@ -+/* -+ File: linux/ext3_xattr.h -+ -+ On-disk format of extended attributes for the ext3 filesystem. -+ -+ (C) 2001 Andreas Gruenbacher, -+*/ -+ -+#include -+#include -+#include -+ -+/* Magic value in attribute blocks */ -+#define EXT3_XATTR_MAGIC 0xEA020000 -+ -+/* Maximum number of references to one attribute block */ -+#define EXT3_XATTR_REFCOUNT_MAX 1024 -+ -+/* Name indexes */ -+#define EXT3_XATTR_INDEX_MAX 10 -+#define EXT3_XATTR_INDEX_USER 1 -+#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS 2 -+#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT 3 -+ -+struct ext3_xattr_header { -+ __u32 h_magic; /* magic number for identification */ -+ __u32 h_refcount; /* reference count */ -+ __u32 h_blocks; /* number of disk blocks used */ -+ __u32 h_hash; /* hash value of all attributes */ -+ __u32 h_reserved[4]; /* zero right now */ -+}; -+ -+struct ext3_xattr_entry { -+ __u8 e_name_len; /* length of name */ -+ __u8 e_name_index; /* attribute name index */ -+ __u16 e_value_offs; /* offset in disk block of value */ -+ __u32 e_value_block; /* disk block attribute is stored on (n/i) */ -+ __u32 e_value_size; /* size of attribute value */ -+ __u32 e_hash; /* hash value of name and value */ -+ char e_name[0]; /* attribute name */ -+}; -+ -+#define EXT3_XATTR_PAD_BITS 2 -+#define EXT3_XATTR_PAD (1<e_name_len)) ) -+#define EXT3_XATTR_SIZE(size) \ -+ (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND) -+ -+#ifdef __KERNEL__ -+ -+# ifdef CONFIG_EXT3_FS_XATTR -+ -+struct ext3_xattr_handler { -+ char *prefix; -+ size_t (*list)(char *list, struct inode *inode, const char *name, -+ int name_len); -+ int (*get)(struct inode *inode, const char *name, void *buffer, -+ size_t size); -+ int (*set)(struct inode *inode, const char *name, const void *buffer, -+ size_t size, int flags); -+}; -+ -+extern int ext3_xattr_register(int, struct ext3_xattr_handler *); -+extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *); -+ -+extern int ext3_setxattr(struct dentry *, const char *, const void *, size_t, int); -+extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t); -+extern ssize_t ext3_listxattr(struct dentry *, char *, size_t); -+extern int ext3_removexattr(struct dentry *, const char *); -+ -+extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t); -+extern int ext3_xattr_list(struct inode *, char *, size_t); -+extern int ext3_xattr_set(handle_t *handle, struct inode *, int, const char *, const void *, size_t, int); -+ -+extern void ext3_xattr_delete_inode(handle_t *, struct inode *); -+extern void ext3_xattr_put_super(struct super_block *); -+ -+extern int init_ext3_xattr(void) __init; -+extern void exit_ext3_xattr(void); -+ -+# else /* CONFIG_EXT3_FS_XATTR */ -+# define ext3_setxattr NULL -+# define ext3_getxattr NULL -+# define ext3_listxattr NULL -+# define ext3_removexattr NULL -+ -+static inline int -+ext3_xattr_get(struct inode *inode, int name_index, const char *name, -+ void *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext3_xattr_list(struct inode *inode, void *buffer, size_t size) -+{ -+ return -ENOTSUP; -+} -+ -+static inline int -+ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index, -+ const char *name, const void *value, size_t size, int flags) -+{ -+ return -ENOTSUP; -+} -+ -+static inline void -+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode) -+{ -+} -+ -+static inline void -+ext3_xattr_put_super(struct super_block *sb) -+{ -+} -+ -+static inline int -+init_ext3_xattr(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext3_xattr(void) -+{ -+} -+ -+# endif /* CONFIG_EXT3_FS_XATTR */ -+ -+# ifdef CONFIG_EXT3_FS_XATTR_USER -+ -+extern int init_ext3_xattr_user(void) __init; -+extern void exit_ext3_xattr_user(void); -+ -+# else /* CONFIG_EXT3_FS_XATTR_USER */ -+ -+static inline int -+init_ext3_xattr_user(void) -+{ -+ return 0; -+} -+ -+static inline void -+exit_ext3_xattr_user(void) -+{ -+} -+ -+#endif /* CONFIG_EXT3_FS_XATTR_USER */ -+ -+#endif /* __KERNEL__ */ -+ ---- linux-2.4.20/include/linux/fs.h~linux-2.4.20-xattr-0.8.54 2003-05-05 19:00:55.000000000 +0800 -+++ linux-2.4.20-root/include/linux/fs.h 2003-05-07 18:08:03.000000000 +0800 -@@ -888,7 +888,7 @@ struct inode_operations { - int (*setattr) (struct dentry *, struct iattr *); - int (*setattr_raw) (struct inode *, struct iattr *); - int (*getattr) (struct dentry *, struct iattr *); -- int (*setxattr) (struct dentry *, const char *, void *, size_t, int); -+ int (*setxattr) (struct dentry *, const char *, const void *, size_t, int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); - ssize_t (*listxattr) (struct dentry *, char *, size_t); - int (*removexattr) (struct dentry *, const char *); ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-2.4.20-root/include/linux/mbcache.h 2003-05-07 18:08:03.000000000 +0800 -@@ -0,0 +1,69 @@ -+/* -+ File: linux/mbcache.h -+ -+ (C) 2001 by Andreas Gruenbacher, -+*/ -+ -+/* Hardwire the number of additional indexes */ -+#define MB_CACHE_INDEXES_COUNT 1 -+ -+struct mb_cache_entry; -+ -+struct mb_cache_op { -+ int (*free)(struct mb_cache_entry *, int); -+}; -+ -+struct mb_cache { -+ struct list_head c_cache_list; -+ const char *c_name; -+ struct mb_cache_op c_op; -+ atomic_t c_entry_count; -+ int c_bucket_count; -+#ifndef MB_CACHE_INDEXES_COUNT -+ int c_indexes_count; -+#endif -+ kmem_cache_t *c_entry_cache; -+ struct list_head *c_block_hash; -+ struct list_head *c_indexes_hash[0]; -+}; -+ -+struct mb_cache_entry_index { -+ struct list_head o_list; -+ unsigned int o_key; -+}; -+ -+struct mb_cache_entry { -+ struct list_head e_lru_list; -+ struct mb_cache *e_cache; -+ atomic_t e_used; -+ kdev_t e_dev; -+ unsigned long e_block; -+ struct list_head e_block_list; -+ struct mb_cache_entry_index e_indexes[0]; -+}; -+ -+/* Functions on caches */ -+ -+struct mb_cache * mb_cache_create(const char *, struct mb_cache_op *, size_t, -+ int, int); -+void mb_cache_shrink(struct mb_cache *, kdev_t); -+void mb_cache_destroy(struct mb_cache *); -+ -+/* Functions on cache entries */ -+ -+struct mb_cache_entry *mb_cache_entry_alloc(struct mb_cache *); -+int mb_cache_entry_insert(struct mb_cache_entry *, kdev_t, unsigned long, -+ unsigned int[]); -+void mb_cache_entry_rehash(struct mb_cache_entry *, unsigned int[]); -+void mb_cache_entry_release(struct mb_cache_entry *); -+void mb_cache_entry_takeout(struct mb_cache_entry *); -+void mb_cache_entry_free(struct mb_cache_entry *); -+struct mb_cache_entry *mb_cache_entry_dup(struct mb_cache_entry *); -+struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *, kdev_t, -+ unsigned long); -+#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) -+struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache, int, -+ kdev_t, unsigned int); -+struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache_entry *, int, -+ kdev_t, unsigned int); -+#endif ---- linux-2.4.20/kernel/ksyms.c~linux-2.4.20-xattr-0.8.54 2003-05-05 17:43:15.000000000 +0800 -+++ linux-2.4.20-root/kernel/ksyms.c 2003-05-07 18:08:03.000000000 +0800 -@@ -11,6 +11,7 @@ - - #include - #include -+#include - #include - #include - #include -@@ -89,6 +90,7 @@ EXPORT_SYMBOL(exit_mm); - EXPORT_SYMBOL(exit_files); - EXPORT_SYMBOL(exit_fs); - EXPORT_SYMBOL(exit_sighand); -+EXPORT_SYMBOL(copy_fs_struct); - - /* internal kernel memory management */ - EXPORT_SYMBOL(_alloc_pages); -@@ -107,6 +109,8 @@ EXPORT_SYMBOL(kmem_cache_validate); - EXPORT_SYMBOL(kmem_cache_alloc); - EXPORT_SYMBOL(kmem_cache_free); - EXPORT_SYMBOL(kmem_cache_size); -+EXPORT_SYMBOL(register_cache); -+EXPORT_SYMBOL(unregister_cache); - EXPORT_SYMBOL(kmalloc); - EXPORT_SYMBOL(kfree); - EXPORT_SYMBOL(vfree); ---- linux-2.4.20/mm/vmscan.c~linux-2.4.20-xattr-0.8.54 2002-11-29 07:53:15.000000000 +0800 -+++ linux-2.4.20-root/mm/vmscan.c 2003-05-07 18:08:03.000000000 +0800 -@@ -18,6 +18,7 @@ - #include - #include - #include -+#include - #include - #include - #include -@@ -34,6 +35,39 @@ - */ - #define DEF_PRIORITY (6) - -+static DECLARE_MUTEX(other_caches_sem); -+static LIST_HEAD(cache_definitions); -+ -+void register_cache(struct cache_definition *cache) -+{ -+ down(&other_caches_sem); -+ list_add(&cache->link, &cache_definitions); -+ up(&other_caches_sem); -+} -+ -+void unregister_cache(struct cache_definition *cache) -+{ -+ down(&other_caches_sem); -+ list_del(&cache->link); -+ up(&other_caches_sem); -+} -+ -+static void shrink_other_caches(unsigned int priority, int gfp_mask) -+{ -+ struct list_head *p; -+ -+ if (down_trylock(&other_caches_sem)) -+ return; -+ -+ list_for_each_prev(p, &cache_definitions) { -+ struct cache_definition *cache = -+ list_entry(p, struct cache_definition, link); -+ -+ cache->shrink(priority, gfp_mask); -+ } -+ up(&other_caches_sem); -+} -+ - /* - * The swap-out function returns 1 if it successfully - * scanned all the pages it was asked to (`count'). -@@ -577,6 +611,7 @@ static int shrink_caches(zone_t * classz - - shrink_dcache_memory(priority, gfp_mask); - shrink_icache_memory(priority, gfp_mask); -+ shrink_other_caches(priority, gfp_mask); - #ifdef CONFIG_QUOTA - shrink_dqcache_memory(DEF_PRIORITY, gfp_mask); - #endif ---- /dev/null 2003-01-30 18:24:37.000000000 +0800 -+++ linux-root/fs/ext3/ext3-exports.c 2003-05-05 18:19:11.000000000 +0800 -@@ -0,0 +1,13 @@ -+#include -+#include -+#include -+#include -+#include -+ -+EXPORT_SYMBOL(ext3_force_commit); -+EXPORT_SYMBOL(ext3_bread); -+EXPORT_SYMBOL(ext3_xattr_register); -+EXPORT_SYMBOL(ext3_xattr_unregister); -+EXPORT_SYMBOL(ext3_xattr_get); -+EXPORT_SYMBOL(ext3_xattr_list); -+EXPORT_SYMBOL(ext3_xattr_set); - -_ diff --git a/lustre/kernel_patches/patches/lustre-2.5.63.patch b/lustre/kernel_patches/patches/lustre-2.5.63.patch deleted file mode 100644 index 40e6a90..0000000 --- a/lustre/kernel_patches/patches/lustre-2.5.63.patch +++ /dev/null @@ -1,862 +0,0 @@ - arch/um/kernel/mem.c | 18 ++++++ - fs/dcache.c | 12 +++- - fs/namei.c | 132 ++++++++++++++++++++++++++++++++++++++----------- - fs/namespace.c | 1 - fs/nfsd/vfs.c | 2 - fs/open.c | 39 ++++++++++++-- - fs/stat.c | 2 - fs/sysfs/inode.c | 2 - include/linux/dcache.h | 28 ++++++++++ - include/linux/fs.h | 20 +++++++ - include/linux/namei.h | 3 - - include/linux/slab.h | 1 - kernel/ksyms.c | 7 ++ - mm/slab.c | 5 + - net/unix/af_unix.c | 2 - 15 files changed, 231 insertions(+), 43 deletions(-) - ---- linux-2.5.63-nointent/arch/um/kernel/mem.c~lustre-2.5.63 Tue Mar 18 15:02:10 2003 -+++ linux-2.5.63-nointent-root/arch/um/kernel/mem.c Tue Mar 18 15:02:10 2003 -@@ -660,6 +660,22 @@ struct page *pte_mem_map(pte_t pte) - return(phys_mem_map(pte_val(pte))); - } - -+struct page *check_get_page(unsigned long kaddr) -+{ -+ struct page *page; -+ struct mem_region *mr; -+ unsigned long phys = __pa(kaddr); -+ unsigned int n = phys_region_index(phys); -+ -+ if(regions[n] == NULL) -+ return NULL; -+ -+ mr = regions[n]; -+ page = (struct page *) mr->mem_map; -+ return page + ((phys_addr(phys)) >> PAGE_SHIFT); -+} -+ -+ - struct mem_region *page_region(struct page *page, int *index_out) - { - int i; -@@ -747,7 +763,7 @@ extern unsigned long region_pa(void *vir - (addr <= region->start + region->len)) - return(mk_phys(addr - region->start, i)); - } -- panic("region_pa : no region for virtual address"); -+ //panic("region_pa : no region for virtual address"); - return(0); - } - ---- linux-2.5.63-nointent/fs/namei.c~lustre-2.5.63 Tue Mar 18 15:02:10 2003 -+++ linux-2.5.63-nointent-root/fs/namei.c Mon Mar 24 17:08:18 2003 -@@ -101,6 +101,14 @@ - * any extra contention... - */ - -+void intent_release(struct dentry *de, struct lookup_intent *it) -+{ -+ if (it && de->d_op && de->d_op->d_intent_release) -+ de->d_op->d_intent_release(de, it); -+ -+} -+ -+ - /* In order to reduce some races, while at the same time doing additional - * checking and hopefully speeding things up, we copy filenames to the - * kernel data space before using them.. -@@ -273,10 +281,18 @@ void path_release(struct nameidata *nd) - * Internal lookup() using the new generic dcache. - * SMP-safe - */ --static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags, struct lookup_intent *it) - { - struct dentry * dentry = d_lookup(parent, name); - -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) { -+ if (!dentry->d_op->d_revalidate2(dentry, flags, it) && -+ !d_invalidate(dentry)) { -+ dput(dentry); -+ dentry = NULL; -+ } -+ return dentry; -+ } else - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) { - dput(dentry); -@@ -330,7 +346,7 @@ ok: - * make sure that nobody added the entry to the dcache in the meantime.. - * SMP-safe - */ --static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags, struct lookup_intent *it) - { - struct dentry * result; - struct inode *dir = parent->d_inode; -@@ -348,7 +364,10 @@ static struct dentry * real_lookup(struc - struct dentry * dentry = d_alloc(parent, name); - result = ERR_PTR(-ENOMEM); - if (dentry) { -- result = dir->i_op->lookup(dir, dentry); -+ if (dir->i_op->lookup2) -+ result = dir->i_op->lookup2(dir, dentry, it); -+ else -+ result = dir->i_op->lookup(dir, dentry); - if (result) - dput(dentry); - else { -@@ -370,6 +389,12 @@ static struct dentry * real_lookup(struc - dput(result); - result = ERR_PTR(-ENOENT); - } -+ } else if (result->d_op && result->d_op->d_revalidate2) { -+ if (!result->d_op->d_revalidate2(result, flags, it) && -+ !d_invalidate(result)) { -+ dput(result); -+ result = ERR_PTR(-ENOENT); -+ } - } - return result; - } -@@ -402,6 +427,7 @@ static inline int do_follow_link(struct - current->link_count--; - return err; - loop: -+ intent_release(dentry, &nd->it); - path_release(nd); - return err; - } -@@ -447,15 +473,26 @@ static int follow_mount(struct vfsmount - return res; - } - --static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry) -+static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry, -+ struct lookup_intent *it) - { - struct vfsmount *mounted; - - spin_lock(&dcache_lock); - mounted = lookup_mnt(*mnt, *dentry); - if (mounted) { -+ int opc = 0, mode = 0; - *mnt = mntget(mounted); - spin_unlock(&dcache_lock); -+ if (it) { -+ opc = it->it_op; -+ mode = it->it_mode; -+ } -+ intent_release(*dentry, it); -+ if (it) { -+ it->it_op = opc; -+ it->it_mode = mode; -+ } - dput(*dentry); - mntput(mounted->mnt_parent); - *dentry = dget(mounted->mnt_root); -@@ -467,7 +504,7 @@ static inline int __follow_down(struct v - - int follow_down(struct vfsmount **mnt, struct dentry **dentry) - { -- return __follow_down(mnt,dentry); -+ return __follow_down(mnt,dentry,NULL); - } - - static inline void follow_dotdot(struct vfsmount **mnt, struct dentry **dentry) -@@ -531,7 +568,7 @@ done: - return 0; - - need_lookup: -- dentry = real_lookup(nd->dentry, name, LOOKUP_CONTINUE); -+ dentry = real_lookup(nd->dentry, name, LOOKUP_CONTINUE, &nd->it); - if (IS_ERR(dentry)) - goto fail; - goto done; -@@ -665,7 +702,7 @@ int link_path_walk(const char * name, st - nd->dentry = next.dentry; - } - err = -ENOTDIR; -- if (!inode->i_op->lookup) -+ if (!inode->i_op->lookup && !inode->i_op->lookup2) - break; - continue; - /* here ends the main loop */ -@@ -716,7 +753,8 @@ last_component: - break; - if (lookup_flags & LOOKUP_DIRECTORY) { - err = -ENOTDIR; -- if (!inode->i_op || !inode->i_op->lookup) -+ if (!inode->i_op || -+ (!inode->i_op->lookup && !inode->i_op->lookup2)) - break; - } - goto return_base; -@@ -735,6 +773,7 @@ out_dput: - dput(next.dentry); - break; - } -+ intent_release(nd->dentry, &nd->it); - path_release(nd); - return_err: - return err; -@@ -857,7 +896,8 @@ int path_lookup(const char *name, unsign - * needs parent already locked. Doesn't follow mounts. - * SMP-safe. - */ --struct dentry * lookup_hash(struct qstr *name, struct dentry * base) -+struct dentry * lookup_hash(struct qstr *name, struct dentry * base, -+ struct lookup_intent *it) - { - struct dentry * dentry; - struct inode *inode; -@@ -880,13 +920,16 @@ struct dentry * lookup_hash(struct qstr - goto out; - } - -- dentry = cached_lookup(base, name, 0); -+ dentry = cached_lookup(base, name, 0, it); - if (!dentry) { - struct dentry *new = d_alloc(base, name); - dentry = ERR_PTR(-ENOMEM); - if (!new) - goto out; -- dentry = inode->i_op->lookup(inode, new); -+ if (inode->i_op->lookup2) -+ dentry = inode->i_op->lookup2(inode, new, it); -+ else -+ dentry = inode->i_op->lookup(inode, new); - if (!dentry) { - dentry = new; - security_inode_post_lookup(inode, dentry); -@@ -898,7 +941,7 @@ out: - } - - /* SMP-safe */ --struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+struct dentry * lookup_one_len_it(const char * name, struct dentry * base, int len, struct lookup_intent *it) - { - unsigned long hash; - struct qstr this; -@@ -918,11 +961,16 @@ struct dentry * lookup_one_len(const cha - } - this.hash = end_name_hash(hash); - -- return lookup_hash(&this, base); -+ return lookup_hash(&this, base, it); - access: - return ERR_PTR(-EACCES); - } - -+struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) -+{ -+ return lookup_one_len_it(name, base, len, NULL); -+} -+ - /* - * namei() - * -@@ -1224,6 +1272,9 @@ int open_namei(const char * pathname, in - /* - * Create - we need to know the parent. - */ -+ nd->it.it_mode = mode; -+ nd->it.it_op |= IT_CREAT; -+ - error = path_lookup(pathname, LOOKUP_PARENT, nd); - if (error) - return error; -@@ -1239,7 +1290,7 @@ int open_namei(const char * pathname, in - - dir = nd->dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash(&nd->last, nd->dentry, &nd->it); - - do_last: - error = PTR_ERR(dentry); -@@ -1247,7 +1298,8 @@ do_last: - up(&dir->d_inode->i_sem); - goto exit; - } -- -+ -+ nd->it.it_mode = mode; - /* Negative dentry, just create the file */ - if (!dentry->d_inode) { - if (!IS_POSIXACL(dir->d_inode)) -@@ -1277,7 +1329,7 @@ do_last: - error = -ELOOP; - if (flag & O_NOFOLLOW) - goto exit_dput; -- while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry)); -+ while (__follow_down(&nd->mnt,&dentry,&nd->it) && d_mountpoint(dentry)); - } - error = -ENOENT; - if (!dentry->d_inode) -@@ -1297,8 +1349,10 @@ ok: - return 0; - - exit_dput: -+ intent_release(dentry, &nd->it); - dput(dentry); - exit: -+ intent_release(nd->dentry, &nd->it); - path_release(nd); - return error; - -@@ -1320,7 +1374,12 @@ do_link: - if (error) - goto exit_dput; - UPDATE_ATIME(dentry->d_inode); -- error = dentry->d_inode->i_op->follow_link(dentry, nd); -+ if (dentry->d_inode->i_op->follow_link2) -+ error = dentry->d_inode->i_op->follow_link2(dentry, nd, &nd->it); -+ else -+ error = dentry->d_inode->i_op->follow_link(dentry, nd); -+ if (error) -+ intent_release(dentry, &nd->it); - dput(dentry); - if (error) - return error; -@@ -1342,7 +1401,7 @@ do_link: - } - dir = nd->dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash(&nd->last, nd->dentry, &nd->it); - putname(nd->last.name); - goto do_last; - } -@@ -1356,7 +1415,7 @@ static struct dentry *lookup_create(stru - dentry = ERR_PTR(-EEXIST); - if (nd->last_type != LAST_NORM) - goto fail; -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash(&nd->last, nd->dentry, &nd->it); - if (IS_ERR(dentry)) - goto fail; - if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) -@@ -1588,7 +1647,7 @@ asmlinkage long sys_rmdir(const char * p - goto exit1; - } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash(&nd.last, nd.dentry, &nd.it); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_rmdir(nd.dentry->d_inode, dentry); -@@ -1654,8 +1713,18 @@ asmlinkage long sys_unlink(const char * - error = -EISDIR; - if (nd.last_type != LAST_NORM) - goto exit1; -+ if (nd.dentry->d_inode->i_op->unlink2) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->unlink2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+// dentry = lookup_hash(&nd.last, nd.dentry, &nd.it); -+ dentry = lookup_hash(&nd.last, nd.dentry, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - /* Why not before? Because we want correct error value */ -@@ -1859,7 +1928,8 @@ exit: - * locking]. - */ - int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it) - { - int error = 0; - struct inode *target; -@@ -1887,6 +1957,7 @@ int vfs_rename_dir(struct inode *old_dir - error = -EBUSY; - else - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); -+ intent_release(new_dentry, it); - if (target) { - if (!error) - target->i_flags |= S_DEAD; -@@ -1904,7 +1975,8 @@ int vfs_rename_dir(struct inode *old_dir - } - - int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it) - { - struct inode *target; - int error; -@@ -1921,6 +1993,7 @@ int vfs_rename_other(struct inode *old_d - error = -EBUSY; - else - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); -+ intent_release(new_dentry, it); - if (!error) { - /* The following d_move() should become unconditional */ - if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) -@@ -1934,7 +2007,8 @@ int vfs_rename_other(struct inode *old_d - } - - int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it) - { - int error; - int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); -@@ -1960,9 +2034,9 @@ int vfs_rename(struct inode *old_dir, st - DQUOT_INIT(new_dir); - - if (is_dir) -- error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); -+ error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry, it); - else -- error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); -+ error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry, it); - if (!error) { - if (old_dir == new_dir) - inode_dir_notify(old_dir, DN_RENAME); -@@ -2005,7 +2079,7 @@ static inline int do_rename(const char * - - trap = lock_rename(new_dir, old_dir); - -- old_dentry = lookup_hash(&oldnd.last, old_dir); -+ old_dentry = lookup_hash(&oldnd.last, old_dir, &oldnd.it); - error = PTR_ERR(old_dentry); - if (IS_ERR(old_dentry)) - goto exit3; -@@ -2025,7 +2099,7 @@ static inline int do_rename(const char * - error = -EINVAL; - if (old_dentry == trap) - goto exit4; -- new_dentry = lookup_hash(&newnd.last, new_dir); -+ new_dentry = lookup_hash(&newnd.last, new_dir, &newnd.it); - error = PTR_ERR(new_dentry); - if (IS_ERR(new_dentry)) - goto exit4; -@@ -2035,7 +2109,7 @@ static inline int do_rename(const char * - goto exit5; - - error = vfs_rename(old_dir->d_inode, old_dentry, -- new_dir->d_inode, new_dentry); -+ new_dir->d_inode, new_dentry, NULL); - exit5: - dput(new_dentry); - exit4: ---- linux-2.5.63-nointent/fs/nfsd/vfs.c~lustre-2.5.63 Tue Mar 18 15:02:10 2003 -+++ linux-2.5.63-nointent-root/fs/nfsd/vfs.c Tue Mar 18 15:02:10 2003 -@@ -1337,7 +1337,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru - err = nfserr_perm; - } else - #endif -- err = vfs_rename(fdir, odentry, tdir, ndentry); -+ err = vfs_rename(fdir, odentry, tdir, ndentry, NULL); - if (!err && EX_ISSYNC(tfhp->fh_export)) { - nfsd_sync_dir(tdentry); - nfsd_sync_dir(fdentry); ---- linux-2.5.63-nointent/fs/sysfs/inode.c~lustre-2.5.63 Tue Mar 18 15:02:10 2003 -+++ linux-2.5.63-nointent-root/fs/sysfs/inode.c Tue Mar 18 15:02:10 2003 -@@ -540,7 +540,7 @@ static struct dentry * get_dentry(struct - qstr.name = name; - qstr.len = strlen(name); - qstr.hash = full_name_hash(name,qstr.len); -- return lookup_hash(&qstr,parent); -+ return lookup_hash(&qstr,parent,NULL); - } - - ---- linux-2.5.63-nointent/include/linux/dcache.h~lustre-2.5.63 Tue Mar 18 15:02:10 2003 -+++ linux-2.5.63-nointent-root/include/linux/dcache.h Tue Mar 18 15:02:10 2003 -@@ -12,6 +12,27 @@ - - struct vfsmount; - -+#define IT_OPEN (1) -+#define IT_CREAT (1<<1) -+#define IT_READDIR (1<<2) -+#define IT_GETATTR (1<<3) -+#define IT_LOOKUP (1<<4) -+#define IT_UNLINK (1<<5) -+ -+ -+struct lookup_intent { -+ int it_op; -+ int it_mode; -+ int it_flags; -+ int it_disposition; -+ int it_status; -+ struct iattr *it_iattr; -+ __u64 it_lock_handle[2]; -+ int it_lock_mode; -+ void *it_data; -+}; -+ -+ - /* - * linux/include/linux/dcache.h - * -@@ -34,6 +55,8 @@ struct qstr { - char name_str[0]; - }; - -+#include -+ - struct dentry_stat_t { - int nr_dentry; - int nr_unused; -@@ -87,6 +110,7 @@ struct dentry { - struct list_head d_subdirs; /* our children */ - struct list_head d_alias; /* inode alias list */ - int d_mounted; -+ struct lookup_intent *d_it; - struct qstr d_name; - struct qstr * d_qstr; /* quick str ptr used in lockless lookup and concurrent d_move */ - unsigned long d_time; /* used by d_revalidate */ -@@ -107,6 +131,8 @@ struct dentry_operations { - int (*d_delete)(struct dentry *); - void (*d_release)(struct dentry *); - void (*d_iput)(struct dentry *, struct inode *); -+ int (*d_revalidate2)(struct dentry *, int, struct lookup_intent *); -+ void (*d_intent_release)(struct dentry *, struct lookup_intent *); - }; - - /* the dentry parameter passed to d_hash and d_compare is the parent -@@ -147,6 +173,8 @@ d_iput: no no yes - - #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ - #define DCACHE_UNHASHED 0x0010 -+#define DCACHE_LUSTRE_INVALID 0x0011 /* Lustre invalidated */ -+ - - extern spinlock_t dcache_lock; - extern rwlock_t dparent_lock; ---- linux-2.5.63-nointent/include/linux/fs.h~lustre-2.5.63 Tue Mar 18 15:02:10 2003 -+++ linux-2.5.63-nointent-root/include/linux/fs.h Tue Mar 18 15:02:10 2003 -@@ -234,6 +234,9 @@ typedef int (get_blocks_t)(struct inode - #define ATTR_ATTR_FLAG 1024 - #define ATTR_KILL_SUID 2048 - #define ATTR_KILL_SGID 4096 -+#define ATTR_RAW 8192 /* file system, not vfs will massage attrs */ -+#define ATTR_FROM_OPEN 16384 /* called from open path, ie O_TRUNC */ -+ - - /* - * This is the Inode Attributes structure, used for notify_change(). It -@@ -642,7 +645,7 @@ extern int vfs_symlink(struct inode *, s - extern int vfs_link(struct dentry *, struct inode *, struct dentry *); - extern int vfs_rmdir(struct inode *, struct dentry *); - extern int vfs_unlink(struct inode *, struct dentry *); --extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); -+extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct lookup_intent *it); - - /* - * File types -@@ -728,19 +731,33 @@ struct file_operations { - struct inode_operations { - int (*create) (struct inode *,struct dentry *,int); - struct dentry * (*lookup) (struct inode *,struct dentry *); -+ struct dentry * (*lookup2) (struct inode *,struct dentry *, -+ struct lookup_intent *); - int (*link) (struct dentry *,struct inode *,struct dentry *); -+ int (*link2) (struct inode *,struct inode *, const char *, int); - int (*unlink) (struct inode *,struct dentry *); -+ int (*unlink2) (struct inode *, const char *, int); - int (*symlink) (struct inode *,struct dentry *,const char *); -+ int (*symlink2) (struct inode *, const char *, int, const char *); - int (*mkdir) (struct inode *,struct dentry *,int); -+ int (*mkdir2) (struct inode *, const char *, int,int); - int (*rmdir) (struct inode *,struct dentry *); -+ int (*rmdir2) (struct inode *, const char *, int); - int (*mknod) (struct inode *,struct dentry *,int,dev_t); -+ int (*mknod2) (struct inode *, const char *, int,int,int); - int (*rename) (struct inode *, struct dentry *, - struct inode *, struct dentry *); -+ int (*rename2) (struct inode *, struct inode *, -+ const char *oldname, int oldlen, -+ const char *newname, int newlen); - int (*readlink) (struct dentry *, char *,int); - int (*follow_link) (struct dentry *, struct nameidata *); -+ int (*follow_link2) (struct dentry *, struct nameidata *, -+ struct lookup_intent *it); - void (*truncate) (struct inode *); - int (*permission) (struct inode *, int); - int (*setattr) (struct dentry *, struct iattr *); -+ int (*setattr_raw) (struct inode *, struct iattr *); - int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); - int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t,int); -@@ -953,6 +970,7 @@ extern int register_filesystem(struct fi - extern int unregister_filesystem(struct file_system_type *); - extern struct vfsmount *kern_mount(struct file_system_type *); - extern int may_umount(struct vfsmount *); -+struct vfsmount *do_kern_mount(const char *type, int flags, char *name, void *data); - extern long do_mount(char *, char *, char *, unsigned long, void *); - - extern int vfs_statfs(struct super_block *, struct statfs *); ---- linux-2.5.63-nointent/include/linux/namei.h~lustre-2.5.63 Tue Mar 18 15:02:10 2003 -+++ linux-2.5.63-nointent-root/include/linux/namei.h Tue Mar 18 15:02:10 2003 -@@ -11,6 +11,7 @@ struct nameidata { - struct qstr last; - unsigned int flags; - int last_type; -+ struct lookup_intent it; - }; - - /* -@@ -44,7 +45,7 @@ extern int FASTCALL(link_path_walk(const - extern void path_release(struct nameidata *); - - extern struct dentry * lookup_one_len(const char *, struct dentry *, int); --extern struct dentry * lookup_hash(struct qstr *, struct dentry *); -+extern struct dentry * lookup_hash(struct qstr *, struct dentry *, struct lookup_intent *); - - extern int follow_down(struct vfsmount **, struct dentry **); - extern int follow_up(struct vfsmount **, struct dentry **); ---- linux-2.5.63-nointent/include/linux/slab.h~lustre-2.5.63 Tue Mar 18 15:02:10 2003 -+++ linux-2.5.63-nointent-root/include/linux/slab.h Tue Mar 18 15:02:10 2003 -@@ -55,6 +55,7 @@ extern int kmem_cache_destroy(kmem_cache - extern int kmem_cache_shrink(kmem_cache_t *); - extern void *kmem_cache_alloc(kmem_cache_t *, int); - extern void kmem_cache_free(kmem_cache_t *, void *); -+extern int kmem_cache_validate(kmem_cache_t *cachep, void *objp); - extern unsigned int kmem_cache_size(kmem_cache_t *); - - extern void *kmalloc(size_t, int); ---- linux-2.5.63-nointent/kernel/ksyms.c~lustre-2.5.63 Tue Mar 18 15:02:10 2003 -+++ linux-2.5.63-nointent-root/kernel/ksyms.c Tue Mar 18 15:02:10 2003 -@@ -377,6 +377,7 @@ EXPORT_SYMBOL(unregister_filesystem); - EXPORT_SYMBOL(kern_mount); - EXPORT_SYMBOL(__mntput); - EXPORT_SYMBOL(may_umount); -+EXPORT_SYMBOL(reparent_to_init); - - /* executable format registration */ - EXPORT_SYMBOL(register_binfmt); -@@ -407,6 +408,12 @@ EXPORT_SYMBOL(request_irq); - EXPORT_SYMBOL(free_irq); - EXPORT_SYMBOL(irq_stat); - -+/* lustre */ -+EXPORT_SYMBOL(do_kern_mount); -+EXPORT_SYMBOL(exit_files); -+EXPORT_SYMBOL(kmem_cache_validate); -+ -+ - /* waitqueue handling */ - EXPORT_SYMBOL(add_wait_queue); - EXPORT_SYMBOL(add_wait_queue_exclusive); ---- linux-2.5.63-nointent/mm/slab.c~lustre-2.5.63 Tue Mar 18 15:02:10 2003 -+++ linux-2.5.63-nointent-root/mm/slab.c Tue Mar 18 15:02:10 2003 -@@ -1792,6 +1792,11 @@ static inline void __cache_free (kmem_ca - } - } - -+int kmem_cache_validate(kmem_cache_t *cachep, void *objp) -+{ -+ return 1; -+} -+ - /** - * kmem_cache_alloc - Allocate an object - * @cachep: The cache to allocate from. ---- linux-2.5.63-nointent/net/unix/af_unix.c~lustre-2.5.63 Tue Mar 18 15:02:10 2003 -+++ linux-2.5.63-nointent-root/net/unix/af_unix.c Tue Mar 18 15:02:10 2003 -@@ -720,7 +720,7 @@ static int unix_bind(struct socket *sock - /* - * Do the final lookup. - */ -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash(&nd.last, nd.dentry, NULL); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - goto out_mknod_unlock; ---- linux-2.5.63-nointent/fs/dcache.c~lustre-2.5.63 Tue Mar 18 15:02:10 2003 -+++ linux-2.5.63-nointent-root/fs/dcache.c Tue Mar 18 15:02:10 2003 -@@ -1111,15 +1111,21 @@ void d_delete(struct dentry * dentry) - * Adds a dentry to the hash according to its name. - */ - --void d_rehash(struct dentry * entry) -+void __d_rehash(struct dentry * entry, int lock) - { - struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash); -- spin_lock(&dcache_lock); -+ if (lock) spin_lock(&dcache_lock); - if (!list_empty(&entry->d_hash) && !d_unhashed(entry)) BUG(); - entry->d_vfs_flags &= ~DCACHE_UNHASHED; - entry->d_bucket = list; - list_add_rcu(&entry->d_hash, list); -- spin_unlock(&dcache_lock); -+ if (lock) spin_unlock(&dcache_lock); -+} -+EXPORT_SYMBOL(__d_rehash); -+ -+void d_rehash(struct dentry * entry) -+{ -+ __d_rehash(entry, 1); - } - - #define do_switch(x,y) do { \ ---- linux-2.5.63-nointent/fs/namespace.c~lustre-2.5.63 Tue Mar 18 15:02:10 2003 -+++ linux-2.5.63-nointent-root/fs/namespace.c Tue Mar 18 15:02:10 2003 -@@ -925,6 +925,7 @@ void set_fs_pwd(struct fs_struct *fs, st - mntput(old_pwdmnt); - } - } -+EXPORT_SYMBOL(set_fs_pwd); - - static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd) - { ---- linux-2.5.63-nointent/fs/open.c~lustre-2.5.63 Thu Mar 20 12:43:39 2003 -+++ linux-2.5.63-nointent-root/fs/open.c Mon Mar 24 16:25:47 2003 -@@ -97,7 +97,8 @@ static inline long do_sys_truncate(const - struct nameidata nd; - struct inode * inode; - int error; -- -+ struct lookup_intent it = { .it_op = IT_GETATTR }; -+ nd.it=it; - error = -EINVAL; - if (length < 0) /* sorry, but loff_t says... */ - goto out; -@@ -142,11 +143,13 @@ static inline long do_sys_truncate(const - error = locks_verify_truncate(inode, NULL, length); - if (!error) { - DQUOT_INIT(inode); -+ intent_release(nd.dentry, &nd.it); - error = do_truncate(nd.dentry, length); - } - put_write_access(inode); - - dput_and_out: -+ intent_release(nd.dentry, &nd.it); - path_release(&nd); - out: - return error; -@@ -340,6 +343,8 @@ asmlinkage long sys_access(const char * - int old_fsuid, old_fsgid; - kernel_cap_t old_cap; - int res; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; -+ nd.it=it; - - if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ - return -EINVAL; -@@ -371,6 +376,8 @@ asmlinkage long sys_access(const char * - if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) - && !special_file(nd.dentry->d_inode->i_mode)) - res = -EROFS; -+ -+ intent_release(nd.dentry, &nd.it); - path_release(&nd); - } - -@@ -385,6 +392,8 @@ asmlinkage long sys_chdir(const char * f - { - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; -+ nd.it=it; - - error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); - if (error) -@@ -397,6 +406,7 @@ asmlinkage long sys_chdir(const char * f - set_fs_pwd(current->fs, nd.mnt, nd.dentry); - - dput_and_out: -+ intent_release(nd.dentry, &nd.it); - path_release(&nd); - out: - return error; -@@ -436,6 +446,8 @@ asmlinkage long sys_chroot(const char * - { - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; -+ nd.it=it; - - error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); - if (error) -@@ -508,6 +520,18 @@ asmlinkage long sys_chmod(const char * f - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_mode = mode; -+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } - - error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) -@@ -619,7 +643,10 @@ asmlinkage long sys_fchown(unsigned int - struct file *filp_open(const char * filename, int flags, int mode) - { - int namei_flags, error; -+ struct file * temp_filp; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = flags }; -+ nd.it=it; - - namei_flags = flags; - if ((namei_flags+1) & O_ACCMODE) -@@ -628,9 +655,11 @@ struct file *filp_open(const char * file - namei_flags |= 2; - - error = open_namei(filename, namei_flags, mode, &nd); -- if (!error) -- return dentry_open(nd.dentry, nd.mnt, flags); -- -+ if (!error) { -+ temp_filp = dentry_open(nd.dentry, nd.mnt, flags); -+ intent_release(nd.dentry,&nd.it); -+ return temp_filp; -+ } - return ERR_PTR(error); - } - -@@ -675,7 +704,7 @@ struct file *dentry_open(struct dentry * - goto cleanup_all; - } - } -- -+ - return f; - - cleanup_all: ---- linux-2.5.63-nointent/fs/stat.c~lustre-2.5.63 Fri Mar 21 21:15:40 2003 -+++ linux-2.5.63-nointent-root/fs/stat.c Fri Mar 21 21:16:53 2003 -@@ -65,6 +65,7 @@ int vfs_stat(char *name, struct kstat *s - error = user_path_walk(name, &nd); - if (!error) { - error = vfs_getattr(nd.mnt, nd.dentry, stat); -+ intent_release(nd.dentry, &nd.it); - path_release(&nd); - } - return error; -@@ -80,6 +81,7 @@ int vfs_lstat(char *name, struct kstat * - error = user_path_walk_link(name, &nd); - if (!error) { - error = vfs_getattr(nd.mnt, nd.dentry, stat); -+ intent_release(nd.dentry, &nd.it); - path_release(&nd); - } - return error; - -_ diff --git a/lustre/kernel_patches/patches/lustre_version.patch b/lustre/kernel_patches/patches/lustre_version.patch deleted file mode 100644 index 78855ac..0000000 --- a/lustre/kernel_patches/patches/lustre_version.patch +++ /dev/null @@ -1,12 +0,0 @@ - - - - include/linux/lustre_version.h | 1 + - 1 files changed, 1 insertion(+) - ---- /dev/null Fri Aug 30 17:31:37 2002 -+++ linux-2.4.18-18.8.0-l12-braam/include/linux/lustre_version.h Thu Feb 13 07:58:33 2003 -@@ -0,0 +1 @@ -+#define LUSTRE_KERNEL_VERSION 19 - -_ diff --git a/lustre/kernel_patches/patches/mcore-2.4.20-8.patch b/lustre/kernel_patches/patches/mcore-2.4.20-8.patch deleted file mode 100644 index c8b80eb..0000000 --- a/lustre/kernel_patches/patches/mcore-2.4.20-8.patch +++ /dev/null @@ -1,2738 +0,0 @@ -? linux/.config -? linux/include/linux/autoconf.h -? linux/include/linux/modules -Index: linux/Makefile -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/Makefile,v -retrieving revision 1.3.2.1 -retrieving revision 1.3.2.1.2.1 -diff -u -r1.3.2.1 -r1.3.2.1.2.1 ---- linux/Makefile 12 Mar 2003 19:48:52 -0000 1.3.2.1 -+++ linux/Makefile 1 Apr 2003 12:17:40 -0000 1.3.2.1.2.1 -@@ -99,6 +99,10 @@ - CFLAGS += -fomit-frame-pointer - endif - AFLAGS := -D__ASSEMBLY__ $(CPPFLAGS) -+ifeq ($(CONFIG_MCL_COREDUMP),y) -+ CFLAGS += -g -+endif -+ - - # - # ROOT_DEV specifies the default root-device when making the image. -Index: linux/Documentation/Configure.help -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/Documentation/Configure.help,v -retrieving revision 1.3.2.1 -retrieving revision 1.3.2.1.2.1 -diff -u -r1.3.2.1 -r1.3.2.1.2.1 ---- linux/Documentation/Configure.help 12 Mar 2003 19:48:52 -0000 1.3.2.1 -+++ linux/Documentation/Configure.help 1 Apr 2003 12:17:40 -0000 1.3.2.1.2.1 -@@ -21660,6 +21660,35 @@ - This option allows you to run the kernel with data cache disabled. - Say Y if you experience CPM lock-ups. - -+Boot kernel image support -+CONFIG_BOOTIMG -+ Add support for booting a new Linux kernel from a running Linux -+ system. You need to download the bootimg(8) utility from -+ ftp://icaftp.epfl.ch/pub/people/almesber/misc/bootimg-current.tar.gz -+ in order to use this functionality. -+ -+Protect SMP configuration tables -+CONFIG_BOOTIMG_SMP -+ On SMP systems, the BIOS stores tables with configuration data in -+ memory and an SMP-enabled kernel reads these tables. However, a -+ kernel without SMP support will overwrite such tables. If a kernel -+ without SMP support used bootimg to boot an SMP-enabled kernel, the -+ latter will probably crash when trying to read the SMP tables. The -+ CONFIG_BOOTIMG_SMP option enables minimal support for scanning and -+ protecting of SMP configuration tables also for kernels without SMP -+ support. -+ -+In-memory kernel core dump facility -+CONFIG_MCL_COREDUMP -+ In conjunction with bootimg, this allows you to get kernel core dumps -+ of your system at panic() time. The panic call is modified so that it -+ calls the core dump facility and reboots the system. On the way back -+ up, the kernel dump image is written out to disk by the accompanying -+ init script. You can use the crash analysis tool to analyze the core -+ dump. This tool can be found at : -+ -+ http://www.missioncriticallinux.com/download -+ - # - # m68k-specific kernel options - # Documented by Chris Lawrence et al. -Index: linux/arch/i386/config.in -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/arch/i386/config.in,v -retrieving revision 1.3.2.1 -retrieving revision 1.3.2.1.2.2 -diff -u -r1.3.2.1 -r1.3.2.1.2.2 ---- linux/arch/i386/config.in 12 Mar 2003 19:49:05 -0000 1.3.2.1 -+++ linux/arch/i386/config.in 1 Apr 2003 19:35:12 -0000 1.3.2.1.2.2 -@@ -502,6 +502,12 @@ - bool ' Magic SysRq key' CONFIG_MAGIC_SYSRQ - bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK - bool ' Compile the kernel with frame pointers' CONFIG_FRAME_POINTER -+ if [ "$CONFIG_FRAME_POINTER " != "n" ]; then -+ bool ' Kernel Core Dump Facility' CONFIG_MCL_COREDUMP -+ if [ "$CONFIG_MCL_COREDUMP" = "y" ]; then -+ bool ' Reboot using bootimg' CONFIG_BOOTIMG -+ fi -+ fi - fi - - endmenu -Index: linux/arch/i386/vmlinux.lds -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/arch/i386/vmlinux.lds,v -retrieving revision 1.1.1.1.4.1 -retrieving revision 1.1.1.1.4.1.2.1 -diff -u -r1.1.1.1.4.1 -r1.1.1.1.4.1.2.1 ---- linux/arch/i386/vmlinux.lds 12 Mar 2003 19:49:05 -0000 1.1.1.1.4.1 -+++ linux/arch/i386/vmlinux.lds 1 Apr 2003 12:17:40 -0000 1.1.1.1.4.1.2.1 -@@ -19,6 +19,13 @@ - .rodata : { *(.rodata) *(.rodata.*) } - .kstrtab : { *(.kstrtab) } - -+ . = ALIGN(16); /* Relocatable bootimage code */ -+ __bootimg_start = .; -+ .bootimg : { -+ *(.bootimg) -+ } -+ __bootimg_end = .; -+ - . = ALIGN(16); /* Exception table */ - __start___ex_table = .; - __ex_table : { *(__ex_table) } -Index: linux/arch/i386/boot/setup.S -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/arch/i386/boot/setup.S,v -retrieving revision 1.2.2.1 -retrieving revision 1.2.2.1.2.1 -diff -u -r1.2.2.1 -r1.2.2.1.2.1 ---- linux/arch/i386/boot/setup.S 12 Mar 2003 19:49:05 -0000 1.2.2.1 -+++ linux/arch/i386/boot/setup.S 1 Apr 2003 12:17:40 -0000 1.2.2.1.2.1 -@@ -105,16 +105,22 @@ - # flags, unused bits must be zero (RFU) bit within loadflags - loadflags: - LOADED_HIGH = 1 # If set, the kernel is loaded high -+RELOADS_GDT = 2 # if set, kernel reloads GDT, such that -+ # boot loader does not have to provide -+ # GDT in a "safe" memory location - CAN_USE_HEAP = 0x80 # If set, the loader also has set - # heap_end_ptr to tell how much - # space behind setup.S can be used for - # heap purposes. - # Only the loader knows what is free --#ifndef __BIG_KERNEL__ -- .byte 0 --#else -- .byte LOADED_HIGH -+_FLAGS = 0 -+#ifdef __BIG_KERNEL__ -+ _FLAGS = _FLAGS | LOADED_HIGH - #endif -+#ifdef CONFIG_BOOTIMG -+ _FLAGS = _FLAGS | RELOADS_GDT -+#endif -+ .byte _FLAGS - - setup_move_size: .word 0x8000 # size to move, when setup is not - # loaded at 0x90000. We will move setup -Index: linux/arch/i386/kernel/Makefile -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/arch/i386/kernel/Makefile,v -retrieving revision 1.2.2.1 -retrieving revision 1.2.2.1.2.1 -diff -u -r1.2.2.1 -r1.2.2.1.2.1 ---- linux/arch/i386/kernel/Makefile 12 Mar 2003 19:49:05 -0000 1.2.2.1 -+++ linux/arch/i386/kernel/Makefile 1 Apr 2003 12:17:40 -0000 1.2.2.1.2.1 -@@ -49,6 +49,7 @@ - obj-$(CONFIG_X86_LONGRUN) += longrun.o - obj-$(CONFIG_ELAN_CPUFREQ) += elanfreq.o - obj-$(CONFIG_PROFILING) += profile.o -+obj-$(CONFIG_MCL_COREDUMP) += crash.o - - - include $(TOPDIR)/Rules.make -Index: linux/arch/i386/kernel/crash.c -=================================================================== -RCS file: linux/arch/i386/kernel/crash.c -diff -N linux/arch/i386/kernel/crash.c ---- /dev/null 1 Jan 1970 00:00:00 -0000 -+++ linux/arch/i386/kernel/crash.c 1 Apr 2003 12:17:40 -0000 1.1.6.1 -@@ -0,0 +1,82 @@ -+/* -+ * linux/arch/i386/crash.c -+ * -+ * Architecture dependant code for MCL in-memory core dump. -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+inline void crash_save_regs(void) { -+ static unsigned long regs[8]; -+ -+ __asm__ __volatile__("movl %%ebx,%0" : "=m"(regs[0])); -+ __asm__ __volatile__("movl %%ecx,%0" : "=m"(regs[1])); -+ __asm__ __volatile__("movl %%edx,%0" : "=m"(regs[2])); -+ __asm__ __volatile__("movl %%esi,%0" : "=m"(regs[3])); -+ __asm__ __volatile__("movl %%edi,%0" : "=m"(regs[4])); -+ __asm__ __volatile__("movl %%ebp,%0" : "=m"(regs[5])); -+ __asm__ __volatile__("movl %%eax,%0" : "=m"(regs[6])); -+ __asm__ __volatile__("movl %%esp,%0" : "=m"(regs[7])); -+ -+ panic_regs = regs; -+} -+ -+/* -+ * Save the current stack pointer and EIP. -+ */ -+void crash_save_current_state(struct task_struct *tp) -+{ -+ /* -+ * Here we save ebp instead of esp just in case the compiler -+ * decides to put an extra push in before we execute this -+ * instruction (thus invalidating our frame pointer). -+ */ -+ asm volatile("movl %%ebp,%0":"=m" (*(u_long *)&tp->thread.esp)); -+ tp->thread.eip = (u_long)crash_save_current_state; -+ panic_ksp[smp_processor_id()] = tp->thread.esp; -+ mb(); -+ -+ save_core(); -+ -+ crash_halt_or_reboot(1); -+} -+ -+/* -+ * If we are not the panicking thread, we simply halt. Otherwise, -+ * we take care of calling the reboot code. -+ */ -+void crash_halt_or_reboot(int boot_cpu) -+{ -+#ifdef CONFIG_SMP -+ if (!boot_cpu) { -+ stop_this_cpu(NULL); -+ /* NOTREACHED */ -+ } -+#endif -+ machine_restart(NULL); -+} -+ -+void crash_cleanup_smp_state(void) -+{ -+ /* -+ * Here we duplicate smp_send_stop. Crash_halt_or_reboot() calls -+ * stop_this_cpu. We now know that we are the only one running, -+ * so we finish off the smp_send_stop function. -+ */ -+ __cli(); -+#ifdef CONFIG_SMP -+ disable_local_APIC(); -+#endif -+} -+ -+/* -+ * Core dump IPI -+ */ -+void smp_crash_funnel_cpu(void) -+{ -+ crash_save_current_state(current); -+} -Index: linux/arch/i386/kernel/nmi.c -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/arch/i386/kernel/nmi.c,v -retrieving revision 1.2.2.1 -retrieving revision 1.2.2.1.2.1 -diff -u -r1.2.2.1 -r1.2.2.1.2.1 ---- linux/arch/i386/kernel/nmi.c 12 Mar 2003 19:49:06 -0000 1.2.2.1 -+++ linux/arch/i386/kernel/nmi.c 1 Apr 2003 12:17:40 -0000 1.2.2.1.2.1 -@@ -374,11 +374,18 @@ - bust_spinlocks(1); - printk("NMI Watchdog detected LOCKUP on CPU%d, eip %08lx, registers:\n", cpu, regs->eip); - show_registers(regs); -+#ifdef CONFIG_MCL_COREDUMP -+ spin_unlock(&nmi_print_lock); -+ bust_spinlocks(0); -+ panic("die"); -+ /* NOTREACHED */ -+#else - printk("console shuts up ...\n"); - console_silent(); - spin_unlock(&nmi_print_lock); - bust_spinlocks(0); - do_exit(SIGSEGV); -+#endif - } - } else { - last_irq_sums[cpu] = sum; -Index: linux/arch/i386/kernel/process.c -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/arch/i386/kernel/process.c,v -retrieving revision 1.2.2.2 -retrieving revision 1.2.2.2.2.1 -diff -u -r1.2.2.2 -r1.2.2.2.2.1 ---- linux/arch/i386/kernel/process.c 1 Apr 2003 02:11:17 -0000 1.2.2.2 -+++ linux/arch/i386/kernel/process.c 1 Apr 2003 12:17:40 -0000 1.2.2.2.2.1 -@@ -50,6 +50,9 @@ - #ifdef CONFIG_MATH_EMULATION - #include - #endif -+#ifdef CONFIG_BOOTIMG -+#include -+#endif - - #include - -@@ -377,7 +380,21 @@ - - void machine_restart(char * __unused) - { -+#ifdef CONFIG_MCL_COREDUMP -+ extern char *panicmsg; -+ /* -+ * Only call bootimg if we have a valid descriptor and -+ * we are in a panic() context. -+ */ -+ if (panicmsg) -+#endif -+#ifdef CONFIG_BOOTIMG -+ if (bootimg_dsc.page_dir) -+ boot_image(); -+#endif -+ - #if CONFIG_SMP -+{ - int cpuid; - - cpuid = GET_APIC_ID(apic_read(APIC_ID)); -@@ -413,6 +430,7 @@ - if (!netdump_func) - smp_send_stop(); - disable_IO_APIC(); -+} - #endif - - if(!reboot_thru_bios) { -Index: linux/arch/i386/kernel/setup.c -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/arch/i386/kernel/setup.c,v -retrieving revision 1.3.2.1 -retrieving revision 1.3.2.1.2.2 -diff -u -r1.3.2.1 -r1.3.2.1.2.2 ---- linux/arch/i386/kernel/setup.c 12 Mar 2003 19:49:06 -0000 1.3.2.1 -+++ linux/arch/i386/kernel/setup.c 1 Apr 2003 17:55:35 -0000 1.3.2.1.2.2 -@@ -116,6 +116,9 @@ - #include - #include - #include -+#ifdef CONFIG_MCL_COREDUMP -+#include -+#endif - /* - * Machine setup.. - */ -@@ -973,6 +976,7 @@ - static unsigned long __init setup_memory(void) - { - unsigned long bootmap_size, start_pfn, max_low_pfn; -+ unsigned long bootmap_pages = 0UL, crash_pages = 0UL; - - /* - * partially used pages are not usable - thus -@@ -992,6 +996,21 @@ - printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", - pages_to_mb(highend_pfn - highstart_pfn)); - #endif -+ -+#ifdef CONFIG_MCL_COREDUMP -+ bootmap_pages = bootmem_bootmap_pages(max_low_pfn); -+ crash_pages = crash_pages_needed(); -+ -+ printk("start_pfn: %d, bootmap_pages: %d\n", start_pfn, bootmap_pages); -+ -+ crash_init((u_long)phys_to_virt(PFN_PHYS(start_pfn)), -+ (u_long)phys_to_virt(PFN_PHYS(LOW_OFFSET + start_pfn)), -+ (u_long)phys_to_virt(PFN_PHYS(LOW_OFFSET + start_pfn + -+ crash_pages))); -+ -+ printk("new start_pfn: %08lx\n", PFN_PHYS(start_pfn)); -+ printk("crash map starts at %lx\n",(start_pfn+bootmap_pages)*PAGE_SIZE); -+#endif - printk(KERN_NOTICE "%ldMB LOWMEM available.\n", - pages_to_mb(max_low_pfn)); - /* -@@ -1007,8 +1026,8 @@ - * the (very unlikely) case of us accidentally initializing the - * bootmem allocator with an invalid RAM area. - */ -- reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) + -- bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY)); -+ reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) + bootmap_size + -+ ((1+crash_pages)*PAGE_SIZE) + PAGE_SIZE-1) - (HIGH_MEMORY)); - - /* - * reserve physical page 0 - it's a special BIOS page on many boxes, -@@ -1016,6 +1035,16 @@ - */ - reserve_bootmem(0, PAGE_SIZE); - -+#ifdef CONFIG_BOOTIMG -+ /* -+ * bootimg(8) reads the old parameter block. Note that the copy in -+ * empty_zero_page will vanish when mem_init runs. (Should we -+ * memcpy(phys_to_virt(0x90000), PARAM, PAGE_SIZE); -+ * now ?) -+ */ -+ reserve_bootmem(0x90000, PAGE_SIZE); -+#endif -+ - #ifdef CONFIG_SMP - /* - * But first pinch a few for the stack/trampoline stuff -@@ -1032,6 +1061,7 @@ - find_smp_config(); - #endif - #ifdef CONFIG_BLK_DEV_INITRD -+ printk("caution: initrd may overwrite dump\n"); /* phro */ - if (LOADER_TYPE && INITRD_START) { - if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { - reserve_bootmem(INITRD_START, INITRD_SIZE); -@@ -1172,6 +1202,12 @@ - smp_alloc_memory(); /* AP processor realmode stacks in low memory*/ - #endif - paging_init(); -+#ifdef CONFIG_MCL_COREDUMP -+ /* -+ * Reserve crash pages -+ */ -+ crash_mark_dump_reserved(); -+#endif - #ifdef CONFIG_X86_LOCAL_APIC - /* - * get boot-time SMP configuration: -Index: linux/arch/i386/kernel/smp.c -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/arch/i386/kernel/smp.c,v -retrieving revision 1.3.2.1 -retrieving revision 1.3.2.1.2.1 -diff -u -r1.3.2.1 -r1.3.2.1.2.1 ---- linux/arch/i386/kernel/smp.c 12 Mar 2003 19:49:06 -0000 1.3.2.1 -+++ linux/arch/i386/kernel/smp.c 1 Apr 2003 12:17:40 -0000 1.3.2.1.2.1 -@@ -23,6 +23,9 @@ - #include - #include - -+#ifdef CONFIG_MCL_COREDUMP -+#include -+#endif - /* - * Some notes on x86 processor bugs affecting SMP operation: - * -@@ -579,7 +582,7 @@ - return 0; - } - --static void stop_this_cpu (void * dummy) -+void stop_this_cpu (void * dummy) - { - /* - * Remove this CPU: -Index: linux/arch/i386/kernel/traps.c -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/arch/i386/kernel/traps.c,v -retrieving revision 1.3.2.1 -retrieving revision 1.3.2.1.2.1 -diff -u -r1.3.2.1 -r1.3.2.1.2.1 ---- linux/arch/i386/kernel/traps.c 12 Mar 2003 19:49:06 -0000 1.3.2.1 -+++ linux/arch/i386/kernel/traps.c 1 Apr 2003 12:17:40 -0000 1.3.2.1.2.1 -@@ -52,6 +52,10 @@ - #include - #include - -+#ifdef CONFIG_MCL_COREDUMP -+#include -+#endif -+ - asmlinkage int system_call(void); - asmlinkage void lcall7(void); - asmlinkage void lcall27(void); -@@ -309,7 +313,11 @@ - netdump_func(regs); - bust_spinlocks(0); - spin_unlock_irq(&die_lock); -- do_exit(SIGSEGV); -+#ifdef CONFIG_MCL_COREDUMP -+ if(panic_on_oops) -+ panic("die"); -+#endif -+ do_exit(SIGSEGV);/* NOTREACHED */ - } - - static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) -Index: linux/drivers/char/misc.c -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/drivers/char/misc.c,v -retrieving revision 1.2 -retrieving revision 1.2.4.1 -diff -u -r1.2 -r1.2.4.1 ---- linux/drivers/char/misc.c 25 Sep 2002 17:11:05 -0000 1.2 -+++ linux/drivers/char/misc.c 1 Apr 2003 12:17:41 -0000 1.2.4.1 -@@ -78,6 +78,8 @@ - extern int i8k_init(void); - extern int lcd_init(void); - -+extern int crash_init_chrdev(void); -+ - static int misc_read_proc(char *buf, char **start, off_t offset, - int len, int *eof, void *private) - { -@@ -255,6 +257,9 @@ - int __init misc_init(void) - { - create_proc_read_entry("misc", 0, 0, misc_read_proc, NULL); -+#ifdef CONFIG_MCL_COREDUMP -+ crash_init_chrdev(); -+#endif - #ifdef CONFIG_MVME16x - rtc_MK48T08_init(); - #endif -Index: linux/drivers/char/sysrq.c -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/drivers/char/sysrq.c,v -retrieving revision 1.2.2.1 -retrieving revision 1.2.2.1.2.2 -diff -u -r1.2.2.1 -r1.2.2.1.2.2 ---- linux/drivers/char/sysrq.c 12 Mar 2003 19:49:47 -0000 1.2.2.1 -+++ linux/drivers/char/sysrq.c 1 Apr 2003 17:55:35 -0000 1.2.2.1.2.2 -@@ -97,7 +97,18 @@ - action_msg: "Resetting", - }; - -- -+#ifdef CONFIG_MCL_COREDUMP -+/* kernel core dump sysrq */ -+static void sysrq_handle_coredump(int key, struct pt_regs *pt_regs, -+ struct kbd_struct *kbd, struct tty_struct *ttty) { -+ panic("sysrq"); -+} -+static struct sysrq_key_op sysrq_coredump_op = { -+ handler: sysrq_handle_coredump, -+ help_msg: "Crash", -+ action_msg: "Dumping core", -+}; -+#endif - - /* SYNC SYSRQ HANDLERS BLOCK */ - -@@ -334,7 +345,11 @@ - it is handled specially on the spark - and will never arive */ - /* b */ &sysrq_reboot_op, -+#ifdef CONFIG_MCL_COREDUMP -+/* c */ &sysrq_coredump_op, -+#else - /* c */ NULL, -+#endif - /* d */ NULL, - /* e */ &sysrq_term_op, - /* f */ NULL, -Index: linux/include/asm-i386/bootimg.h -=================================================================== -RCS file: linux/include/asm-i386/bootimg.h -diff -N linux/include/asm-i386/bootimg.h ---- /dev/null 1 Jan 1970 00:00:00 -0000 -+++ linux/include/asm-i386/bootimg.h 1 Apr 2003 12:17:41 -0000 1.1.6.1 -@@ -0,0 +1,141 @@ -+/* asm-i386/bootimg.h - Boot image, i386-specific code */ -+ -+/* Written 2000 by Werner Almesberger */ -+ -+/* -+ * When porting bootimg(2) to a new architcture, you need to adapt the -+ * functions and definitions in this file. -+ */ -+ -+ -+#ifndef _ASM_I386_BOOTIMG_H -+#define _ASM_I386_BOOTIMG_H -+ -+#include -+#include -+ -+#ifdef CONFIG_SMP -+#include -+#include -+#endif -+ -+ -+/* -+ * The memory page with the code currently executing has been copied from -+ * old_page to new_page. Jump there. -+ * -+ * Note: flush_icache_range has already been called on the new page. -+ */ -+ -+static inline void jump_relocated(unsigned long old_page,unsigned long new_page) -+{ -+ int tmp; -+ -+ __asm__ __volatile__( -+ "stc\n\t" -+ "call 1f\n" -+ "1:\tjnc 2f\n\t" -+ "popl %0\n\t" -+ "addl %1,%0\n\t" -+ "addl %1,%%esp\n\t" -+ "clc\n\t" -+ "jmp *%0\n" -+ "2:" -+ : "=&r" (tmp) : "r" (new_page-old_page)); -+} -+ -+ -+/* -+ * Stop paging, such that -+ * - page tables can be overwritten -+ * - all physical memory can be accessed -+ * - all physical memory is identity-mapped -+ * -+ * (Other rules are possible, but need to be encoded in bootimg(8).) -+ */ -+ -+static inline void stop_paging(void) -+{ -+ unsigned long msw; -+ -+ __asm__ __volatile__( -+ "movl %%cr0,%0\n\t" -+ "andl $0x7fffffff,%0\n\t" -+ "movl %0,%%cr0\n\t" -+ "jmp 1f\n\t" /* i486 and such */ -+ "1:" -+ -+/* Clear the PAE bit in register %cr4 if we were in PAE mode. The initial -+ * page table set up by the new kernel's bootstrap code is non-PAE regardless -+ * of whether the new kernel is a PAE kernel. By clearing the PAE bit here, -+ * we make sure the bootstrap code doesn't accidentally enable PAE mode when -+ * it turns on address translation. -+ */ -+#ifdef CONFIG_X86_PAE -+ "movl %%cr4,%0\n\t" -+ "andl $0xffffffdf,%0\n\t" -+ "movl %0,%%cr4\n\t" -+#endif -+ -+ : "=&r" (msw) : : "memory"); -+} -+ -+ -+/* -+ * Stop any remaining concurrency in the system. If become_only_thread fails -+ * but the system is still usable, become_only_thread should return an error -+ * code. If no recovery is possible, it may as well panic. -+ */ -+ -+static inline int become_only_thread(void) -+{ -+#ifdef CONFIG_SMP -+ smp_send_stop(); -+ disable_IO_APIC(); -+#endif -+ cli(); -+ return 0; -+} -+ -+ -+/* -+ * A conservative estimate of the number of bytes relocate_and_jump allocated -+ * on the stack. This is only used for sanity checking before running code, -+ * because we can't recover from failure in relocate_and_jump. -+ */ -+ -+#define RESERVE_MIN_RELOC_STACK 256 -+ -+ -+/* -+ * Change the stack pointer such that stack is at the end of the specified -+ * page. No data on the old stack will be accessed anymore, so no copying is -+ * required. -+ */ -+ -+static inline void stack_on_page(void *page) -+{ -+ __asm__ __volatile__( -+ "push %%ds\n\t" -+ "pop %%ss\n\t" -+ "movl %0,%%esp\n\t" -+ "addl $0x1000,%%esp\n\t" -+ : : "r" (page)); -+} -+ -+/* -+ * Set up things such that the kernel will be comfortable (e.g. some -+ * architectures expect the boot loader to set registers in certain ways), -+ * and then jump to the kernel's entry address. -+ */ -+ -+static inline void jump_to_kernel(void (*kernel_entry)(void)) -+{ -+ __asm__ __volatile__( -+ "mov $0x90000,%%esi\n\t" -+ : : ); -+ -+ kernel_entry(); -+} -+ -+#endif -Index: linux/include/asm-i386/crash.h -=================================================================== -RCS file: linux/include/asm-i386/crash.h -diff -N linux/include/asm-i386/crash.h ---- /dev/null 1 Jan 1970 00:00:00 -0000 -+++ linux/include/asm-i386/crash.h 1 Apr 2003 12:17:41 -0000 1.1.6.1 -@@ -0,0 +1,15 @@ -+#ifndef __ASM_CRASH_H -+#define __ASM_CRASH_H -+ -+#define UPPER_MEM_BACKUP 0 -+#define LOWER_MEM_FORWARD 0 -+#define LOW_OFFSET 100 -+ -+/* -+ * These two functions are inlined on alpha. That's why they appear -+ * in the arch dependent include file. -+ */ -+void crash_save_current_state(struct task_struct *); -+void crash_halt_or_reboot(int); -+ -+#endif -Index: linux/include/linux/bootimg.h -=================================================================== -RCS file: linux/include/linux/bootimg.h -diff -N linux/include/linux/bootimg.h ---- /dev/null 1 Jan 1970 00:00:00 -0000 -+++ linux/include/linux/bootimg.h 1 Apr 2003 12:17:41 -0000 1.1.6.1 -@@ -0,0 +1,84 @@ -+/* linux/bootimg.h - Boot image, general definitions */ -+ -+/* Written 2000 by Werner Almesberger */ -+ -+ -+#ifndef _LINUX_BOOTIMG_H -+#define _LINUX_BOOTIMG_H -+ -+ -+/* -+ * Constraints on image_map: -+ * - each image_map[n] is the virtual address of a page-sized memory region -+ * readable by the user -+ * - currently, image_map[n] is not required to be page-aligned, but this may -+ * change in the future if we want to map pages directly to lower memory -+ * pressure (NB: mapping works for ELF and plain binary images, but usually -+ * not for (b)zImages, because the prepended boot and setup sectors -+ * mis-align them) -+ * -+ * Constraints on load_map: -+ * - each load_map[] is the physical address of a page in RAM -+ */ -+ -+struct boot_image { -+ void **image_map; /* pointers to image pages in user memory */ -+ int pages; /* length in pages */ -+ unsigned long *load_map;/* list of destination pages (physical addr) */ -+ unsigned long start; /* jump to this physical address */ -+ int flags; /* for future use, must be zero for now */ -+}; -+ -+ -+#ifdef __KERNEL__ -+ -+#define __bootimg __attribute__ ((__section__ (".bootimg"))) -+ -+ -+struct bootimg_dsc { -+ unsigned long self; /* code page ALL ADDRESSES */ -+ unsigned long scratch; /* scratch page ARE PHYSICAL !*/ -+ unsigned long **page_dir; /* src & dst page tables */ -+ void (*jump_to)(void); /* start address */ -+ int pages; /* number of pages */ -+ unsigned long csum; /* Kernel Image checksum */ -+}; -+ -+/* -+ * page_dir contains pointers to pages containing pointers to pages. We call -+ * page_dir a "directory" and the page page_dir[n] points to a "table". The -+ * first PAGES_PER_TABLE/2 entries of page_dir are for source pages, and other -+ * half are for destination pages. -+ */ -+ -+/* -+ * Note that the definitions used here do not necessarily correspond to the -+ * architecture-specific PTRS_PER_PTE, __pte_offset, etc. -+ */ -+ -+#define PAGES_PER_TABLE (PAGE_SIZE/sizeof(void *)) -+#define FROM_TABLE(i) ((i)/PAGES_PER_TABLE) -+#define TO_TABLE(i) ((i)/PAGES_PER_TABLE+PAGES_PER_TABLE/2) -+#define PAGE_NR(i) ((i) % PAGES_PER_TABLE) -+ -+ -+extern char __bootimg_start,__bootimg_end; /* linker segment boundaries */ -+extern unsigned long *unity_page; /* unity-mapped page for i386 */ -+ -+/* -+ * relocate_and_jump runs in its own page with its own stack. This makes it -+ * difficult to pass parameters. The solution chosen here is to use the global -+ * variable bootimg_dsc, which is copied into an "auto" variable by -+ * relocate_and_jump before any copying or relocation takes place. -+ */ -+ -+extern struct bootimg_dsc bootimg_dsc; -+ -+typedef void (*relocate_and_jump_t)(void); -+ -+void relocate_and_jump(void); -+int boot_image(void); -+ -+#endif /* __KERNEL__ */ -+ -+#endif -Index: linux/include/linux/crash.h -=================================================================== -RCS file: linux/include/linux/crash.h -diff -N linux/include/linux/crash.h ---- /dev/null 1 Jan 1970 00:00:00 -0000 -+++ linux/include/linux/crash.h 1 Apr 2003 12:17:41 -0000 1.1.6.1 -@@ -0,0 +1,119 @@ -+#ifndef __LINUX_CRASH_H -+#define __LINUX_CRASH_H -+ -+/* defines for interfacing with user-space (ioctls, etc) */ -+struct ioctl_getdump { -+ unsigned long kva; -+ unsigned long buf; -+}; -+ -+#define CRASH_IOC_MAGIC 'C' -+ -+#define CRASH_IOCFREEDUMP _IO(CRASH_IOC_MAGIC, 0) -+#define CRASH_IOCGETDUMP _IOWR(CRASH_IOC_MAGIC, 1, struct ioctl_getdump) -+#define CRASH_IOCBOOTIMG _IOWR(CRASH_IOC_MAGIC, 2, struct boot_image) -+#define CRASH_IOCVERSION _IO(CRASH_IOC_MAGIC, 3) -+ -+/* kernel-only part of crash.h */ -+#ifdef __KERNEL__ -+#include -+ -+#define CRASH_K_MINOR (1) -+#define CRASH_K_MAJOR (0) -+ -+/* -+ * Crash prototypes. -+ */ -+void save_core(void); -+void crash_mark_dump_reserved(void); -+void crash_init(u_long bootmap_va, u_long crash_va, u_long end_alloc_va); -+u_long crash_pages_needed(void); -+void smp_crash_funnel_cpu(void); -+void crash_cleanup_smp_state(void); -+ -+/* -+ * Arch dependant crash.c funcs -+ */ -+void crash_save_current_state(struct task_struct *); -+void crash_halt_or_reboot(int); -+inline void crash_save_regs(void); -+ -+/* -+ * Crash globals -+ */ -+extern u_long crash_dump_header; -+extern volatile u_long panic_ksp[]; -+extern volatile int crash_release; -+extern int panic_on_oops; -+extern char *panicmsg; -+extern int panic_processor; -+extern int crash_perform_sync; -+extern unsigned long *panic_regs; -+ -+/* -+ * symbols not exported by linux header files -+ */ -+extern void stop_this_cpu(void *); -+ -+/* struct crash_map_hdr located at byte offset 0 */ -+/* on-disk formats */ -+ -+#define trunc_page(x) ((void *)(((unsigned long)(x)) & ~((unsigned long)(PAGE_SIZE - 1)))) -+#define round_page(x) trunc_page(((unsigned long)(x)) + ((unsigned long)(PAGE_SIZE - 1))) -+ -+#define CRASH_MAGIC 0x9a8bccdd -+#define CRASH_SOURCE_PAGES 128 -+#define CRASH_SUB_MAP_BYTES ((u_long)round_page((CRASH_SOURCE_PAGES+1)*sizeof(u_long))) -+#define CRASH_SUB_MAP_PAGES (CRASH_SUB_MAP_BYTES / PAGE_SIZE) -+#define CRASH_UNCOMPR_BUF_PAGES (CRASH_SOURCE_PAGES + CRASH_SUB_MAP_PAGES) -+#define CRASH_COMPR_BUF_PAGES (CRASH_UNCOMPR_BUF_PAGES + (CRASH_UNCOMPR_BUF_PAGES/4)) -+#define CRASH_COMPESS_PRIME_PAGES (2*CRASH_COMPR_BUF_PAGES) -+#define CRASH_ZALLOC_PAGES 16*5*2 /* 2 to handle crash in crash */ -+#define CRASH_LOW_WATER_PAGES 100 -+ -+#define CRASH_CPU_TIMEOUT 5000 /* 5 sec wait for other cpus to stop */ -+ -+#define CRASH_MARK_RESERVED(addr) (set_bit(PG_reserved,&mem_map[MAP_NR(addr)].flags)) -+#define CRASH_CLEAR_RESERVED(addr) (clear_bit(PG_reserved,&mem_map[MAP_NR(addr)].flags)) -+#define CRASH_MARK_BOOT_RESERVED(addr) reserve_bootmem(virt_to_phys((void *)addr), PAGE_SIZE); -+ -+typedef int boolean_t; -+ -+#define TRUE 1 -+#define FALSE 0 -+ -+/* mem structure */ -+struct mem_crash_map_hdr { -+ long magic[4]; /* identify crash dump */ -+ u_long map; /* location of map */ -+ u_long map_pages; -+ u_long data_pages; -+ u_long compr_units; -+ u_long boot_reserved_start; -+ u_long boot_reserved_end; -+}; -+struct mem_crash_map_entry { -+ u_long src_va; /* source start of larger non-contig -+ * block. a src_va of -1 means that -+ * the dest_page_va is the location of -+ * the next map page */ -+ u_long dest_page_va; /* dest of this sub block */ -+ u_long check_sum; /* check_sum for dest data */ -+}; -+ -+/* file structure */ -+struct crash_map_hdr { -+ long magic[4]; /* identify crash dump */ -+ int blk_size; /* block size for this device */ -+ int map_block; /* location of map */ -+ int map_blocks; /* number of blocks for map */ -+}; -+struct crash_map_entry { -+ u_long start_va; /* virtual address */ -+ char *exp_data; /* expanded data in memory */ -+ int start_blk; /* device location */ -+ int num_blks; -+}; -+ -+#endif /* __KERNEL__ */ -+#endif /* __LINUX_CRASH_H */ -Index: linux/include/linux/mm.h -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/include/linux/mm.h,v -retrieving revision 1.2.2.1 -retrieving revision 1.2.2.1.2.2 -diff -u -r1.2.2.1 -r1.2.2.1.2.2 ---- linux/include/linux/mm.h 12 Mar 2003 19:51:27 -0000 1.2.2.1 -+++ linux/include/linux/mm.h 1 Apr 2003 17:55:35 -0000 1.2.2.1.2.2 -@@ -331,6 +331,11 @@ - #define PG_lru 18 - #define PG_active_cache 19 - #define PG_fs_1 20 /* Filesystem specific */ -+#ifdef CONFIG_MCL_COREDUMP -+#define PG_free 21 -+#define PG_shm 22 -+#define PG_anon 23 -+#endif - - /* Make it prettier to test the above... */ - #define UnlockPage(page) unlock_page(page) -@@ -452,6 +457,11 @@ - #define PageSetSlab(page) set_bit(PG_slab, &(page)->flags) - #define PageClearSlab(page) clear_bit(PG_slab, &(page)->flags) - #define PageReserved(page) test_bit(PG_reserved, &(page)->flags) -+#ifdef CONFIG_MCL_COREDUMP -+#define PageFree(page) (test_bit(PG_free, &(page)->flags)) -+#define PageAnon(page) (test_bit(PG_anon, &(page)->flags)) -+#define PageShm(page) (test_bit(PG_shm, &(page)->flags)) -+#endif - - #define PageActiveAnon(page) test_bit(PG_active_anon, &(page)->flags) - #define SetPageActiveAnon(page) set_bit(PG_active_anon, &(page)->flags) -Index: linux/include/linux/reboot.h -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/include/linux/reboot.h,v -retrieving revision 1.1.1.1 -retrieving revision 1.1.1.1.10.2 -diff -u -r1.1.1.1 -r1.1.1.1.10.2 ---- linux/include/linux/reboot.h 7 May 2002 21:53:47 -0000 1.1.1.1 -+++ linux/include/linux/reboot.h 1 Apr 2003 17:55:35 -0000 1.1.1.1.10.2 -@@ -20,6 +20,7 @@ - * CAD_OFF Ctrl-Alt-Del sequence sends SIGINT to init task. - * POWER_OFF Stop OS and remove all power from system, if possible. - * RESTART2 Restart system using given command string. -+ * COREDUMP We're taking a core dump, secondary cpus already stopped. - */ - - #define LINUX_REBOOT_CMD_RESTART 0x01234567 -@@ -28,7 +29,9 @@ - #define LINUX_REBOOT_CMD_CAD_OFF 0x00000000 - #define LINUX_REBOOT_CMD_POWER_OFF 0x4321FEDC - #define LINUX_REBOOT_CMD_RESTART2 0xA1B2C3D4 -- -+#ifdef CONFIG_MCL_COREDUMP -+#define LINUX_REBOOT_CMD_COREDUMP 0x9A8BCCDD -+#endif - - #ifdef __KERNEL__ - -Index: linux/include/linux/sysctl.h -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/include/linux/sysctl.h,v -retrieving revision 1.3.2.1 -retrieving revision 1.3.2.1.2.1 -diff -u -r1.3.2.1 -r1.3.2.1.2.1 ---- linux/include/linux/sysctl.h 12 Mar 2003 19:51:30 -0000 1.3.2.1 -+++ linux/include/linux/sysctl.h 1 Apr 2003 12:17:41 -0000 1.3.2.1.2.1 -@@ -126,6 +126,7 @@ - KERN_CADPID=54, /* int: PID of the process to notify on CAD */ - KERN_CORE_PATTERN=56, /* string: pattern for core-files */ - KERN_PID_MAX=55, /* int: max PID value of processes */ -+ KERN_PANIC_ON_OOPS /* int: panic on oops enabled */ - }; - - -Index: linux/init/main.c -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/init/main.c,v -retrieving revision 1.2.2.1 -retrieving revision 1.2.2.1.2.1 -diff -u -r1.2.2.1 -r1.2.2.1.2.1 ---- linux/init/main.c 12 Mar 2003 19:51:35 -0000 1.2.2.1 -+++ linux/init/main.c 1 Apr 2003 12:17:41 -0000 1.2.2.1.2.1 -@@ -70,6 +70,10 @@ - #include - #endif - -+#ifdef CONFIG_BOOTIMG -+#include -+#endif -+ - /* - * Versions of gcc older than that listed below may actually compile - * and link okay, but the end product can have subtle run time bugs. -@@ -352,10 +356,14 @@ - { - char * command_line; - extern char saved_command_line[]; -+#if defined(CONFIG_BOOTIMG) && defined(CONFIG_X86_LOCAL_APIC) -+ unsigned long value; -+#endif - /* - * Interrupts are still disabled. Do necessary setups, then - * enable them - */ -+ printk("start_kernel\n"); - lock_kernel(); - printk(linux_banner); - setup_arch(&command_line); -@@ -373,12 +381,26 @@ - * this. But we do want output early, in case something goes wrong. - */ - console_init(); -+ -+#ifdef CONFIG_BOOTIMG -+ unity_page = alloc_bootmem_pages(PAGE_SIZE); -+ printk("unity_page addr: %p\n",unity_page); -+#endif - #ifdef CONFIG_MODULES - init_modules(); - #endif - profile_init(); - kmem_cache_init(); - sti(); -+#if defined(CONFIG_BOOTIMG) && defined(CONFIG_X86_LOCAL_APIC) -+ /* If we don't make sure the APIC is enabled, AND the LVT0 -+ * register is programmed properly, we won't get timer interrupts -+ */ -+ setup_local_APIC(); -+ -+ value = apic_read(APIC_LVT0); -+ apic_write_around(APIC_LVT0, value & ~APIC_LVT_MASKED); -+#endif - calibrate_delay(); - #ifdef CONFIG_BLK_DEV_INITRD - if (initrd_start && !initrd_below_start_ok && -Index: linux/kernel/Makefile -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/kernel/Makefile,v -retrieving revision 1.1.1.1.4.1 -retrieving revision 1.1.1.1.4.1.2.1 -diff -u -r1.1.1.1.4.1 -r1.1.1.1.4.1.2.1 ---- linux/kernel/Makefile 12 Mar 2003 19:51:36 -0000 1.1.1.1.4.1 -+++ linux/kernel/Makefile 1 Apr 2003 12:17:41 -0000 1.1.1.1.4.1.2.1 -@@ -22,7 +22,8 @@ - obj-$(CONFIG_PM) += pm.o - obj-$(CONFIG_KALLSYMS) += kallsyms.o - obj-$(CONFIG_CPU_FREQ) += cpufreq.o -- -+obj-$(CONFIG_BOOTIMG) += bootimg.o bootimg_pic.o -+obj-$(CONFIG_MCL_COREDUMP) += crash.o - - ifneq ($(CONFIG_IA64),y) - # According to Alan Modra , the -fno-omit-frame-pointer is -Index: linux/kernel/bootimg.c -=================================================================== -RCS file: linux/kernel/bootimg.c -diff -N linux/kernel/bootimg.c ---- /dev/null 1 Jan 1970 00:00:00 -0000 -+++ linux/kernel/bootimg.c 1 Apr 2003 12:17:41 -0000 1.1.6.1 -@@ -0,0 +1,301 @@ -+/* bootimg.c - Boot another (kernel) image */ -+ -+/* Written 2000 by Werner Almesberger */ -+ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#if 0 -+#define DPRINTK_CONT(format,args...) printk(format,##args) -+#else -+#define DPRINTK_CONT(format,args...) -+#endif -+#define DPRINTK(format,args...) DPRINTK_CONT(KERN_DEBUG format,##args) -+ -+unsigned long **bootimg_page_dir; -+ -+struct bootimg_dsc bootimg_dsc; /* communication with PIC */ -+unsigned long *unity_page; /* unity-mapped page for i386 */ -+ -+static unsigned long bootimg_checksum(unsigned long **page_dir, int num_pages) -+{ -+ unsigned long checksum, *page; -+ int i, j; -+ -+ checksum = 0; -+ -+ for (i = 0; i < num_pages; i++) { -+ page = __va((unsigned long *) -+ page_dir[FROM_TABLE(i)][PAGE_NR(i)]); -+ -+ for (j = 0; j < PAGES_PER_TABLE; j++) -+ checksum ^= page[j]; -+ -+ checksum ^= page_dir[TO_TABLE(i)][PAGE_NR(i)]; -+ } -+ -+ return checksum; -+} -+ -+#ifdef CONFIG_X86_PAE -+ -+static unsigned long get_identity_mapped_page(void) -+{ -+ pgd_t *pgd; -+ pmd_t *pmd; -+ unsigned long phys_addr, page_base; -+ -+ /* Set up a 2 Mb identity-mapped page. */ -+ -+ phys_addr = virt_to_phys(unity_page); -+ pgd = pgd_offset(current->active_mm, phys_addr); -+ pmd = pmd_offset(pgd, phys_addr); -+ -+ /* We hardcode this rather than using PMD_MASK just in case the PAE -+ * mode setup ever changes so that 2 Mb pages are no longer used. -+ */ -+ page_base = phys_addr & ~((1 << 21) - 1); -+ -+ set_pmd(pmd, __pmd(page_base | _PAGE_PSE | _KERNPG_TABLE)); -+ __flush_tlb_one(phys_addr); -+ -+ return (unsigned long) unity_page; -+} -+ -+#else -+ -+static unsigned long get_identity_mapped_page(void) -+{ -+ set_pgd(pgd_offset(current->active_mm,virt_to_phys(unity_page)), -+ __pgd((_KERNPG_TABLE + _PAGE_PSE + (virt_to_phys(unity_page)&PGDIR_MASK)))); -+ __flush_tlb_one(virt_to_phys(unity_page)); -+ return (unsigned long)unity_page; -+} -+ -+#endif -+ -+#if 0 /* Perhaps we'll need this in the future? */ -+static void unmap_identity_mapped_page(void) -+{ -+ set_pgd(pgd_offset(current->active_mm,virt_to_phys(unity_page)),__pgd(0)); -+ __flush_tlb(); -+} -+#endif -+ -+static int fill_page_dir(unsigned long **page_dir,struct boot_image *image) -+{ -+ int i, count=0; -+ -+ memset(page_dir,0,PAGE_SIZE); -+ for (i = 0; i < image->pages; i += PAGES_PER_TABLE) { -+ unsigned long **table; -+ int bytes_left; -+ -+ table = page_dir+FROM_TABLE(i); -+ *table = (unsigned long *) get_free_page(GFP_KERNEL); -+ if (!*table) return -ENOMEM; -+ -+ memset(*table,0,PAGE_SIZE); -+ DPRINTK("page %d: from table %p @ %p\n",i,*table,table); -+ table = page_dir+TO_TABLE(i); -+ *table = (unsigned long *) get_free_page(GFP_KERNEL); -+ if (!*table) return -ENOMEM; -+ -+ bytes_left = (image->pages-i)*sizeof(unsigned long); -+ if (copy_from_user(*table,image->load_map+i, -+ bytes_left > PAGE_SIZE ? PAGE_SIZE : bytes_left)) -+ return -EFAULT; -+ DPRINTK("page %d: to table %p @ %p\n",i,*table,table); -+ count+=2; /* 2 pages per loop */ -+ } -+ -+ for (i = 0; i < image->pages; i++) { -+ unsigned long page = get_free_page(GFP_KERNEL); -+ void *src; -+ -+ if (!page) return -ENOMEM; -+ count++; -+ -+ page_dir[FROM_TABLE(i)][PAGE_NR(i)] = -+ virt_to_phys((void *) page); -+ if (get_user(src,image->image_map+i) || -+ copy_from_user((void *) page,src,PAGE_SIZE)) -+ return -EFAULT; -+ -+ DPRINTK("page %d: %p->%p->%p @ %p\n",i,src,(void *) page, -+ (void *) page_dir[FROM_TABLE(i)][PAGE_NR(i)], -+ &page_dir[FROM_TABLE(i)][PAGE_NR(i)]); -+ } -+ -+ DPRINTK("fill_page_dir: %d pages allocated\n", count); -+ -+ return 0; -+} -+ -+ -+static void free_page_dir(unsigned long **page_dir) -+{ -+ int i,j,count=0; -+ -+ for (i = 0; i < PAGES_PER_TABLE/2; i++) -+ if (page_dir[i]) -+ for (j = 0; j < PAGES_PER_TABLE; j++) -+ if (page_dir[i][j]) { -+ free_page((unsigned long) -+ phys_to_virt(page_dir[i][j])); -+ count++; -+ } -+ for (i = 0; i < PAGES_PER_TABLE; i++) -+ if (page_dir[i]) { -+ free_page((unsigned long) *page_dir[i]); -+ count++; -+ } -+ DPRINTK("free_page_dir: %d pages freed\n", count); -+} -+ -+ -+static void convert_table_refs_to_phys(unsigned long **page_dir) -+{ -+ int i; -+ -+ DPRINTK("PAGES_PER_TABLE: %d\n",PAGES_PER_TABLE); -+ for (i = 0; i < PAGES_PER_TABLE; i++) -+ if (page_dir[i]) { -+ DPRINTK("table %i: mapped %p -> ",i,page_dir[i]); -+ page_dir[i] = (unsigned long *) -+ virt_to_phys(page_dir[i]); -+ DPRINTK_CONT("%p\n",page_dir[i]); -+ } -+} -+ -+ -+ -+static int fill_bootimg_dsc(struct boot_image *image) -+{ -+ unsigned long scratch; -+ int error = -ENOMEM; -+ -+ if(bootimg_page_dir) { -+ /* free previously allocated memory */ -+ free_page_dir(bootimg_page_dir); -+ free_page((unsigned long) bootimg_page_dir); -+ DPRINTK("free_page (bootimg_page_dir)\n"); -+ } -+ -+ bootimg_page_dir = (unsigned long **) get_free_page(GFP_KERNEL); -+ if (!bootimg_page_dir) goto out0; -+ DPRINTK("get_free_page (bootimg_page_dir)\n"); -+ -+ error = fill_page_dir(bootimg_page_dir,image); -+ if (error) goto out1; -+ -+ if(!bootimg_dsc.scratch) { -+ scratch = get_free_page(GFP_KERNEL); -+ DPRINTK("get_free_page (scratch)\n"); -+ } else -+ scratch = 1; /* already allocated */ -+ -+ if (!scratch) goto out1; -+ /* -+ * Not all architectures need the code to be identity-mapped, but it -+ * can't hurt ... -+ */ -+ DPRINTK("bootimg_page_dir: mapped %p -> ",bootimg_page_dir); -+ bootimg_dsc.page_dir = (unsigned long **) virt_to_phys(bootimg_page_dir); -+ DPRINTK_CONT("%p\n",bootimg_dsc.page_dir); -+ if(!bootimg_dsc.scratch) -+ bootimg_dsc.scratch = virt_to_phys((void *) scratch); -+ bootimg_dsc.jump_to = (void (*)(void)) image->start; -+ bootimg_dsc.pages = image->pages; -+ bootimg_dsc.csum = bootimg_checksum(bootimg_page_dir, image->pages); -+ -+ return 0; -+ -+out1: -+ free_page_dir(bootimg_page_dir); -+ free_page((unsigned long) bootimg_page_dir); -+ DPRINTK("free_page (bootimg_page_dir)\n"); -+ bootimg_page_dir = 0; -+out0: -+ return error; -+} -+ -+extern char *panicmsg; -+int boot_image() -+{ -+ relocate_and_jump_t code; -+ unsigned long code_page; -+ int error = -ENOMEM; -+ -+ if (bootimg_checksum(__va(bootimg_dsc.page_dir),bootimg_dsc.pages) -+ != bootimg_dsc.csum) -+ printk("Checksum of kernel image failed. Rebooting via BIOS\n"); -+ -+ code_page = get_identity_mapped_page(); -+ if (!code_page) goto out3; -+ code = (relocate_and_jump_t) virt_to_phys((void *) code_page); -+ memcpy(code,&__bootimg_start,&__bootimg_end-&__bootimg_start); -+ flush_icache_range(&__bootimg_start, &__bootimg_end-&__bootimg_start); -+ -+ bootimg_dsc.self = (unsigned long) code; -+ printk(KERN_INFO "Running boot code at 0x%p\n",code); -+ -+ /* -+ * The point of no return. Not even printk may work after a successful -+ * return from become_only_thread. -+ */ -+ -+ if (!panicmsg) { -+ error = become_only_thread(); -+ if (error) goto out3; -+ } else { -+#ifdef CONFIG_SMP -+ disable_IO_APIC(); -+#endif -+ __cli(); -+ } -+ -+ convert_table_refs_to_phys((unsigned long **)__va(bootimg_dsc.page_dir)); -+ stack_on_page(code); -+ -+ code(); -+ -+ panic("PIC code exec failed"); -+out3: -+ printk("boot_image() failed!\n"); -+ for(;;); -+} -+ -+/* changed from asmlinkage because we're called via an IOCTL on /dev/crash now */ -+int sys_bootimg(struct boot_image *user_dsc) -+{ -+ struct boot_image dsc; -+ -+ if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_MODULE)) return -EPERM; -+ if (&__bootimg_end-&__bootimg_start > PAGE_SIZE-RESERVE_MIN_RELOC_STACK) -+ { -+ printk(KERN_ERR "boot_image: PIC too large (%d bytes)\n", -+ &__bootimg_end-&__bootimg_start); -+ return -EIO; -+ } -+ if ((void *) relocate_and_jump != (void *) &__bootimg_start) { -+ printk(KERN_ERR "boot_image: relocate_and_jump is mis-placed" -+ "(0x%p != 0x%p)\n",relocate_and_jump,&__bootimg_start); -+ return -EIO; -+ } -+ -+ if (copy_from_user(&dsc,user_dsc,sizeof(dsc))) return -EFAULT; -+ if (dsc.pages >= PAGES_PER_TABLE*PAGES_PER_TABLE/2) return -EFBIG; -+ if (dsc.flags) return -EINVAL; /* for future use */ -+ return fill_bootimg_dsc(&dsc); -+} -Index: linux/kernel/bootimg_pic.c -=================================================================== -RCS file: linux/kernel/bootimg_pic.c -diff -N linux/kernel/bootimg_pic.c ---- /dev/null 1 Jan 1970 00:00:00 -0000 -+++ linux/kernel/bootimg_pic.c 1 Apr 2003 12:17:41 -0000 1.1.6.1 -@@ -0,0 +1,91 @@ -+/* bootimg_pic.c - Boot image, position-independent code */ -+ -+/* Written 2000 by Werner Almesberger */ -+ -+/* -+ * Strongly inspired by FiPaBoL designed mainly by Otfried Cheong and Roger -+ * Gammans, and written by the latter. -+ */ -+ -+/* -+ * This code is position-independent and must fit in a single page ! -+ * Furthermore, everything (text+data+stack) has to go into the -+ * .bootimg segment. -+ */ -+ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+ -+#define copy_and_swap(from,to) \ -+ ( { my_copy_page(from,to); \ -+ tmp = from; \ -+ from = to; \ -+ to = tmp; } ) -+ -+ -+static inline void my_copy_page(unsigned long from,unsigned long to) -+{ -+ unsigned long end = from+PAGE_SIZE; -+ -+ do *((unsigned long *) to)++ = *((unsigned long *) from)++; -+ while (from != end); -+} -+ -+ -+void __bootimg relocate_and_jump(void) -+{ -+ struct bootimg_dsc dsc = bootimg_dsc; -+ int i; -+ -+ stop_paging(); -+ for (i = 0; i < dsc.pages; i++) { -+ unsigned long from,to,tmp; -+ -+ from = dsc.page_dir[FROM_TABLE(i)][PAGE_NR(i)]; -+ to = dsc.page_dir[TO_TABLE(i)][PAGE_NR(i)]; -+ if (from == to) continue; -+ if (to == dsc.self) { -+ copy_and_swap(dsc.self,dsc.scratch); -+ /* WARNING: flush_icache_range MUST BE INLINED !!! */ -+ flush_icache_range(dsc.self,dsc.self+PAGE_SIZE-1); -+ jump_relocated(dsc.scratch,dsc.self); -+ } -+ else if (to == (unsigned long) dsc.page_dir) -+ copy_and_swap((unsigned long) dsc.page_dir,dsc.scratch); -+ else { -+ /* -+ * O((n^2-n)/2), sigh ... -+ */ -+ unsigned long **table; -+ int j; -+ -+ for (j = i+1; j < dsc.pages; j++) { -+ table = dsc.page_dir+FROM_TABLE(j); -+ if (((unsigned long) *table) == to) { -+ copy_and_swap(*table,dsc.scratch); -+ break; -+ } -+ if ((*table)[PAGE_NR(j)] == to) { -+ copy_and_swap((*table)[PAGE_NR(j)], -+ dsc.scratch); -+ break; -+ } -+ table = dsc.page_dir+TO_TABLE(j); -+ if (((unsigned long) *table) == to) { -+ copy_and_swap(*table,dsc.scratch); -+ break; -+ } -+ } -+ } -+ my_copy_page(from,to); -+ dsc.scratch = from; -+ } -+ jump_to_kernel(dsc.jump_to); -+} -Index: linux/kernel/crash.c -=================================================================== -RCS file: linux/kernel/crash.c -diff -N linux/kernel/crash.c ---- /dev/null 1 Jan 1970 00:00:00 -0000 -+++ linux/kernel/crash.c 1 Apr 2003 12:17:41 -0000 1.1.6.1 -@@ -0,0 +1,886 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#ifdef CONFIG_BOOTIMG -+#include -+#endif -+ -+static void crash_print_data_around(u_long p); -+static void crash_free_page(u_long addr); -+static int crash_chksum_page(u_long pg_addr, u_long * sum_addr); -+static void *czalloc(void *arg, unsigned int items, unsigned int size); -+static void czfree(void *arg, void *ptr); -+static u_long crash_alloc_dest_page(void); -+static void crash_free_dest_page(u_long dest); -+static void init_dest_page_alloc(void); -+static int crash_audit_maps(void); -+static u_long crash_get_source_page(void); -+static u_long crash_update_map(u_long map, u_long src_base, u_long dest, u_long * pages); -+static int crash_reset_stream(z_stream * stream); -+static boolean_t crash_is_kseg(u_long addr); -+static u_long *crash_link(u_long p); -+static int crash_chksum(u_long limit, u_long * sum_addr); -+static int crash_audit_map_page(u_long map); -+static void crash_wait_cpus(void); -+static int crash_is_dir_page(struct page *page); -+ -+/* for the /dev/crash interface */ -+int crash_init_chrdev(void); -+static int crashdev_ioctl(struct inode *, struct file *, unsigned int, unsigned long); -+ -+#define CRASH_DEBUG 1 -+ -+#ifdef CONFIG_BOOTIMG -+extern int sys_bootimg(struct boot_image *); -+#endif -+ -+static u_long crash_compr_buf; -+static u_long crash_uncompr_buf; -+static u_long crash_dump_header = 0; -+static u_long crash_dest_free_list = 0; -+static u_long crash_debug = 0; -+ -+static u_long crash_cur_pfn; -+ -+static u_long src_pages_skipped = 0; -+static u_long src_pages_saved = 0; -+static u_long dest_pages_free = 0; -+ -+/* this information is saved from within panic() */ -+char *panicmsg = (char *)0; -+int panic_processor = 0; -+int crash_perform_sync = 0; -+ -+u_int console_crash = 0; /* should be moved to alpha branch */ -+ -+// typedef struct task_struct *task_t; -+ -+/* -+ * Threads active at time of panic: -+ */ -+volatile task_t *panic_threads[NR_CPUS]; -+volatile unsigned long panic_ksp[NR_CPUS]; -+unsigned long *panic_regs = NULL; -+ -+int panic_on_oops; /* for /proc/sys/kernel/panic_on_oops */ -+ -+extern unsigned long max_low_pfn; -+ -+u_long crash_zalloc_start; // , crash_zalloc_end, crash_zalloc_cur; -+ -+/* -+ * Crash Kernel API functions below -+ * crash_pages_needed, computes pages needed for header and compression temp -+ * crash_init, partitions out the allocated pages, sets defaults and -+ * initializes the character device. -+ * crash_mark_dump_reserved, marks pages reserved from a previous dump. -+ * save_core, called at panic time to save a dump to memory. -+ */ -+u_long crash_pages_needed(void) -+{ -+ /* one for the header */ -+ return (1 + CRASH_ZALLOC_PAGES + CRASH_UNCOMPR_BUF_PAGES + CRASH_COMPR_BUF_PAGES); -+} -+ -+void crash_init(u_long bootmap_va, u_long crash_va, u_long end_alloc_va) -+{ -+ struct mem_crash_map_hdr *header; -+ int i; -+ -+ /* the default behavior is not NOT panic on a kernel OOPS */ -+ panic_on_oops = 0; -+ -+ printk("crash_init (crash_va: %08lx)\n", crash_va); -+ for (i = 0; i < NR_CPUS; i++) -+ panic_threads[i] = 0; -+ crash_dump_header = crash_va; -+ crash_va += PAGE_SIZE; -+ crash_zalloc_start = crash_va; -+ crash_va += CRASH_ZALLOC_PAGES * PAGE_SIZE; -+ crash_uncompr_buf = crash_va; -+ crash_va += CRASH_UNCOMPR_BUF_PAGES * PAGE_SIZE; -+ crash_compr_buf = crash_va; -+ crash_va += CRASH_COMPR_BUF_PAGES * PAGE_SIZE; -+#if 0 -+ if (crash_va != end_alloc_va) -+ panic("crash_init inconsistency-1\n"); -+#endif -+ -+ header = (struct mem_crash_map_hdr *)crash_dump_header; -+#ifdef CRASH_DEBUG -+ printk("crash_dump_header %p {\n", header); -+ printk(" magic[0] = %lx\n", header->magic[0]); -+ printk(" map = %lx\n", header->map); -+ printk(" map_pages = %lx\n", header->map_pages); -+ printk(" data_pages = %lx\n", header->data_pages); -+ printk(" compr_units = %lx\n", header->compr_units); -+ printk(" boot_reserved_start = %lx\n", header->boot_reserved_start); -+ printk(" boot_reserved_end = %lx\n", header->boot_reserved_end); -+#endif -+ -+ if (header->magic[0] == CRASH_MAGIC) { -+ printk("crash found\n"); -+ if ((header->boot_reserved_start != bootmap_va) || -+ (header->boot_reserved_end != end_alloc_va)) { -+ /* crash audit will catch the corruption */ -+ printk("crash_init inconsistency, dump may be corrupted\n"); -+ } -+ } else { -+printk("memset..."); -+ memset(header, 0, sizeof(*header)); -+printk("done\n"); -+ } -+ -+ header->boot_reserved_start = bootmap_va; -+ header->boot_reserved_end = end_alloc_va; -+ -+} -+ -+void crash_mark_dump_reserved(void) -+{ -+ struct mem_crash_map_hdr *header; -+ struct mem_crash_map_entry *m; -+ -+ header = (struct mem_crash_map_hdr *)crash_dump_header; -+ if (header->magic[0] != CRASH_MAGIC) -+ return; -+ m = (struct mem_crash_map_entry *)header->map; -+#ifdef CRASH_DEBUG -+ printk("\n\n\ncrash_mark_dump_reserved\n\n"); -+ printk("crash_dump_header %p {\n", header); -+ printk(" magic[0] = %lx\n", header->magic[0]); -+ printk(" map = %lx\n", header->map); -+ printk(" map_pages = %lx\n", header->map_pages); -+ printk(" data_pages = %lx\n", header->data_pages); -+ printk(" compr_units = %lx\n", header->compr_units); -+ printk(" boot_reserved_start = %lx\n", header->boot_reserved_start); -+ printk(" boot_reserved_end = %lx\n", header->boot_reserved_end); -+ printk("mem_crash_map_entry %p {\n", m); -+ printk(" src_va = %lx\n", m->src_va); -+ printk(" dest_page_va = %lx\n", m->dest_page_va); -+ printk(" check_sum = %lx\n", m->check_sum); -+#endif -+ -+ if (crash_audit_maps()) { -+ header->magic[0] = 0; -+ return; -+ } -+ -+ m = (struct mem_crash_map_entry *)header->map; -+ again: -+ CRASH_MARK_BOOT_RESERVED(m); -+ for (; m->src_va; m++) { -+ if (m->src_va == -1) { -+ m = (struct mem_crash_map_entry *)m->dest_page_va; -+ goto again; -+ } -+ CRASH_MARK_BOOT_RESERVED(m->dest_page_va); -+ } -+ return; -+} -+ -+void save_core(void) -+{ -+ int i, j, k; -+ z_stream stream; -+ int err; -+ struct task_struct *tp; -+ struct mem_crash_map_hdr *header; -+ u_long *sub_map; -+ u_long map; -+ u_long src, dest, unc, cp, src_base, comp_pages; -+ -+ k = 0; -+ dest = 0; -+ __cli(); -+ tp = current; -+ mb(); -+ if (smp_processor_id() != 0) { /* boot_cpu_id is always 0, i think */ -+ panic_threads[smp_processor_id()] = tp; -+ crash_halt_or_reboot(0); -+ } else { -+ if (console_crash) -+ panic_threads[smp_processor_id()] = &init_task_union.task; -+ else -+ panic_threads[smp_processor_id()] = tp; -+ -+ crash_wait_cpus(); -+ } -+ -+ printk("save_core: started on CPU%d\n", smp_processor_id()); -+ if (!crash_dump_header) { -+ printk("save_core: not initialized\n"); -+ return; -+ } -+ -+ header = (struct mem_crash_map_hdr *)crash_dump_header; -+ header->magic[0] = 0; -+ header->map_pages = 0; -+ header->data_pages = 0; -+ header->compr_units = 0; -+ header->map = 0; -+ -+ stream.workspace=(void*)crash_zalloc_start; -+ // stream.zalloc = czalloc; -+ // stream.zfree = czfree; -+ // stream.opaque = (voidpf) 0; -+ stream.next_out = (Bytef *) crash_compr_buf; -+ stream.avail_out = (uInt) (CRASH_COMPR_BUF_PAGES * PAGE_SIZE); -+ stream.next_in = (Bytef *) crash_uncompr_buf; -+ stream.avail_in = (uInt) (CRASH_UNCOMPR_BUF_PAGES * PAGE_SIZE); -+ err = zlib_deflateInit(&stream, Z_BEST_SPEED); -+ if (err != Z_OK) { -+ printk("save_core: bad return %d from deflateInit\n", err); -+ return; -+ } -+ -+ init_dest_page_alloc(); -+ header->map = map = crash_update_map(0, 0, 0, &header->map_pages); -+ if (!map) { -+ printk("save_core: no dest pages\n"); -+ return; -+ } -+ crash_cur_pfn = 0; -+ src_base = 0; -+ src = 0; -+ for (;;) { -+ sub_map = (u_long *) crash_uncompr_buf; -+ unc = crash_uncompr_buf + CRASH_SUB_MAP_PAGES * PAGE_SIZE; -+ for (i = 0; i < CRASH_SOURCE_PAGES; i++) { -+ src = crash_get_source_page(); -+ if (!src) -+ break; -+ if (!i) -+ src_base = src; -+ if (!crash_is_kseg(unc) || !crash_is_kseg(src)) { -+ printk("unc = 0x%lx, src = 0x%lx, i = %d\n", unc, src, i); -+ i = src = 0; -+ break; -+ } -+ memcpy((void *)unc, (void *)src, PAGE_SIZE); -+ unc += PAGE_SIZE; -+ *sub_map++ = src; -+ } -+ *sub_map = 0; -+ if (!i && !src) -+ break; -+ err = zlib_deflate(&stream, Z_FINISH); -+ if (!(err == Z_STREAM_END)) { -+ zlib_deflateEnd(&stream); -+ printk("save_core: bad return %d from deflate, src_base = 0x%lx\n", err, -+ src_base); -+ return; -+ } -+ comp_pages = (u_long) round_page(stream.total_out) / PAGE_SIZE; -+ if (crash_debug) -+ printk("src_base = 0x%lx compressed data in 0x%lx pages\n", src_base, -+ comp_pages); -+ -+ cp = crash_compr_buf; -+ j = 0; -+ if (crash_debug) -+ printk("\nsrc = %lx\n", src_base); -+ else { -+ printk("."); -+ if (!(k++ % 64)) -+ printk("\n"); -+ } -+ for (i = 0; i < comp_pages; i++) { -+ dest = crash_alloc_dest_page(); -+ if (crash_debug) { -+ printk("%lx ", dest); -+ if (!(j++ % 8)) -+ printk("\n"); -+ } -+ header->data_pages++; -+ if (!dest) { -+ printk("save_core: no dest pages\n"); -+ return; -+ } -+ if (!crash_is_kseg(dest) || !crash_is_kseg(cp)) { -+ printk("dest = 0x%lx, cp = 0x%lx, i = %d, comp_pages = 0x%lx\n", -+ dest, cp, i, comp_pages); -+ src = 0; -+ break; -+ } -+ memcpy((void *)dest, (void *)cp, PAGE_SIZE); -+ cp += PAGE_SIZE; -+ map = crash_update_map(map, src_base, dest, &header->map_pages); /* links a new map page, if necessary */ -+ if (!map) { -+ printk("save_core: no map\n"); -+ return; -+ } -+ } -+ header->compr_units++; -+ if (!src) -+ break; -+ if (crash_reset_stream(&stream)) -+ return; -+ } -+ -+ map = crash_update_map(map, 0, 0, &header->map_pages); -+ header->magic[0] = CRASH_MAGIC; -+ -+ if (crash_audit_maps()) { -+ header->magic[0] = 0; -+ return; -+ } -+ -+ printk("\nsave_core: src pages skipped = 0x%lx src pages saved = 0x%lx\n", -+ src_pages_skipped, src_pages_saved); -+ printk("save_core: data_pages = 0x%lx map_pages = 0x%lx\n", header->data_pages, -+ header->map_pages); -+ printk("save_core: completed, crash_dump_header = 0x%lx\n", crash_dump_header); -+} -+ -+/* helper functions private to this file */ -+static int crash_reset_stream(z_stream * stream) -+{ -+ int err; -+ -+ stream->workspace=(void*)crash_zalloc_start; -+ // stream->zalloc = czalloc; -+ // stream->zfree = czfree; -+ // stream->opaque = (voidpf) 0; -+ stream->next_out = (Bytef *) crash_compr_buf; -+ stream->avail_out = (uInt) (CRASH_COMPR_BUF_PAGES * PAGE_SIZE); -+ stream->next_in = (Bytef *) crash_uncompr_buf; -+ stream->avail_in = (uInt) (CRASH_UNCOMPR_BUF_PAGES * PAGE_SIZE); -+ err = zlib_deflateReset(stream); -+ if (err != Z_OK) { -+ printk("crash_reset_stream: bad return %d from deflateReset\n", err); -+ return 1; -+ } -+ return 0; -+} -+ -+static u_long crash_alloc_dest_page(void) -+{ -+ u_long addr; -+ -+ addr = crash_dest_free_list; -+ if (addr) { -+ crash_dest_free_list = *(u_long *) addr; -+ dest_pages_free--; -+ } else -+ printk("crash_alloc_dest_page: free list empty\n"); -+ return addr; -+} -+ -+static void crash_free_dest_page(u_long dest) -+{ -+ if (!dest) { -+ printk("crash_free_dest_page: freeing addr 0\n"); -+ return; -+ } -+ dest_pages_free++; -+ dest = (u_long) trunc_page(dest); -+ *(u_long *) dest = crash_dest_free_list; -+ crash_dest_free_list = dest; -+} -+ -+/* -+ * Stolen from setup.c -+ */ -+#define PFN_PHYS(x) ((x) << PAGE_SHIFT) -+ -+static void init_dest_page_alloc(void) -+{ -+ u_long va; -+ long i; -+ struct page *page; -+ struct mem_crash_map_hdr *header; -+ -+ header = (struct mem_crash_map_hdr *)crash_dump_header; -+ for (i = ((1 << 24) >> PAGE_SHIFT) + LOWER_MEM_FORWARD; -+ i < (max_low_pfn - UPPER_MEM_BACKUP); i++) { -+ va = (u_long) phys_to_virt(PFN_PHYS(i)); -+ if ((va >= header->boot_reserved_start) && (va < header->boot_reserved_end)) -+ continue; -+ page = mem_map + i; -+ if (PageLocked(page) || PageReserved(page)) -+ continue; -+ if (PageFree(page) || PageAnon(page) || PageShm(page) || page->buffers) -+ crash_free_dest_page(va); -+ } -+ if (crash_debug) -+ printk("init_dest_page_alloc: dest_pages_free = 0x%lx\n", dest_pages_free); -+} -+ -+static int crash_is_dir_page(struct page *page) { -+ struct inode *tmp_inode; -+ -+ if(page->mapping && page->mapping->host) { -+ tmp_inode = (struct inode *)page->mapping->host; -+ if((tmp_inode->i_sb->s_magic == EXT2_SUPER_MAGIC) && -+ (S_ISDIR(tmp_inode->i_mode))) -+ return 1; -+ } -+ -+ return 0; -+} -+ -+static u_long crash_get_source_page(void) -+{ -+ struct page *page; -+ u_long va; -+ -+ while (crash_cur_pfn < max_low_pfn) { -+ page = mem_map + crash_cur_pfn; -+ if (!(PageFree(page) || PageAnon(page) || PageShm(page) || page->buffers)) -+ break; -+ src_pages_skipped++; -+ crash_cur_pfn++; -+ } -+ if (crash_cur_pfn == max_low_pfn) -+ return 0; -+ -+ va = (u_long) phys_to_virt(PFN_PHYS(crash_cur_pfn)); -+ src_pages_saved++; -+ crash_cur_pfn++; -+ return va; -+} -+ -+static u_long crash_update_map(u_long map, u_long src_base, u_long dest, u_long * pages) -+{ -+ struct mem_crash_map_entry *m; -+ -+ -+ if (!map) { -+ (*pages)++; -+ return crash_alloc_dest_page(); -+ } -+ m = (struct mem_crash_map_entry *)map; -+ m->src_va = src_base; -+ m->dest_page_va = dest; -+ if (dest) -+ if (crash_chksum_page(dest, &m->check_sum)) -+ return 0; -+ -+ map += sizeof(struct mem_crash_map_entry); -+ -+ m = (struct mem_crash_map_entry *)map; -+ if (!src_base) { /* end of list */ -+ if (crash_chksum((u_long) m, &m->src_va)) -+ return 0; -+ } else if ((map + 3 * sizeof(struct mem_crash_map_entry)) > (u_long) round_page(map)) { -+ m->src_va = -1; -+ map = m->dest_page_va = crash_alloc_dest_page(); -+ if (crash_debug) -+ printk("\nm = 0x%lx m->src_va = 0x%lx m->dest_page_va = 0x%lx\n", -+ (u_long) trunc_page(m), m->src_va, m->dest_page_va); -+ m++; -+ if (crash_chksum((u_long) m, &m->src_va)) -+ return 0; -+ if (crash_debug) -+ printk("m = 0x%lx chksum = m->src_va = 0x%lx\n", (u_long) trunc_page(m), -+ m->src_va); -+ if (crash_audit_map_page((u_long) m)) -+ return 0; -+ (*pages)++; -+ } -+ return map; -+} -+ -+static int crash_chksum(u_long limit, u_long * sum_addr) -+{ -+ u_long sum; -+ u_long *addr; -+ -+ if (!crash_is_kseg(limit)) { -+ printk("bad addr = 0x%lx to crash_chksum\n", limit); -+ return 1; -+ } -+ sum = 0; -+ addr = (u_long *) trunc_page(limit); -+ for (; (u_long) addr < limit; addr++) -+ sum += *addr; -+ *sum_addr = sum; -+ return 0; -+} -+ -+static int crash_chksum_page(u_long pg_addr, u_long * sum_addr) -+{ -+ u_long sum, limit; -+ u_long *addr; -+ -+ if (!crash_is_kseg(pg_addr)) { -+ printk("bad addr = 0x%lx to crash_chksum_page\n", pg_addr); -+ return 1; -+ } -+ -+ sum = 0; -+ addr = (u_long *) trunc_page(pg_addr); -+ limit = (u_long) addr + PAGE_SIZE; -+ for (; (u_long) addr < limit; addr++) -+ sum += *addr; -+ *sum_addr = sum; -+ return 0; -+} -+ -+static int crash_audit_maps(void) -+{ -+ u_long m, count; -+ u_long *link_addr; -+ struct mem_crash_map_hdr *header; -+ -+ header = (struct mem_crash_map_hdr *)crash_dump_header; -+ if (header->magic[0] != CRASH_MAGIC) -+ return 1; -+ -+ link_addr = &header->map; -+ m = header->map; -+ -+ count = 0; -+ for (;;) { -+ if (!crash_is_kseg(m)) { -+ printk("crash_audit_maps: bad link 0x%lx at 0x%lx\n", m, -+ (u_long) link_addr); -+ return 1; -+ } -+ if (crash_audit_map_page(m)) { -+ printk("audit failed while on map page %ld\n", count); -+ return 1; -+ } -+ if (!crash_link(m)) -+ break; -+ link_addr = crash_link(m); -+ m = *link_addr; -+ -+ count++; -+ } -+ return 0; -+} -+ -+static int crash_audit_map_page(u_long map) -+{ -+ struct mem_crash_map_entry *m; -+ u_long sum; -+ -+ if (!map || !crash_is_kseg(map)) { -+ printk("crash_audit_map_page: bad map = 0x%lx\n", map); -+ return 1; -+ } -+ map = (u_long) trunc_page((u_long) map); -+ m = (struct mem_crash_map_entry *)map; -+ for (;;) { -+ if ((m->src_va == -1) || (m->src_va == 0)) { -+ m++; -+ if (crash_chksum((u_long) m, &sum)) -+ return 1; -+ if (m->src_va != sum) { -+ printk("crash_audit_map_page: checksum failure1\n"); -+ printk("m = 0x%lx, sum = 0x%lx, m->src_va = 0x%lx\n", -+ (u_long) m, (u_long) sum, (u_long) m->src_va); -+ crash_print_data_around((u_long) & m->src_va); -+ return 1; -+ } else { -+ return 0; -+ } -+ } else { -+ if (crash_chksum_page((u_long) m->dest_page_va, &sum) -+ || (m->check_sum != sum)) { -+ printk("crash_audit_map_page: checksum failure2\n"); -+ printk -+ ("dest_page_va = 0x%lx, &dest_page_va = 0x%lx, sum = 0x%lx, m->check_sum = 0x%lx\n", -+ (u_long) m->dest_page_va, (u_long) (&m->check_sum), -+ (u_long) sum, (u_long) m->check_sum); -+ crash_print_data_around((u_long) & m->check_sum); -+ return 1; -+ } -+ } -+ m++; -+ } -+} -+ -+static void crash_print_data_around(u_long p) -+{ -+ u_long *a; -+ int i; -+ -+ if (!crash_is_kseg(p)) { -+ printk("crash_print_data_around: p = 0x%lx not kseg\n", p); -+ return; -+ } -+ a = (u_long *) p; -+ a -= 20; -+ for (i = 0; i < 40; i++) -+ printk("%lx\n", *a++); -+} -+ -+#ifdef CRASH_DEBUG -+static void crash_print_map_page(u_long map) -+{ -+ struct mem_crash_map_entry *m; -+ int j = 0; -+ u_long sum; -+ -+ map = (u_long) trunc_page((u_long) map); -+ m = (struct mem_crash_map_entry *)map; -+ for (;;) { -+ printk("%lx %lx %lx ", m->src_va, m->dest_page_va, m->check_sum); -+ if (!(j++ % 4)) -+ printk("\n"); -+ if ((m->src_va == -1) || (m->src_va == 0)) { -+ m++; -+ printk("%lx %lx ", m->src_va, m->dest_page_va); -+ if (crash_chksum((u_long) m, &sum)); -+ else -+ printk("\nchksum = 0x%lx\n", sum); -+ return; -+ } -+ m++; -+ } -+} -+#endif /* CRASH_DEBUG */ -+ -+static void crash_wait_cpus(void) -+{ -+ int i; -+ int msecs = 0; -+ -+ for (i = 0; i < smp_num_cpus; i++) { -+ if (i != smp_processor_id()) { -+ while (!panic_threads[i]) { -+ msecs++; -+ mdelay(1); -+ if (msecs > CRASH_CPU_TIMEOUT) { -+ /* if other cpus are still running -+ * we have to halt, otherwise we could -+ * risk using buffer cache pages which -+ * could subsequently get flushed to disk. -+ */ -+ printk("Unable to halt other CPUs, halting system.\n"); -+ crash_halt_or_reboot(0); -+ } -+ } -+ } -+ } -+ -+ crash_cleanup_smp_state(); -+} -+ -+ -+#if 0 -+static void *czalloc(void *arg, unsigned int items, unsigned int size) -+{ -+ u_long nbytes; -+ u_long addr; -+ -+ nbytes = (u_long) (items * size); -+ nbytes = (u_long) round_page(nbytes); -+ if ((crash_zalloc_cur + nbytes) > crash_zalloc_end) -+ return 0; -+ addr = crash_zalloc_cur; -+ crash_zalloc_cur += nbytes; -+ return ((void *)addr); -+} -+ -+static void czfree(void *arg, void *ptr) -+{ -+ printk("zfree: ptr = 0x%lx\n", (u_long) ptr); -+} -+#endif -+ -+static boolean_t crash_is_kseg(u_long addr) -+{ -+ u_long phys; -+ -+ phys = virt_to_phys((void *)addr); -+ if (phys < PFN_PHYS(max_low_pfn)) -+ return TRUE; -+ else -+ return FALSE; -+} -+ -+static u_long *crash_link(u_long p) -+{ -+ struct mem_crash_map_entry *m; -+ -+ p = (u_long) trunc_page(p); -+ m = (struct mem_crash_map_entry *)p; -+ for (; m->src_va; m++) -+ if (m->src_va == -1) -+ return &m->dest_page_va; -+ -+ return 0; -+} -+ -+/* Call this after data written to disk. */ -+static int crash_free_crashmem(void) -+{ -+ struct mem_crash_map_hdr *header; -+ struct mem_crash_map_entry *m, *last_m; -+ -+ if (crash_debug) -+ printk("crash_free_crashmem: \n"); -+ -+ header = (struct mem_crash_map_hdr *)crash_dump_header; -+ if (crash_audit_maps()) { -+ header->magic[0] = 0; -+ return 1; -+ } -+ m = (struct mem_crash_map_entry *)header->map; -+ again: -+ for (; m->src_va; m++) { -+ if (m->src_va == -1) { -+ last_m = m; -+ m = (struct mem_crash_map_entry *)m->dest_page_va; -+ crash_free_page((unsigned long)last_m); -+ goto again; -+ } -+ crash_free_page(m->dest_page_va); -+ } -+ if (crash_debug) -+ printk("crash_free_crashmem: 0x%lx freed\n", -+ (header->data_pages + header->map_pages) * PAGE_SIZE); -+ header->magic[0] = 0; -+ return 0; -+} -+ -+static void crash_free_page(u_long addr) -+{ -+ struct page *page; -+ -+ page = virt_to_page(addr); -+ ClearPageReserved(page); -+ set_page_count(page, 1); -+ __free_page(page); -+} -+ -+static int get_dump_helper(u_long kva, u_long buf) -+{ -+ struct page *page; -+ struct mem_crash_map_hdr *header; -+ -+ header = (struct mem_crash_map_hdr *)crash_dump_header; -+ if (header->magic[0] != CRASH_MAGIC) -+ return 1; -+ -+ if (!kva) { -+ if (crash_audit_maps()) { -+ printk("get_dump_helper: audit failure\n"); -+ header->magic[0] = 0; -+ return 1; -+ } -+ page = virt_to_page((u_long) crash_dump_header); -+ if (!PageReserved(page)) { -+ printk("not reserved: crash_dump_header = 0x%lx\n", crash_dump_header); -+ return 1; -+ } -+ if (copy_to_user((char *)buf, (char *)crash_dump_header, -+ sizeof(struct mem_crash_map_hdr))) { -+ printk("get_dump_helper: copy_to_user failed1\n"); -+ return 1; -+ } -+ } else { -+ page = virt_to_page(kva); -+ if (!PageReserved(page)) { -+ printk("not reserved: kva = 0x%lx\n", kva); -+ return 1; -+ } -+ if (copy_to_user((char *)buf, (char *)trunc_page(kva), PAGE_SIZE)) { -+ printk("get_dump_helper: copy_to_user failed2\n"); -+ return 1; -+ } -+ } -+ return 0; -+} -+ -+static void free_dump_helper(void) -+{ -+ struct mem_crash_map_hdr *header; -+ -+ header = (struct mem_crash_map_hdr *)crash_dump_header; -+ if (header->magic[0] != CRASH_MAGIC) -+ return; -+ if (crash_debug) -+ printk("free_dump_helper\n"); -+ crash_free_crashmem(); -+} -+ -+static int crashdev_open(struct inode *inode, struct file *file) -+{ -+ /* always return success -- nothing to do here */ -+ return 0; -+} -+ -+/* character device implementation */ -+static struct file_operations crashdev_fops = { -+ ioctl:crashdev_ioctl, -+ open:crashdev_open, -+}; -+ -+static struct miscdevice crash_miscdev = { -+ 190, "crash", &crashdev_fops -+}; -+ -+int crash_init_chrdev(void) -+{ -+ int result; -+ -+ result = misc_register(&crash_miscdev); -+ -+ if (result < 0) -+ printk(KERN_WARNING "crash: can't register crash device (c 10 190)\n"); -+ -+ return result; -+} -+ -+/* call the original syscalls, just to get things going */ -+static int crashdev_ioctl(struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ int retval = 0; -+ -+ switch (cmd) { -+ case CRASH_IOCFREEDUMP: -+ free_dump_helper(); -+ break; -+ -+ case CRASH_IOCGETDUMP: -+ if (crash_debug) { -+ printk("crashdev_ioctl: get dump\n"); -+ printk("vals: %08lx %08lx\n", -+ ((struct ioctl_getdump *)arg)->kva, -+ ((struct ioctl_getdump *)arg)->buf); -+ } -+ -+ retval = get_dump_helper((u_long) ((struct ioctl_getdump *)arg)->kva, -+ (u_long) ((struct ioctl_getdump *)arg)->buf); -+ break; -+ -+#ifdef CONFIG_BOOTIMG -+ case CRASH_IOCBOOTIMG: -+ if (crash_debug) -+ printk("crashdev_ioctl: bootimg\n"); -+ -+ retval = sys_bootimg((struct boot_image *)arg); -+ break; -+#endif -+ -+ case CRASH_IOCVERSION: -+ if (crash_debug) -+ printk("crashdev_ioctl: version\n"); -+ retval = CRASH_K_MINOR | (CRASH_K_MAJOR << 16); -+ break; -+ -+ default: -+ return -EINVAL; -+ } -+ -+ return retval; -+} -Index: linux/kernel/module.c -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/kernel/module.c,v -retrieving revision 1.1.1.1.4.1 -retrieving revision 1.1.1.1.4.1.2.1 -diff -u -r1.1.1.1.4.1 -r1.1.1.1.4.1.2.1 ---- linux/kernel/module.c 12 Mar 2003 19:51:36 -0000 1.1.1.1.4.1 -+++ linux/kernel/module.c 1 Apr 2003 12:17:41 -0000 1.1.1.1.4.1.2.1 -@@ -311,7 +311,14 @@ - error = -EEXIST; - goto err1; - } -+#if defined(CONFIG_MCL_COREDUMP) -+ /* Call vmalloc_32 instead of module_map (vmalloc for i386) -+ * to avoid being mapped in highmem where mcore can't see us. -+ */ -+ if ((mod = (struct module *)vmalloc_32(size)) == NULL) { -+#else - if ((mod = (struct module *)module_map(size)) == NULL) { -+#endif - error = -ENOMEM; - goto err1; - } -Index: linux/kernel/panic.c -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/kernel/panic.c,v -retrieving revision 1.3.2.1 -retrieving revision 1.3.2.1.2.1 -diff -u -r1.3.2.1 -r1.3.2.1.2.1 ---- linux/kernel/panic.c 12 Mar 2003 19:51:36 -0000 1.3.2.1 -+++ linux/kernel/panic.c 1 Apr 2003 12:17:41 -0000 1.3.2.1.2.1 -@@ -19,6 +19,10 @@ - #include - #include - -+#ifdef CONFIG_MCL_COREDUMP -+#include -+#endif -+ - asmlinkage void sys_sync(void); /* it's really int */ - - int panic_timeout; -@@ -197,20 +201,43 @@ - unsigned long caller = (unsigned long) __builtin_return_address(0); - #endif - -+#ifdef CONFIG_MCL_COREDUMP -+ crash_save_regs(); -+#endif -+ - bust_spinlocks(1); - va_start(args, fmt); - vsprintf(buf, fmt, args); - va_end(args); - printk(KERN_EMERG "Kernel panic: %s\n",buf); -+ -+#ifdef CONFIG_MCL_COREDUMP -+ if (!panicmsg) { -+ panicmsg = buf; -+ panic_processor = smp_processor_id(); -+ mb(); -+ } -+#endif -+ - if (netdump_func) - BUG(); - if (in_interrupt()) - printk(KERN_EMERG "In interrupt handler - not syncing\n"); - else if (!current->pid) - printk(KERN_EMERG "In idle task - not syncing\n"); -+#ifdef CONFIG_MCL_COREDUMP -+ else if (crash_perform_sync) -+#else - else -+#endif - sys_sync(); -+ - bust_spinlocks(0); -+ -+#ifdef CONFIG_MCL_COREDUMP -+ smp_call_function((void *)smp_crash_funnel_cpu,0,0,0); -+ crash_save_current_state(current); -+#endif - - #ifdef CONFIG_SMP - smp_send_stop(); -Index: linux/kernel/sysctl.c -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/kernel/sysctl.c,v -retrieving revision 1.2.2.1 -retrieving revision 1.2.2.1.2.1 -diff -u -r1.2.2.1 -r1.2.2.1.2.1 ---- linux/kernel/sysctl.c 12 Mar 2003 19:51:36 -0000 1.2.2.1 -+++ linux/kernel/sysctl.c 1 Apr 2003 12:17:41 -0000 1.2.2.1.2.1 -@@ -37,6 +37,10 @@ - #include - #endif - -+#ifdef CONFIG_MCL_COREDUMP -+#include -+#endif -+ - #if defined(CONFIG_SYSCTL) - - /* External variables not in a header file. */ -@@ -247,6 +251,10 @@ - {KERN_SYSRQ, "sysrq", &sysrq_enabled, sizeof (int), - 0644, NULL, &proc_dointvec}, - #endif -+#ifdef CONFIG_MCL_COREDUMP -+ {KERN_PANIC_ON_OOPS, "panic_on_oops", &panic_on_oops, sizeof(int), -+ 0644, NULL, &proc_dointvec}, -+#endif - {KERN_CADPID, "cad_pid", &cad_pid, sizeof (int), - 0600, NULL, &proc_dointvec}, - {KERN_MAX_THREADS, "threads-max", &max_threads, sizeof(int), -Index: linux/lib/Config.in -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/lib/Config.in,v -retrieving revision 1.2 -retrieving revision 1.2.4.1 -diff -u -r1.2 -r1.2.4.1 ---- linux/lib/Config.in 14 Feb 2003 22:59:23 -0000 1.2 -+++ linux/lib/Config.in 1 Apr 2003 12:17:41 -0000 1.2.4.1 -@@ -23,12 +23,14 @@ - fi - fi - --if [ "$CONFIG_PPP_DEFLATE" = "y" -o \ -+if [ "$CONFIG_MCL_COREDUMP" = "y" -o \ -+ "$CONFIG_PPP_DEFLATE" = "y" -o \ - "$CONFIG_JFFS2_FS" = "y" ]; then - define_tristate CONFIG_ZLIB_DEFLATE y - else - if [ "$CONFIG_PPP_DEFLATE" = "m" -o \ -- "$CONFIG_JFFS2_FS" = "m" ]; then -+ "$CONFIG_JFFS2_FS" = "m" -o \ -+ "$CONFIG_MCL_COREDUMP" = "m" ]; then - define_tristate CONFIG_ZLIB_DEFLATE m - else - tristate 'zlib compression support' CONFIG_ZLIB_DEFLATE -Index: linux/mm/memory.c -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/mm/memory.c,v -retrieving revision 1.3.2.1 -retrieving revision 1.3.2.1.2.1 -diff -u -r1.3.2.1 -r1.3.2.1.2.1 ---- linux/mm/memory.c 12 Mar 2003 19:51:37 -0000 1.3.2.1 -+++ linux/mm/memory.c 1 Apr 2003 12:17:41 -0000 1.3.2.1.2.1 -@@ -1381,6 +1381,10 @@ - } - lock_page(page); - -+#ifdef CONFIG_MCL_COREDUMP -+ set_bit(PG_anon, &page->flags); -+#endif -+ - /* - * Back out if somebody else faulted in this pte while we - * released the page table lock. -@@ -1470,6 +1474,9 @@ - mm->rss++; - flush_page_to_ram(page); - entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); -+#ifdef CONFIG_MCL_COREDUMP -+ set_bit(PG_anon, &page->flags); -+#endif - lru_cache_add(page); - } - -Index: linux/mm/page_alloc.c -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/mm/page_alloc.c,v -retrieving revision 1.3.2.1 -retrieving revision 1.3.2.1.2.1 -diff -u -r1.3.2.1 -r1.3.2.1.2.1 ---- linux/mm/page_alloc.c 12 Mar 2003 19:51:37 -0000 1.3.2.1 -+++ linux/mm/page_alloc.c 1 Apr 2003 12:17:41 -0000 1.3.2.1.2.1 -@@ -95,6 +95,10 @@ - struct page *base; - per_cpu_t *per_cpu; - zone_t *zone; -+#ifdef CONFIG_MCL_COREDUMP -+ struct page *pagemap; -+ int count = 1<lock); - -+#ifdef CONFIG_MCL_COREDUMP -+ pagemap = page; -+ do { -+ pagemap->flags |= (1<flags &= ~((1<free_pages -= mask; - - while (mask + (1 << (MAX_ORDER-1))) { -@@ -268,6 +281,16 @@ - zone->free_pages -= 1UL << order; - - page = expand(zone, page, index, order, curr_order, area); -+#ifdef CONFIG_MCL_COREDUMP -+ { -+ struct page *pagemap = page; -+ int count = 1<flags &= ~(1<lock, flags); - - set_page_count(page, 1); -Index: linux/arch/i386//boot/compressed/head.S -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/arch/i386/boot/compressed/head.S,v -retrieving revision 1.1.1.1 -retrieving revision 1.1.1.1.12.6 -diff -u -r1.1.1.1 -r1.1.1.1.12.6 ---- linux/arch/i386//boot/compressed/head.S 7 May 2002 21:53:54 -0000 1.1.1.1 -+++ linux/arch/i386//boot/compressed/head.S 5 Apr 2003 05:51:27 -0000 1.1.1.1.12.6 -@@ -23,6 +23,7 @@ - */ - .text - -+#include - #include - #include - -@@ -31,6 +32,55 @@ - startup_32: - cld - cli -+ -+#ifdef CONFIG_BOOTIMG -+/* -+ * GDT is invalid if we're booted by bootimg, so reload it now -+ */ -+ lgdt %cs:gdt_descr -+ ljmp $(__KERNEL_CS),$1f -+ -+gdt_table_limit = gdt_table_end - gdt_table - 1 -+gdt_descr: -+ .word gdt_table_limit -+ .long gdt_table -+ -+gdt_table: /* stolen from arch/i386/kernel/head.S */ -+ .quad 0x0000000000000000 /* NULL descriptor */ -+ .quad 0x0000000000000000 /* 0x0b reserved */ -+ .quad 0x0000000000000000 /* 0x13 reserved */ -+ .quad 0x0000000000000000 /* 0x1b reserved */ -+ .quad 0x00cffa000000ffff /* 0x23 user 4GB code at 0x00000000 */ -+ .quad 0x00cff2000000ffff /* 0x2b user 4GB data at 0x00000000 */ -+ .quad 0x0000000000000000 /* 0x33 TLS entry 1 */ -+ .quad 0x0000000000000000 /* 0x3b TLS entry 2 */ -+ .quad 0x0000000000000000 /* 0x43 TLS entry 3 */ -+ .quad 0x0000000000000000 /* 0x4b reserved */ -+ .quad 0x0000000000000000 /* 0x53 reserved */ -+ .quad 0x0000000000000000 /* 0x5b reserved */ -+ -+ .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ -+ .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ -+ .quad 0x0000000000000000 /* 0x70 TSS descriptor */ -+ .quad 0x0000000000000000 /* 0x78 LDT descriptor */ -+ -+ /* Segments used for calling PnP BIOS */ -+ .quad 0x00c09a0000000000 /* 0x80 32-bit code */ -+ .quad 0x00809a0000000000 /* 0x88 16-bit code */ -+ .quad 0x0080920000000000 /* 0x90 16-bit data */ -+ .quad 0x0080920000000000 /* 0x98 16-bit data */ -+ .quad 0x0080920000000000 /* 0xa0 16-bit data */ -+ /* -+ * The APM segments have byte granularity and their bases -+ * and limits are set at run time. -+ */ -+ .quad 0x00409a0000000000 /* 0xa8 APM CS code */ -+ .quad 0x00009a0000000000 /* 0xb0 APM CS 16 code (16 bit) */ -+ .quad 0x0040920000000000 /* 0xb8 APM DS data */ -+gdt_table_end: -+ -+1: -+#endif - movl $(__KERNEL_DS),%eax - movl %eax,%ds - movl %eax,%es -@@ -92,7 +142,6 @@ - cld - rep - movsl -- - popl %esi # discard the address - popl %ebx # real mode pointer - popl %esi # low_buffer_start -@@ -124,5 +173,10 @@ - movsl - movl %ebx,%esi # Restore setup pointer - xorl %ebx,%ebx -+#ifdef CONFIG_BOOTIMG -+ movl $0x100000,%eax -+ jmpl *%eax -+#else - ljmp $(__KERNEL_CS), $0x100000 -+#endif - move_routine_end: -Index: linux/arch/i386//kernel/head.S -=================================================================== -RCS file: /chaos/cvs/kernel-rh/linux/arch/i386/kernel/head.S,v -retrieving revision 1.2.2.1 -retrieving revision 1.2.2.1.2.5 -diff -u -r1.2.2.1 -r1.2.2.1.2.5 ---- linux/arch/i386//kernel/head.S 12 Mar 2003 19:49:06 -0000 1.2.2.1 -+++ linux/arch/i386//kernel/head.S 5 Apr 2003 05:51:27 -0000 1.2.2.1.2.5 -@@ -42,6 +42,21 @@ - * On entry, %esi points to the real-mode code as a 32-bit pointer. - */ - startup_32: -+#ifdef CONFIG_BOOTIMG -+/* -+ * GDT is invalid if we're booted by bootimg, so reload it now -+ */ -+ lgdt %cs:_gdt_descr-__PAGE_OFFSET -+ ljmp $(__KERNEL_CS),$1f-__PAGE_OFFSET -+ -+gdt_limit = SYMBOL_NAME(cpu_gdt_table_end) - SYMBOL_NAME(cpu_gdt_table) - 1 -+ -+_gdt_descr: -+ .word gdt_limit -+ .long SYMBOL_NAME(cpu_gdt_table)-__PAGE_OFFSET -+ -+1: -+#endif - /* - * Set segments to known values - */ -@@ -452,6 +467,7 @@ - .quad 0x00409a0000000000 /* 0xa8 APM CS code */ - .quad 0x00009a0000000000 /* 0xb0 APM CS 16 code (16 bit) */ - .quad 0x0040920000000000 /* 0xb8 APM DS data */ -+ENTRY(cpu_gdt_table_end) - - #if CONFIG_SMP - .fill (NR_CPUS-1)*GDT_ENTRIES,8,0 /* other CPU's GDT */ diff --git a/lustre/kernel_patches/patches/netconsole_sysrq.patch b/lustre/kernel_patches/patches/netconsole_sysrq.patch deleted file mode 100644 index b995461..0000000 --- a/lustre/kernel_patches/patches/netconsole_sysrq.patch +++ /dev/null @@ -1,41 +0,0 @@ - 0 files changed - ---- linux-2.4.20-rh/drivers/net/netconsole.c~netconsole_sysrq 2003-04-11 14:04:57.000000000 +0800 -+++ linux-2.4.20-rh-root/drivers/net/netconsole.c 2003-07-01 11:10:26.000000000 +0800 -@@ -988,7 +988,15 @@ static void netconsole_netdump (struct p - reply.info = 0; - send_netdump_skb(dev, tmp, strlen(tmp), &reply); - break; -- -+ case COMM_SYSRQ: -+ Dprintk("got SYSRQ command.\n"); -+ printk("netdump: got SYSRQ command %d \n", req->from); -+ handle_sysrq(req->from, regs, NULL, NULL); -+ reply.code = REPLY_SYSRQ; -+ reply.nr = req->nr; -+ reply.info = req->from; -+ send_netdump_skb(dev, tmp, strlen(tmp), &reply); -+ break; - default: - reply.code = REPLY_ERROR; - reply.nr = req->nr; ---- linux-2.4.20-rh/drivers/net/netconsole.h~netconsole_sysrq 2003-04-11 14:04:57.000000000 +0800 -+++ linux-2.4.20-rh-root/drivers/net/netconsole.h 2003-07-01 11:11:29.000000000 +0800 -@@ -42,6 +42,7 @@ enum netdump_commands { - COMM_START_NETDUMP_ACK = 7, - COMM_GET_REGS = 8, - COMM_SHOW_STATE = 9, -+ COMM_SYSRQ=10, - }; - - #define NETDUMP_REQ_SIZE (8+4*4) -@@ -69,6 +70,7 @@ enum netdump_replies { - REPLY_REGS = 10, - REPLY_MAGIC = 11, - REPLY_SHOW_STATE = 12, -+ REPLY_SYSRQ=13, - }; - - typedef struct netdump_reply_s { - -_ diff --git a/lustre/kernel_patches/patches/tcp-zero-copy.patch b/lustre/kernel_patches/patches/tcp-zero-copy.patch deleted file mode 100644 index 7176eca..0000000 --- a/lustre/kernel_patches/patches/tcp-zero-copy.patch +++ /dev/null @@ -1,455 +0,0 @@ -diff -u -r1.1.1.1 linux/include/linux/skbuff.h ---- linux/include/linux/skbuff.h 2 Aug 2002 10:59:25 -0000 1.1.1.1 -+++ linux/include/linux/skbuff.h 2 Aug 2002 14:20:00 -0000 -@@ -116,6 +116,30 @@ - __u16 size; - }; - -+/* Support for callback when skb data has been released */ -+typedef struct zccd /* Zero Copy Callback Descriptor */ -+{ /* (embed as first member of custom struct) */ -+ atomic_t zccd_count; /* reference count */ -+ void (*zccd_destructor)(struct zccd *); /* callback when refcount reaches zero */ -+} zccd_t; -+ -+static inline void zccd_init (zccd_t *d, void (*callback)(zccd_t *)) -+{ -+ atomic_set (&d->zccd_count, 1); -+ d->zccd_destructor = callback; -+} -+ -+static inline void zccd_get (zccd_t *d) /* take a reference */ -+{ -+ atomic_inc (&d->zccd_count); -+} -+ -+static inline void zccd_put (zccd_t *d) /* release a reference */ -+{ -+ if (atomic_dec_and_test (&d->zccd_count)) -+ (d->zccd_destructor)(d); -+} -+ - /* This data is invariant across clones and lives at - * the end of the header data, ie. at skb->end. - */ -@@ -123,6 +147,12 @@ - atomic_t dataref; - unsigned int nr_frags; - struct sk_buff *frag_list; -+ zccd_t *zccd; /* zero copy descriptor */ -+ zccd_t *zccd2; /* 2nd zero copy descriptor */ -+ /* NB we expect zero-copy data to be at least 1 packet, so -+ * having 2 zccds means we don't unneccessarily split the packet -+ * where consecutive zero-copy sends abutt. -+ */ - skb_frag_t frags[MAX_SKB_FRAGS]; - }; - -diff -u -r1.1.1.1 linux/include/net/tcp.h ---- linux/include/net/tcp.h 2 Aug 2002 10:59:29 -0000 1.1.1.1 -+++ linux/include/net/tcp.h 2 Aug 2002 14:03:49 -0000 -@@ -639,6 +639,8 @@ - - extern int tcp_sendmsg(struct sock *sk, struct msghdr *msg, int size); - extern ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); -+extern ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size, -+ int flags, zccd_t *zccd); - - extern int tcp_ioctl(struct sock *sk, - int cmd, -@@ -732,6 +734,9 @@ - struct msghdr *msg, - int len, int nonblock, - int flags, int *addr_len); -+extern int tcp_recvpackets(struct sock *sk, -+ struct sk_buff_head *packets, -+ int len, int nonblock); - - extern int tcp_listen_start(struct sock *sk); - -diff -u -r1.1.1.1 linux/net/netsyms.c ---- linux/net/netsyms.c 2 Aug 2002 10:59:31 -0000 1.1.1.1 -+++ linux/net/netsyms.c 2 Aug 2002 14:21:31 -0000 -@@ -395,6 +395,8 @@ - EXPORT_SYMBOL(sysctl_tcp_ecn); - EXPORT_SYMBOL(tcp_cwnd_application_limited); - EXPORT_SYMBOL(tcp_sendpage); -+EXPORT_SYMBOL(tcp_sendpage_zccd); -+EXPORT_SYMBOL(tcp_recvpackets); - - EXPORT_SYMBOL(tcp_write_xmit); - -diff -u -r1.1.1.1 linux/net/core/skbuff.c ---- linux/net/core/skbuff.c 2 Aug 2002 10:59:32 -0000 1.1.1.1 -+++ linux/net/core/skbuff.c 2 Aug 2002 14:07:13 -0000 -@@ -208,6 +208,8 @@ - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->frag_list = NULL; -+ skb_shinfo(skb)->zccd = NULL; /* skbuffs kick off with NO user zero copy descriptors */ -+ skb_shinfo(skb)->zccd2 = NULL; - return skb; - - nodata: -@@ -276,6 +278,10 @@ - { - if (!skb->cloned || - atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) { -+ if (skb_shinfo(skb)->zccd != NULL) /* zero copy callback descriptor? */ -+ zccd_put (skb_shinfo(skb)->zccd); /* release hold */ -+ if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd zero copy callback descriptor? */ -+ zccd_put (skb_shinfo(skb)->zccd2); /* release hold */ - if (skb_shinfo(skb)->nr_frags) { - int i; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) -@@ -532,6 +538,8 @@ - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->frag_list = NULL; -+ skb_shinfo(skb)->zccd = NULL; /* copied data => no user zero copy descriptor */ -+ skb_shinfo(skb)->zccd2 = NULL; - - /* We are no longer a clone, even if we were. */ - skb->cloned = 0; -@@ -577,6 +585,14 @@ - - n->data_len = skb->data_len; - n->len = skb->len; -+ -+ if (skb_shinfo(skb)->zccd != NULL) /* user zero copy descriptor? */ -+ zccd_get (skb_shinfo(skb)->zccd); /* 1 more ref (pages are shared) */ -+ skb_shinfo(n)->zccd = skb_shinfo(skb)->zccd; -+ -+ if (skb_shinfo(skb)->zccd2 != NULL) /* 2nd user zero copy descriptor? */ -+ zccd_get (skb_shinfo(skb)->zccd2); /* 1 more ref (pages are shared) */ -+ skb_shinfo(n)->zccd2 = skb_shinfo(skb)->zccd2; - - if (skb_shinfo(skb)->nr_frags) { - int i; -@@ -620,6 +636,8 @@ - u8 *data; - int size = nhead + (skb->end - skb->head) + ntail; - long off; -+ zccd_t *zccd = skb_shinfo(skb)->zccd; /* stash user zero copy descriptor */ -+ zccd_t *zccd2 = skb_shinfo(skb)->zccd2; /* stash 2nd user zero copy descriptor */ - - if (skb_shared(skb)) - BUG(); -@@ -641,6 +659,11 @@ - if (skb_shinfo(skb)->frag_list) - skb_clone_fraglist(skb); - -+ if (zccd != NULL) /* user zero copy descriptor? */ -+ zccd_get (zccd); /* extra ref (pages are shared) */ -+ if (zccd2 != NULL) /* 2nd user zero copy descriptor? */ -+ zccd_get (zccd2); /* extra ref (pages are shared) */ -+ - skb_release_data(skb); - - off = (data+nhead) - skb->head; -@@ -655,6 +678,8 @@ - skb->nh.raw += off; - skb->cloned = 0; - atomic_set(&skb_shinfo(skb)->dataref, 1); -+ skb_shinfo(skb)->zccd = zccd; -+ skb_shinfo(skb)->zccd2 = zccd2; - return 0; - - nodata: -diff -u -r1.1.1.1 linux/net/ipv4/tcp.c ---- linux/net/ipv4/tcp.c 2 Aug 2002 10:59:34 -0000 1.1.1.1 -+++ linux/net/ipv4/tcp.c 2 Aug 2002 14:36:30 -0000 -@@ -745,7 +745,7 @@ - goto out; - } - --ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags); -+ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags, zccd_t *zccd); - - static inline int - can_coalesce(struct sk_buff *skb, int i, struct page *page, int off) -@@ -824,7 +824,8 @@ - return err; - } - --ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags) -+/* Extra parameter: user zero copy descriptor (or NULL if not doing that) */ -+ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags, zccd_t *zccd) - { - struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); - int mss_now; -@@ -872,6 +873,17 @@ - copy = size; - - i = skb_shinfo(skb)->nr_frags; -+ -+ if (zccd != NULL && /* this is a zcc I/O */ -+ skb_shinfo(skb)->zccd != NULL && /* skb is part of a zcc I/O */ -+ skb_shinfo(skb)->zccd2 != NULL && -+ skb_shinfo(skb)->zccd != zccd && /* not the same one */ -+ skb_shinfo(skb)->zccd2 != zccd) -+ { -+ tcp_mark_push (tp, skb); -+ goto new_segment; -+ } -+ - if (can_coalesce(skb, i, page, offset)) { - skb_shinfo(skb)->frags[i-1].size += copy; - } else if (i < MAX_SKB_FRAGS) { -@@ -881,6 +893,20 @@ - tcp_mark_push(tp, skb); - goto new_segment; - } -+ -+ if (zccd != NULL && /* this is a zcc I/O */ -+ skb_shinfo(skb)->zccd != zccd && /* not already referencing this zccd */ -+ skb_shinfo(skb)->zccd2 != zccd) -+ { -+ zccd_get (zccd); /* bump ref count */ -+ -+ BUG_TRAP (skb_shinfo(skb)->zccd2 == NULL); -+ -+ if (skb_shinfo(skb)->zccd == NULL) /* reference this zccd */ -+ skb_shinfo(skb)->zccd = zccd; -+ else -+ skb_shinfo(skb)->zccd2 = zccd; -+ } - - skb->len += copy; - skb->data_len += copy; -@@ -945,7 +971,31 @@ - - lock_sock(sk); - TCP_CHECK_TIMER(sk); -- res = do_tcp_sendpages(sk, &page, offset, size, flags); -+ res = do_tcp_sendpages(sk, &page, offset, size, flags, NULL); -+ TCP_CHECK_TIMER(sk); -+ release_sock(sk); -+ return res; -+} -+ -+ssize_t tcp_sendpage_zccd(struct socket *sock, struct page *page, int offset, size_t size, -+ int flags, zccd_t *zccd) -+{ -+ ssize_t res; -+ struct sock *sk = sock->sk; -+ -+#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) -+ -+ if (!(sk->route_caps & NETIF_F_SG) || /* caller shouldn't waste her time */ -+ !(sk->route_caps & TCP_ZC_CSUM_FLAGS)) /* on double mapping */ -+ BUG (); -+ -+#undef TCP_ZC_CSUM_FLAGS -+ -+ lock_sock(sk); -+ TCP_CHECK_TIMER(sk); -+ -+ res = do_tcp_sendpages(sk, &page, offset, size, flags, zccd); -+ - TCP_CHECK_TIMER(sk); - release_sock(sk); - return res; -@@ -1767,6 +1817,202 @@ - recv_urg: - err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len); - goto out; -+} -+ -+int tcp_recvpackets (struct sock *sk, struct sk_buff_head *packets, -+ int len, int nonblock) -+{ -+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); -+ int copied; -+ long timeo; -+ -+ BUG_TRAP (len > 0); -+ /*BUG_TRAP ((flags & (MSG_OOB | MSG_PEEK | MSG_TRUNC)) == 0);*/ -+ -+ lock_sock(sk); -+ -+ TCP_CHECK_TIMER(sk); -+ -+ copied = -ENOTCONN; -+ if (sk->state == TCP_LISTEN) -+ goto out; -+ -+ copied = 0; -+ timeo = sock_rcvtimeo(sk, nonblock); -+ -+ do { -+ struct sk_buff * skb; -+ u32 offset; -+ unsigned long used; -+ int exhausted; -+ int eaten; -+ -+ /* Are we at urgent data? Stop if we have read anything. */ -+ if (copied && tp->urg_data && tp->urg_seq == tp->copied_seq) -+ break; -+ -+ /* We need to check signals first, to get correct SIGURG -+ * handling. FIXME: Need to check this doesnt impact 1003.1g -+ * and move it down to the bottom of the loop -+ */ -+ if (signal_pending(current)) { -+ if (copied) -+ break; -+ copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; -+ break; -+ } -+ -+ /* Next get a buffer. */ -+ -+ skb = skb_peek(&sk->receive_queue); -+ -+ if (skb == NULL) /* nothing ready */ -+ { -+ if (copied) { -+ if (sk->err || -+ sk->state == TCP_CLOSE || -+ (sk->shutdown & RCV_SHUTDOWN) || -+ !timeo || -+ (0)) -+ break; -+ } else { -+ if (sk->done) -+ break; -+ -+ if (sk->err) { -+ copied = sock_error(sk); -+ break; -+ } -+ -+ if (sk->shutdown & RCV_SHUTDOWN) -+ break; -+ -+ if (sk->state == TCP_CLOSE) { -+ if (!sk->done) { -+ /* This occurs when user tries to read -+ * from never connected socket. -+ */ -+ copied = -ENOTCONN; -+ break; -+ } -+ break; -+ } -+ -+ if (!timeo) { -+ copied = -EAGAIN; -+ break; -+ } -+ } -+ -+ cleanup_rbuf(sk, copied); -+ timeo = tcp_data_wait(sk, timeo); -+ continue; -+ } -+ -+ BUG_TRAP (atomic_read (&skb->users) == 1); -+ -+ exhausted = eaten = 0; -+ -+ offset = tp->copied_seq - TCP_SKB_CB(skb)->seq; -+ if (skb->h.th->syn) -+ offset--; -+ -+ used = skb->len - offset; -+ -+ if (tp->urg_data) { -+ u32 urg_offset = tp->urg_seq - tp->copied_seq; -+ if (urg_offset < used) { -+ if (!urg_offset) { /* at urgent date */ -+ if (!sk->urginline) { -+ tp->copied_seq++; /* discard the single byte of urgent data */ -+ offset++; -+ used--; -+ } -+ } else /* truncate read */ -+ used = urg_offset; -+ } -+ } -+ -+ BUG_TRAP (used >= 0); -+ if (len < used) -+ used = len; -+ -+ if (used == 0) -+ exhausted = 1; -+ else -+ { -+ if (skb_is_nonlinear (skb)) -+ { -+ int rc = skb_linearize (skb, GFP_KERNEL); -+ -+ printk ("tcp_recvpackets(): linearising: %d\n", rc); -+ -+ if (rc) -+ { -+ if (!copied) -+ copied = rc; -+ break; -+ } -+ } -+ -+ if ((offset + used) == skb->len) /* consuming the whole packet */ -+ { -+ __skb_unlink (skb, &sk->receive_queue); -+ dst_release (skb->dst); -+ skb_orphan (skb); -+ __skb_pull (skb, offset); -+ __skb_queue_tail (packets, skb); -+ exhausted = eaten = 1; -+ } -+ else /* consuming only part of the packet */ -+ { -+ struct sk_buff *skb2 = skb_clone (skb, GFP_KERNEL); -+ -+ if (skb2 == NULL) -+ { -+ if (!copied) -+ copied = -ENOMEM; -+ break; -+ } -+ -+ dst_release (skb2->dst); -+ __skb_pull (skb2, offset); -+ __skb_trim (skb2, used); -+ __skb_queue_tail (packets, skb2); -+ } -+ -+ tp->copied_seq += used; -+ copied += used; -+ len -= used; -+ } -+ -+ if (tp->urg_data && after(tp->copied_seq,tp->urg_seq)) { -+ tp->urg_data = 0; -+ tcp_fast_path_check(sk, tp); -+ } -+ -+ if (!exhausted) -+ continue; -+ -+ if (skb->h.th->fin) -+ { -+ tp->copied_seq++; -+ if (!eaten) -+ tcp_eat_skb (sk, skb); -+ break; -+ } -+ -+ if (!eaten) -+ tcp_eat_skb (sk, skb); -+ -+ } while (len > 0); -+ -+ out: -+ /* Clean up data we have read: This will do ACK frames. */ -+ cleanup_rbuf(sk, copied); -+ TCP_CHECK_TIMER(sk); -+ release_sock(sk); -+ return copied; - } - - /* diff --git a/lustre/kernel_patches/patches/tg3_netconsole.patch b/lustre/kernel_patches/patches/tg3_netconsole.patch deleted file mode 100644 index 267dedd..0000000 --- a/lustre/kernel_patches/patches/tg3_netconsole.patch +++ /dev/null @@ -1,247 +0,0 @@ - 0 files changed - ---- linux-2.4.20-rh/drivers/net/tg3.c~tg3_netconsole 2003-04-11 14:04:56.000000000 +0800 -+++ linux-2.4.20-rh-root/drivers/net/tg3.c 2003-07-01 11:27:46.000000000 +0800 -@@ -170,6 +170,10 @@ static void tg3_write_indirect_reg32(str - } - } - -+#ifdef HAVE_POLL_CONTROLLER -+static void Poll_tg3(struct net_device *); -+#endif -+ - #define tw32(reg,val) tg3_write_indirect_reg32(tp,(reg),(val)) - #define tw32_mailbox(reg, val) writel(((val) & 0xffffffff), tp->regs + (reg)) - #define tw16(reg,val) writew(((val) & 0xffff), tp->regs + (reg)) -@@ -1899,7 +1903,138 @@ static int tg3_vlan_rx(struct tg3 *tp, s - return vlan_hwaccel_receive_skb(skb, tp->vlgrp, vlan_tag); - } - #endif -+/* for netconsole */ -+static int upcall_rx_hook(struct net_device *dev) -+{ -+ struct tg3 *tp = dev->priv; -+ u32 work_mask; -+ u32 rx_rcb_ptr = tp->rx_rcb_ptr; -+ u16 hw_idx, sw_idx; -+ int received; -+ -+ hw_idx = tp->hw_status->idx[0].rx_producer; -+ sw_idx = rx_rcb_ptr % TG3_RX_RCB_RING_SIZE; -+ work_mask = 0; -+ received = 0; -+ while (sw_idx != hw_idx) { -+ struct tg3_rx_buffer_desc *desc = &tp->rx_rcb[sw_idx]; -+ unsigned int len; -+ struct sk_buff *skb; -+ dma_addr_t dma_addr; -+ u32 opaque_key, desc_idx, *post_ptr; -+ -+ desc_idx = desc->opaque & RXD_OPAQUE_INDEX_MASK; -+ opaque_key = desc->opaque & RXD_OPAQUE_RING_MASK; -+ if (opaque_key == RXD_OPAQUE_RING_STD) { -+ dma_addr = pci_unmap_addr(&tp->rx_std_buffers[desc_idx], -+ mapping); -+ skb = tp->rx_std_buffers[desc_idx].skb; -+ post_ptr = &tp->rx_std_ptr; -+ } else if (opaque_key == RXD_OPAQUE_RING_JUMBO) { -+ dma_addr = pci_unmap_addr(&tp->rx_jumbo_buffers[desc_idx], -+ mapping); -+ skb = tp->rx_jumbo_buffers[desc_idx].skb; -+ post_ptr = &tp->rx_jumbo_ptr; -+ } -+ else { -+ goto next_pkt_nopost; -+ } -+ -+ work_mask |= opaque_key; -+ -+ if ((desc->err_vlan & RXD_ERR_MASK) != 0 && -+ (desc->err_vlan != RXD_ERR_ODD_NIBBLE_RCVD_MII)) { -+ drop_it: -+ tg3_recycle_rx(tp, opaque_key, -+ desc_idx, *post_ptr); -+ drop_it_no_recycle: -+ /* Other statistics kept track of by card. */ -+ tp->net_stats.rx_dropped++; -+ goto next_pkt; -+ } -+ -+ len = ((desc->idx_len & RXD_LEN_MASK) >> RXD_LEN_SHIFT) - 4; /* omit crc */ -+ -+ if (len > RX_COPY_THRESHOLD) { -+ int skb_size; -+ -+ skb_size = tg3_alloc_rx_skb(tp, opaque_key, -+ desc_idx, *post_ptr); -+ if (skb_size < 0) -+ goto drop_it; -+ -+ pci_unmap_single(tp->pdev, dma_addr, -+ skb_size - tp->rx_offset, -+ PCI_DMA_FROMDEVICE); -+ -+ skb_put(skb, len); -+ } else { -+ struct sk_buff *copy_skb; -+ -+ tg3_recycle_rx(tp, opaque_key, -+ desc_idx, *post_ptr); -+ -+ copy_skb = dev_alloc_skb(len + 2); -+ if (copy_skb == NULL) -+ goto drop_it_no_recycle; -+ -+ copy_skb->dev = tp->dev; -+ skb_reserve(copy_skb, 2); -+ skb_put(copy_skb, len); -+ pci_dma_sync_single(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE); -+ memcpy(copy_skb->data, skb->data, len); -+ -+ /* We'll reuse the original ring buffer. */ -+ skb = copy_skb; -+ } -+ -+ if ((tp->tg3_flags & TG3_FLAG_RX_CHECKSUMS) && -+ (desc->type_flags & RXD_FLAG_TCPUDP_CSUM) && -+ (((desc->ip_tcp_csum & RXD_TCPCSUM_MASK) -+ >> RXD_TCPCSUM_SHIFT) == 0xffff)) -+ skb->ip_summed = CHECKSUM_UNNECESSARY; -+ else -+ skb->ip_summed = CHECKSUM_NONE; -+ -+ skb->protocol = eth_type_trans(skb, tp->dev); -+/*into netconsole driver*/ -+ dev->rx_hook(skb); -+ kfree_skb(skb); -+ tp->dev->last_rx = jiffies; -+ received++; -+next_pkt: -+ (*post_ptr)++; -+next_pkt_nopost: -+ rx_rcb_ptr++; -+ sw_idx = rx_rcb_ptr % TG3_RX_RCB_RING_SIZE; -+ } -+ -+ /* ACK the status ring. */ -+ tp->rx_rcb_ptr = rx_rcb_ptr; -+ tw32_mailbox(MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW, -+ (rx_rcb_ptr % TG3_RX_RCB_RING_SIZE)); -+ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER) -+ tr32(MAILBOX_RCVRET_CON_IDX_0 + TG3_64BIT_REG_LOW); - -+ /* Refill RX ring(s). */ -+ if (work_mask & RXD_OPAQUE_RING_STD) { -+ sw_idx = tp->rx_std_ptr % TG3_RX_RING_SIZE; -+ tw32_mailbox(MAILBOX_RCV_STD_PROD_IDX + TG3_64BIT_REG_LOW, -+ sw_idx); -+ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER) -+ tr32(MAILBOX_RCV_STD_PROD_IDX + TG3_64BIT_REG_LOW); -+ } -+ if (work_mask & RXD_OPAQUE_RING_JUMBO) { -+ sw_idx = tp->rx_jumbo_ptr % TG3_RX_JUMBO_RING_SIZE; -+ tw32_mailbox(MAILBOX_RCV_JUMBO_PROD_IDX + TG3_64BIT_REG_LOW, -+ sw_idx); -+ if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER) -+ tr32(MAILBOX_RCV_JUMBO_PROD_IDX + TG3_64BIT_REG_LOW); -+ } -+ -+ return received; -+ -+} - /* The RX ring scheme is composed of multiple rings which post fresh - * buffers to the chip, and one special ring the chip uses to report - * status back to the host. -@@ -2006,7 +2141,7 @@ static int tg3_rx(struct tg3 *tp, int bu - /* We'll reuse the original ring buffer. */ - skb = copy_skb; - } -- -+ - if ((tp->tg3_flags & TG3_FLAG_RX_CHECKSUMS) && - (desc->type_flags & RXD_FLAG_TCPUDP_CSUM) && - (((desc->ip_tcp_csum & RXD_TCPCSUM_MASK) -@@ -2016,6 +2151,8 @@ static int tg3_rx(struct tg3 *tp, int bu - skb->ip_summed = CHECKSUM_NONE; - - skb->protocol = eth_type_trans(skb, tp->dev); -+ -+ - #if TG3_VLAN_TAG_USED - if (tp->vlgrp != NULL && - desc->type_flags & RXD_FLAG_VLAN) { -@@ -2058,7 +2195,6 @@ next_pkt_nopost: - if (tp->tg3_flags & TG3_FLAG_MBOX_WRITE_REORDER) - tr32(MAILBOX_RCV_JUMBO_PROD_IDX + TG3_64BIT_REG_LOW); - } -- - return received; - } - -@@ -2151,7 +2287,6 @@ static void tg3_interrupt(int irq, void - unsigned long flags; - - spin_lock_irqsave(&tp->lock, flags); -- - if (sblk->status & SD_STATUS_UPDATED) { - /* - * writing any value to intr-mbox-0 clears PCI INTA# and -@@ -2169,8 +2304,17 @@ static void tg3_interrupt(int irq, void - tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW); - sblk->status &= ~SD_STATUS_UPDATED; - -- if (likely(tg3_has_work(dev, tp))) -- netif_rx_schedule(dev); /* schedule NAPI poll */ -+ if (likely(tg3_has_work(dev, tp))){ -+ if (unlikely(dev->rx_hook != NULL) && netdump_mode) { -+ int ret; -+ struct sk_buff *skb; -+ ret = upcall_rx_hook(dev); -+ if (!ret){ -+ goto out; -+ } -+ } -+ netif_rx_schedule(dev); /* schedule NAPI poll */ -+ } - else { - /* no work, shared interrupt perhaps? re-enable - * interrupts, and flush that PCI write -@@ -2180,7 +2324,7 @@ static void tg3_interrupt(int irq, void - tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW); - } - } -- -+out: - spin_unlock_irqrestore(&tp->lock, flags); - } - -@@ -6804,7 +6948,10 @@ static int __devinit tg3_init_one(struct - dev->watchdog_timeo = TG3_TX_TIMEOUT; - dev->change_mtu = tg3_change_mtu; - dev->irq = pdev->irq; -- -+#ifdef HAVE_POLL_CONTROLLER -+ dev->poll_controller = &Poll_tg3; -+#endif -+ - err = tg3_get_invariants(tp); - if (err) { - printk(KERN_ERR PFX "Problem fetching invariants of chip, " -@@ -6882,6 +7029,15 @@ err_out_disable_pdev: - return err; - } - -+#ifdef HAVE_POLL_CONTROLLER -+static void Poll_tg3(struct net_device *dev) -+{ -+ if (!netdump_mode) disable_irq(dev->irq); -+ tg3_interrupt(dev->irq, dev, NULL); -+ if (!netdump_mode) enable_irq(dev->irq); -+} -+#endif -+ - static void __devexit tg3_remove_one(struct pci_dev *pdev) - { - struct net_device *dev = pci_get_drvdata(pdev); - -_ diff --git a/lustre/kernel_patches/patches/uml-patch-2.4.20-4.patch b/lustre/kernel_patches/patches/uml-patch-2.4.20-4.patch deleted file mode 100644 index b35fee0..0000000 --- a/lustre/kernel_patches/patches/uml-patch-2.4.20-4.patch +++ /dev/null @@ -1,39358 +0,0 @@ -diff -Naur -X ../exclude-files orig/CREDITS um/CREDITS ---- orig/CREDITS Thu Feb 27 13:04:11 2003 -+++ um/CREDITS Thu Feb 27 13:05:17 2003 -@@ -432,6 +432,7 @@ - E: lars@nocrew.org - W: http://lars.nocrew.org/ - D: dsp56k device driver -+D: ptrace proxy in user mode kernel port - S: Kopmansg 2 - S: 411 13 Goteborg - S: Sweden -@@ -721,7 +722,7 @@ - E: jdike@karaya.com - W: http://user-mode-linux.sourceforge.net - D: User mode kernel port --S: RR1 Box 67C -+S: 375 Tubbs Hill Rd - S: Deering NH 03244 - S: USA - -diff -Naur -X ../exclude-files orig/Documentation/Configure.help um/Documentation/Configure.help ---- orig/Documentation/Configure.help Thu Feb 27 13:04:11 2003 -+++ um/Documentation/Configure.help Thu Feb 27 13:05:17 2003 -@@ -14690,19 +14690,23 @@ - The module will be called dsbr100.o. If you want to compile it as a - module, say M here and read . - --Always do synchronous disk IO for UBD --CONFIG_BLK_DEV_UBD_SYNC -+CONFIG_BLK_DEV_UBD - The User-Mode Linux port includes a driver called UBD which will let - you access arbitrary files on the host computer as block devices. -- Writes to such a block device are not immediately written to the -- host's disk; this may cause problems if, for example, the User-Mode -- Linux 'Virtual Machine' uses a journalling file system and the host -- computer crashes. -+ Unless you know that you do not need such virtual block devices say -+ Y here. -+ -+Always do synchronous disk IO for UBD -+CONFIG_BLK_DEV_UBD_SYNC -+ Writes to the virtual block device are not immediately written to the host's -+ disk; this may cause problems if, for example, the User-Mode Linux -+ 'Virtual Machine' uses a journalling filesystem and the host computer -+ crashes. - - Synchronous operation (i.e. always writing data to the host's disk - immediately) is configurable on a per-UBD basis by using a special - kernel command line option. Alternatively, you can say Y here to -- turn on synchronous operation by default for all block. -+ turn on synchronous operation by default for all block devices. - - If you're running a journalling file system (like reiserfs, for - example) in your virtual machine, you will want to say Y here. If -@@ -14714,6 +14718,7 @@ - CONFIG_PT_PROXY - This option enables a debugging interface which allows gdb to debug - the kernel without needing to actually attach to kernel threads. -+ CONFIG_XTERM_CHAN must be enabled in order to enable CONFIG_PT_PROXY. - If you want to do kernel debugging, say Y here; otherwise say N. - - Management console -@@ -14908,25 +14913,173 @@ - - SLIP transport - CONFIG_UML_NET_SLIP -- The Slip User-Mode Linux network transport allows a running UML to -+ The slip User-Mode Linux network transport allows a running UML to - network with its host over a point-to-point link. Unlike Ethertap, - which can carry any Ethernet frame (and hence even non-IP packets), -- the Slip transport can only carry IP packets. -+ the slip transport can only carry IP packets. - -- To use this, your host must support Slip devices. -+ To use this, your host must support slip devices. - - For more information, see - . That site -- has examples of the UML command line to use to enable Slip -+ has examples of the UML command line to use to enable slip - networking, and details of a few quirks with it. - -- The Ethertap Transport is preferred over Slip because of its -- limitation. If you prefer Slip, however, say Y here. Otherwise -+ The Ethertap Transport is preferred over slip because of its -+ limitations. If you prefer slip, however, say Y here. Otherwise - choose the Multicast transport (to network multiple UMLs on - multiple hosts), Ethertap (to network with the host and the - outside world), and/or the Daemon transport (to network multiple - UMLs on a single host). You may choose more than one without - conflict. If you don't need UML networking, say N. -+ -+SLiRP transport -+CONFIG_UML_NET_SLIRP -+ The SLiRP User-Mode Linux network transport allows a running UML -+ to network by invoking a program that can handle SLIP encapsulated -+ packets. This is commonly (but not limited to) the application -+ known as SLiRP, a program that can re-socket IP packets back onto -+ the host on which it is run. Only IP packets are supported, -+ unlike other network transports that can handle all Ethernet -+ frames. In general, slirp allows the UML the same IP connectivity -+ to the outside world that the host user is permitted, and unlike -+ other transports, SLiRP works without the need of root level -+ privleges, setuid binaries, or SLIP devices on the host. This -+ also means not every type of connection is possible, but most -+ situations can be accomodated with carefully crafted slirp -+ commands that can be passed along as part of the network device's -+ setup string. The effect of this transport on the UML is similar -+ that of a host behind a firewall that masquerades all network -+ connections passing through it (but is less secure). -+ -+ To use this you should first have slirp compiled somewhere -+ accessible on the host, and have read its documentation. If you -+ don't need UML networking, say N. -+ -+ Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp" -+ -+Default main console channel initialization -+CONFIG_CON_ZERO_CHAN -+ This is the string describing the channel to which the main console -+ will be attached by default. This value can be overridden from the -+ command line. The default value is "fd:0,fd:1", which attaches the -+ main console to stdin and stdout. -+ It is safe to leave this unchanged. -+ -+Default console channel initialization -+CONFIG_CON_CHAN -+ This is the string describing the channel to which all consoles -+ except the main console will be attached by default. This value can -+ be overridden from the command line. The default value is "xterm", -+ which brings them up in xterms. -+ It is safe to leave this unchanged, although you may wish to change -+ this if you expect the UML that you build to be run in environments -+ which don't have X or xterm available. -+ -+Default serial line channel initialization -+CONFIG_SSL_CHAN -+ This is the string describing the channel to which the serial lines -+ will be attached by default. This value can be overridden from the -+ command line. The default value is "pty", which attaches them to -+ traditional pseudo-terminals. -+ It is safe to leave this unchanged, although you may wish to change -+ this if you expect the UML that you build to be run in environments -+ which don't have a set of /dev/pty* devices. -+ -+Nesting level -+CONFIG_NEST_LEVEL -+ This is set to the number of layers of UMLs that this UML will be run -+ in. Normally, this is zero, meaning that it will run directly on the -+ host. Setting it to one will build a UML that can run inside a UML -+ that is running on the host. Generally, if you intend this UML to run -+ inside another UML, set CONFIG_NEST_LEVEL to one more than the host UML. -+ Note that if the hosting UML has its CONFIG_KERNEL_HALF_GIGS set to -+ greater than one, then the guest UML should have its CONFIG_NEST_LEVEL -+ set to the host's CONFIG_NEST_LEVEL + CONFIG_KERNEL_HALF_GIGS. -+ Only change this if you are running nested UMLs. -+ -+Kernel address space size (in .5G units) -+CONFIG_KERNEL_HALF_GIGS -+ This determines the amount of address space that UML will allocate for -+ its own, measured in half Gigabyte units. The default is 1. -+ Change this only if you need to boot UML with an unusually large amount -+ of physical memory. -+ -+UML sound support -+CONFIG_UML_SOUND -+ This option enables UML sound support. If enabled, it will pull in -+ soundcore and the UML hostaudio relay, which acts as a intermediary -+ between the host's dsp and mixer devices and the UML sound system. -+ It is safe to say 'Y' here. -+ -+UML SMP support -+CONFIG_UML_SMP -+ This option enables UML SMP support. UML implements virtual SMP by -+ allowing as many processes to run simultaneously on the host as -+ there are virtual processors configured. Obviously, if the host is -+ a uniprocessor, those processes will timeshare, but, inside UML, -+ will appear to be running simultaneously. If the host is a -+ multiprocessor, then UML processes may run simultaneously, depending -+ on the host scheduler. -+ CONFIG_SMP will be set to whatever this option is set to. -+ It is safe to leave this unchanged. -+ -+file descriptor channel support -+CONFIG_FD_CHAN -+ This option enables support for attaching UML consoles and serial -+ lines to already set up file descriptors. Generally, the main -+ console is attached to file descriptors 0 and 1 (stdin and stdout), -+ so it would be wise to leave this enabled unless you intend to -+ attach it to some other host device. -+ -+null device channel support -+CONFIG_NULL_CHAN -+ This option enables support for attaching UML consoles and serial -+ lines to a device similar to /dev/null. Data written to it disappears -+ and there is never any data to be read. -+ -+port channel support -+CONFIG_PORT_CHAN -+ This option enables support for attaching UML consoles and serial -+ lines to host portals. They may be accessed with 'telnet -+ '. Any number of consoles and serial lines may be -+ attached to a single portal, although what UML device you get when -+ you telnet to that portal will be unpredictable. -+ It is safe to say 'Y' here. -+ -+pty channel support -+CONFIG_PTY_CHAN -+ This option enables support for attaching UML consoles and serial -+ lines to host pseudo-terminals. Access to both traditional -+ pseudo-terminals (/dev/pty*) and pts pseudo-terminals are controlled -+ with this option. The assignment of UML devices to host devices -+ will be announced in the kernel message log. -+ It is safe to say 'Y' here. -+ -+tty channel support -+CONFIG_TTY_CHAN -+ This option enables support for attaching UML consoles and serial -+ lines to host terminals. Access to both virtual consoles -+ (/dev/tty*) and the slave side of pseudo-terminals (/dev/ttyp* and -+ /dev/pts/*) are controlled by this option. -+ It is safe to say 'Y' here. -+ -+xterm channel support -+CONFIG_XTERM_CHAN -+ This option enables support for attaching UML consoles and serial -+ lines to xterms. Each UML device so assigned will be brought up in -+ its own xterm. -+ If you disable this option, then CONFIG_PT_PROXY will be disabled as -+ well, since UML's gdb currently requires an xterm. -+ It is safe to say 'Y' here. -+ -+tty logging -+CONFIG_TTY_LOG -+ This option enables logging of all data going through pseudo-terminals -+ to the host. This is primarily useful for honeypots, where you want -+ secure keystroke logging that can't be detected or disabled by root. -+ Say 'N' unless you are setting up a UML honeypot or otherwise know that -+ you want this option. - - Microtek USB scanner support - CONFIG_USB_MICROTEK -diff -Naur -X ../exclude-files orig/MAINTAINERS um/MAINTAINERS ---- orig/MAINTAINERS Thu Feb 27 13:04:12 2003 -+++ um/MAINTAINERS Thu Feb 27 13:05:17 2003 -@@ -1841,6 +1841,14 @@ - L: linux-usb-devel@lists.sourceforge.net - W: http://usb.in.tum.de - S: Maintained -+ -+USER-MODE PORT -+P: Jeff Dike -+M: jdike@karaya.com -+L: user-mode-linux-devel@lists.sourceforge.net -+L: user-mode-linux-user@lists.sourceforge.net -+W: http://user-mode-linux.sourceforge.net -+S: Maintained - - USB "USBNET" DRIVER - P: David Brownell -diff -Naur -X ../exclude-files orig/Makefile um/Makefile ---- orig/Makefile Thu Feb 27 13:04:12 2003 -+++ um/Makefile Thu Feb 27 13:05:17 2003 -@@ -5,7 +5,15 @@ - - KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) - --ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) -+# SUBARCH tells the usermode build what the underlying arch is. That is set -+# first, and if a usermode build is happening, the "ARCH=um" on the command -+# line overrides the setting of ARCH below. If a native build is happening, -+# then ARCH is assigned, getting whatever value it gets normally, and -+# SUBARCH is subsequently ignored. -+ -+SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) -+ARCH := $(SUBARCH) -+ - KERNELPATH=kernel-$(shell echo $(KERNELRELEASE) | sed -e "s/-//g") - - CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \ -diff -Naur -X ../exclude-files orig/arch/um/Makefile um/arch/um/Makefile ---- orig/arch/um/Makefile Wed Dec 31 19:00:00 1969 -+++ um/arch/um/Makefile Fri Mar 28 21:46:54 2003 -@@ -0,0 +1,168 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+OS := $(shell uname -s) -+ -+ARCH_DIR = arch/um -+ -+core-y := kernel sys-$(SUBARCH) os-$(OS) -+drivers-y := fs drivers -+subdir-y := $(core-y) $(drivers-y) -+SUBDIRS += $(foreach dir,$(subdir-y),$(ARCH_DIR)/$(dir)) -+ -+CORE_FILES += $(foreach dir,$(core-y),$(ARCH_DIR)/$(dir)/built-in.o) -+DRIVERS += $(foreach dir,$(drivers-y),$(ARCH_DIR)/$(dir)/built-in.o) -+ -+include $(ARCH_DIR)/Makefile-$(SUBARCH) -+include $(ARCH_DIR)/Makefile-os-$(OS) -+ -+MAKEFILE-$(CONFIG_MODE_TT) += Makefile-tt -+MAKEFILE-$(CONFIG_MODE_SKAS) += Makefile-skas -+ -+ifneq ($(MAKEFILE-y),) -+ include $(addprefix $(ARCH_DIR)/,$(MAKEFILE-y)) -+endif -+ -+EXTRAVERSION := $(EXTRAVERSION)-4um -+ -+include/linux/version.h: arch/$(ARCH)/Makefile -+ -+# Recalculate MODLIB to reflect the EXTRAVERSION changes (via KERNELRELEASE) -+# The way the toplevel Makefile is written EXTRAVERSION is not supposed -+# to be changed outside the toplevel Makefile, but recalculating MODLIB is -+# a sufficient workaround until we no longer need architecture dependent -+# EXTRAVERSION... -+MODLIB := $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE) -+ -+ifeq ($(CONFIG_DEBUGSYM),y) -+CFLAGS := $(subst -fomit-frame-pointer,,$(CFLAGS)) -+endif -+ -+CFLAGS-$(CONFIG_DEBUGSYM) += -g -+ -+ARCH_INCLUDE = -I$(TOPDIR)/$(ARCH_DIR)/include -+ -+# -Derrno=kernel_errno - This turns all kernel references to errno into -+# kernel_errno to separate them from the libc errno. This allows -fno-common -+# in CFLAGS. Otherwise, it would cause ld to complain about the two different -+# errnos. -+ -+CFLAGS += $(ARCH_CFLAGS) $(CFLAGS-y) -D__arch_um__ -DSUBARCH=\"$(SUBARCH)\" \ -+ -D_LARGEFILE64_SOURCE $(ARCH_INCLUDE) -Derrno=kernel_errno \ -+ $(MODE_INCLUDE) -+ -+LINKFLAGS += -r -+ -+LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc -+ -+SIZE = (($(CONFIG_NEST_LEVEL) + $(CONFIG_KERNEL_HALF_GIGS)) * 0x20000000) -+ -+# These aren't in Makefile-tt because they are needed in the !CONFIG_MODE_TT + -+# CONFIG_MODE_SKAS + CONFIG_STATIC_LINK case. -+ -+LINK_TT = -static -+LD_SCRIPT_TT := link.ld -+ -+ifeq ($(CONFIG_STATIC_LINK),y) -+ LINK-y += $(LINK_TT) -+ LD_SCRIPT-y := $(LD_SCRIPT_TT) -+else -+ifeq ($(CONFIG_MODE_TT),y) -+ LINK-y += $(LINK_TT) -+ LD_SCRIPT-y := $(LD_SCRIPT_TT) -+else -+ifeq ($(CONFIG_MODE_SKAS),y) -+ LINK-y += $(LINK_SKAS) -+ LD_SCRIPT-y := $(LD_SCRIPT_SKAS) -+endif -+endif -+endif -+ -+LD_SCRIPT-y := $(ARCH_DIR)/$(LD_SCRIPT-y) -+M4_MODE_TT := $(shell [ "$(CONFIG_MODE_TT)" = "y" ] && echo -DMODE_TT) -+ -+$(LD_SCRIPT-y): $(LD_SCRIPT-y).in -+ pages=$$(( 1 << $(CONFIG_KERNEL_STACK_ORDER) )) ; \ -+ m4 -DSTART=$$(($(TOP_ADDR) - $(SIZE))) -DELF_ARCH=$(ELF_ARCH) \ -+ -DELF_FORMAT=$(ELF_FORMAT) $(M4_MODE_TT) \ -+ -DKERNEL_STACK_SIZE=$$(( 4096 * $$pages )) $< > $@ -+ -+SYMLINK_HEADERS = include/asm-um/archparam.h include/asm-um/system.h \ -+ include/asm-um/sigcontext.h include/asm-um/processor.h \ -+ include/asm-um/ptrace.h include/asm-um/arch-signal.h -+ -+ARCH_SYMLINKS = include/asm-um/arch arch/um/include/sysdep arch/um/os \ -+ $(SYMLINK_HEADERS) $(ARCH_DIR)/include/uml-config.h -+ -+ifeq ($(CONFIG_MODE_SKAS), y) -+$(SYS_HEADERS) : $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h -+endif -+ -+GEN_HEADERS += $(ARCH_DIR)/include/task.h $(ARCH_DIR)/include/kern_constants.h -+ -+setup: $(ARCH_SYMLINKS) $(SYS_HEADERS) $(GEN_HEADERS) -+ -+linux: setup $(ARCH_DIR)/main.o vmlinux $(LD_SCRIPT-y) -+ mv vmlinux vmlinux.o -+ $(CC) -Wl,-T,$(LD_SCRIPT-y) $(LINK-y) $(LINK_WRAPS) \ -+ -o linux $(ARCH_DIR)/main.o vmlinux.o -L/usr/lib -lutil -+ -+USER_CFLAGS := $(patsubst -I%,,$(CFLAGS)) -+USER_CFLAGS := $(patsubst -Derrno=kernel_errno,,$(USER_CFLAGS)) -+USER_CFLAGS := $(patsubst -D__KERNEL__,,$(USER_CFLAGS)) $(ARCH_INCLUDE) \ -+ $(MODE_INCLUDE) -+ -+# To get a definition of F_SETSIG -+USER_CFLAGS += -D_GNU_SOURCE -+ -+CLEAN_FILES += linux x.i gmon.out $(ARCH_DIR)/link.ld $(ARCH_DIR)/dyn_link.ld \ -+ $(GEN_HEADERS) $(ARCH_DIR)/include/uml-config.h -+ -+$(ARCH_DIR)/main.o: $(ARCH_DIR)/main.c -+ $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< -+ -+archmrproper: -+ rm -f $(SYMLINK_HEADERS) $(ARCH_SYMLINKS) include/asm \ -+ $(LD_SCRIPT) $(addprefix $(ARCH_DIR)/kernel/,$(KERN_SYMLINKS)) -+ -+archclean: sysclean -+ find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \ -+ -o -name '*.gcov' \) -type f -print | xargs rm -f -+ cd $(ARCH_DIR) ; \ -+ for dir in $(subdir-y) util ; do $(MAKE) -C $$dir clean; done -+ -+archdep: -+ -+$(SYMLINK_HEADERS): -+ cd $(TOPDIR)/$(dir $@) ; \ -+ ln -sf $(basename $(notdir $@))-$(SUBARCH)$(suffix $@) $(notdir $@) -+ -+include/asm-um/arch: -+ cd $(TOPDIR)/include/asm-um && ln -sf ../asm-$(SUBARCH) arch -+ -+arch/um/include/sysdep: -+ cd $(TOPDIR)/arch/um/include && ln -sf sysdep-$(SUBARCH) sysdep -+ -+arch/um/os: -+ cd $(ARCH_DIR) && ln -sf os-$(OS) os -+ -+$(ARCH_DIR)/include/task.h : $(ARCH_DIR)/util/mk_task -+ $< > $@ -+ -+$(ARCH_DIR)/include/kern_constants.h : $(ARCH_DIR)/util/mk_constants -+ $< > $@ -+ -+$(ARCH_DIR)/include/uml-config.h : $(TOPDIR)/include/linux/autoconf.h -+ sed 's/ CONFIG/ UML_CONFIG/' $(TOPDIR)/include/linux/autoconf.h > $@ -+ -+$(ARCH_DIR)/util/mk_task : $(ARCH_DIR)/util/mk_task_user.c \ -+ $(ARCH_DIR)/util/mk_task_kern.c $(SYS_HEADERS) -+ $(MAKE) $(MFLAGS) -C $(ARCH_DIR)/util mk_task -+ -+$(ARCH_DIR)/util/mk_constants : $(ARCH_DIR)/util/mk_constants_user.c \ -+ $(ARCH_DIR)/util/mk_constants_kern.c -+ $(MAKE) $(MFLAGS) -C $(ARCH_DIR)/util mk_constants -+ -+export SUBARCH USER_CFLAGS OS -diff -Naur -X ../exclude-files orig/arch/um/Makefile-i386 um/arch/um/Makefile-i386 ---- orig/arch/um/Makefile-i386 Wed Dec 31 19:00:00 1969 -+++ um/arch/um/Makefile-i386 Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,35 @@ -+ifeq ($(CONFIG_HOST_2G_2G), y) -+TOP_ADDR = 0x80000000 -+else -+TOP_ADDR = 0xc0000000 -+endif -+ -+ARCH_CFLAGS = -U__$(SUBARCH)__ -U$(SUBARCH) -DUM_FASTCALL -+ELF_ARCH = $(SUBARCH) -+ELF_FORMAT = elf32-$(SUBARCH) -+ -+I386_H = $(ARCH_DIR)/include/sysdep-i386 -+SYS = $(ARCH_DIR)/sys-i386 -+UTIL = $(SYS)/util -+SUBDIRS += $(UTIL) -+ -+SYS_HEADERS = $(I386_H)/sc.h $(I386_H)/thread.h -+ -+$(I386_H)/sc.h : $(UTIL)/mk_sc -+ $(UTIL)/mk_sc > $@ -+ -+$(I386_H)/thread.h : $(UTIL)/mk_thread -+ $(UTIL)/mk_thread > $@ -+ -+$(UTIL)/mk_sc : $(UTIL)/mk_sc.c -+ $(MAKE) -C $(UTIL) mk_sc -+ -+$(UTIL)/mk_thread : $(UTIL)/mk_thread_user.c $(UTIL)/mk_thread_kern.c \ -+ $(I386_H)/sc.h -+ $(MAKE) -C $(UTIL) mk_thread -+ -+sysclean : -+ rm -f $(SYS_HEADERS) -+ $(MAKE) -C $(UTIL) clean -+ $(MAKE) -C $(SYS) clean -+ -diff -Naur -X ../exclude-files orig/arch/um/Makefile-ia64 um/arch/um/Makefile-ia64 ---- orig/arch/um/Makefile-ia64 Wed Dec 31 19:00:00 1969 -+++ um/arch/um/Makefile-ia64 Wed Oct 23 21:08:04 2002 -@@ -0,0 +1 @@ -+START_ADDR = 0x1000000000000000 -diff -Naur -X ../exclude-files orig/arch/um/Makefile-os-Linux um/arch/um/Makefile-os-Linux ---- orig/arch/um/Makefile-os-Linux Wed Dec 31 19:00:00 1969 -+++ um/arch/um/Makefile-os-Linux Mon Dec 9 14:21:51 2002 -@@ -0,0 +1,7 @@ -+# -+# Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+SUBDIRS += $(ARCH_DIR)/os-$(OS)/drivers -+DRIVERS += $(ARCH_DIR)/os-$(OS)/drivers/drivers.o -diff -Naur -X ../exclude-files orig/arch/um/Makefile-ppc um/arch/um/Makefile-ppc ---- orig/arch/um/Makefile-ppc Wed Dec 31 19:00:00 1969 -+++ um/arch/um/Makefile-ppc Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,9 @@ -+ifeq ($(CONFIG_HOST_2G_2G), y) -+START_ADDR = 0x80000000 -+else -+START_ADDR = 0xc0000000 -+endif -+ARCH_CFLAGS = -U__powerpc__ -D__UM_PPC__ -+ -+# The arch is ppc, but the elf32 name is powerpc -+ELF_SUBARCH = powerpc -diff -Naur -X ../exclude-files orig/arch/um/Makefile-skas um/arch/um/Makefile-skas ---- orig/arch/um/Makefile-skas Wed Dec 31 19:00:00 1969 -+++ um/arch/um/Makefile-skas Sun Dec 15 22:02:57 2002 -@@ -0,0 +1,20 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+PROFILE += -pg -+ -+CFLAGS-$(CONFIG_GCOV) += -fprofile-arcs -ftest-coverage -+CFLAGS-$(CONFIG_GPROF) += $(PROFILE) -+LINK-$(CONFIG_GPROF) += $(PROFILE) -+ -+MODE_INCLUDE += -I$(TOPDIR)/$(ARCH_DIR)/kernel/skas/include -+ -+LINK_SKAS = -Wl,-rpath,/lib -+LD_SCRIPT_SKAS = dyn_link.ld -+ -+GEN_HEADERS += $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h -+ -+$(ARCH_DIR)/kernel/skas/include/skas_ptregs.h : -+ $(MAKE) -C $(ARCH_DIR)/kernel/skas include/skas_ptregs.h -diff -Naur -X ../exclude-files orig/arch/um/Makefile-tt um/arch/um/Makefile-tt ---- orig/arch/um/Makefile-tt Wed Dec 31 19:00:00 1969 -+++ um/arch/um/Makefile-tt Mon Dec 16 20:22:23 2002 -@@ -0,0 +1,7 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+MODE_INCLUDE += -I$(TOPDIR)/$(ARCH_DIR)/kernel/tt/include -+ -diff -Naur -X ../exclude-files orig/arch/um/common.ld.in um/arch/um/common.ld.in ---- orig/arch/um/common.ld.in Wed Dec 31 19:00:00 1969 -+++ um/arch/um/common.ld.in Tue Feb 4 19:35:13 2003 -@@ -0,0 +1,53 @@ -+ .kstrtab : { *(.kstrtab) } -+ -+ . = ALIGN(16); /* Exception table */ -+ __start___ex_table = .; -+ __ex_table : { *(__ex_table) } -+ __stop___ex_table = .; -+ -+ __start___ksymtab = .; /* Kernel symbol table */ -+ __ksymtab : { *(__ksymtab) } -+ __stop___ksymtab = .; -+ -+ .unprotected : { *(.unprotected) } -+ . = ALIGN(4096); -+ PROVIDE (_unprotected_end = .); -+ -+ . = ALIGN(4096); -+ __uml_setup_start = .; -+ .uml.setup.init : { *(.uml.setup.init) } -+ __uml_setup_end = .; -+ __uml_help_start = .; -+ .uml.help.init : { *(.uml.help.init) } -+ __uml_help_end = .; -+ __uml_postsetup_start = .; -+ .uml.postsetup.init : { *(.uml.postsetup.init) } -+ __uml_postsetup_end = .; -+ __setup_start = .; -+ .setup.init : { *(.setup.init) } -+ __setup_end = .; -+ __initcall_start = .; -+ .initcall.init : { *(.initcall.init) } -+ __initcall_end = .; -+ __uml_initcall_start = .; -+ .uml.initcall.init : { *(.uml.initcall.init) } -+ __uml_initcall_end = .; -+ __init_end = .; -+ __exitcall_begin = .; -+ .exitcall : { *(.exitcall.exit) } -+ __exitcall_end = .; -+ __uml_exitcall_begin = .; -+ .uml.exitcall : { *(.uml.exitcall.exit) } -+ __uml_exitcall_end = .; -+ -+ __preinit_array_start = .; -+ .preinit_array : { *(.preinit_array) } -+ __preinit_array_end = .; -+ __init_array_start = .; -+ .init_array : { *(.init_array) } -+ __init_array_end = .; -+ __fini_array_start = .; -+ .fini_array : { *(.fini_array) } -+ __fini_array_end = .; -+ -+ .data.init : { *(.data.init) } -diff -Naur -X ../exclude-files orig/arch/um/config.in um/arch/um/config.in ---- orig/arch/um/config.in Wed Dec 31 19:00:00 1969 -+++ um/arch/um/config.in Thu Feb 27 13:12:39 2003 -@@ -0,0 +1,104 @@ -+define_bool CONFIG_USERMODE y -+ -+mainmenu_name "Linux/Usermode Kernel Configuration" -+ -+define_bool CONFIG_ISA n -+define_bool CONFIG_SBUS n -+define_bool CONFIG_PCI n -+ -+define_bool CONFIG_UID16 y -+ -+define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y -+ -+mainmenu_option next_comment -+comment 'Code maturity level options' -+bool 'Prompt for development and/or incomplete code/drivers' CONFIG_EXPERIMENTAL -+endmenu -+ -+mainmenu_option next_comment -+comment 'General Setup' -+ -+bool 'Separate kernel address space support' CONFIG_MODE_SKAS -+ -+# This is to ensure that at least one of the modes is enabled. When neither -+# is present in defconfig, they default to N, which is bad. -+if [ "$CONFIG_MODE_SKAS" != "y" ]; then -+ define_bool CONFIG_MODE_TT y -+fi -+ -+bool 'Tracing thread support' CONFIG_MODE_TT -+if [ "$CONFIG_MODE_TT" != "y" ]; then -+ bool 'Statically linked binary when CONFIG_MODE_TT is disabled' CONFIG_STATIC_LINK -+fi -+bool 'Networking support' CONFIG_NET -+bool 'System V IPC' CONFIG_SYSVIPC -+bool 'BSD Process Accounting' CONFIG_BSD_PROCESS_ACCT -+bool 'Sysctl support' CONFIG_SYSCTL -+tristate 'Kernel support for a.out binaries' CONFIG_BINFMT_AOUT -+tristate 'Kernel support for ELF binaries' CONFIG_BINFMT_ELF -+tristate 'Kernel support for MISC binaries' CONFIG_BINFMT_MISC -+tristate 'Host filesystem' CONFIG_HOSTFS -+tristate 'Honeypot proc filesystem' CONFIG_HPPFS -+bool 'Management console' CONFIG_MCONSOLE -+dep_bool 'Magic SysRq key' CONFIG_MAGIC_SYSRQ $CONFIG_MCONSOLE -+bool '2G/2G host address space split' CONFIG_HOST_2G_2G -+bool 'Symmetric multi-processing support' CONFIG_UML_SMP -+define_bool CONFIG_SMP $CONFIG_UML_SMP -+int 'Nesting level' CONFIG_NEST_LEVEL 0 -+int 'Kernel address space size (in .5G units)' CONFIG_KERNEL_HALF_GIGS 1 -+bool 'Highmem support' CONFIG_HIGHMEM -+bool '/proc/mm' CONFIG_PROC_MM -+int 'Kernel stack size order' CONFIG_KERNEL_STACK_ORDER 2 -+endmenu -+ -+mainmenu_option next_comment -+comment 'Loadable module support' -+bool 'Enable loadable module support' CONFIG_MODULES -+if [ "$CONFIG_MODULES" = "y" ]; then -+# MODVERSIONS does not yet work in this architecture -+# bool ' Set version information on all module symbols' CONFIG_MODVERSIONS -+ bool ' Kernel module loader' CONFIG_KMOD -+fi -+endmenu -+ -+source arch/um/config_char.in -+ -+source arch/um/config_block.in -+ -+define_bool CONFIG_NETDEVICES $CONFIG_NET -+ -+if [ "$CONFIG_NET" = "y" ]; then -+ source arch/um/config_net.in -+ source net/Config.in -+fi -+ -+source fs/Config.in -+ -+mainmenu_option next_comment -+comment 'SCSI support' -+ -+tristate 'SCSI support' CONFIG_SCSI -+ -+if [ "$CONFIG_SCSI" != "n" ]; then -+ source arch/um/config_scsi.in -+fi -+endmenu -+ -+source drivers/md/Config.in -+ -+source drivers/mtd/Config.in -+ -+source lib/Config.in -+ -+mainmenu_option next_comment -+comment 'Kernel hacking' -+bool 'Debug memory allocations' CONFIG_DEBUG_SLAB -+bool 'Enable kernel debugging symbols' CONFIG_DEBUGSYM -+if [ "$CONFIG_XTERM_CHAN" = "y" ]; then -+ dep_bool 'Enable ptrace proxy' CONFIG_PT_PROXY $CONFIG_DEBUGSYM -+else -+ define_bool CONFIG_PT_PROXY n -+fi -+dep_bool 'Enable gprof support' CONFIG_GPROF $CONFIG_DEBUGSYM -+dep_bool 'Enable gcov support' CONFIG_GCOV $CONFIG_DEBUGSYM -+endmenu -diff -Naur -X ../exclude-files orig/arch/um/config.release um/arch/um/config.release ---- orig/arch/um/config.release Wed Dec 31 19:00:00 1969 -+++ um/arch/um/config.release Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,302 @@ -+# -+# Automatically generated make config: don't edit -+# -+CONFIG_USERMODE=y -+# CONFIG_ISA is not set -+# CONFIG_SBUS is not set -+# CONFIG_PCI is not set -+CONFIG_UID16=y -+CONFIG_RWSEM_XCHGADD_ALGORITHM=y -+ -+# -+# Code maturity level options -+# -+CONFIG_EXPERIMENTAL=y -+ -+# -+# General Setup -+# -+CONFIG_NET=y -+CONFIG_SYSVIPC=y -+CONFIG_BSD_PROCESS_ACCT=y -+CONFIG_SYSCTL=y -+CONFIG_BINFMT_AOUT=y -+CONFIG_BINFMT_ELF=y -+CONFIG_BINFMT_MISC=y -+CONFIG_HOSTFS=y -+# CONFIG_HPPFS is not set -+CONFIG_MCONSOLE=y -+CONFIG_MAGIC_SYSRQ=y -+# CONFIG_HOST_2G_2G is not set -+# CONFIG_UML_SMP is not set -+# CONFIG_SMP is not set -+CONFIG_NEST_LEVEL=0 -+CONFIG_KERNEL_HALF_GIGS=1 -+ -+# -+# Loadable module support -+# -+CONFIG_MODULES=y -+CONFIG_KMOD=y -+ -+# -+# Character Devices -+# -+CONFIG_STDIO_CONSOLE=y -+CONFIG_SSL=y -+CONFIG_FD_CHAN=y -+# CONFIG_NULL_CHAN is not set -+CONFIG_PORT_CHAN=y -+CONFIG_PTY_CHAN=y -+CONFIG_TTY_CHAN=y -+CONFIG_XTERM_CHAN=y -+CONFIG_CON_ZERO_CHAN="fd:0,fd:1" -+CONFIG_CON_CHAN="xterm" -+CONFIG_SSL_CHAN="pty" -+CONFIG_UNIX98_PTYS=y -+CONFIG_UNIX98_PTY_COUNT=256 -+# CONFIG_WATCHDOG is not set -+CONFIG_UML_SOUND=y -+CONFIG_SOUND=y -+CONFIG_HOSTAUDIO=y -+# CONFIG_TTY_LOG is not set -+ -+# -+# Block Devices -+# -+CONFIG_BLK_DEV_UBD=y -+# CONFIG_BLK_DEV_UBD_SYNC is not set -+CONFIG_BLK_DEV_LOOP=y -+CONFIG_BLK_DEV_NBD=y -+CONFIG_BLK_DEV_RAM=y -+CONFIG_BLK_DEV_RAM_SIZE=4096 -+CONFIG_BLK_DEV_INITRD=y -+# CONFIG_MMAPPER is not set -+CONFIG_NETDEVICES=y -+ -+# -+# Network Devices -+# -+CONFIG_UML_NET=y -+CONFIG_UML_NET_ETHERTAP=y -+CONFIG_UML_NET_TUNTAP=y -+CONFIG_UML_NET_SLIP=y -+CONFIG_UML_NET_DAEMON=y -+CONFIG_UML_NET_MCAST=y -+CONFIG_DUMMY=y -+CONFIG_BONDING=m -+CONFIG_EQUALIZER=m -+CONFIG_TUN=y -+CONFIG_PPP=m -+CONFIG_PPP_MULTILINK=y -+# CONFIG_PPP_ASYNC is not set -+CONFIG_PPP_SYNC_TTY=m -+CONFIG_PPP_DEFLATE=m -+CONFIG_PPP_BSDCOMP=m -+CONFIG_PPPOE=m -+CONFIG_SLIP=m -+ -+# -+# Networking options -+# -+CONFIG_PACKET=y -+CONFIG_PACKET_MMAP=y -+# CONFIG_NETLINK_DEV is not set -+# CONFIG_NETFILTER is not set -+# CONFIG_FILTER is not set -+CONFIG_UNIX=y -+CONFIG_INET=y -+# CONFIG_IP_MULTICAST is not set -+# CONFIG_IP_ADVANCED_ROUTER is not set -+# CONFIG_IP_PNP is not set -+# CONFIG_NET_IPIP is not set -+# CONFIG_NET_IPGRE is not set -+# CONFIG_ARPD is not set -+# CONFIG_INET_ECN is not set -+# CONFIG_SYN_COOKIES is not set -+# CONFIG_IPV6 is not set -+# CONFIG_KHTTPD is not set -+# CONFIG_ATM is not set -+# CONFIG_VLAN_8021Q is not set -+ -+# -+# -+# -+# CONFIG_IPX is not set -+# CONFIG_ATALK is not set -+ -+# -+# Appletalk devices -+# -+# CONFIG_DECNET is not set -+# CONFIG_BRIDGE is not set -+# CONFIG_X25 is not set -+# CONFIG_LAPB is not set -+# CONFIG_LLC is not set -+# CONFIG_NET_DIVERT is not set -+# CONFIG_ECONET is not set -+# CONFIG_WAN_ROUTER is not set -+# CONFIG_NET_FASTROUTE is not set -+# CONFIG_NET_HW_FLOWCONTROL is not set -+ -+# -+# QoS and/or fair queueing -+# -+# CONFIG_NET_SCHED is not set -+ -+# -+# Network testing -+# -+# CONFIG_NET_PKTGEN is not set -+ -+# -+# File systems -+# -+CONFIG_QUOTA=y -+CONFIG_AUTOFS_FS=m -+CONFIG_AUTOFS4_FS=m -+CONFIG_REISERFS_FS=m -+# CONFIG_REISERFS_CHECK is not set -+# CONFIG_REISERFS_PROC_INFO is not set -+CONFIG_ADFS_FS=m -+# CONFIG_ADFS_FS_RW is not set -+CONFIG_AFFS_FS=m -+CONFIG_HFS_FS=m -+CONFIG_BFS_FS=m -+CONFIG_EXT3_FS=y -+CONFIG_JBD=y -+# CONFIG_JBD_DEBUG is not set -+CONFIG_FAT_FS=y -+CONFIG_MSDOS_FS=y -+CONFIG_UMSDOS_FS=y -+CONFIG_VFAT_FS=y -+CONFIG_EFS_FS=m -+CONFIG_CRAMFS=m -+CONFIG_TMPFS=y -+CONFIG_RAMFS=y -+CONFIG_ISO9660_FS=y -+# CONFIG_JOLIET is not set -+# CONFIG_ZISOFS is not set -+CONFIG_MINIX_FS=m -+CONFIG_VXFS_FS=m -+# CONFIG_NTFS_FS is not set -+CONFIG_HPFS_FS=m -+CONFIG_PROC_FS=y -+CONFIG_DEVFS_FS=y -+CONFIG_DEVFS_MOUNT=y -+# CONFIG_DEVFS_DEBUG is not set -+CONFIG_DEVPTS_FS=y -+CONFIG_QNX4FS_FS=m -+# CONFIG_QNX4FS_RW is not set -+CONFIG_ROMFS_FS=m -+CONFIG_EXT2_FS=y -+CONFIG_SYSV_FS=m -+CONFIG_UDF_FS=m -+# CONFIG_UDF_RW is not set -+CONFIG_UFS_FS=m -+# CONFIG_UFS_FS_WRITE is not set -+ -+# -+# Network File Systems -+# -+# CONFIG_CODA_FS is not set -+# CONFIG_INTERMEZZO_FS is not set -+CONFIG_NFS_FS=y -+CONFIG_NFS_V3=y -+CONFIG_NFSD=y -+CONFIG_NFSD_V3=y -+CONFIG_SUNRPC=y -+CONFIG_LOCKD=y -+CONFIG_LOCKD_V4=y -+# CONFIG_SMB_FS is not set -+# CONFIG_NCP_FS is not set -+# CONFIG_ZISOFS_FS is not set -+CONFIG_ZLIB_FS_INFLATE=m -+ -+# -+# Partition Types -+# -+# CONFIG_PARTITION_ADVANCED is not set -+CONFIG_MSDOS_PARTITION=y -+# CONFIG_SMB_NLS is not set -+CONFIG_NLS=y -+ -+# -+# Native Language Support -+# -+CONFIG_NLS_DEFAULT="iso8859-1" -+# CONFIG_NLS_CODEPAGE_437 is not set -+# CONFIG_NLS_CODEPAGE_737 is not set -+# CONFIG_NLS_CODEPAGE_775 is not set -+# CONFIG_NLS_CODEPAGE_850 is not set -+# CONFIG_NLS_CODEPAGE_852 is not set -+# CONFIG_NLS_CODEPAGE_855 is not set -+# CONFIG_NLS_CODEPAGE_857 is not set -+# CONFIG_NLS_CODEPAGE_860 is not set -+# CONFIG_NLS_CODEPAGE_861 is not set -+# CONFIG_NLS_CODEPAGE_862 is not set -+# CONFIG_NLS_CODEPAGE_863 is not set -+# CONFIG_NLS_CODEPAGE_864 is not set -+# CONFIG_NLS_CODEPAGE_865 is not set -+# CONFIG_NLS_CODEPAGE_866 is not set -+# CONFIG_NLS_CODEPAGE_869 is not set -+# CONFIG_NLS_CODEPAGE_936 is not set -+# CONFIG_NLS_CODEPAGE_950 is not set -+# CONFIG_NLS_CODEPAGE_932 is not set -+# CONFIG_NLS_CODEPAGE_949 is not set -+# CONFIG_NLS_CODEPAGE_874 is not set -+# CONFIG_NLS_ISO8859_8 is not set -+# CONFIG_NLS_CODEPAGE_1250 is not set -+# CONFIG_NLS_CODEPAGE_1251 is not set -+# CONFIG_NLS_ISO8859_1 is not set -+# CONFIG_NLS_ISO8859_2 is not set -+# CONFIG_NLS_ISO8859_3 is not set -+# CONFIG_NLS_ISO8859_4 is not set -+# CONFIG_NLS_ISO8859_5 is not set -+# CONFIG_NLS_ISO8859_6 is not set -+# CONFIG_NLS_ISO8859_7 is not set -+# CONFIG_NLS_ISO8859_9 is not set -+# CONFIG_NLS_ISO8859_13 is not set -+# CONFIG_NLS_ISO8859_14 is not set -+# CONFIG_NLS_ISO8859_15 is not set -+# CONFIG_NLS_KOI8_R is not set -+# CONFIG_NLS_KOI8_U is not set -+# CONFIG_NLS_UTF8 is not set -+ -+# -+# SCSI support -+# -+CONFIG_SCSI=y -+ -+# -+# SCSI support type (disk, tape, CD-ROM) -+# -+# CONFIG_BLK_DEV_SD is not set -+# CONFIG_CHR_DEV_ST is not set -+# CONFIG_BLK_DEV_SR is not set -+# CONFIG_CHR_DEV_SG is not set -+ -+# -+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -+# -+# CONFIG_SCSI_DEBUG_QUEUES is not set -+# CONFIG_SCSI_MULTI_LUN is not set -+# CONFIG_SCSI_CONSTANTS is not set -+# CONFIG_SCSI_LOGGING is not set -+CONFIG_SCSI_DEBUG=m -+ -+# -+# Multi-device support (RAID and LVM) -+# -+# CONFIG_MD is not set -+ -+# -+# Memory Technology Devices (MTD) -+# -+# CONFIG_MTD is not set -+ -+# -+# Kernel hacking -+# -+# CONFIG_DEBUG_SLAB is not set -+# CONFIG_DEBUGSYM is not set -diff -Naur -X ../exclude-files orig/arch/um/config_block.in um/arch/um/config_block.in ---- orig/arch/um/config_block.in Wed Dec 31 19:00:00 1969 -+++ um/arch/um/config_block.in Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,16 @@ -+mainmenu_option next_comment -+comment 'Block Devices' -+ -+bool 'Virtual block device' CONFIG_BLK_DEV_UBD -+dep_bool ' Always do synchronous disk IO for UBD' CONFIG_BLK_DEV_UBD_SYNC $CONFIG_BLK_DEV_UBD -+tristate 'Loopback device support' CONFIG_BLK_DEV_LOOP -+dep_tristate 'Network block device support' CONFIG_BLK_DEV_NBD $CONFIG_NET -+tristate 'RAM disk support' CONFIG_BLK_DEV_RAM -+if [ "$CONFIG_BLK_DEV_RAM" = "y" -o "$CONFIG_BLK_DEV_RAM" = "m" ]; then -+ int ' Default RAM disk size' CONFIG_BLK_DEV_RAM_SIZE 4096 -+fi -+dep_bool ' Initial RAM disk (initrd) support' CONFIG_BLK_DEV_INITRD $CONFIG_BLK_DEV_RAM -+ -+tristate 'Example IO memory driver' CONFIG_MMAPPER -+ -+endmenu -diff -Naur -X ../exclude-files orig/arch/um/config_char.in um/arch/um/config_char.in ---- orig/arch/um/config_char.in Wed Dec 31 19:00:00 1969 -+++ um/arch/um/config_char.in Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,37 @@ -+mainmenu_option next_comment -+comment 'Character Devices' -+ -+define_bool CONFIG_STDIO_CONSOLE y -+ -+bool 'Virtual serial line' CONFIG_SSL -+ -+bool 'file descriptor channel support' CONFIG_FD_CHAN -+bool 'null channel support' CONFIG_NULL_CHAN -+bool 'port channel support' CONFIG_PORT_CHAN -+bool 'pty channel support' CONFIG_PTY_CHAN -+bool 'tty channel support' CONFIG_TTY_CHAN -+bool 'xterm channel support' CONFIG_XTERM_CHAN -+string 'Default main console channel initialization' CONFIG_CON_ZERO_CHAN \ -+ "fd:0,fd:1" -+string 'Default console channel initialization' CONFIG_CON_CHAN "xterm" -+string 'Default serial line channel initialization' CONFIG_SSL_CHAN "pty" -+ -+ -+bool 'Unix98 PTY support' CONFIG_UNIX98_PTYS -+if [ "$CONFIG_UNIX98_PTYS" = "y" ]; then -+ int 'Maximum number of Unix98 PTYs in use (0-2048)' CONFIG_UNIX98_PTY_COUNT 256 -+fi -+ -+bool 'Watchdog Timer Support' CONFIG_WATCHDOG -+dep_bool ' Disable watchdog shutdown on close' CONFIG_WATCHDOG_NOWAYOUT \ -+ $CONFIG_WATCHDOG -+dep_tristate ' Software Watchdog' CONFIG_SOFT_WATCHDOG $CONFIG_WATCHDOG -+dep_tristate ' UML watchdog' CONFIG_UML_WATCHDOG $CONFIG_WATCHDOG -+ -+tristate 'Sound support' CONFIG_UML_SOUND -+define_tristate CONFIG_SOUND $CONFIG_UML_SOUND -+define_tristate CONFIG_HOSTAUDIO $CONFIG_UML_SOUND -+ -+bool 'Enable tty logging' CONFIG_TTY_LOG -+ -+endmenu -diff -Naur -X ../exclude-files orig/arch/um/config_net.in um/arch/um/config_net.in ---- orig/arch/um/config_net.in Wed Dec 31 19:00:00 1969 -+++ um/arch/um/config_net.in Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,47 @@ -+mainmenu_option next_comment -+comment 'Network Devices' -+ -+# UML virtual driver -+bool 'Virtual network device' CONFIG_UML_NET -+ -+dep_bool ' Ethertap transport' CONFIG_UML_NET_ETHERTAP $CONFIG_UML_NET -+dep_bool ' TUN/TAP transport' CONFIG_UML_NET_TUNTAP $CONFIG_UML_NET -+dep_bool ' SLIP transport' CONFIG_UML_NET_SLIP $CONFIG_UML_NET -+dep_bool ' SLiRP transport' CONFIG_UML_NET_SLIRP $CONFIG_UML_NET -+dep_bool ' Daemon transport' CONFIG_UML_NET_DAEMON $CONFIG_UML_NET -+dep_bool ' Multicast transport' CONFIG_UML_NET_MCAST $CONFIG_UML_NET -+dep_bool ' pcap transport' CONFIG_UML_NET_PCAP $CONFIG_UML_NET -+ -+# Below are hardware-independent drivers mirrored from -+# drivers/net/Config.in. It would be nice if Linux -+# had HW independent drivers separated from the other -+# but it does not. Until then each non-ISA/PCI arch -+# needs to provide it's own menu of network drivers -+ -+tristate 'Dummy net driver support' CONFIG_DUMMY -+tristate 'Bonding driver support' CONFIG_BONDING -+tristate 'EQL (serial line load balancing) support' CONFIG_EQUALIZER -+tristate 'Universal TUN/TAP device driver support' CONFIG_TUN -+if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then -+ if [ "$CONFIG_NETLINK" = "y" ]; then -+ tristate 'Ethertap network tap (OBSOLETE)' CONFIG_ETHERTAP -+ fi -+fi -+ -+tristate 'PPP (point-to-point protocol) support' CONFIG_PPP -+if [ ! "$CONFIG_PPP" = "n" ]; then -+ dep_bool ' PPP multilink support (EXPERIMENTAL)' CONFIG_PPP_MULTILINK $CONFIG_EXPERIMENTAL -+ dep_bool ' PPP filtering' CONFIG_PPP_FILTER $CONFIG_FILTER -+ dep_tristate ' PPP support for async serial ports' CONFIG_PPP_ASYNC $CONFIG_PPP -+ dep_tristate ' PPP support for sync tty ports' CONFIG_PPP_SYNC_TTY $CONFIG_PPP -+ dep_tristate ' PPP Deflate compression' CONFIG_PPP_DEFLATE $CONFIG_PPP -+ dep_tristate ' PPP BSD-Compress compression' CONFIG_PPP_BSDCOMP $CONFIG_PPP -+ dep_tristate ' PPP over Ethernet (EXPERIMENTAL)' CONFIG_PPPOE $CONFIG_PPP $CONFIG_EXPERIMENTAL -+fi -+ -+tristate 'SLIP (serial line) support' CONFIG_SLIP -+dep_bool ' CSLIP compressed headers' CONFIG_SLIP_COMPRESSED $CONFIG_SLIP -+dep_bool ' Keepalive and linefill' CONFIG_SLIP_SMART $CONFIG_SLIP -+dep_bool ' Six bit SLIP encapsulation' CONFIG_SLIP_MODE_SLIP6 $CONFIG_SLIP -+ -+endmenu -diff -Naur -X ../exclude-files orig/arch/um/config_scsi.in um/arch/um/config_scsi.in ---- orig/arch/um/config_scsi.in Wed Dec 31 19:00:00 1969 -+++ um/arch/um/config_scsi.in Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,30 @@ -+comment 'SCSI support type (disk, tape, CD-ROM)' -+ -+dep_tristate ' SCSI disk support' CONFIG_BLK_DEV_SD $CONFIG_SCSI -+ -+if [ "$CONFIG_BLK_DEV_SD" != "n" ]; then -+ int 'Maximum number of SCSI disks that can be loaded as modules' CONFIG_SD_EXTRA_DEVS 40 -+fi -+ -+dep_tristate ' SCSI tape support' CONFIG_CHR_DEV_ST $CONFIG_SCSI -+ -+dep_tristate ' SCSI CD-ROM support' CONFIG_BLK_DEV_SR $CONFIG_SCSI -+ -+if [ "$CONFIG_BLK_DEV_SR" != "n" ]; then -+ bool ' Enable vendor-specific extensions (for SCSI CDROM)' CONFIG_BLK_DEV_SR_VENDOR -+ int 'Maximum number of CDROM devices that can be loaded as modules' CONFIG_SR_EXTRA_DEVS 2 -+fi -+dep_tristate ' SCSI generic support' CONFIG_CHR_DEV_SG $CONFIG_SCSI -+ -+comment 'Some SCSI devices (e.g. CD jukebox) support multiple LUNs' -+ -+#if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then -+ bool ' Enable extra checks in new queueing code' CONFIG_SCSI_DEBUG_QUEUES -+#fi -+ -+bool ' Probe all LUNs on each SCSI device' CONFIG_SCSI_MULTI_LUN -+ -+bool ' Verbose SCSI error reporting (kernel size +=12K)' CONFIG_SCSI_CONSTANTS -+bool ' SCSI logging facility' CONFIG_SCSI_LOGGING -+ -+dep_tristate 'SCSI debugging host simulator (EXPERIMENTAL)' CONFIG_SCSI_DEBUG $CONFIG_SCSI -diff -Naur -X ../exclude-files orig/arch/um/defconfig um/arch/um/defconfig ---- orig/arch/um/defconfig Wed Dec 31 19:00:00 1969 -+++ um/arch/um/defconfig Mon Jan 20 11:26:54 2003 -@@ -0,0 +1,396 @@ -+# -+# Automatically generated make config: don't edit -+# -+CONFIG_USERMODE=y -+# CONFIG_ISA is not set -+# CONFIG_SBUS is not set -+# CONFIG_PCI is not set -+CONFIG_UID16=y -+CONFIG_RWSEM_XCHGADD_ALGORITHM=y -+ -+# -+# Code maturity level options -+# -+CONFIG_EXPERIMENTAL=y -+ -+# -+# General Setup -+# -+CONFIG_MODE_TT=y -+CONFIG_MODE_SKAS=y -+CONFIG_NET=y -+CONFIG_SYSVIPC=y -+CONFIG_BSD_PROCESS_ACCT=y -+CONFIG_SYSCTL=y -+CONFIG_BINFMT_AOUT=y -+CONFIG_BINFMT_ELF=y -+CONFIG_BINFMT_MISC=y -+CONFIG_HOSTFS=y -+CONFIG_HPPFS=y -+CONFIG_MCONSOLE=y -+CONFIG_MAGIC_SYSRQ=y -+# CONFIG_HOST_2G_2G is not set -+# CONFIG_UML_SMP is not set -+# CONFIG_SMP is not set -+CONFIG_NEST_LEVEL=0 -+CONFIG_KERNEL_HALF_GIGS=1 -+# CONFIG_HIGHMEM is not set -+CONFIG_PROC_MM=y -+CONFIG_KERNEL_STACK_ORDER=2 -+ -+# -+# Loadable module support -+# -+CONFIG_MODULES=y -+# CONFIG_KMOD is not set -+ -+# -+# Character Devices -+# -+CONFIG_STDIO_CONSOLE=y -+CONFIG_SSL=y -+CONFIG_FD_CHAN=y -+CONFIG_NULL_CHAN=y -+CONFIG_PORT_CHAN=y -+CONFIG_PTY_CHAN=y -+CONFIG_TTY_CHAN=y -+CONFIG_XTERM_CHAN=y -+CONFIG_CON_ZERO_CHAN="fd:0,fd:1" -+CONFIG_CON_CHAN="xterm" -+CONFIG_SSL_CHAN="pty" -+CONFIG_UNIX98_PTYS=y -+CONFIG_UNIX98_PTY_COUNT=256 -+# CONFIG_WATCHDOG is not set -+# CONFIG_WATCHDOG_NOWAYOUT is not set -+# CONFIG_SOFT_WATCHDOG is not set -+# CONFIG_UML_WATCHDOG is not set -+CONFIG_UML_SOUND=y -+CONFIG_SOUND=y -+CONFIG_HOSTAUDIO=y -+# CONFIG_TTY_LOG is not set -+ -+# -+# Block Devices -+# -+CONFIG_BLK_DEV_UBD=y -+# CONFIG_BLK_DEV_UBD_SYNC is not set -+CONFIG_BLK_DEV_LOOP=y -+CONFIG_BLK_DEV_NBD=y -+CONFIG_BLK_DEV_RAM=y -+CONFIG_BLK_DEV_RAM_SIZE=4096 -+CONFIG_BLK_DEV_INITRD=y -+# CONFIG_MMAPPER is not set -+CONFIG_NETDEVICES=y -+ -+# -+# Network Devices -+# -+CONFIG_UML_NET=y -+CONFIG_UML_NET_ETHERTAP=y -+CONFIG_UML_NET_TUNTAP=y -+CONFIG_UML_NET_SLIP=y -+CONFIG_UML_NET_SLIRP=y -+CONFIG_UML_NET_DAEMON=y -+CONFIG_UML_NET_MCAST=y -+# CONFIG_UML_NET_PCAP is not set -+CONFIG_DUMMY=y -+# CONFIG_BONDING is not set -+# CONFIG_EQUALIZER is not set -+CONFIG_TUN=y -+CONFIG_PPP=y -+# CONFIG_PPP_MULTILINK is not set -+# CONFIG_PPP_FILTER is not set -+# CONFIG_PPP_ASYNC is not set -+# CONFIG_PPP_SYNC_TTY is not set -+# CONFIG_PPP_DEFLATE is not set -+# CONFIG_PPP_BSDCOMP is not set -+# CONFIG_PPPOE is not set -+CONFIG_SLIP=y -+# CONFIG_SLIP_COMPRESSED is not set -+# CONFIG_SLIP_SMART is not set -+# CONFIG_SLIP_MODE_SLIP6 is not set -+ -+# -+# Networking options -+# -+CONFIG_PACKET=y -+CONFIG_PACKET_MMAP=y -+# CONFIG_NETLINK_DEV is not set -+# CONFIG_NETFILTER is not set -+# CONFIG_FILTER is not set -+CONFIG_UNIX=y -+CONFIG_INET=y -+# CONFIG_IP_MULTICAST is not set -+# CONFIG_IP_ADVANCED_ROUTER is not set -+# CONFIG_IP_PNP is not set -+# CONFIG_NET_IPIP is not set -+# CONFIG_NET_IPGRE is not set -+# CONFIG_ARPD is not set -+# CONFIG_INET_ECN is not set -+# CONFIG_SYN_COOKIES is not set -+# CONFIG_IPV6 is not set -+# CONFIG_KHTTPD is not set -+# CONFIG_ATM is not set -+# CONFIG_VLAN_8021Q is not set -+ -+# -+# -+# -+# CONFIG_IPX is not set -+# CONFIG_ATALK is not set -+ -+# -+# Appletalk devices -+# -+# CONFIG_DEV_APPLETALK is not set -+# CONFIG_DECNET is not set -+# CONFIG_BRIDGE is not set -+# CONFIG_X25 is not set -+# CONFIG_LAPB is not set -+# CONFIG_LLC is not set -+# CONFIG_NET_DIVERT is not set -+# CONFIG_ECONET is not set -+# CONFIG_WAN_ROUTER is not set -+# CONFIG_NET_FASTROUTE is not set -+# CONFIG_NET_HW_FLOWCONTROL is not set -+ -+# -+# QoS and/or fair queueing -+# -+# CONFIG_NET_SCHED is not set -+ -+# -+# Network testing -+# -+# CONFIG_NET_PKTGEN is not set -+ -+# -+# File systems -+# -+CONFIG_QUOTA=y -+CONFIG_AUTOFS_FS=y -+CONFIG_AUTOFS4_FS=y -+CONFIG_REISERFS_FS=y -+# CONFIG_REISERFS_CHECK is not set -+# CONFIG_REISERFS_PROC_INFO is not set -+# CONFIG_ADFS_FS is not set -+# CONFIG_ADFS_FS_RW is not set -+# CONFIG_AFFS_FS is not set -+# CONFIG_HFS_FS is not set -+# CONFIG_BFS_FS is not set -+# CONFIG_EXT3_FS is not set -+# CONFIG_JBD is not set -+# CONFIG_JBD_DEBUG is not set -+CONFIG_FAT_FS=y -+CONFIG_MSDOS_FS=y -+CONFIG_UMSDOS_FS=y -+CONFIG_VFAT_FS=y -+# CONFIG_EFS_FS is not set -+CONFIG_JFFS_FS=y -+CONFIG_JFFS_FS_VERBOSE=0 -+CONFIG_JFFS_PROC_FS=y -+CONFIG_JFFS2_FS=y -+CONFIG_JFFS2_FS_DEBUG=0 -+# CONFIG_CRAMFS is not set -+# CONFIG_TMPFS is not set -+CONFIG_RAMFS=y -+CONFIG_ISO9660_FS=y -+# CONFIG_JOLIET is not set -+# CONFIG_ZISOFS is not set -+CONFIG_MINIX_FS=y -+# CONFIG_VXFS_FS is not set -+# CONFIG_NTFS_FS is not set -+# CONFIG_NTFS_RW is not set -+# CONFIG_HPFS_FS is not set -+CONFIG_PROC_FS=y -+CONFIG_DEVFS_FS=y -+CONFIG_DEVFS_MOUNT=y -+# CONFIG_DEVFS_DEBUG is not set -+CONFIG_DEVPTS_FS=y -+# CONFIG_QNX4FS_FS is not set -+# CONFIG_QNX4FS_RW is not set -+# CONFIG_ROMFS_FS is not set -+CONFIG_EXT2_FS=y -+# CONFIG_SYSV_FS is not set -+# CONFIG_UDF_FS is not set -+# CONFIG_UDF_RW is not set -+# CONFIG_UFS_FS is not set -+# CONFIG_UFS_FS_WRITE is not set -+ -+# -+# Network File Systems -+# -+# CONFIG_CODA_FS is not set -+# CONFIG_INTERMEZZO_FS is not set -+# CONFIG_NFS_FS is not set -+# CONFIG_NFS_V3 is not set -+# CONFIG_ROOT_NFS is not set -+# CONFIG_NFSD is not set -+# CONFIG_NFSD_V3 is not set -+# CONFIG_SUNRPC is not set -+# CONFIG_LOCKD is not set -+# CONFIG_SMB_FS is not set -+# CONFIG_NCP_FS is not set -+# CONFIG_NCPFS_PACKET_SIGNING is not set -+# CONFIG_NCPFS_IOCTL_LOCKING is not set -+# CONFIG_NCPFS_STRONG is not set -+# CONFIG_NCPFS_NFS_NS is not set -+# CONFIG_NCPFS_OS2_NS is not set -+# CONFIG_NCPFS_SMALLDOS is not set -+# CONFIG_NCPFS_NLS is not set -+# CONFIG_NCPFS_EXTRAS is not set -+# CONFIG_ZISOFS_FS is not set -+# CONFIG_ZLIB_FS_INFLATE is not set -+ -+# -+# Partition Types -+# -+# CONFIG_PARTITION_ADVANCED is not set -+CONFIG_MSDOS_PARTITION=y -+# CONFIG_SMB_NLS is not set -+CONFIG_NLS=y -+ -+# -+# Native Language Support -+# -+CONFIG_NLS_DEFAULT="iso8859-1" -+# CONFIG_NLS_CODEPAGE_437 is not set -+# CONFIG_NLS_CODEPAGE_737 is not set -+# CONFIG_NLS_CODEPAGE_775 is not set -+# CONFIG_NLS_CODEPAGE_850 is not set -+# CONFIG_NLS_CODEPAGE_852 is not set -+# CONFIG_NLS_CODEPAGE_855 is not set -+# CONFIG_NLS_CODEPAGE_857 is not set -+# CONFIG_NLS_CODEPAGE_860 is not set -+# CONFIG_NLS_CODEPAGE_861 is not set -+# CONFIG_NLS_CODEPAGE_862 is not set -+# CONFIG_NLS_CODEPAGE_863 is not set -+# CONFIG_NLS_CODEPAGE_864 is not set -+# CONFIG_NLS_CODEPAGE_865 is not set -+# CONFIG_NLS_CODEPAGE_866 is not set -+# CONFIG_NLS_CODEPAGE_869 is not set -+# CONFIG_NLS_CODEPAGE_936 is not set -+# CONFIG_NLS_CODEPAGE_950 is not set -+# CONFIG_NLS_CODEPAGE_932 is not set -+# CONFIG_NLS_CODEPAGE_949 is not set -+# CONFIG_NLS_CODEPAGE_874 is not set -+# CONFIG_NLS_ISO8859_8 is not set -+# CONFIG_NLS_CODEPAGE_1250 is not set -+# CONFIG_NLS_CODEPAGE_1251 is not set -+# CONFIG_NLS_ISO8859_1 is not set -+# CONFIG_NLS_ISO8859_2 is not set -+# CONFIG_NLS_ISO8859_3 is not set -+# CONFIG_NLS_ISO8859_4 is not set -+# CONFIG_NLS_ISO8859_5 is not set -+# CONFIG_NLS_ISO8859_6 is not set -+# CONFIG_NLS_ISO8859_7 is not set -+# CONFIG_NLS_ISO8859_9 is not set -+# CONFIG_NLS_ISO8859_13 is not set -+# CONFIG_NLS_ISO8859_14 is not set -+# CONFIG_NLS_ISO8859_15 is not set -+# CONFIG_NLS_KOI8_R is not set -+# CONFIG_NLS_KOI8_U is not set -+# CONFIG_NLS_UTF8 is not set -+ -+# -+# SCSI support -+# -+CONFIG_SCSI=y -+ -+# -+# SCSI support type (disk, tape, CD-ROM) -+# -+# CONFIG_BLK_DEV_SD is not set -+# CONFIG_CHR_DEV_ST is not set -+# CONFIG_BLK_DEV_SR is not set -+# CONFIG_CHR_DEV_SG is not set -+ -+# -+# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -+# -+# CONFIG_SCSI_DEBUG_QUEUES is not set -+# CONFIG_SCSI_MULTI_LUN is not set -+# CONFIG_SCSI_CONSTANTS is not set -+# CONFIG_SCSI_LOGGING is not set -+CONFIG_SCSI_DEBUG=y -+ -+# -+# Multi-device support (RAID and LVM) -+# -+# CONFIG_MD is not set -+# CONFIG_BLK_DEV_MD is not set -+# CONFIG_MD_LINEAR is not set -+# CONFIG_MD_RAID0 is not set -+# CONFIG_MD_RAID1 is not set -+# CONFIG_MD_RAID5 is not set -+# CONFIG_MD_MULTIPATH is not set -+# CONFIG_BLK_DEV_LVM is not set -+ -+# -+# Memory Technology Devices (MTD) -+# -+CONFIG_MTD=y -+# CONFIG_MTD_DEBUG is not set -+# CONFIG_MTD_PARTITIONS is not set -+# CONFIG_MTD_CONCAT is not set -+# CONFIG_MTD_REDBOOT_PARTS is not set -+ -+# -+# User Modules And Translation Layers -+# -+CONFIG_MTD_CHAR=y -+CONFIG_MTD_BLOCK=y -+# CONFIG_FTL is not set -+# CONFIG_NFTL is not set -+ -+# -+# RAM/ROM/Flash chip drivers -+# -+# CONFIG_MTD_CFI is not set -+# CONFIG_MTD_JEDECPROBE is not set -+# CONFIG_MTD_GEN_PROBE is not set -+# CONFIG_MTD_CFI_INTELEXT is not set -+# CONFIG_MTD_CFI_AMDSTD is not set -+# CONFIG_MTD_RAM is not set -+# CONFIG_MTD_ROM is not set -+# CONFIG_MTD_ABSENT is not set -+# CONFIG_MTD_OBSOLETE_CHIPS is not set -+# CONFIG_MTD_AMDSTD is not set -+# CONFIG_MTD_SHARP is not set -+# CONFIG_MTD_JEDEC is not set -+ -+# -+# Mapping drivers for chip access -+# -+# CONFIG_MTD_PHYSMAP is not set -+# CONFIG_MTD_PCI is not set -+ -+# -+# Self-contained MTD device drivers -+# -+# CONFIG_MTD_PMC551 is not set -+# CONFIG_MTD_SLRAM is not set -+# CONFIG_MTD_MTDRAM is not set -+CONFIG_MTD_BLKMTD=y -+ -+# -+# Disk-On-Chip Device Drivers -+# -+# CONFIG_MTD_DOC1000 is not set -+# CONFIG_MTD_DOC2000 is not set -+# CONFIG_MTD_DOC2001 is not set -+# CONFIG_MTD_DOCPROBE is not set -+ -+# -+# NAND Flash Device Drivers -+# -+# CONFIG_MTD_NAND is not set -+ -+# -+# Kernel hacking -+# -+# CONFIG_DEBUG_SLAB is not set -+CONFIG_DEBUGSYM=y -+CONFIG_PT_PROXY=y -+# CONFIG_GPROF is not set -+# CONFIG_GCOV is not set -diff -Naur -X ../exclude-files orig/arch/um/drivers/Makefile um/arch/um/drivers/Makefile ---- orig/arch/um/drivers/Makefile Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/Makefile Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,94 @@ -+# -+# Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET := built-in.o -+ -+CHAN_OBJS := chan_kern.o chan_user.o line.o -+ -+list-multi := slip.o slirp.o daemon.o mcast.o mconsole.o net.o ubd.o \ -+ hostaudio.o pcap.o port.o harddog.o -+ -+slip-objs := slip_kern.o slip_user.o -+slirp-objs := slirp_kern.o slirp_user.o -+daemon-objs := daemon_kern.o daemon_user.o -+mcast-objs := mcast_kern.o mcast_user.o -+pcap-objs := pcap_kern.o pcap_user.o -lpcap -L/usr/lib -+net-objs := net_kern.o net_user.o -+mconsole-objs := mconsole_kern.o mconsole_user.o -+hostaudio-objs := hostaudio_kern.o hostaudio_user.o -+ubd-objs := ubd_kern.o ubd_user.o -+port-objs := port_kern.o port_user.o -+harddog-objs := harddog_kern.o harddog_user.o -+ -+export-objs := mconsole_kern.o -+ -+obj-y = -+obj-$(CONFIG_SSL) += ssl.o -+obj-$(CONFIG_UML_NET_SLIP) += slip.o -+obj-$(CONFIG_UML_NET_SLIRP) += slirp.o -+obj-$(CONFIG_UML_NET_DAEMON) += daemon.o -+obj-$(CONFIG_UML_NET_MCAST) += mcast.o -+obj-$(CONFIG_UML_NET_PCAP) += pcap.o -+obj-$(CONFIG_UML_NET) += net.o -+obj-$(CONFIG_MCONSOLE) += mconsole.o -+obj-$(CONFIG_MMAPPER) += mmapper_kern.o -+obj-$(CONFIG_BLK_DEV_UBD) += ubd.o -+obj-$(CONFIG_HOSTAUDIO) += hostaudio.o -+obj-$(CONFIG_FD_CHAN) += fd.o -+obj-$(CONFIG_NULL_CHAN) += null.o -+obj-$(CONFIG_PORT_CHAN) += port.o -+obj-$(CONFIG_PTY_CHAN) += pty.o -+obj-$(CONFIG_TTY_CHAN) += tty.o -+obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o -+obj-$(CONFIG_UML_WATCHDOG) += harddog.o -+ -+CFLAGS_pcap_user.o = -I/usr/include/pcap -+ -+obj-y += stdio_console.o $(CHAN_OBJS) -+ -+USER_SINGLE_OBJS = $(foreach f,$(patsubst %.o,%,$(obj-y) $(obj-m)),$($(f)-objs)) -+ -+USER_OBJS = $(filter %_user.o,$(obj-y) $(obj-m) $(USER_SINGLE_OBJS)) fd.o \ -+ null.o pty.o tty.o xterm.o -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+clean: -+ -+modules: -+ -+fastdep: -+ -+dep: -+ -+archmrproper: -+ -+daemon.o : $(daemon-objs) -+ -+slip.o : $(slip-objs) -+ -+slirp.o : $(slirp-objs) -+ -+mcast.o : $(mcast-objs) -+ -+pcap.o : $(pcap-objs) -+ -+mconsole.o : $(mconsole-objs) -+ -+net.o : $(net-objs) -+ -+hostaudio.o : $(hostaudio-objs) -+ -+ubd.o : $(ubd-objs) -+ -+port.o : $(port-objs) -+ -+harddog.o : $(harddog-objs) -+ -+$(list-multi) : # This doesn't work, but should : '%.o : $(%-objs)' -+ $(LD) $(LD_RFLAG) -r -o $@ $($(patsubst %.o,%,$@)-objs) -diff -Naur -X ../exclude-files orig/arch/um/drivers/chan_kern.c um/arch/um/drivers/chan_kern.c ---- orig/arch/um/drivers/chan_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/chan_kern.c Thu Mar 6 19:25:16 2003 -@@ -0,0 +1,510 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "chan_kern.h" -+#include "user_util.h" -+#include "kern.h" -+#include "irq_user.h" -+#include "sigio.h" -+#include "line.h" -+ -+static void *not_configged_init(char *str, int device, struct chan_opts *opts) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+ return(NULL); -+} -+ -+static int not_configged_open(int input, int output, int primary, void *data, -+ char **dev_out) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+ return(-ENODEV); -+} -+ -+static void not_configged_close(int fd, void *data) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+} -+ -+static int not_configged_read(int fd, char *c_out, void *data) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+ return(-EIO); -+} -+ -+static int not_configged_write(int fd, const char *buf, int len, void *data) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+ return(-EIO); -+} -+ -+static int not_configged_console_write(int fd, const char *buf, int len, -+ void *data) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+ return(-EIO); -+} -+ -+static int not_configged_window_size(int fd, void *data, unsigned short *rows, -+ unsigned short *cols) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+ return(-ENODEV); -+} -+ -+static void not_configged_free(void *data) -+{ -+ printk(KERN_ERR "Using a channel type which is configured out of " -+ "UML\n"); -+} -+ -+static struct chan_ops not_configged_ops = { -+ .init = not_configged_init, -+ .open = not_configged_open, -+ .close = not_configged_close, -+ .read = not_configged_read, -+ .write = not_configged_write, -+ .console_write = not_configged_console_write, -+ .window_size = not_configged_window_size, -+ .free = not_configged_free, -+ .winch = 0, -+}; -+ -+static void tty_receive_char(struct tty_struct *tty, char ch) -+{ -+ if(tty == NULL) return; -+ -+ if(I_IXON(tty) && !I_IXOFF(tty) && !tty->raw) { -+ if(ch == STOP_CHAR(tty)){ -+ stop_tty(tty); -+ return; -+ } -+ else if(ch == START_CHAR(tty)){ -+ start_tty(tty); -+ return; -+ } -+ } -+ -+ if((tty->flip.flag_buf_ptr == NULL) || -+ (tty->flip.char_buf_ptr == NULL)) -+ return; -+ tty_insert_flip_char(tty, ch, TTY_NORMAL); -+} -+ -+static int open_one_chan(struct chan *chan, int input, int output, int primary) -+{ -+ int fd; -+ -+ if(chan->opened) return(0); -+ if(chan->ops->open == NULL) fd = 0; -+ else fd = (*chan->ops->open)(input, output, primary, chan->data, -+ &chan->dev); -+ if(fd < 0) return(fd); -+ chan->fd = fd; -+ -+ chan->opened = 1; -+ return(0); -+} -+ -+int open_chan(struct list_head *chans) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ int ret, err = 0; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ ret = open_one_chan(chan, chan->input, chan->output, -+ chan->primary); -+ if(chan->primary) err = ret; -+ } -+ return(err); -+} -+ -+void chan_enable_winch(struct list_head *chans, void *line) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(chan->primary && chan->output && chan->ops->winch){ -+ register_winch(chan->fd, line); -+ return; -+ } -+ } -+} -+ -+void enable_chan(struct list_head *chans, void *data) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(!chan->opened) continue; -+ -+ line_setup_irq(chan->fd, chan->input, chan->output, data); -+ } -+} -+ -+void close_chan(struct list_head *chans) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ -+ /* Close in reverse order as open in case more than one of them -+ * refers to the same device and they save and restore that device's -+ * state. Then, the first one opened will have the original state, -+ * so it must be the last closed. -+ */ -+ for(ele = chans->prev; ele != chans; ele = ele->prev){ -+ chan = list_entry(ele, struct chan, list); -+ if(!chan->opened) continue; -+ if(chan->ops->close != NULL) -+ (*chan->ops->close)(chan->fd, chan->data); -+ chan->opened = 0; -+ chan->fd = -1; -+ } -+} -+ -+int write_chan(struct list_head *chans, const char *buf, int len, -+ int write_irq) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ int n, ret = 0; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(!chan->output || (chan->ops->write == NULL)) continue; -+ n = chan->ops->write(chan->fd, buf, len, chan->data); -+ if(chan->primary){ -+ ret = n; -+ if((ret == -EAGAIN) || ((ret >= 0) && (ret < len))){ -+ reactivate_fd(chan->fd, write_irq); -+ if(ret == -EAGAIN) ret = 0; -+ } -+ } -+ } -+ return(ret); -+} -+ -+int console_write_chan(struct list_head *chans, const char *buf, int len) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ int n, ret = 0; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(!chan->output || (chan->ops->console_write == NULL)) -+ continue; -+ n = chan->ops->console_write(chan->fd, buf, len, chan->data); -+ if(chan->primary) ret = n; -+ } -+ return(ret); -+} -+ -+int chan_window_size(struct list_head *chans, unsigned short *rows_out, -+ unsigned short *cols_out) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(chan->primary){ -+ if(chan->ops->window_size == NULL) return(0); -+ return(chan->ops->window_size(chan->fd, chan->data, -+ rows_out, cols_out)); -+ } -+ } -+ return(0); -+} -+ -+void free_one_chan(struct chan *chan) -+{ -+ list_del(&chan->list); -+ if(chan->ops->free != NULL) -+ (*chan->ops->free)(chan->data); -+ free_irq_by_fd(chan->fd); -+ if(chan->primary && chan->output) ignore_sigio_fd(chan->fd); -+ kfree(chan); -+} -+ -+void free_chan(struct list_head *chans) -+{ -+ struct list_head *ele, *next; -+ struct chan *chan; -+ -+ list_for_each_safe(ele, next, chans){ -+ chan = list_entry(ele, struct chan, list); -+ free_one_chan(chan); -+ } -+} -+ -+static int one_chan_config_string(struct chan *chan, char *str, int size, -+ char **error_out) -+{ -+ int n = 0; -+ -+ CONFIG_CHUNK(str, size, n, chan->ops->type, 0); -+ -+ if(chan->dev == NULL){ -+ CONFIG_CHUNK(str, size, n, "", 1); -+ return(n); -+ } -+ -+ CONFIG_CHUNK(str, size, n, ":", 0); -+ CONFIG_CHUNK(str, size, n, chan->dev, 0); -+ -+ return(n); -+} -+ -+static int chan_pair_config_string(struct chan *in, struct chan *out, -+ char *str, int size, char **error_out) -+{ -+ int n; -+ -+ n = one_chan_config_string(in, str, size, error_out); -+ str += n; -+ size -= n; -+ -+ if(in == out){ -+ CONFIG_CHUNK(str, size, n, "", 1); -+ return(n); -+ } -+ -+ CONFIG_CHUNK(str, size, n, ",", 1); -+ n = one_chan_config_string(out, str, size, error_out); -+ str += n; -+ size -= n; -+ CONFIG_CHUNK(str, size, n, "", 1); -+ -+ return(n); -+} -+ -+int chan_config_string(struct list_head *chans, char *str, int size, -+ char **error_out) -+{ -+ struct list_head *ele; -+ struct chan *chan, *in = NULL, *out = NULL; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(!chan->primary) -+ continue; -+ if(chan->input) -+ in = chan; -+ if(chan->output) -+ out = chan; -+ } -+ -+ return(chan_pair_config_string(in, out, str, size, error_out)); -+} -+ -+struct chan_type { -+ char *key; -+ struct chan_ops *ops; -+}; -+ -+struct chan_type chan_table[] = { -+#ifdef CONFIG_FD_CHAN -+ { "fd", &fd_ops }, -+#else -+ { "fd", ¬_configged_ops }, -+#endif -+ -+#ifdef CONFIG_NULL_CHAN -+ { "null", &null_ops }, -+#else -+ { "null", ¬_configged_ops }, -+#endif -+ -+#ifdef CONFIG_PORT_CHAN -+ { "port", &port_ops }, -+#else -+ { "port", ¬_configged_ops }, -+#endif -+ -+#ifdef CONFIG_PTY_CHAN -+ { "pty", &pty_ops }, -+ { "pts", &pts_ops }, -+#else -+ { "pty", ¬_configged_ops }, -+ { "pts", ¬_configged_ops }, -+#endif -+ -+#ifdef CONFIG_TTY_CHAN -+ { "tty", &tty_ops }, -+#else -+ { "tty", ¬_configged_ops }, -+#endif -+ -+#ifdef CONFIG_XTERM_CHAN -+ { "xterm", &xterm_ops }, -+#else -+ { "xterm", ¬_configged_ops }, -+#endif -+}; -+ -+static struct chan *parse_chan(char *str, int pri, int device, -+ struct chan_opts *opts) -+{ -+ struct chan_type *entry; -+ struct chan_ops *ops; -+ struct chan *chan; -+ void *data; -+ int i; -+ -+ ops = NULL; -+ data = NULL; -+ for(i = 0; i < sizeof(chan_table)/sizeof(chan_table[0]); i++){ -+ entry = &chan_table[i]; -+ if(!strncmp(str, entry->key, strlen(entry->key))){ -+ ops = entry->ops; -+ str += strlen(entry->key); -+ break; -+ } -+ } -+ if(ops == NULL){ -+ printk(KERN_ERR "parse_chan couldn't parse \"%s\"\n", -+ str); -+ return(NULL); -+ } -+ if(ops->init == NULL) return(NULL); -+ data = (*ops->init)(str, device, opts); -+ if(data == NULL) return(NULL); -+ -+ chan = kmalloc(sizeof(*chan), GFP_KERNEL); -+ if(chan == NULL) return(NULL); -+ *chan = ((struct chan) { .list = LIST_HEAD_INIT(chan->list), -+ .primary = 1, -+ .input = 0, -+ .output = 0, -+ .opened = 0, -+ .fd = -1, -+ .pri = pri, -+ .ops = ops, -+ .data = data }); -+ return(chan); -+} -+ -+int parse_chan_pair(char *str, struct list_head *chans, int pri, int device, -+ struct chan_opts *opts) -+{ -+ struct chan *new, *chan; -+ char *in, *out; -+ -+ if(!list_empty(chans)){ -+ chan = list_entry(chans->next, struct chan, list); -+ if(chan->pri >= pri) return(0); -+ free_chan(chans); -+ INIT_LIST_HEAD(chans); -+ } -+ -+ if((out = strchr(str, ',')) != NULL){ -+ in = str; -+ *out = '\0'; -+ out++; -+ new = parse_chan(in, pri, device, opts); -+ if(new == NULL) return(-1); -+ new->input = 1; -+ list_add(&new->list, chans); -+ -+ new = parse_chan(out, pri, device, opts); -+ if(new == NULL) return(-1); -+ list_add(&new->list, chans); -+ new->output = 1; -+ } -+ else { -+ new = parse_chan(str, pri, device, opts); -+ if(new == NULL) return(-1); -+ list_add(&new->list, chans); -+ new->input = 1; -+ new->output = 1; -+ } -+ return(0); -+} -+ -+int chan_out_fd(struct list_head *chans) -+{ -+ struct list_head *ele; -+ struct chan *chan; -+ -+ list_for_each(ele, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(chan->primary && chan->output) -+ return(chan->fd); -+ } -+ return(-1); -+} -+ -+void chan_interrupt(struct list_head *chans, struct tq_struct *task, -+ struct tty_struct *tty, int irq, void *dev) -+{ -+ struct list_head *ele, *next; -+ struct chan *chan; -+ int err; -+ char c; -+ -+ list_for_each_safe(ele, next, chans){ -+ chan = list_entry(ele, struct chan, list); -+ if(!chan->input || (chan->ops->read == NULL)) continue; -+ do { -+ if((tty != NULL) && -+ (tty->flip.count >= TTY_FLIPBUF_SIZE)){ -+ queue_task(task, &tq_timer); -+ goto out; -+ } -+ err = chan->ops->read(chan->fd, &c, chan->data); -+ if(err > 0) tty_receive_char(tty, c); -+ } while(err > 0); -+ if(err == 0) reactivate_fd(chan->fd, irq); -+ if(err == -EIO){ -+ if(chan->primary){ -+ if(tty != NULL) tty_hangup(tty); -+ line_disable(dev, irq); -+ close_chan(chans); -+ free_chan(chans); -+ return; -+ } -+ else { -+ if(chan->ops->close != NULL) -+ chan->ops->close(chan->fd, chan->data); -+ free_one_chan(chan); -+ } -+ } -+ } -+ out: -+ if(tty) tty_flip_buffer_push(tty); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/chan_user.c um/arch/um/drivers/chan_user.c ---- orig/arch/um/drivers/chan_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/chan_user.c Wed Mar 26 13:23:48 2003 -@@ -0,0 +1,213 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "kern_util.h" -+#include "user_util.h" -+#include "chan_user.h" -+#include "user.h" -+#include "helper.h" -+#include "os.h" -+#include "choose-mode.h" -+#include "mode.h" -+ -+void generic_close(int fd, void *unused) -+{ -+ close(fd); -+} -+ -+int generic_read(int fd, char *c_out, void *unused) -+{ -+ int n; -+ -+ n = read(fd, c_out, sizeof(*c_out)); -+ if(n < 0){ -+ if(errno == EAGAIN) return(0); -+ return(-errno); -+ } -+ else if(n == 0) return(-EIO); -+ return(1); -+} -+ -+int generic_write(int fd, const char *buf, int n, void *unused) -+{ -+ int count; -+ -+ count = write(fd, buf, n); -+ if(count < 0) return(-errno); -+ return(count); -+} -+ -+int generic_console_write(int fd, const char *buf, int n, void *unused) -+{ -+ struct termios save, new; -+ int err; -+ -+ if(isatty(fd)){ -+ tcgetattr(fd, &save); -+ new = save; -+ new.c_oflag |= OPOST; -+ tcsetattr(fd, TCSAFLUSH, &new); -+ } -+ err = generic_write(fd, buf, n, NULL); -+ if(isatty(fd)) tcsetattr(fd, TCSAFLUSH, &save); -+ return(err); -+} -+ -+int generic_window_size(int fd, void *unused, unsigned short *rows_out, -+ unsigned short *cols_out) -+{ -+ struct winsize size; -+ int ret = 0; -+ -+ if(ioctl(fd, TIOCGWINSZ, &size) == 0){ -+ ret = ((*rows_out != size.ws_row) || -+ (*cols_out != size.ws_col)); -+ *rows_out = size.ws_row; -+ *cols_out = size.ws_col; -+ } -+ return(ret); -+} -+ -+void generic_free(void *data) -+{ -+ kfree(data); -+} -+ -+static void winch_handler(int sig) -+{ -+} -+ -+struct winch_data { -+ int pty_fd; -+ int pipe_fd; -+ int close_me; -+}; -+ -+static int winch_thread(void *arg) -+{ -+ struct winch_data *data = arg; -+ sigset_t sigs; -+ int pty_fd, pipe_fd; -+ char c = 1; -+ -+ close(data->close_me); -+ pty_fd = data->pty_fd; -+ pipe_fd = data->pipe_fd; -+ if(write(pipe_fd, &c, sizeof(c)) != sizeof(c)) -+ printk("winch_thread : failed to write synchronization " -+ "byte, errno = %d\n", errno); -+ -+ signal(SIGWINCH, winch_handler); -+ sigfillset(&sigs); -+ sigdelset(&sigs, SIGWINCH); -+ if(sigprocmask(SIG_SETMASK, &sigs, NULL) < 0){ -+ printk("winch_thread : sigprocmask failed, errno = %d\n", -+ errno); -+ exit(1); -+ } -+ -+ if(setsid() < 0){ -+ printk("winch_thread : setsid failed, errno = %d\n", errno); -+ exit(1); -+ } -+ -+ if(ioctl(pty_fd, TIOCSCTTY, 0) < 0){ -+ printk("winch_thread : TIOCSCTTY failed, errno = %d\n", errno); -+ exit(1); -+ } -+ if(tcsetpgrp(pty_fd, os_getpid()) < 0){ -+ printk("winch_thread : tcsetpgrp failed, errno = %d\n", errno); -+ exit(1); -+ } -+ -+ if(read(pipe_fd, &c, sizeof(c)) != sizeof(c)) -+ printk("winch_thread : failed to read synchronization byte, " -+ "errno = %d\n", errno); -+ -+ while(1){ -+ pause(); -+ -+ if(write(pipe_fd, &c, sizeof(c)) != sizeof(c)){ -+ printk("winch_thread : write failed, errno = %d\n", -+ errno); -+ } -+ } -+} -+ -+static int winch_tramp(int fd, void *device_data, int *fd_out) -+{ -+ struct winch_data data; -+ unsigned long stack; -+ int fds[2], pid, n, err; -+ char c; -+ -+ err = os_pipe(fds, 1, 1); -+ if(err){ -+ printk("winch_tramp : os_pipe failed, errno = %d\n", -err); -+ return(err); -+ } -+ -+ data = ((struct winch_data) { .pty_fd = fd, -+ .pipe_fd = fds[1], -+ .close_me = fds[0] } ); -+ pid = run_helper_thread(winch_thread, &data, 0, &stack, 0); -+ if(pid < 0){ -+ printk("fork of winch_thread failed - errno = %d\n", errno); -+ return(pid); -+ } -+ -+ close(fds[1]); -+ *fd_out = fds[0]; -+ n = read(fds[0], &c, sizeof(c)); -+ if(n != sizeof(c)){ -+ printk("winch_tramp : failed to read synchronization byte\n"); -+ printk("read returned %d, errno = %d\n", n, errno); -+ printk("fd %d will not support SIGWINCH\n", fd); -+ *fd_out = -1; -+ } -+ return(pid); -+} -+ -+void register_winch(int fd, void *device_data) -+{ -+ int pid, thread, thread_fd; -+ char c = 1; -+ -+ if(!isatty(fd)) return; -+ -+ pid = tcgetpgrp(fd); -+ if(!CHOOSE_MODE_PROC(is_tracer_winch, is_skas_winch, pid, fd, -+ device_data) && (pid == -1)){ -+ thread = winch_tramp(fd, device_data, &thread_fd); -+ if(fd != -1){ -+ register_winch_irq(thread_fd, fd, thread, device_data); -+ -+ if(write(thread_fd, &c, sizeof(c)) != sizeof(c)) -+ printk("register_winch : failed to write " -+ "synchronization byte\n"); -+ } -+ } -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/daemon.h um/arch/um/drivers/daemon.h ---- orig/arch/um/drivers/daemon.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/daemon.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,35 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "net_user.h" -+ -+#define SWITCH_VERSION 3 -+ -+struct daemon_data { -+ char *sock_type; -+ char *ctl_sock; -+ void *ctl_addr; -+ void *data_addr; -+ void *local_addr; -+ int fd; -+ int control; -+ void *dev; -+}; -+ -+extern struct net_user_info daemon_user_info; -+ -+extern int daemon_user_write(int fd, void *buf, int len, -+ struct daemon_data *pri); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/daemon_kern.c um/arch/um/drivers/daemon_kern.c ---- orig/arch/um/drivers/daemon_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/daemon_kern.c Sun Dec 15 21:19:17 2002 -@@ -0,0 +1,113 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and -+ * James Leu (jleu@mindspring.net). -+ * Copyright (C) 2001 by various other people who didn't put their name here. -+ * Licensed under the GPL. -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/init.h" -+#include "linux/netdevice.h" -+#include "linux/etherdevice.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "daemon.h" -+ -+struct daemon_init { -+ char *sock_type; -+ char *ctl_sock; -+}; -+ -+void daemon_init(struct net_device *dev, void *data) -+{ -+ struct uml_net_private *pri; -+ struct daemon_data *dpri; -+ struct daemon_init *init = data; -+ -+ init_etherdev(dev, 0); -+ pri = dev->priv; -+ dpri = (struct daemon_data *) pri->user; -+ *dpri = ((struct daemon_data) -+ { .sock_type = init->sock_type, -+ .ctl_sock = init->ctl_sock, -+ .ctl_addr = NULL, -+ .data_addr = NULL, -+ .local_addr = NULL, -+ .fd = -1, -+ .control = -1, -+ .dev = dev }); -+ -+ printk("daemon backend (uml_switch version %d) - %s:%s", -+ SWITCH_VERSION, dpri->sock_type, dpri->ctl_sock); -+ printk("\n"); -+} -+ -+static int daemon_read(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER); -+ if(*skb == NULL) return(-ENOMEM); -+ return(net_recvfrom(fd, (*skb)->mac.raw, -+ (*skb)->dev->mtu + ETH_HEADER_OTHER)); -+} -+ -+static int daemon_write(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ return(daemon_user_write(fd, (*skb)->data, (*skb)->len, -+ (struct daemon_data *) &lp->user)); -+} -+ -+static struct net_kern_info daemon_kern_info = { -+ .init = daemon_init, -+ .protocol = eth_protocol, -+ .read = daemon_read, -+ .write = daemon_write, -+}; -+ -+int daemon_setup(char *str, char **mac_out, void *data) -+{ -+ struct daemon_init *init = data; -+ char *remain; -+ -+ *init = ((struct daemon_init) -+ { .sock_type = "unix", -+ .ctl_sock = "/tmp/uml.ctl" }); -+ -+ remain = split_if_spec(str, mac_out, &init->sock_type, &init->ctl_sock, -+ NULL); -+ if(remain != NULL) -+ printk(KERN_WARNING "daemon_setup : Ignoring data socket " -+ "specification\n"); -+ -+ return(1); -+} -+ -+static struct transport daemon_transport = { -+ .list = LIST_HEAD_INIT(daemon_transport.list), -+ .name = "daemon", -+ .setup = daemon_setup, -+ .user = &daemon_user_info, -+ .kern = &daemon_kern_info, -+ .private_size = sizeof(struct daemon_data), -+ .setup_size = sizeof(struct daemon_init), -+}; -+ -+static int register_daemon(void) -+{ -+ register_transport(&daemon_transport); -+ return(1); -+} -+ -+__initcall(register_daemon); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/daemon_user.c um/arch/um/drivers/daemon_user.c ---- orig/arch/um/drivers/daemon_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/daemon_user.c Fri Jan 17 13:48:59 2003 -@@ -0,0 +1,195 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and -+ * James Leu (jleu@mindspring.net). -+ * Copyright (C) 2001 by various other people who didn't put their name here. -+ * Licensed under the GPL. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include "net_user.h" -+#include "daemon.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "user.h" -+#include "os.h" -+ -+#define MAX_PACKET (ETH_MAX_PACKET + ETH_HEADER_OTHER) -+ -+enum request_type { REQ_NEW_CONTROL }; -+ -+#define SWITCH_MAGIC 0xfeedface -+ -+struct request_v3 { -+ uint32_t magic; -+ uint32_t version; -+ enum request_type type; -+ struct sockaddr_un sock; -+}; -+ -+static struct sockaddr_un *new_addr(void *name, int len) -+{ -+ struct sockaddr_un *sun; -+ -+ sun = um_kmalloc(sizeof(struct sockaddr_un)); -+ if(sun == NULL){ -+ printk("new_addr: allocation of sockaddr_un failed\n"); -+ return(NULL); -+ } -+ sun->sun_family = AF_UNIX; -+ memcpy(sun->sun_path, name, len); -+ return(sun); -+} -+ -+static int connect_to_switch(struct daemon_data *pri) -+{ -+ struct sockaddr_un *ctl_addr = pri->ctl_addr; -+ struct sockaddr_un *local_addr = pri->local_addr; -+ struct sockaddr_un *sun; -+ struct request_v3 req; -+ int fd, n, err; -+ -+ if((pri->control = socket(AF_UNIX, SOCK_STREAM, 0)) < 0){ -+ printk("daemon_open : control socket failed, errno = %d\n", -+ errno); -+ return(-errno); -+ } -+ -+ if(connect(pri->control, (struct sockaddr *) ctl_addr, -+ sizeof(*ctl_addr)) < 0){ -+ printk("daemon_open : control connect failed, errno = %d\n", -+ errno); -+ err = -errno; -+ goto out; -+ } -+ -+ if((fd = socket(AF_UNIX, SOCK_DGRAM, 0)) < 0){ -+ printk("daemon_open : data socket failed, errno = %d\n", -+ errno); -+ err = -errno; -+ goto out; -+ } -+ if(bind(fd, (struct sockaddr *) local_addr, sizeof(*local_addr)) < 0){ -+ printk("daemon_open : data bind failed, errno = %d\n", -+ errno); -+ err = -errno; -+ goto out_close; -+ } -+ -+ sun = um_kmalloc(sizeof(struct sockaddr_un)); -+ if(sun == NULL){ -+ printk("new_addr: allocation of sockaddr_un failed\n"); -+ err = -ENOMEM; -+ goto out_close; -+ } -+ -+ req.magic = SWITCH_MAGIC; -+ req.version = SWITCH_VERSION; -+ req.type = REQ_NEW_CONTROL; -+ req.sock = *local_addr; -+ n = write(pri->control, &req, sizeof(req)); -+ if(n != sizeof(req)){ -+ printk("daemon_open : control setup request returned %d, " -+ "errno = %d\n", n, errno); -+ err = -ENOTCONN; -+ goto out; -+ } -+ -+ n = read(pri->control, sun, sizeof(*sun)); -+ if(n != sizeof(*sun)){ -+ printk("daemon_open : read of data socket returned %d, " -+ "errno = %d\n", n, errno); -+ err = -ENOTCONN; -+ goto out_close; -+ } -+ -+ pri->data_addr = sun; -+ return(fd); -+ -+ out_close: -+ close(fd); -+ out: -+ close(pri->control); -+ return(err); -+} -+ -+static void daemon_user_init(void *data, void *dev) -+{ -+ struct daemon_data *pri = data; -+ struct timeval tv; -+ struct { -+ char zero; -+ int pid; -+ int usecs; -+ } name; -+ -+ if(!strcmp(pri->sock_type, "unix")) -+ pri->ctl_addr = new_addr(pri->ctl_sock, -+ strlen(pri->ctl_sock) + 1); -+ name.zero = 0; -+ name.pid = os_getpid(); -+ gettimeofday(&tv, NULL); -+ name.usecs = tv.tv_usec; -+ pri->local_addr = new_addr(&name, sizeof(name)); -+ pri->dev = dev; -+ pri->fd = connect_to_switch(pri); -+ if(pri->fd < 0){ -+ kfree(pri->local_addr); -+ pri->local_addr = NULL; -+ } -+} -+ -+static int daemon_open(void *data) -+{ -+ struct daemon_data *pri = data; -+ return(pri->fd); -+} -+ -+static void daemon_remove(void *data) -+{ -+ struct daemon_data *pri = data; -+ -+ close(pri->fd); -+ close(pri->control); -+ if(pri->data_addr != NULL) kfree(pri->data_addr); -+ if(pri->ctl_addr != NULL) kfree(pri->ctl_addr); -+ if(pri->local_addr != NULL) kfree(pri->local_addr); -+} -+ -+int daemon_user_write(int fd, void *buf, int len, struct daemon_data *pri) -+{ -+ struct sockaddr_un *data_addr = pri->data_addr; -+ -+ return(net_sendto(fd, buf, len, data_addr, sizeof(*data_addr))); -+} -+ -+static int daemon_set_mtu(int mtu, void *data) -+{ -+ return(mtu); -+} -+ -+struct net_user_info daemon_user_info = { -+ .init = daemon_user_init, -+ .open = daemon_open, -+ .close = NULL, -+ .remove = daemon_remove, -+ .set_mtu = daemon_set_mtu, -+ .add_address = NULL, -+ .delete_address = NULL, -+ .max_packet = MAX_PACKET - ETH_HEADER_OTHER -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/fd.c um/arch/um/drivers/fd.c ---- orig/arch/um/drivers/fd.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/fd.c Sun Dec 15 20:57:25 2002 -@@ -0,0 +1,96 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include -+#include "user.h" -+#include "user_util.h" -+#include "chan_user.h" -+ -+struct fd_chan { -+ int fd; -+ int raw; -+ struct termios tt; -+ char str[sizeof("1234567890\0")]; -+}; -+ -+void *fd_init(char *str, int device, struct chan_opts *opts) -+{ -+ struct fd_chan *data; -+ char *end; -+ int n; -+ -+ if(*str != ':'){ -+ printk("fd_init : channel type 'fd' must specify a file " -+ "descriptor\n"); -+ return(NULL); -+ } -+ str++; -+ n = strtoul(str, &end, 0); -+ if((*end != '\0') || (end == str)){ -+ printk("fd_init : couldn't parse file descriptor '%s'\n", str); -+ return(NULL); -+ } -+ if((data = um_kmalloc(sizeof(*data))) == NULL) return(NULL); -+ *data = ((struct fd_chan) { .fd = n, -+ .raw = opts->raw }); -+ return(data); -+} -+ -+int fd_open(int input, int output, int primary, void *d, char **dev_out) -+{ -+ struct fd_chan *data = d; -+ -+ if(data->raw && isatty(data->fd)){ -+ tcgetattr(data->fd, &data->tt); -+ raw(data->fd, 0); -+ } -+ sprintf(data->str, "%d", data->fd); -+ *dev_out = data->str; -+ return(data->fd); -+} -+ -+void fd_close(int fd, void *d) -+{ -+ struct fd_chan *data = d; -+ -+ if(data->raw && isatty(fd)){ -+ tcsetattr(fd, TCSAFLUSH, &data->tt); -+ data->raw = 0; -+ } -+} -+ -+int fd_console_write(int fd, const char *buf, int n, void *d) -+{ -+ struct fd_chan *data = d; -+ -+ return(generic_console_write(fd, buf, n, &data->tt)); -+} -+ -+struct chan_ops fd_ops = { -+ .type = "fd", -+ .init = fd_init, -+ .open = fd_open, -+ .close = fd_close, -+ .read = generic_read, -+ .write = generic_write, -+ .console_write = fd_console_write, -+ .window_size = generic_window_size, -+ .free = generic_free, -+ .winch = 1, -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/harddog_kern.c um/arch/um/drivers/harddog_kern.c ---- orig/arch/um/drivers/harddog_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/harddog_kern.c Sun Dec 15 20:57:42 2002 -@@ -0,0 +1,194 @@ -+/* UML hardware watchdog, shamelessly stolen from: -+ * -+ * SoftDog 0.05: A Software Watchdog Device -+ * -+ * (c) Copyright 1996 Alan Cox , All Rights Reserved. -+ * http://www.redhat.com -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ * -+ * Neither Alan Cox nor CymruNet Ltd. admit liability nor provide -+ * warranty for any of this software. This material is provided -+ * "AS-IS" and at no charge. -+ * -+ * (c) Copyright 1995 Alan Cox -+ * -+ * Software only watchdog driver. Unlike its big brother the WDT501P -+ * driver this won't always recover a failed machine. -+ * -+ * 03/96: Angelo Haritsis : -+ * Modularised. -+ * Added soft_margin; use upon insmod to change the timer delay. -+ * NB: uses same minor as wdt (WATCHDOG_MINOR); we could use separate -+ * minors. -+ * -+ * 19980911 Alan Cox -+ * Made SMP safe for 2.3.x -+ * -+ * 20011127 Joel Becker (jlbec@evilplan.org> -+ * Added soft_noboot; Allows testing the softdog trigger without -+ * requiring a recompile. -+ * Added WDIOC_GETTIMEOUT and WDIOC_SETTIMOUT. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "helper.h" -+#include "mconsole.h" -+ -+MODULE_LICENSE("GPL"); -+ -+/* Locked by the BKL in harddog_open and harddog_release */ -+static int timer_alive; -+static int harddog_in_fd = -1; -+static int harddog_out_fd = -1; -+ -+/* -+ * Allow only one person to hold it open -+ */ -+ -+extern int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock); -+ -+static int harddog_open(struct inode *inode, struct file *file) -+{ -+ int err; -+ char *sock = NULL; -+ -+ lock_kernel(); -+ if(timer_alive) -+ return -EBUSY; -+#ifdef CONFIG_HARDDOG_NOWAYOUT -+ MOD_INC_USE_COUNT; -+#endif -+ -+#ifdef CONFIG_MCONSOLE -+ sock = mconsole_notify_socket(); -+#endif -+ err = start_watchdog(&harddog_in_fd, &harddog_out_fd, sock); -+ if(err) return(err); -+ -+ timer_alive = 1; -+ unlock_kernel(); -+ return 0; -+} -+ -+extern void stop_watchdog(int in_fd, int out_fd); -+ -+static int harddog_release(struct inode *inode, struct file *file) -+{ -+ /* -+ * Shut off the timer. -+ */ -+ lock_kernel(); -+ -+ stop_watchdog(harddog_in_fd, harddog_out_fd); -+ harddog_in_fd = -1; -+ harddog_out_fd = -1; -+ -+ timer_alive=0; -+ unlock_kernel(); -+ return 0; -+} -+ -+extern int ping_watchdog(int fd); -+ -+static ssize_t harddog_write(struct file *file, const char *data, size_t len, -+ loff_t *ppos) -+{ -+ /* Can't seek (pwrite) on this device */ -+ if (ppos != &file->f_pos) -+ return -ESPIPE; -+ -+ /* -+ * Refresh the timer. -+ */ -+ if(len) -+ return(ping_watchdog(harddog_out_fd)); -+ return 0; -+} -+ -+static int harddog_ioctl(struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ static struct watchdog_info ident = { -+ WDIOF_SETTIMEOUT, -+ 0, -+ "UML Hardware Watchdog" -+ }; -+ switch (cmd) { -+ default: -+ return -ENOTTY; -+ case WDIOC_GETSUPPORT: -+ if(copy_to_user((struct harddog_info *)arg, &ident, -+ sizeof(ident))) -+ return -EFAULT; -+ return 0; -+ case WDIOC_GETSTATUS: -+ case WDIOC_GETBOOTSTATUS: -+ return put_user(0,(int *)arg); -+ case WDIOC_KEEPALIVE: -+ return(ping_watchdog(harddog_out_fd)); -+ } -+} -+ -+static struct file_operations harddog_fops = { -+ .owner = THIS_MODULE, -+ .write = harddog_write, -+ .ioctl = harddog_ioctl, -+ .open = harddog_open, -+ .release = harddog_release, -+}; -+ -+static struct miscdevice harddog_miscdev = { -+ .minor = WATCHDOG_MINOR, -+ .name = "watchdog", -+ .fops = &harddog_fops, -+}; -+ -+static char banner[] __initdata = KERN_INFO "UML Watchdog Timer\n"; -+ -+static int __init harddog_init(void) -+{ -+ int ret; -+ -+ ret = misc_register(&harddog_miscdev); -+ -+ if (ret) -+ return ret; -+ -+ printk(banner); -+ -+ return(0); -+} -+ -+static void __exit harddog_exit(void) -+{ -+ misc_deregister(&harddog_miscdev); -+} -+ -+module_init(harddog_init); -+module_exit(harddog_exit); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/harddog_user.c um/arch/um/drivers/harddog_user.c ---- orig/arch/um/drivers/harddog_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/harddog_user.c Wed Dec 4 16:38:05 2002 -@@ -0,0 +1,137 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include "user_util.h" -+#include "user.h" -+#include "helper.h" -+#include "mconsole.h" -+#include "os.h" -+#include "choose-mode.h" -+#include "mode.h" -+ -+struct dog_data { -+ int stdin; -+ int stdout; -+ int close_me[2]; -+}; -+ -+static void pre_exec(void *d) -+{ -+ struct dog_data *data = d; -+ -+ dup2(data->stdin, 0); -+ dup2(data->stdout, 1); -+ dup2(data->stdout, 2); -+ close(data->stdin); -+ close(data->stdout); -+ close(data->close_me[0]); -+ close(data->close_me[1]); -+} -+ -+int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock) -+{ -+ struct dog_data data; -+ int in_fds[2], out_fds[2], pid, n, err; -+ char pid_buf[sizeof("nnnnn\0")], c; -+ char *pid_args[] = { "/usr/bin/uml_watchdog", "-pid", pid_buf, NULL }; -+ char *mconsole_args[] = { "/usr/bin/uml_watchdog", "-mconsole", NULL, -+ NULL }; -+ char **args = NULL; -+ -+ err = os_pipe(in_fds, 1, 0); -+ if(err){ -+ printk("harddog_open - os_pipe failed, errno = %d\n", -err); -+ return(err); -+ } -+ -+ err = os_pipe(out_fds, 1, 0); -+ if(err){ -+ printk("harddog_open - os_pipe failed, errno = %d\n", -err); -+ return(err); -+ } -+ -+ data.stdin = out_fds[0]; -+ data.stdout = in_fds[1]; -+ data.close_me[0] = out_fds[1]; -+ data.close_me[1] = in_fds[0]; -+ -+ if(sock != NULL){ -+ mconsole_args[2] = sock; -+ args = mconsole_args; -+ } -+ else { -+ /* XXX The os_getpid() is not SMP correct */ -+ sprintf(pid_buf, "%d", CHOOSE_MODE(tracing_pid, os_getpid())); -+ args = pid_args; -+ } -+ -+ pid = run_helper(pre_exec, &data, args, NULL); -+ -+ close(out_fds[0]); -+ close(in_fds[1]); -+ -+ if(pid < 0){ -+ err = -pid; -+ printk("harddog_open - run_helper failed, errno = %d\n", err); -+ goto out; -+ } -+ -+ n = read(in_fds[0], &c, sizeof(c)); -+ if(n == 0){ -+ printk("harddog_open - EOF on watchdog pipe\n"); -+ helper_wait(pid); -+ err = -EIO; -+ goto out; -+ } -+ else if(n < 0){ -+ printk("harddog_open - read of watchdog pipe failed, " -+ "errno = %d\n", errno); -+ helper_wait(pid); -+ err = -errno; -+ goto out; -+ } -+ *in_fd_ret = in_fds[0]; -+ *out_fd_ret = out_fds[1]; -+ return(0); -+ out: -+ close(out_fds[1]); -+ close(in_fds[0]); -+ return(err); -+} -+ -+void stop_watchdog(int in_fd, int out_fd) -+{ -+ close(in_fd); -+ close(out_fd); -+} -+ -+int ping_watchdog(int fd) -+{ -+ int n; -+ char c = '\n'; -+ -+ n = write(fd, &c, sizeof(c)); -+ if(n < sizeof(c)){ -+ printk("ping_watchdog - write failed, errno = %d\n", -+ errno); -+ return(-errno); -+ } -+ return 1; -+ -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/hostaudio_kern.c um/arch/um/drivers/hostaudio_kern.c ---- orig/arch/um/drivers/hostaudio_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/hostaudio_kern.c Fri Mar 28 21:57:16 2003 -@@ -0,0 +1,330 @@ -+/* -+ * Copyright (C) 2002 Steve Schmidtke -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/module.h" -+#include "linux/version.h" -+#include "linux/init.h" -+#include "linux/slab.h" -+#include "linux/fs.h" -+#include "linux/sound.h" -+#include "linux/soundcard.h" -+#include "asm/uaccess.h" -+#include "kern_util.h" -+#include "init.h" -+#include "hostaudio.h" -+ -+/* Only changed from linux_main at boot time */ -+char *dsp = HOSTAUDIO_DEV_DSP; -+char *mixer = HOSTAUDIO_DEV_MIXER; -+ -+#ifndef MODULE -+static int set_dsp(char *name, int *add) -+{ -+ dsp = name; -+ return(0); -+} -+ -+__uml_setup("dsp=", set_dsp, -+"dsp=\n" -+" This is used to specify the host dsp device to the hostaudio driver.\n" -+" The default is \"" HOSTAUDIO_DEV_DSP "\".\n\n" -+); -+ -+static int set_mixer(char *name, int *add) -+{ -+ mixer = name; -+ return(0); -+} -+ -+__uml_setup("mixer=", set_mixer, -+"mixer=\n" -+" This is used to specify the host mixer device to the hostaudio driver.\n" -+" The default is \"" HOSTAUDIO_DEV_MIXER "\".\n\n" -+); -+#endif -+ -+/* /dev/dsp file operations */ -+ -+static ssize_t hostaudio_read(struct file *file, char *buffer, size_t count, -+ loff_t *ppos) -+{ -+ struct hostaudio_state *state = file->private_data; -+ void *kbuf; -+ int err; -+ -+#ifdef DEBUG -+ printk("hostaudio: read called, count = %d\n", count); -+#endif -+ -+ kbuf = kmalloc(count, GFP_KERNEL); -+ if(kbuf == NULL) -+ return(-ENOMEM); -+ -+ err = hostaudio_read_user(state, kbuf, count, ppos); -+ if(err < 0) -+ goto out; -+ -+ if(copy_to_user(buffer, kbuf, err)) -+ err = -EFAULT; -+ -+ out: -+ kfree(kbuf); -+ return(err); -+} -+ -+static ssize_t hostaudio_write(struct file *file, const char *buffer, -+ size_t count, loff_t *ppos) -+{ -+ struct hostaudio_state *state = file->private_data; -+ void *kbuf; -+ int err; -+ -+#ifdef DEBUG -+ printk("hostaudio: write called, count = %d\n", count); -+#endif -+ -+ kbuf = kmalloc(count, GFP_KERNEL); -+ if(kbuf == NULL) -+ return(-ENOMEM); -+ -+ err = -EFAULT; -+ if(copy_from_user(kbuf, buffer, count)) -+ goto out; -+ -+ err = hostaudio_write_user(state, kbuf, count, ppos); -+ if(err < 0) -+ goto out; -+ -+ out: -+ kfree(kbuf); -+ return(err); -+} -+ -+static unsigned int hostaudio_poll(struct file *file, -+ struct poll_table_struct *wait) -+{ -+ unsigned int mask = 0; -+ -+#ifdef DEBUG -+ printk("hostaudio: poll called (unimplemented)\n"); -+#endif -+ -+ return(mask); -+} -+ -+static int hostaudio_ioctl(struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ struct hostaudio_state *state = file->private_data; -+ unsigned long data = 0; -+ int err; -+ -+#ifdef DEBUG -+ printk("hostaudio: ioctl called, cmd = %u\n", cmd); -+#endif -+ switch(cmd){ -+ case SNDCTL_DSP_SPEED: -+ case SNDCTL_DSP_STEREO: -+ case SNDCTL_DSP_GETBLKSIZE: -+ case SNDCTL_DSP_CHANNELS: -+ case SNDCTL_DSP_SUBDIVIDE: -+ case SNDCTL_DSP_SETFRAGMENT: -+ if(get_user(data, (int *) arg)) -+ return(-EFAULT); -+ break; -+ default: -+ break; -+ } -+ -+ err = hostaudio_ioctl_user(state, cmd, (unsigned long) &data); -+ -+ switch(cmd){ -+ case SNDCTL_DSP_SPEED: -+ case SNDCTL_DSP_STEREO: -+ case SNDCTL_DSP_GETBLKSIZE: -+ case SNDCTL_DSP_CHANNELS: -+ case SNDCTL_DSP_SUBDIVIDE: -+ case SNDCTL_DSP_SETFRAGMENT: -+ if(put_user(data, (int *) arg)) -+ return(-EFAULT); -+ break; -+ default: -+ break; -+ } -+ -+ return(err); -+} -+ -+static int hostaudio_open(struct inode *inode, struct file *file) -+{ -+ struct hostaudio_state *state; -+ int r = 0, w = 0; -+ int ret; -+ -+#ifdef DEBUG -+ printk("hostaudio: open called (host: %s)\n", dsp); -+#endif -+ -+ state = kmalloc(sizeof(struct hostaudio_state), GFP_KERNEL); -+ if(state == NULL) return(-ENOMEM); -+ -+ if(file->f_mode & FMODE_READ) r = 1; -+ if(file->f_mode & FMODE_WRITE) w = 1; -+ -+ ret = hostaudio_open_user(state, r, w, dsp); -+ if(ret < 0){ -+ kfree(state); -+ return(ret); -+ } -+ -+ file->private_data = state; -+ return(0); -+} -+ -+static int hostaudio_release(struct inode *inode, struct file *file) -+{ -+ struct hostaudio_state *state = file->private_data; -+ int ret; -+ -+#ifdef DEBUG -+ printk("hostaudio: release called\n"); -+#endif -+ -+ ret = hostaudio_release_user(state); -+ kfree(state); -+ -+ return(ret); -+} -+ -+/* /dev/mixer file operations */ -+ -+static int hostmixer_ioctl_mixdev(struct inode *inode, struct file *file, -+ unsigned int cmd, unsigned long arg) -+{ -+ struct hostmixer_state *state = file->private_data; -+ -+#ifdef DEBUG -+ printk("hostmixer: ioctl called\n"); -+#endif -+ -+ return(hostmixer_ioctl_mixdev_user(state, cmd, arg)); -+} -+ -+static int hostmixer_open_mixdev(struct inode *inode, struct file *file) -+{ -+ struct hostmixer_state *state; -+ int r = 0, w = 0; -+ int ret; -+ -+#ifdef DEBUG -+ printk("hostmixer: open called (host: %s)\n", mixer); -+#endif -+ -+ state = kmalloc(sizeof(struct hostmixer_state), GFP_KERNEL); -+ if(state == NULL) return(-ENOMEM); -+ -+ if(file->f_mode & FMODE_READ) r = 1; -+ if(file->f_mode & FMODE_WRITE) w = 1; -+ -+ ret = hostmixer_open_mixdev_user(state, r, w, mixer); -+ -+ if(ret < 0){ -+ kfree(state); -+ return(ret); -+ } -+ -+ file->private_data = state; -+ return(0); -+} -+ -+static int hostmixer_release(struct inode *inode, struct file *file) -+{ -+ struct hostmixer_state *state = file->private_data; -+ int ret; -+ -+#ifdef DEBUG -+ printk("hostmixer: release called\n"); -+#endif -+ -+ ret = hostmixer_release_mixdev_user(state); -+ kfree(state); -+ -+ return(ret); -+} -+ -+ -+/* kernel module operations */ -+ -+static struct file_operations hostaudio_fops = { -+ .owner = THIS_MODULE, -+ .llseek = no_llseek, -+ .read = hostaudio_read, -+ .write = hostaudio_write, -+ .poll = hostaudio_poll, -+ .ioctl = hostaudio_ioctl, -+ .mmap = NULL, -+ .open = hostaudio_open, -+ .release = hostaudio_release, -+}; -+ -+static struct file_operations hostmixer_fops = { -+ .owner = THIS_MODULE, -+ .llseek = no_llseek, -+ .ioctl = hostmixer_ioctl_mixdev, -+ .open = hostmixer_open_mixdev, -+ .release = hostmixer_release, -+}; -+ -+struct { -+ int dev_audio; -+ int dev_mixer; -+} module_data; -+ -+MODULE_AUTHOR("Steve Schmidtke"); -+MODULE_DESCRIPTION("UML Audio Relay"); -+MODULE_LICENSE("GPL"); -+ -+static int __init hostaudio_init_module(void) -+{ -+ printk(KERN_INFO "UML Audio Relay (host dsp = %s, host mixer = %s)\n", -+ dsp, mixer); -+ -+ module_data.dev_audio = register_sound_dsp(&hostaudio_fops, -1); -+ if(module_data.dev_audio < 0){ -+ printk(KERN_ERR "hostaudio: couldn't register DSP device!\n"); -+ return -ENODEV; -+ } -+ -+ module_data.dev_mixer = register_sound_mixer(&hostmixer_fops, -1); -+ if(module_data.dev_mixer < 0){ -+ printk(KERN_ERR "hostmixer: couldn't register mixer " -+ "device!\n"); -+ unregister_sound_dsp(module_data.dev_audio); -+ return -ENODEV; -+ } -+ -+ return 0; -+} -+ -+static void __exit hostaudio_cleanup_module (void) -+{ -+ unregister_sound_mixer(module_data.dev_mixer); -+ unregister_sound_dsp(module_data.dev_audio); -+} -+ -+module_init(hostaudio_init_module); -+module_exit(hostaudio_cleanup_module); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/hostaudio_user.c um/arch/um/drivers/hostaudio_user.c ---- orig/arch/um/drivers/hostaudio_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/hostaudio_user.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,149 @@ -+/* -+ * Copyright (C) 2002 Steve Schmidtke -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include "hostaudio.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "os.h" -+ -+/* /dev/dsp file operations */ -+ -+ssize_t hostaudio_read_user(struct hostaudio_state *state, char *buffer, -+ size_t count, loff_t *ppos) -+{ -+ ssize_t ret; -+ -+#ifdef DEBUG -+ printk("hostaudio: read_user called, count = %d\n", count); -+#endif -+ -+ ret = read(state->fd, buffer, count); -+ -+ if(ret < 0) return(-errno); -+ return(ret); -+} -+ -+ssize_t hostaudio_write_user(struct hostaudio_state *state, const char *buffer, -+ size_t count, loff_t *ppos) -+{ -+ ssize_t ret; -+ -+#ifdef DEBUG -+ printk("hostaudio: write_user called, count = %d\n", count); -+#endif -+ -+ ret = write(state->fd, buffer, count); -+ -+ if(ret < 0) return(-errno); -+ return(ret); -+} -+ -+int hostaudio_ioctl_user(struct hostaudio_state *state, unsigned int cmd, -+ unsigned long arg) -+{ -+ int ret; -+#ifdef DEBUG -+ printk("hostaudio: ioctl_user called, cmd = %u\n", cmd); -+#endif -+ -+ ret = ioctl(state->fd, cmd, arg); -+ -+ if(ret < 0) return(-errno); -+ return(ret); -+} -+ -+int hostaudio_open_user(struct hostaudio_state *state, int r, int w, char *dsp) -+{ -+#ifdef DEBUG -+ printk("hostaudio: open_user called\n"); -+#endif -+ -+ state->fd = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0); -+ -+ if(state->fd >= 0) return(0); -+ -+ printk("hostaudio_open_user failed to open '%s', errno = %d\n", -+ dsp, errno); -+ -+ return(-errno); -+} -+ -+int hostaudio_release_user(struct hostaudio_state *state) -+{ -+#ifdef DEBUG -+ printk("hostaudio: release called\n"); -+#endif -+ if(state->fd >= 0){ -+ close(state->fd); -+ state->fd=-1; -+ } -+ -+ return(0); -+} -+ -+/* /dev/mixer file operations */ -+ -+int hostmixer_ioctl_mixdev_user(struct hostmixer_state *state, -+ unsigned int cmd, unsigned long arg) -+{ -+ int ret; -+#ifdef DEBUG -+ printk("hostmixer: ioctl_user called cmd = %u\n",cmd); -+#endif -+ -+ ret = ioctl(state->fd, cmd, arg); -+ if(ret < 0) -+ return(-errno); -+ return(ret); -+} -+ -+int hostmixer_open_mixdev_user(struct hostmixer_state *state, int r, int w, -+ char *mixer) -+{ -+#ifdef DEBUG -+ printk("hostmixer: open_user called\n"); -+#endif -+ -+ state->fd = os_open_file(mixer, of_set_rw(OPENFLAGS(), r, w), 0); -+ -+ if(state->fd >= 0) return(0); -+ -+ printk("hostaudio_open_mixdev_user failed to open '%s', errno = %d\n", -+ mixer, errno); -+ -+ return(-errno); -+} -+ -+int hostmixer_release_mixdev_user(struct hostmixer_state *state) -+{ -+#ifdef DEBUG -+ printk("hostmixer: release_user called\n"); -+#endif -+ -+ if(state->fd >= 0){ -+ close(state->fd); -+ state->fd = -1; -+ } -+ -+ return 0; -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/line.c um/arch/um/drivers/line.c ---- orig/arch/um/drivers/line.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/line.c Wed Mar 26 15:09:44 2003 -@@ -0,0 +1,589 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "linux/slab.h" -+#include "linux/list.h" -+#include "linux/devfs_fs_kernel.h" -+#include "asm/irq.h" -+#include "asm/uaccess.h" -+#include "chan_kern.h" -+#include "irq_user.h" -+#include "line.h" -+#include "kern.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "os.h" -+ -+#define LINE_BUFSIZE 4096 -+ -+void line_interrupt(int irq, void *data, struct pt_regs *unused) -+{ -+ struct line *dev = data; -+ -+ if(dev->count > 0) -+ chan_interrupt(&dev->chan_list, &dev->task, dev->tty, irq, -+ dev); -+} -+ -+void line_timer_cb(void *arg) -+{ -+ struct line *dev = arg; -+ -+ line_interrupt(dev->driver->read_irq, dev, NULL); -+} -+ -+static void buffer_data(struct line *line, const char *buf, int len) -+{ -+ int end; -+ -+ if(line->buffer == NULL){ -+ line->buffer = kmalloc(LINE_BUFSIZE, GFP_ATOMIC); -+ if(line->buffer == NULL){ -+ printk("buffer_data - atomic allocation failed\n"); -+ return; -+ } -+ line->head = line->buffer; -+ line->tail = line->buffer; -+ } -+ end = line->buffer + LINE_BUFSIZE - line->tail; -+ if(len < end){ -+ memcpy(line->tail, buf, len); -+ line->tail += len; -+ } -+ else { -+ memcpy(line->tail, buf, end); -+ buf += end; -+ len -= end; -+ memcpy(line->buffer, buf, len); -+ line->tail = line->buffer + len; -+ } -+} -+ -+static int flush_buffer(struct line *line) -+{ -+ int n, count; -+ -+ if((line->buffer == NULL) || (line->head == line->tail)) return(1); -+ -+ if(line->tail < line->head){ -+ count = line->buffer + LINE_BUFSIZE - line->head; -+ n = write_chan(&line->chan_list, line->head, count, -+ line->driver->write_irq); -+ if(n < 0) return(n); -+ if(n == count) line->head = line->buffer; -+ else { -+ line->head += n; -+ return(0); -+ } -+ } -+ -+ count = line->tail - line->head; -+ n = write_chan(&line->chan_list, line->head, count, -+ line->driver->write_irq); -+ if(n < 0) return(n); -+ -+ line->head += n; -+ return(line->head == line->tail); -+} -+ -+int line_write(struct line *lines, struct tty_struct *tty, int from_user, -+ const char *buf, int len) -+{ -+ struct line *line; -+ char *new; -+ unsigned long flags; -+ int n, err, i; -+ -+ if(tty->stopped) return 0; -+ -+ if(from_user){ -+ new = kmalloc(len, GFP_KERNEL); -+ if(new == NULL) -+ return(0); -+ n = copy_from_user(new, buf, len); -+ if(n == len) -+ return(-EFAULT); -+ buf = new; -+ } -+ -+ i = minor(tty->device) - tty->driver.minor_start; -+ line = &lines[i]; -+ -+ down(&line->sem); -+ if(line->head != line->tail){ -+ local_irq_save(flags); -+ buffer_data(line, buf, len); -+ err = flush_buffer(line); -+ local_irq_restore(flags); -+ if(err <= 0) -+ goto out; -+ } -+ else { -+ n = write_chan(&line->chan_list, buf, len, -+ line->driver->write_irq); -+ if(n < 0){ -+ len = n; -+ goto out; -+ } -+ if(n < len) -+ buffer_data(line, buf + n, len - n); -+ } -+ out: -+ up(&line->sem); -+ -+ if(from_user) -+ kfree(buf); -+ return(len); -+} -+ -+void line_write_interrupt(int irq, void *data, struct pt_regs *unused) -+{ -+ struct line *dev = data; -+ struct tty_struct *tty = dev->tty; -+ int err; -+ -+ err = flush_buffer(dev); -+ if(err == 0) return; -+ else if(err < 0){ -+ dev->head = dev->buffer; -+ dev->tail = dev->buffer; -+ } -+ -+ if(tty == NULL) return; -+ -+ if(test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) && -+ (tty->ldisc.write_wakeup != NULL)) -+ (tty->ldisc.write_wakeup)(tty); -+ -+ /* BLOCKING mode -+ * In blocking mode, everything sleeps on tty->write_wait. -+ * Sleeping in the console driver would break non-blocking -+ * writes. -+ */ -+ -+ if (waitqueue_active(&tty->write_wait)) -+ wake_up_interruptible(&tty->write_wait); -+ -+} -+ -+int line_write_room(struct tty_struct *tty) -+{ -+ struct line *dev = tty->driver_data; -+ int n; -+ -+ if(dev->buffer == NULL) return(LINE_BUFSIZE - 1); -+ -+ n = dev->head - dev->tail; -+ if(n <= 0) n = LINE_BUFSIZE + n; -+ return(n - 1); -+} -+ -+int line_setup_irq(int fd, int input, int output, void *data) -+{ -+ struct line *line = data; -+ struct line_driver *driver = line->driver; -+ int err = 0, flags = SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM; -+ -+ if(input) err = um_request_irq(driver->read_irq, fd, IRQ_READ, -+ line_interrupt, flags, -+ driver->read_irq_name, line); -+ if(err) return(err); -+ if(output) err = um_request_irq(driver->write_irq, fd, IRQ_WRITE, -+ line_write_interrupt, flags, -+ driver->write_irq_name, line); -+ line->have_irq = 1; -+ return(err); -+} -+ -+void line_disable(struct line *line, int current_irq) -+{ -+ if(!line->have_irq) return; -+ -+ if(line->driver->read_irq == current_irq) -+ free_irq_later(line->driver->read_irq, line); -+ else -+ free_irq(line->driver->read_irq, line); -+ -+ if(line->driver->write_irq == current_irq) -+ free_irq_later(line->driver->write_irq, line); -+ else -+ free_irq(line->driver->write_irq, line); -+ -+ line->have_irq = 0; -+} -+ -+int line_open(struct line *lines, struct tty_struct *tty, -+ struct chan_opts *opts) -+{ -+ struct line *line; -+ int n, err = 0; -+ -+ if(tty == NULL) n = 0; -+ else n = minor(tty->device) - tty->driver.minor_start; -+ line = &lines[n]; -+ -+ down(&line->sem); -+ if(line->count == 0){ -+ if(!line->valid){ -+ err = -ENODEV; -+ goto out; -+ } -+ if(list_empty(&line->chan_list)){ -+ err = parse_chan_pair(line->init_str, &line->chan_list, -+ line->init_pri, n, opts); -+ if(err) goto out; -+ err = open_chan(&line->chan_list); -+ if(err) goto out; -+ } -+ enable_chan(&line->chan_list, line); -+ INIT_TQUEUE(&line->task, line_timer_cb, line); -+ } -+ -+ if(!line->sigio){ -+ chan_enable_winch(&line->chan_list, line); -+ line->sigio = 1; -+ } -+ -+ /* This is outside the if because the initial console is opened -+ * with tty == NULL -+ */ -+ line->tty = tty; -+ -+ if(tty != NULL){ -+ tty->driver_data = line; -+ chan_window_size(&line->chan_list, &tty->winsize.ws_row, -+ &tty->winsize.ws_col); -+ } -+ -+ line->count++; -+ out: -+ up(&line->sem); -+ return(err); -+} -+ -+void line_close(struct line *lines, struct tty_struct *tty) -+{ -+ struct line *line; -+ int n; -+ -+ if(tty == NULL) n = 0; -+ else n = minor(tty->device) - tty->driver.minor_start; -+ line = &lines[n]; -+ -+ down(&line->sem); -+ line->count--; -+ -+ /* I don't like this, but I can't think of anything better. What's -+ * going on is that the tty is in the process of being closed for -+ * the last time. Its count hasn't been dropped yet, so it's still -+ * at 1. This may happen when line->count != 0 because of the initial -+ * console open (without a tty) bumping it up to 1. -+ */ -+ if((line->tty != NULL) && (line->tty->count == 1)) -+ line->tty = NULL; -+ if(line->count == 0) -+ line_disable(line, -1); -+ up(&line->sem); -+} -+ -+void close_lines(struct line *lines, int nlines) -+{ -+ int i; -+ -+ for(i = 0; i < nlines; i++) -+ close_chan(&lines[i].chan_list); -+} -+ -+int line_setup(struct line *lines, int num, char *init, int all_allowed) -+{ -+ int i, n; -+ char *end; -+ -+ if(*init == '=') n = -1; -+ else { -+ n = simple_strtoul(init, &end, 0); -+ if(*end != '='){ -+ printk(KERN_ERR "line_setup failed to parse \"%s\"\n", -+ init); -+ return(1); -+ } -+ init = end; -+ } -+ init++; -+ if((n >= 0) && (n >= num)){ -+ printk("line_setup - %d out of range ((0 ... %d) allowed)\n", -+ n, num); -+ return(1); -+ } -+ else if(n >= 0){ -+ if(lines[n].count > 0){ -+ printk("line_setup - device %d is open\n", n); -+ return(1); -+ } -+ if(lines[n].init_pri <= INIT_ONE){ -+ lines[n].init_pri = INIT_ONE; -+ if(!strcmp(init, "none")) lines[n].valid = 0; -+ else { -+ lines[n].init_str = init; -+ lines[n].valid = 1; -+ } -+ } -+ } -+ else if(!all_allowed){ -+ printk("line_setup - can't configure all devices from " -+ "mconsole\n"); -+ return(1); -+ } -+ else { -+ for(i = 0; i < num; i++){ -+ if(lines[i].init_pri <= INIT_ALL){ -+ lines[i].init_pri = INIT_ALL; -+ if(!strcmp(init, "none")) lines[i].valid = 0; -+ else { -+ lines[i].init_str = init; -+ lines[i].valid = 1; -+ } -+ } -+ } -+ } -+ return(0); -+} -+ -+int line_config(struct line *lines, int num, char *str) -+{ -+ char *new = uml_strdup(str); -+ -+ if(new == NULL){ -+ printk("line_config - uml_strdup failed\n"); -+ return(-ENOMEM); -+ } -+ return(line_setup(lines, num, new, 0)); -+} -+ -+int line_get_config(char *name, struct line *lines, int num, char *str, -+ int size, char **error_out) -+{ -+ struct line *line; -+ char *end; -+ int dev, n = 0; -+ -+ dev = simple_strtoul(name, &end, 0); -+ if((*end != '\0') || (end == name)){ -+ *error_out = "line_get_config failed to parse device number"; -+ return(0); -+ } -+ -+ if((dev < 0) || (dev >= num)){ -+ *error_out = "device number of of range"; -+ return(0); -+ } -+ -+ line = &lines[dev]; -+ -+ down(&line->sem); -+ if(!line->valid) -+ CONFIG_CHUNK(str, size, n, "none", 1); -+ else if(line->count == 0) -+ CONFIG_CHUNK(str, size, n, line->init_str, 1); -+ else n = chan_config_string(&line->chan_list, str, size, error_out); -+ up(&line->sem); -+ -+ return(n); -+} -+ -+int line_remove(struct line *lines, int num, char *str) -+{ -+ char config[sizeof("conxxxx=none\0")]; -+ -+ sprintf(config, "%s=none", str); -+ return(line_setup(lines, num, config, 0)); -+} -+ -+void line_register_devfs(struct lines *set, struct line_driver *line_driver, -+ struct tty_driver *driver, struct line *lines, -+ int nlines) -+{ -+ int err, i, n; -+ char *from, *to; -+ -+ driver->driver_name = line_driver->name; -+ driver->name = line_driver->devfs_name; -+ driver->major = line_driver->major; -+ driver->minor_start = line_driver->minor_start; -+ driver->type = line_driver->type; -+ driver->subtype = line_driver->subtype; -+ driver->magic = TTY_DRIVER_MAGIC; -+ driver->flags = TTY_DRIVER_REAL_RAW; -+ -+ n = set->num; -+ driver->num = n; -+ driver->table = kmalloc(n * sizeof(driver->table[0]), GFP_KERNEL); -+ driver->termios = kmalloc(n * sizeof(driver->termios[0]), GFP_KERNEL); -+ driver->termios_locked = kmalloc(n * sizeof(driver->termios_locked[0]), -+ GFP_KERNEL); -+ if((driver->table == NULL) || (driver->termios == NULL) || -+ (driver->termios_locked == NULL)) -+ panic("Failed to allocate driver table"); -+ -+ memset(driver->table, 0, n * sizeof(driver->table[0])); -+ memset(driver->termios, 0, n * sizeof(driver->termios[0])); -+ memset(driver->termios_locked, 0, -+ n * sizeof(driver->termios_locked[0])); -+ -+ driver->write_room = line_write_room; -+ driver->init_termios = tty_std_termios; -+ -+ if (tty_register_driver(driver)) -+ panic("line_register_devfs : Couldn't register driver\n"); -+ -+ from = line_driver->symlink_from; -+ to = line_driver->symlink_to; -+ err = devfs_mk_symlink(NULL, from, 0, to, NULL, NULL); -+ if(err) printk("Symlink creation from /dev/%s to /dev/%s " -+ "returned %d\n", from, to, err); -+ -+ for(i = 0; i < nlines; i++){ -+ if(!lines[i].valid) -+ tty_unregister_devfs(driver, driver->minor_start + i); -+ } -+ -+ mconsole_register_dev(&line_driver->mc); -+} -+ -+void lines_init(struct line *lines, int nlines) -+{ -+ struct line *line; -+ int i; -+ -+ for(i = 0; i < nlines; i++){ -+ line = &lines[i]; -+ INIT_LIST_HEAD(&line->chan_list); -+ sema_init(&line->sem, 1); -+ if(line->init_str != NULL){ -+ line->init_str = uml_strdup(line->init_str); -+ if(line->init_str == NULL) -+ printk("lines_init - uml_strdup returned " -+ "NULL\n"); -+ } -+ } -+} -+ -+struct winch { -+ struct list_head list; -+ int fd; -+ int tty_fd; -+ int pid; -+ struct line *line; -+}; -+ -+void winch_interrupt(int irq, void *data, struct pt_regs *unused) -+{ -+ struct winch *winch = data; -+ struct tty_struct *tty; -+ int err; -+ char c; -+ -+ if(winch->fd != -1){ -+ err = generic_read(winch->fd, &c, NULL); -+ if(err < 0){ -+ if(err != -EAGAIN){ -+ printk("winch_interrupt : read failed, " -+ "errno = %d\n", -err); -+ printk("fd %d is losing SIGWINCH support\n", -+ winch->tty_fd); -+ return; -+ } -+ goto out; -+ } -+ } -+ tty = winch->line->tty; -+ if(tty != NULL){ -+ chan_window_size(&winch->line->chan_list, -+ &tty->winsize.ws_row, -+ &tty->winsize.ws_col); -+ kill_pg(tty->pgrp, SIGWINCH, 1); -+ } -+ out: -+ if(winch->fd != -1) -+ reactivate_fd(winch->fd, WINCH_IRQ); -+} -+ -+DECLARE_MUTEX(winch_handler_sem); -+LIST_HEAD(winch_handlers); -+ -+void register_winch_irq(int fd, int tty_fd, int pid, void *line) -+{ -+ struct winch *winch; -+ -+ down(&winch_handler_sem); -+ winch = kmalloc(sizeof(*winch), GFP_KERNEL); -+ if(winch == NULL){ -+ printk("register_winch_irq - kmalloc failed\n"); -+ goto out; -+ } -+ *winch = ((struct winch) { .list = LIST_HEAD_INIT(winch->list), -+ .fd = fd, -+ .tty_fd = tty_fd, -+ .pid = pid, -+ .line = line }); -+ list_add(&winch->list, &winch_handlers); -+ if(um_request_irq(WINCH_IRQ, fd, IRQ_READ, winch_interrupt, -+ SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM, -+ "winch", winch) < 0) -+ printk("register_winch_irq - failed to register IRQ\n"); -+ out: -+ up(&winch_handler_sem); -+} -+ -+static void winch_cleanup(void) -+{ -+ struct list_head *ele; -+ struct winch *winch; -+ -+ list_for_each(ele, &winch_handlers){ -+ winch = list_entry(ele, struct winch, list); -+ if(winch->fd != -1){ -+ deactivate_fd(winch->fd, WINCH_IRQ); -+ close(winch->fd); -+ } -+ if(winch->pid != -1) -+ os_kill_process(winch->pid, 1); -+ } -+} -+ -+__uml_exitcall(winch_cleanup); -+ -+char *add_xterm_umid(char *base) -+{ -+ char *umid, *title; -+ int len; -+ -+ umid = get_umid(1); -+ if(umid == NULL) return(base); -+ -+ len = strlen(base) + strlen(" ()") + strlen(umid) + 1; -+ title = kmalloc(len, GFP_KERNEL); -+ if(title == NULL){ -+ printk("Failed to allocate buffer for xterm title\n"); -+ return(base); -+ } -+ -+ strncpy(title, base, len); -+ len -= strlen(title); -+ snprintf(&title[strlen(title)], len, " (%s)", umid); -+ return(title); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/mcast.h um/arch/um/drivers/mcast.h ---- orig/arch/um/drivers/mcast.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/mcast.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "net_user.h" -+ -+struct mcast_data { -+ char *addr; -+ unsigned short port; -+ void *mcast_addr; -+ int ttl; -+ void *dev; -+}; -+ -+extern struct net_user_info mcast_user_info; -+ -+extern int mcast_user_write(int fd, void *buf, int len, -+ struct mcast_data *pri); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/mcast_kern.c um/arch/um/drivers/mcast_kern.c ---- orig/arch/um/drivers/mcast_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/mcast_kern.c Sun Dec 15 20:58:55 2002 -@@ -0,0 +1,145 @@ -+/* -+ * user-mode-linux networking multicast transport -+ * Copyright (C) 2001 by Harald Welte -+ * -+ * based on the existing uml-networking code, which is -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and -+ * James Leu (jleu@mindspring.net). -+ * Copyright (C) 2001 by various other people who didn't put their name here. -+ * -+ * Licensed under the GPL. -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/init.h" -+#include "linux/netdevice.h" -+#include "linux/etherdevice.h" -+#include "linux/in.h" -+#include "linux/inet.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "mcast.h" -+ -+struct mcast_init { -+ char *addr; -+ int port; -+ int ttl; -+}; -+ -+void mcast_init(struct net_device *dev, void *data) -+{ -+ struct uml_net_private *pri; -+ struct mcast_data *dpri; -+ struct mcast_init *init = data; -+ -+ init_etherdev(dev, 0); -+ pri = dev->priv; -+ dpri = (struct mcast_data *) pri->user; -+ *dpri = ((struct mcast_data) -+ { .addr = init->addr, -+ .port = init->port, -+ .ttl = init->ttl, -+ .mcast_addr = NULL, -+ .dev = dev }); -+ printk("mcast backend "); -+ printk("multicast adddress: %s:%u, TTL:%u ", -+ dpri->addr, dpri->port, dpri->ttl); -+ -+ printk("\n"); -+} -+ -+static int mcast_read(int fd, struct sk_buff **skb, struct uml_net_private *lp) -+{ -+ *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER); -+ if(*skb == NULL) return(-ENOMEM); -+ return(net_recvfrom(fd, (*skb)->mac.raw, -+ (*skb)->dev->mtu + ETH_HEADER_OTHER)); -+} -+ -+static int mcast_write(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ return mcast_user_write(fd, (*skb)->data, (*skb)->len, -+ (struct mcast_data *) &lp->user); -+} -+ -+static struct net_kern_info mcast_kern_info = { -+ .init = mcast_init, -+ .protocol = eth_protocol, -+ .read = mcast_read, -+ .write = mcast_write, -+}; -+ -+int mcast_setup(char *str, char **mac_out, void *data) -+{ -+ struct mcast_init *init = data; -+ char *port_str = NULL, *ttl_str = NULL, *remain; -+ char *last; -+ int n; -+ -+ *init = ((struct mcast_init) -+ { .addr = "239.192.168.1", -+ .port = 1102, -+ .ttl = 1 }); -+ -+ remain = split_if_spec(str, mac_out, &init->addr, &port_str, &ttl_str, -+ NULL); -+ if(remain != NULL){ -+ printk(KERN_ERR "mcast_setup - Extra garbage on " -+ "specification : '%s'\n", remain); -+ return(0); -+ } -+ -+ if(port_str != NULL){ -+ n = simple_strtoul(port_str, &last, 10); -+ if((*last != '\0') || (last == port_str)){ -+ printk(KERN_ERR "mcast_setup - Bad port : '%s'\n", -+ port_str); -+ return(0); -+ } -+ init->port = htons(n); -+ } -+ -+ if(ttl_str != NULL){ -+ init->ttl = simple_strtoul(ttl_str, &last, 10); -+ if((*last != '\0') || (last == ttl_str)){ -+ printk(KERN_ERR "mcast_setup - Bad ttl : '%s'\n", -+ ttl_str); -+ return(0); -+ } -+ } -+ -+ printk(KERN_INFO "Configured mcast device: %s:%u-%u\n", init->addr, -+ init->port, init->ttl); -+ -+ return(1); -+} -+ -+static struct transport mcast_transport = { -+ .list = LIST_HEAD_INIT(mcast_transport.list), -+ .name = "mcast", -+ .setup = mcast_setup, -+ .user = &mcast_user_info, -+ .kern = &mcast_kern_info, -+ .private_size = sizeof(struct mcast_data), -+ .setup_size = sizeof(struct mcast_init), -+}; -+ -+static int register_mcast(void) -+{ -+ register_transport(&mcast_transport); -+ return(1); -+} -+ -+__initcall(register_mcast); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/mcast_user.c um/arch/um/drivers/mcast_user.c ---- orig/arch/um/drivers/mcast_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/mcast_user.c Sun Dec 15 21:19:16 2002 -@@ -0,0 +1,175 @@ -+/* -+ * user-mode-linux networking multicast transport -+ * Copyright (C) 2001 by Harald Welte -+ * -+ * based on the existing uml-networking code, which is -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and -+ * James Leu (jleu@mindspring.net). -+ * Copyright (C) 2001 by various other people who didn't put their name here. -+ * -+ * Licensed under the GPL. -+ * -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "net_user.h" -+#include "mcast.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "user.h" -+ -+#define MAX_PACKET (ETH_MAX_PACKET + ETH_HEADER_OTHER) -+ -+static struct sockaddr_in *new_addr(char *addr, unsigned short port) -+{ -+ struct sockaddr_in *sin; -+ -+ sin = um_kmalloc(sizeof(struct sockaddr_in)); -+ if(sin == NULL){ -+ printk("new_addr: allocation of sockaddr_in failed\n"); -+ return(NULL); -+ } -+ sin->sin_family = AF_INET; -+ sin->sin_addr.s_addr = in_aton(addr); -+ sin->sin_port = port; -+ return(sin); -+} -+ -+static void mcast_user_init(void *data, void *dev) -+{ -+ struct mcast_data *pri = data; -+ -+ pri->mcast_addr = new_addr(pri->addr, pri->port); -+ pri->dev = dev; -+} -+ -+static int mcast_open(void *data) -+{ -+ struct mcast_data *pri = data; -+ struct sockaddr_in *sin = pri->mcast_addr; -+ struct ip_mreq mreq; -+ int fd, yes = 1; -+ -+ -+ if ((sin->sin_addr.s_addr == 0) || (sin->sin_port == 0)) { -+ fd = -EINVAL; -+ goto out; -+ } -+ -+ if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0){ -+ printk("mcast_open : data socket failed, errno = %d\n", -+ errno); -+ fd = -ENOMEM; -+ goto out; -+ } -+ -+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) { -+ printk("mcast_open: SO_REUSEADDR failed, errno = %d\n", -+ errno); -+ close(fd); -+ fd = -EINVAL; -+ goto out; -+ } -+ -+ /* set ttl according to config */ -+ if (setsockopt(fd, SOL_IP, IP_MULTICAST_TTL, &pri->ttl, -+ sizeof(pri->ttl)) < 0) { -+ printk("mcast_open: IP_MULTICAST_TTL failed, error = %d\n", -+ errno); -+ close(fd); -+ fd = -EINVAL; -+ goto out; -+ } -+ -+ /* set LOOP, so data does get fed back to local sockets */ -+ if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP, &yes, sizeof(yes)) < 0) { -+ printk("mcast_open: IP_MULTICAST_LOOP failed, error = %d\n", -+ errno); -+ close(fd); -+ fd = -EINVAL; -+ goto out; -+ } -+ -+ /* bind socket to mcast address */ -+ if (bind(fd, (struct sockaddr *) sin, sizeof(*sin)) < 0) { -+ printk("mcast_open : data bind failed, errno = %d\n", errno); -+ close(fd); -+ fd = -EINVAL; -+ goto out; -+ } -+ -+ /* subscribe to the multicast group */ -+ mreq.imr_multiaddr.s_addr = sin->sin_addr.s_addr; -+ mreq.imr_interface.s_addr = 0; -+ if (setsockopt(fd, SOL_IP, IP_ADD_MEMBERSHIP, -+ &mreq, sizeof(mreq)) < 0) { -+ printk("mcast_open: IP_ADD_MEMBERSHIP failed, error = %d\n", -+ errno); -+ printk("There appears not to be a multicast-capable network " -+ "interface on the host.\n"); -+ printk("eth0 should be configured in order to use the " -+ "multicast transport.\n"); -+ close(fd); -+ fd = -EINVAL; -+ } -+ -+ out: -+ return(fd); -+} -+ -+static void mcast_close(int fd, void *data) -+{ -+ struct ip_mreq mreq; -+ struct mcast_data *pri = data; -+ struct sockaddr_in *sin = pri->mcast_addr; -+ -+ mreq.imr_multiaddr.s_addr = sin->sin_addr.s_addr; -+ mreq.imr_interface.s_addr = 0; -+ if (setsockopt(fd, SOL_IP, IP_DROP_MEMBERSHIP, -+ &mreq, sizeof(mreq)) < 0) { -+ printk("mcast_open: IP_DROP_MEMBERSHIP failed, error = %d\n", -+ errno); -+ } -+ -+ close(fd); -+} -+ -+int mcast_user_write(int fd, void *buf, int len, struct mcast_data *pri) -+{ -+ struct sockaddr_in *data_addr = pri->mcast_addr; -+ -+ return(net_sendto(fd, buf, len, data_addr, sizeof(*data_addr))); -+} -+ -+static int mcast_set_mtu(int mtu, void *data) -+{ -+ return(mtu); -+} -+ -+struct net_user_info mcast_user_info = { -+ .init = mcast_user_init, -+ .open = mcast_open, -+ .close = mcast_close, -+ .remove = NULL, -+ .set_mtu = mcast_set_mtu, -+ .add_address = NULL, -+ .delete_address = NULL, -+ .max_packet = MAX_PACKET - ETH_HEADER_OTHER -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/mconsole_kern.c um/arch/um/drivers/mconsole_kern.c ---- orig/arch/um/drivers/mconsole_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/mconsole_kern.c Fri Mar 28 21:58:11 2003 -@@ -0,0 +1,453 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/slab.h" -+#include "linux/init.h" -+#include "linux/notifier.h" -+#include "linux/reboot.h" -+#include "linux/utsname.h" -+#include "linux/ctype.h" -+#include "linux/interrupt.h" -+#include "linux/sysrq.h" -+#include "linux/tqueue.h" -+#include "linux/module.h" -+#include "linux/proc_fs.h" -+#include "asm/irq.h" -+#include "asm/uaccess.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "mconsole.h" -+#include "mconsole_kern.h" -+#include "irq_user.h" -+#include "init.h" -+#include "os.h" -+#include "umid.h" -+ -+static int do_unlink_socket(struct notifier_block *notifier, -+ unsigned long what, void *data) -+{ -+ return(mconsole_unlink_socket()); -+} -+ -+ -+static struct notifier_block reboot_notifier = { -+ .notifier_call = do_unlink_socket, -+ .priority = 0, -+}; -+ -+/* Safe without explicit locking for now. Tasklets provide their own -+ * locking, and the interrupt handler is safe because it can't interrupt -+ * itself and it can only happen on CPU 0. -+ */ -+ -+LIST_HEAD(mc_requests); -+ -+void mc_task_proc(void *unused) -+{ -+ struct mconsole_entry *req; -+ unsigned long flags; -+ int done; -+ -+ do { -+ save_flags(flags); -+ req = list_entry(mc_requests.next, struct mconsole_entry, -+ list); -+ list_del(&req->list); -+ done = list_empty(&mc_requests); -+ restore_flags(flags); -+ req->request.cmd->handler(&req->request); -+ kfree(req); -+ } while(!done); -+} -+ -+struct tq_struct mconsole_task = { -+ .routine = mc_task_proc, -+ .data = NULL -+}; -+ -+void mconsole_interrupt(int irq, void *dev_id, struct pt_regs *regs) -+{ -+ int fd; -+ struct mconsole_entry *new; -+ struct mc_request req; -+ -+ fd = (int) dev_id; -+ while (mconsole_get_request(fd, &req)){ -+ if(req.cmd->as_interrupt) (*req.cmd->handler)(&req); -+ else { -+ new = kmalloc(sizeof(req), GFP_ATOMIC); -+ if(new == NULL) -+ mconsole_reply(&req, "Out of memory", 1, 0); -+ else { -+ new->request = req; -+ list_add(&new->list, &mc_requests); -+ } -+ } -+ } -+ if(!list_empty(&mc_requests)) schedule_task(&mconsole_task); -+ reactivate_fd(fd, MCONSOLE_IRQ); -+} -+ -+void mconsole_version(struct mc_request *req) -+{ -+ char version[256]; -+ -+ sprintf(version, "%s %s %s %s %s", system_utsname.sysname, -+ system_utsname.nodename, system_utsname.release, -+ system_utsname.version, system_utsname.machine); -+ mconsole_reply(req, version, 0, 0); -+} -+ -+#define UML_MCONSOLE_HELPTEXT \ -+"Commands: \n\ -+ version - Get kernel version \n\ -+ help - Print this message \n\ -+ halt - Halt UML \n\ -+ reboot - Reboot UML \n\ -+ config = - Add a new device to UML; \n\ -+ same syntax as command line \n\ -+ config - Query the configuration of a device \n\ -+ remove - Remove a device from UML \n\ -+ sysrq - Performs the SysRq action controlled by the letter \n\ -+ cad - invoke the Ctl-Alt-Del handler \n\ -+ stop - pause the UML; it will do nothing until it receives a 'go' \n\ -+ go - continue the UML after a 'stop' \n\ -+" -+ -+void mconsole_help(struct mc_request *req) -+{ -+ mconsole_reply(req, UML_MCONSOLE_HELPTEXT, 0, 0); -+} -+ -+void mconsole_halt(struct mc_request *req) -+{ -+ mconsole_reply(req, "", 0, 0); -+ machine_halt(); -+} -+ -+void mconsole_reboot(struct mc_request *req) -+{ -+ mconsole_reply(req, "", 0, 0); -+ machine_restart(NULL); -+} -+ -+extern void ctrl_alt_del(void); -+ -+void mconsole_cad(struct mc_request *req) -+{ -+ mconsole_reply(req, "", 0, 0); -+ ctrl_alt_del(); -+} -+ -+void mconsole_go(struct mc_request *req) -+{ -+ mconsole_reply(req, "Not stopped", 1, 0); -+} -+ -+void mconsole_stop(struct mc_request *req) -+{ -+ deactivate_fd(req->originating_fd, MCONSOLE_IRQ); -+ os_set_fd_block(req->originating_fd, 1); -+ mconsole_reply(req, "", 0, 0); -+ while(mconsole_get_request(req->originating_fd, req)){ -+ if(req->cmd->handler == mconsole_go) break; -+ (*req->cmd->handler)(req); -+ } -+ os_set_fd_block(req->originating_fd, 0); -+ reactivate_fd(req->originating_fd, MCONSOLE_IRQ); -+ mconsole_reply(req, "", 0, 0); -+} -+ -+/* This list is populated by __initcall routines. */ -+ -+LIST_HEAD(mconsole_devices); -+ -+void mconsole_register_dev(struct mc_device *new) -+{ -+ list_add(&new->list, &mconsole_devices); -+} -+ -+static struct mc_device *mconsole_find_dev(char *name) -+{ -+ struct list_head *ele; -+ struct mc_device *dev; -+ -+ list_for_each(ele, &mconsole_devices){ -+ dev = list_entry(ele, struct mc_device, list); -+ if(!strncmp(name, dev->name, strlen(dev->name))) -+ return(dev); -+ } -+ return(NULL); -+} -+ -+#define CONFIG_BUF_SIZE 64 -+ -+static void mconsole_get_config(int (*get_config)(char *, char *, int, -+ char **), -+ struct mc_request *req, char *name) -+{ -+ char default_buf[CONFIG_BUF_SIZE], *error, *buf; -+ int n, size; -+ -+ if(get_config == NULL){ -+ mconsole_reply(req, "No get_config routine defined", 1, 0); -+ return; -+ } -+ -+ error = NULL; -+ size = sizeof(default_buf)/sizeof(default_buf[0]); -+ buf = default_buf; -+ -+ while(1){ -+ n = (*get_config)(name, buf, size, &error); -+ if(error != NULL){ -+ mconsole_reply(req, error, 1, 0); -+ goto out; -+ } -+ -+ if(n <= size){ -+ mconsole_reply(req, buf, 0, 0); -+ goto out; -+ } -+ -+ if(buf != default_buf) -+ kfree(buf); -+ -+ size = n; -+ buf = kmalloc(size, GFP_KERNEL); -+ if(buf == NULL){ -+ mconsole_reply(req, "Failed to allocate buffer", 1, 0); -+ return; -+ } -+ } -+ out: -+ if(buf != default_buf) -+ kfree(buf); -+ -+} -+ -+void mconsole_config(struct mc_request *req) -+{ -+ struct mc_device *dev; -+ char *ptr = req->request.data, *name; -+ int err; -+ -+ ptr += strlen("config"); -+ while(isspace(*ptr)) ptr++; -+ dev = mconsole_find_dev(ptr); -+ if(dev == NULL){ -+ mconsole_reply(req, "Bad configuration option", 1, 0); -+ return; -+ } -+ -+ name = &ptr[strlen(dev->name)]; -+ ptr = name; -+ while((*ptr != '=') && (*ptr != '\0')) -+ ptr++; -+ -+ if(*ptr == '='){ -+ err = (*dev->config)(name); -+ mconsole_reply(req, "", err, 0); -+ } -+ else mconsole_get_config(dev->get_config, req, name); -+} -+ -+void mconsole_remove(struct mc_request *req) -+{ -+ struct mc_device *dev; -+ char *ptr = req->request.data; -+ int err; -+ -+ ptr += strlen("remove"); -+ while(isspace(*ptr)) ptr++; -+ dev = mconsole_find_dev(ptr); -+ if(dev == NULL){ -+ mconsole_reply(req, "Bad remove option", 1, 0); -+ return; -+ } -+ err = (*dev->remove)(&ptr[strlen(dev->name)]); -+ mconsole_reply(req, "", err, 0); -+} -+ -+#ifdef CONFIG_MAGIC_SYSRQ -+void mconsole_sysrq(struct mc_request *req) -+{ -+ char *ptr = req->request.data; -+ -+ ptr += strlen("sysrq"); -+ while(isspace(*ptr)) ptr++; -+ -+ handle_sysrq(*ptr, ¤t->thread.regs, NULL, NULL); -+ mconsole_reply(req, "", 0, 0); -+} -+#else -+void mconsole_sysrq(struct mc_request *req) -+{ -+ mconsole_reply(req, "Sysrq not compiled in", 1, 0); -+} -+#endif -+ -+/* Changed by mconsole_setup, which is __setup, and called before SMP is -+ * active. -+ */ -+static char *notify_socket = NULL; -+ -+int mconsole_init(void) -+{ -+ int err, sock; -+ char file[256]; -+ -+ if(umid_file_name("mconsole", file, sizeof(file))) return(-1); -+ snprintf(mconsole_socket_name, sizeof(file), "%s", file); -+ -+ sock = create_unix_socket(file, sizeof(file)); -+ if (sock < 0){ -+ printk("Failed to initialize management console\n"); -+ return(1); -+ } -+ -+ register_reboot_notifier(&reboot_notifier); -+ -+ err = um_request_irq(MCONSOLE_IRQ, sock, IRQ_READ, mconsole_interrupt, -+ SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM, -+ "mconsole", (void *)sock); -+ if (err){ -+ printk("Failed to get IRQ for management console\n"); -+ return(1); -+ } -+ -+ if(notify_socket != NULL){ -+ notify_socket = uml_strdup(notify_socket); -+ if(notify_socket != NULL) -+ mconsole_notify(notify_socket, MCONSOLE_SOCKET, -+ mconsole_socket_name, -+ strlen(mconsole_socket_name) + 1); -+ else printk(KERN_ERR "mconsole_setup failed to strdup " -+ "string\n"); -+ } -+ -+ printk("mconsole (version %d) initialized on %s\n", -+ MCONSOLE_VERSION, mconsole_socket_name); -+ return(0); -+} -+ -+__initcall(mconsole_init); -+ -+static int write_proc_mconsole(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char *buf; -+ -+ buf = kmalloc(count + 1, GFP_KERNEL); -+ if(buf == NULL) -+ return(-ENOMEM); -+ -+ if(copy_from_user(buf, buffer, count)) -+ return(-EFAULT); -+ buf[count] = '\0'; -+ -+ mconsole_notify(notify_socket, MCONSOLE_USER_NOTIFY, buf, count); -+ return(count); -+} -+ -+static int create_proc_mconsole(void) -+{ -+ struct proc_dir_entry *ent; -+ -+ if(notify_socket == NULL) return(0); -+ -+ ent = create_proc_entry("mconsole", S_IFREG | 0200, NULL); -+ if(ent == NULL){ -+ printk("create_proc_mconsole : create_proc_entry failed\n"); -+ return(0); -+ } -+ -+ ent->read_proc = NULL; -+ ent->write_proc = write_proc_mconsole; -+ return(0); -+} -+ -+static spinlock_t notify_spinlock = SPIN_LOCK_UNLOCKED; -+ -+void lock_notify(void) -+{ -+ spin_lock(¬ify_spinlock); -+} -+ -+void unlock_notify(void) -+{ -+ spin_unlock(¬ify_spinlock); -+} -+ -+__initcall(create_proc_mconsole); -+ -+#define NOTIFY "=notify:" -+ -+static int mconsole_setup(char *str) -+{ -+ if(!strncmp(str, NOTIFY, strlen(NOTIFY))){ -+ str += strlen(NOTIFY); -+ notify_socket = str; -+ } -+ else printk(KERN_ERR "mconsole_setup : Unknown option - '%s'\n", str); -+ return(1); -+} -+ -+__setup("mconsole", mconsole_setup); -+ -+__uml_help(mconsole_setup, -+"mconsole=notify:\n" -+" Requests that the mconsole driver send a message to the named Unix\n" -+" socket containing the name of the mconsole socket. This also serves\n" -+" to notify outside processes when UML has booted far enough to respond\n" -+" to mconsole requests.\n\n" -+); -+ -+static int notify_panic(struct notifier_block *self, unsigned long unused1, -+ void *ptr) -+{ -+ char *message = ptr; -+ -+ if(notify_socket == NULL) return(0); -+ -+ mconsole_notify(notify_socket, MCONSOLE_PANIC, message, -+ strlen(message) + 1); -+ return(0); -+} -+ -+static struct notifier_block panic_exit_notifier = { -+ .notifier_call = notify_panic, -+ .next = NULL, -+ .priority = 1 -+}; -+ -+static int add_notifier(void) -+{ -+ notifier_chain_register(&panic_notifier_list, &panic_exit_notifier); -+ return(0); -+} -+ -+__initcall(add_notifier); -+ -+char *mconsole_notify_socket(void) -+{ -+ return(notify_socket); -+} -+ -+EXPORT_SYMBOL(mconsole_notify_socket); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/mconsole_user.c um/arch/um/drivers/mconsole_user.c ---- orig/arch/um/drivers/mconsole_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/mconsole_user.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,212 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "user.h" -+#include "mconsole.h" -+#include "umid.h" -+ -+static struct mconsole_command commands[] = { -+ { "version", mconsole_version, 1 }, -+ { "halt", mconsole_halt, 0 }, -+ { "reboot", mconsole_reboot, 0 }, -+ { "config", mconsole_config, 0 }, -+ { "remove", mconsole_remove, 0 }, -+ { "sysrq", mconsole_sysrq, 1 }, -+ { "help", mconsole_help, 1 }, -+ { "cad", mconsole_cad, 1 }, -+ { "stop", mconsole_stop, 0 }, -+ { "go", mconsole_go, 1 }, -+}; -+ -+/* Initialized in mconsole_init, which is an initcall */ -+char mconsole_socket_name[256]; -+ -+int mconsole_reply_v0(struct mc_request *req, char *reply) -+{ -+ struct iovec iov; -+ struct msghdr msg; -+ -+ iov.iov_base = reply; -+ iov.iov_len = strlen(reply); -+ -+ msg.msg_name = &(req->origin); -+ msg.msg_namelen = req->originlen; -+ msg.msg_iov = &iov; -+ msg.msg_iovlen = 1; -+ msg.msg_control = NULL; -+ msg.msg_controllen = 0; -+ msg.msg_flags = 0; -+ -+ return sendmsg(req->originating_fd, &msg, 0); -+} -+ -+static struct mconsole_command *mconsole_parse(struct mc_request *req) -+{ -+ struct mconsole_command *cmd; -+ int i; -+ -+ for(i=0;irequest.data, cmd->command, -+ strlen(cmd->command))){ -+ return(cmd); -+ } -+ } -+ return(NULL); -+} -+ -+#define MIN(a,b) ((a)<(b) ? (a):(b)) -+ -+#define STRINGX(x) #x -+#define STRING(x) STRINGX(x) -+ -+int mconsole_get_request(int fd, struct mc_request *req) -+{ -+ int len; -+ -+ req->originlen = sizeof(req->origin); -+ req->len = recvfrom(fd, &req->request, sizeof(req->request), 0, -+ (struct sockaddr *) req->origin, &req->originlen); -+ if (req->len < 0) -+ return 0; -+ -+ req->originating_fd = fd; -+ -+ if(req->request.magic != MCONSOLE_MAGIC){ -+ /* Unversioned request */ -+ len = MIN(sizeof(req->request.data) - 1, -+ strlen((char *) &req->request)); -+ memmove(req->request.data, &req->request, len); -+ req->request.data[len] = '\0'; -+ -+ req->request.magic = MCONSOLE_MAGIC; -+ req->request.version = 0; -+ req->request.len = len; -+ -+ mconsole_reply_v0(req, "ERR Version 0 mconsole clients are " -+ "not supported by this driver"); -+ return(0); -+ } -+ -+ if(req->request.len >= MCONSOLE_MAX_DATA){ -+ mconsole_reply(req, "Request too large", 1, 0); -+ return(0); -+ } -+ if(req->request.version != MCONSOLE_VERSION){ -+ mconsole_reply(req, "This driver only supports version " -+ STRING(MCONSOLE_VERSION) " clients", 1, 0); -+ } -+ -+ req->request.data[req->request.len] = '\0'; -+ req->cmd = mconsole_parse(req); -+ if(req->cmd == NULL){ -+ mconsole_reply(req, "Unknown command", 1, 0); -+ return(0); -+ } -+ -+ return(1); -+} -+ -+int mconsole_reply(struct mc_request *req, char *str, int err, int more) -+{ -+ struct mconsole_reply reply; -+ int total, len, n; -+ -+ total = strlen(str); -+ do { -+ reply.err = err; -+ -+ /* err can only be true on the first packet */ -+ err = 0; -+ -+ len = MIN(total, MCONSOLE_MAX_DATA - 1); -+ -+ if(len == total) reply.more = more; -+ else reply.more = 1; -+ -+ memcpy(reply.data, str, len); -+ reply.data[len] = '\0'; -+ total -= len; -+ reply.len = len + 1; -+ -+ len = sizeof(reply) + reply.len - sizeof(reply.data); -+ -+ n = sendto(req->originating_fd, &reply, len, 0, -+ (struct sockaddr *) req->origin, req->originlen); -+ -+ if(n < 0) return(-errno); -+ } while(total > 0); -+ return(0); -+} -+ -+int mconsole_unlink_socket(void) -+{ -+ unlink(mconsole_socket_name); -+ return 0; -+} -+ -+static int notify_sock = -1; -+ -+int mconsole_notify(char *sock_name, int type, const void *data, int len) -+{ -+ struct sockaddr_un target; -+ struct mconsole_notify packet; -+ int n, err = 0; -+ -+ lock_notify(); -+ if(notify_sock < 0){ -+ notify_sock = socket(PF_UNIX, SOCK_DGRAM, 0); -+ if(notify_sock < 0){ -+ printk("mconsole_notify - socket failed, errno = %d\n", -+ errno); -+ err = -errno; -+ } -+ } -+ unlock_notify(); -+ -+ if(err) -+ return(err); -+ -+ target.sun_family = AF_UNIX; -+ strcpy(target.sun_path, sock_name); -+ -+ packet.magic = MCONSOLE_MAGIC; -+ packet.version = MCONSOLE_VERSION; -+ packet.type = type; -+ len = (len > sizeof(packet.data)) ? sizeof(packet.data) : len; -+ packet.len = len; -+ memcpy(packet.data, data, len); -+ -+ err = 0; -+ len = sizeof(packet) + packet.len - sizeof(packet.data); -+ n = sendto(notify_sock, &packet, len, 0, (struct sockaddr *) &target, -+ sizeof(target)); -+ if(n < 0){ -+ printk("mconsole_notify - sendto failed, errno = %d\n", errno); -+ err = -errno; -+ } -+ return(err); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/mmapper_kern.c um/arch/um/drivers/mmapper_kern.c ---- orig/arch/um/drivers/mmapper_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/mmapper_kern.c Sun Dec 15 21:03:08 2002 -@@ -0,0 +1,148 @@ -+/* -+ * arch/um/drivers/mmapper_kern.c -+ * -+ * BRIEF MODULE DESCRIPTION -+ * -+ * Copyright (C) 2000 RidgeRun, Inc. -+ * Author: RidgeRun, Inc. -+ * Greg Lonnon glonnon@ridgerun.com or info@ridgerun.com -+ * -+ */ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "mem_user.h" -+#include "user_util.h" -+ -+/* These are set in mmapper_init, which is called at boot time */ -+static unsigned long mmapper_size; -+static unsigned long p_buf = 0; -+static char *v_buf = NULL; -+ -+static ssize_t -+mmapper_read(struct file *file, char *buf, size_t count, loff_t *ppos) -+{ -+ if(*ppos > mmapper_size) -+ return -EINVAL; -+ -+ if(count + *ppos > mmapper_size) -+ count = count + *ppos - mmapper_size; -+ -+ if(count < 0) -+ return -EINVAL; -+ -+ copy_to_user(buf,&v_buf[*ppos],count); -+ -+ return count; -+} -+ -+static ssize_t -+mmapper_write(struct file *file, const char *buf, size_t count, loff_t *ppos) -+{ -+ if(*ppos > mmapper_size) -+ return -EINVAL; -+ -+ if(count + *ppos > mmapper_size) -+ count = count + *ppos - mmapper_size; -+ -+ if(count < 0) -+ return -EINVAL; -+ -+ copy_from_user(&v_buf[*ppos],buf,count); -+ -+ return count; -+} -+ -+static int -+mmapper_ioctl(struct inode *inode, struct file *file, unsigned int cmd, -+ unsigned long arg) -+{ -+ return(-ENOIOCTLCMD); -+} -+ -+static int -+mmapper_mmap(struct file *file, struct vm_area_struct * vma) -+{ -+ int ret = -EINVAL; -+ int size; -+ -+ lock_kernel(); -+ if (vma->vm_pgoff != 0) -+ goto out; -+ -+ size = vma->vm_end - vma->vm_start; -+ if(size > mmapper_size) return(-EFAULT); -+ -+ /* XXX A comment above remap_page_range says it should only be -+ * called when the mm semaphore is held -+ */ -+ if (remap_page_range(vma->vm_start, p_buf, size, vma->vm_page_prot)) -+ goto out; -+ ret = 0; -+out: -+ unlock_kernel(); -+ return ret; -+} -+ -+static int -+mmapper_open(struct inode *inode, struct file *file) -+{ -+ return 0; -+} -+ -+static int -+mmapper_release(struct inode *inode, struct file *file) -+{ -+ return 0; -+} -+ -+static struct file_operations mmapper_fops = { -+ .owner = THIS_MODULE, -+ .read = mmapper_read, -+ .write = mmapper_write, -+ .ioctl = mmapper_ioctl, -+ .mmap = mmapper_mmap, -+ .open = mmapper_open, -+ .release = mmapper_release, -+}; -+ -+static int __init mmapper_init(void) -+{ -+ printk(KERN_INFO "Mapper v0.1\n"); -+ -+ v_buf = (char *) find_iomem("mmapper", &mmapper_size); -+ if(mmapper_size == 0) return(0); -+ -+ p_buf = __pa(v_buf); -+ -+ devfs_register (NULL, "mmapper", DEVFS_FL_DEFAULT, -+ 30, 0, S_IFCHR | S_IRUGO | S_IWUGO, -+ &mmapper_fops, NULL); -+ devfs_mk_symlink(NULL, "mmapper0", DEVFS_FL_DEFAULT, "mmapper", -+ NULL, NULL); -+ return(0); -+} -+ -+static void mmapper_exit(void) -+{ -+} -+ -+module_init(mmapper_init); -+module_exit(mmapper_exit); -+ -+MODULE_AUTHOR("Greg Lonnon "); -+MODULE_DESCRIPTION("DSPLinux simulator mmapper driver"); -+/* -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/net_kern.c um/arch/um/drivers/net_kern.c ---- orig/arch/um/drivers/net_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/net_kern.c Sun Dec 15 21:19:16 2002 -@@ -0,0 +1,870 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and -+ * James Leu (jleu@mindspring.net). -+ * Copyright (C) 2001 by various other people who didn't put their name here. -+ * Licensed under the GPL. -+ */ -+ -+#include "linux/config.h" -+#include "linux/kernel.h" -+#include "linux/netdevice.h" -+#include "linux/rtnetlink.h" -+#include "linux/skbuff.h" -+#include "linux/socket.h" -+#include "linux/spinlock.h" -+#include "linux/module.h" -+#include "linux/init.h" -+#include "linux/etherdevice.h" -+#include "linux/list.h" -+#include "linux/inetdevice.h" -+#include "linux/ctype.h" -+#include "linux/bootmem.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "mconsole_kern.h" -+#include "init.h" -+#include "irq_user.h" -+ -+static spinlock_t opened_lock = SPIN_LOCK_UNLOCKED; -+LIST_HEAD(opened); -+ -+static int uml_net_rx(struct net_device *dev) -+{ -+ struct uml_net_private *lp = dev->priv; -+ int pkt_len; -+ struct sk_buff *skb; -+ -+ /* If we can't allocate memory, try again next round. */ -+ if ((skb = dev_alloc_skb(dev->mtu)) == NULL) { -+ lp->stats.rx_dropped++; -+ return 0; -+ } -+ -+ skb->dev = dev; -+ skb_put(skb, dev->mtu); -+ skb->mac.raw = skb->data; -+ pkt_len = (*lp->read)(lp->fd, &skb, lp); -+ -+ if (pkt_len > 0) { -+ skb_trim(skb, pkt_len); -+ skb->protocol = (*lp->protocol)(skb); -+ netif_rx(skb); -+ -+ lp->stats.rx_bytes += skb->len; -+ lp->stats.rx_packets++; -+ return pkt_len; -+ } -+ -+ kfree_skb(skb); -+ return pkt_len; -+} -+ -+void uml_net_interrupt(int irq, void *dev_id, struct pt_regs *regs) -+{ -+ struct net_device *dev = dev_id; -+ struct uml_net_private *lp = dev->priv; -+ int err; -+ -+ if(!netif_running(dev)) -+ return; -+ -+ spin_lock(&lp->lock); -+ while((err = uml_net_rx(dev)) > 0) ; -+ if(err < 0) { -+ printk(KERN_ERR -+ "Device '%s' read returned %d, shutting it down\n", -+ dev->name, err); -+ dev_close(dev); -+ goto out; -+ } -+ reactivate_fd(lp->fd, UM_ETH_IRQ); -+ -+ out: -+ spin_unlock(&lp->lock); -+} -+ -+static int uml_net_open(struct net_device *dev) -+{ -+ struct uml_net_private *lp = dev->priv; -+ char addr[sizeof("255.255.255.255\0")]; -+ int err; -+ -+ spin_lock(&lp->lock); -+ -+ if(lp->fd >= 0){ -+ err = -ENXIO; -+ goto out; -+ } -+ -+ if(!lp->have_mac){ -+ dev_ip_addr(dev, addr, &lp->mac[2]); -+ set_ether_mac(dev, lp->mac); -+ } -+ -+ lp->fd = (*lp->open)(&lp->user); -+ if(lp->fd < 0){ -+ err = lp->fd; -+ goto out; -+ } -+ -+ err = um_request_irq(dev->irq, lp->fd, IRQ_READ, uml_net_interrupt, -+ SA_INTERRUPT | SA_SHIRQ, dev->name, dev); -+ if(err != 0){ -+ printk(KERN_ERR "uml_net_open: failed to get irq(%d)\n", err); -+ if(lp->close != NULL) (*lp->close)(lp->fd, &lp->user); -+ lp->fd = -1; -+ err = -ENETUNREACH; -+ } -+ -+ lp->tl.data = (unsigned long) &lp->user; -+ netif_start_queue(dev); -+ -+ spin_lock(&opened_lock); -+ list_add(&lp->list, &opened); -+ spin_unlock(&opened_lock); -+ MOD_INC_USE_COUNT; -+ out: -+ spin_unlock(&lp->lock); -+ return(err); -+} -+ -+static int uml_net_close(struct net_device *dev) -+{ -+ struct uml_net_private *lp = dev->priv; -+ -+ netif_stop_queue(dev); -+ spin_lock(&lp->lock); -+ -+ free_irq(dev->irq, dev); -+ if(lp->close != NULL) (*lp->close)(lp->fd, &lp->user); -+ lp->fd = -1; -+ spin_lock(&opened_lock); -+ list_del(&lp->list); -+ spin_unlock(&opened_lock); -+ -+ MOD_DEC_USE_COUNT; -+ spin_unlock(&lp->lock); -+ return 0; -+} -+ -+static int uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev) -+{ -+ struct uml_net_private *lp = dev->priv; -+ unsigned long flags; -+ int len; -+ -+ netif_stop_queue(dev); -+ -+ spin_lock_irqsave(&lp->lock, flags); -+ -+ len = (*lp->write)(lp->fd, &skb, lp); -+ -+ if(len == skb->len) { -+ lp->stats.tx_packets++; -+ lp->stats.tx_bytes += skb->len; -+ dev->trans_start = jiffies; -+ netif_start_queue(dev); -+ -+ /* this is normally done in the interrupt when tx finishes */ -+ netif_wake_queue(dev); -+ } -+ else if(len == 0){ -+ netif_start_queue(dev); -+ lp->stats.tx_dropped++; -+ } -+ else { -+ netif_start_queue(dev); -+ printk(KERN_ERR "uml_net_start_xmit: failed(%d)\n", len); -+ } -+ -+ spin_unlock_irqrestore(&lp->lock, flags); -+ -+ dev_kfree_skb(skb); -+ -+ return 0; -+} -+ -+static struct net_device_stats *uml_net_get_stats(struct net_device *dev) -+{ -+ struct uml_net_private *lp = dev->priv; -+ return &lp->stats; -+} -+ -+static void uml_net_set_multicast_list(struct net_device *dev) -+{ -+ if (dev->flags & IFF_PROMISC) return; -+ else if (dev->mc_count) dev->flags |= IFF_ALLMULTI; -+ else dev->flags &= ~IFF_ALLMULTI; -+} -+ -+static void uml_net_tx_timeout(struct net_device *dev) -+{ -+ dev->trans_start = jiffies; -+ netif_wake_queue(dev); -+} -+ -+static int uml_net_set_mac(struct net_device *dev, void *addr) -+{ -+ struct uml_net_private *lp = dev->priv; -+ struct sockaddr *hwaddr = addr; -+ -+ spin_lock(&lp->lock); -+ memcpy(dev->dev_addr, hwaddr->sa_data, ETH_ALEN); -+ spin_unlock(&lp->lock); -+ -+ return(0); -+} -+ -+static int uml_net_change_mtu(struct net_device *dev, int new_mtu) -+{ -+ struct uml_net_private *lp = dev->priv; -+ int err = 0; -+ -+ spin_lock(&lp->lock); -+ -+ new_mtu = (*lp->set_mtu)(new_mtu, &lp->user); -+ if(new_mtu < 0){ -+ err = new_mtu; -+ goto out; -+ } -+ -+ dev->mtu = new_mtu; -+ -+ out: -+ spin_unlock(&lp->lock); -+ return err; -+} -+ -+static int uml_net_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -+{ -+ return(-EINVAL); -+} -+ -+void uml_net_user_timer_expire(unsigned long _conn) -+{ -+#ifdef undef -+ struct connection *conn = (struct connection *)_conn; -+ -+ dprintk(KERN_INFO "uml_net_user_timer_expire [%p]\n", conn); -+ do_connect(conn); -+#endif -+} -+ -+/* -+ * default do nothing hard header packet routines for struct net_device init. -+ * real ethernet transports will overwrite with real routines. -+ */ -+static int uml_net_hard_header(struct sk_buff *skb, struct net_device *dev, -+ unsigned short type, void *daddr, void *saddr, unsigned len) -+{ -+ return(0); /* no change */ -+} -+ -+static int uml_net_rebuild_header(struct sk_buff *skb) -+{ -+ return(0); /* ignore */ -+} -+ -+static int uml_net_header_cache(struct neighbour *neigh, struct hh_cache *hh) -+{ -+ return(-1); /* fail */ -+} -+ -+static void uml_net_header_cache_update(struct hh_cache *hh, -+ struct net_device *dev, unsigned char * haddr) -+{ -+ /* ignore */ -+} -+ -+static int uml_net_header_parse(struct sk_buff *skb, unsigned char *haddr) -+{ -+ return(0); /* nothing */ -+} -+ -+static spinlock_t devices_lock = SPIN_LOCK_UNLOCKED; -+static struct list_head devices = LIST_HEAD_INIT(devices); -+ -+static int eth_configure(int n, void *init, char *mac, -+ struct transport *transport) -+{ -+ struct uml_net *device; -+ struct net_device *dev; -+ struct uml_net_private *lp; -+ int save, err, size; -+ -+ size = transport->private_size + sizeof(struct uml_net_private) + -+ sizeof(((struct uml_net_private *) 0)->user); -+ -+ device = kmalloc(sizeof(*device), GFP_KERNEL); -+ if(device == NULL){ -+ printk(KERN_ERR "eth_configure failed to allocate uml_net\n"); -+ return(1); -+ } -+ -+ *device = ((struct uml_net) { .list = LIST_HEAD_INIT(device->list), -+ .dev = NULL, -+ .index = n, -+ .mac = { [ 0 ... 5 ] = 0 }, -+ .have_mac = 0 }); -+ -+ spin_lock(&devices_lock); -+ list_add(&device->list, &devices); -+ spin_unlock(&devices_lock); -+ -+ if(setup_etheraddr(mac, device->mac)) -+ device->have_mac = 1; -+ -+ printk(KERN_INFO "Netdevice %d ", n); -+ if(device->have_mac) printk("(%02x:%02x:%02x:%02x:%02x:%02x) ", -+ device->mac[0], device->mac[1], -+ device->mac[2], device->mac[3], -+ device->mac[4], device->mac[5]); -+ printk(": "); -+ dev = kmalloc(sizeof(*dev) + size, GFP_KERNEL); -+ if(dev == NULL){ -+ printk(KERN_ERR "eth_configure: failed to allocate device\n"); -+ return(1); -+ } -+ memset(dev, 0, sizeof(*dev) + size); -+ -+ snprintf(dev->name, sizeof(dev->name), "eth%d", n); -+ dev->priv = (void *) &dev[1]; -+ device->dev = dev; -+ -+ dev->hard_header = uml_net_hard_header; -+ dev->rebuild_header = uml_net_rebuild_header; -+ dev->hard_header_cache = uml_net_header_cache; -+ dev->header_cache_update= uml_net_header_cache_update; -+ dev->hard_header_parse = uml_net_header_parse; -+ -+ (*transport->kern->init)(dev, init); -+ -+ dev->mtu = transport->user->max_packet; -+ dev->open = uml_net_open; -+ dev->hard_start_xmit = uml_net_start_xmit; -+ dev->stop = uml_net_close; -+ dev->get_stats = uml_net_get_stats; -+ dev->set_multicast_list = uml_net_set_multicast_list; -+ dev->tx_timeout = uml_net_tx_timeout; -+ dev->set_mac_address = uml_net_set_mac; -+ dev->change_mtu = uml_net_change_mtu; -+ dev->do_ioctl = uml_net_ioctl; -+ dev->watchdog_timeo = (HZ >> 1); -+ dev->irq = UM_ETH_IRQ; -+ -+ rtnl_lock(); -+ err = register_netdevice(dev); -+ rtnl_unlock(); -+ if(err) -+ return(1); -+ lp = dev->priv; -+ -+ /* lp.user is the first four bytes of the transport data, which -+ * has already been initialized. This structure assignment will -+ * overwrite that, so we make sure that .user gets overwritten with -+ * what it already has. -+ */ -+ save = lp->user[0]; -+ *lp = ((struct uml_net_private) -+ { .list = LIST_HEAD_INIT(lp->list), -+ .lock = SPIN_LOCK_UNLOCKED, -+ .dev = dev, -+ .fd = -1, -+ .mac = { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0}, -+ .have_mac = device->have_mac, -+ .protocol = transport->kern->protocol, -+ .open = transport->user->open, -+ .close = transport->user->close, -+ .remove = transport->user->remove, -+ .read = transport->kern->read, -+ .write = transport->kern->write, -+ .add_address = transport->user->add_address, -+ .delete_address = transport->user->delete_address, -+ .set_mtu = transport->user->set_mtu, -+ .user = { save } }); -+ init_timer(&lp->tl); -+ lp->tl.function = uml_net_user_timer_expire; -+ memset(&lp->stats, 0, sizeof(lp->stats)); -+ if(lp->have_mac) memcpy(lp->mac, device->mac, sizeof(lp->mac)); -+ -+ if(transport->user->init) -+ (*transport->user->init)(&lp->user, dev); -+ -+ if(device->have_mac) -+ set_ether_mac(dev, device->mac); -+ return(0); -+} -+ -+static struct uml_net *find_device(int n) -+{ -+ struct uml_net *device; -+ struct list_head *ele; -+ -+ spin_lock(&devices_lock); -+ list_for_each(ele, &devices){ -+ device = list_entry(ele, struct uml_net, list); -+ if(device->index == n) -+ goto out; -+ } -+ device = NULL; -+ out: -+ spin_unlock(&devices_lock); -+ return(device); -+} -+ -+static int eth_parse(char *str, int *index_out, char **str_out) -+{ -+ char *end; -+ int n; -+ -+ n = simple_strtoul(str, &end, 0); -+ if(end == str){ -+ printk(KERN_ERR "eth_setup: Failed to parse '%s'\n", str); -+ return(1); -+ } -+ if(n < 0){ -+ printk(KERN_ERR "eth_setup: device %d is negative\n", n); -+ return(1); -+ } -+ str = end; -+ if(*str != '='){ -+ printk(KERN_ERR -+ "eth_setup: expected '=' after device number\n"); -+ return(1); -+ } -+ str++; -+ if(find_device(n)){ -+ printk(KERN_ERR "eth_setup: Device %d already configured\n", -+ n); -+ return(1); -+ } -+ if(index_out) *index_out = n; -+ *str_out = str; -+ return(0); -+} -+ -+struct eth_init { -+ struct list_head list; -+ char *init; -+ int index; -+}; -+ -+/* Filled in at boot time. Will need locking if the transports become -+ * modular. -+ */ -+struct list_head transports = LIST_HEAD_INIT(transports); -+ -+/* Filled in during early boot */ -+struct list_head eth_cmd_line = LIST_HEAD_INIT(eth_cmd_line); -+ -+static int check_transport(struct transport *transport, char *eth, int n, -+ void **init_out, char **mac_out) -+{ -+ int len; -+ -+ len = strlen(transport->name); -+ if(strncmp(eth, transport->name, len)) -+ return(0); -+ -+ eth += len; -+ if(*eth == ',') -+ eth++; -+ else if(*eth != '\0') -+ return(0); -+ -+ *init_out = kmalloc(transport->setup_size, GFP_KERNEL); -+ if(*init_out == NULL) -+ return(1); -+ -+ if(!transport->setup(eth, mac_out, *init_out)){ -+ kfree(*init_out); -+ *init_out = NULL; -+ } -+ return(1); -+} -+ -+void register_transport(struct transport *new) -+{ -+ struct list_head *ele, *next; -+ struct eth_init *eth; -+ void *init; -+ char *mac = NULL; -+ int match; -+ -+ list_add(&new->list, &transports); -+ -+ list_for_each_safe(ele, next, ð_cmd_line){ -+ eth = list_entry(ele, struct eth_init, list); -+ match = check_transport(new, eth->init, eth->index, &init, -+ &mac); -+ if(!match) -+ continue; -+ else if(init != NULL){ -+ eth_configure(eth->index, init, mac, new); -+ kfree(init); -+ } -+ list_del(ð->list); -+ } -+} -+ -+static int eth_setup_common(char *str, int index) -+{ -+ struct list_head *ele; -+ struct transport *transport; -+ void *init; -+ char *mac = NULL; -+ -+ list_for_each(ele, &transports){ -+ transport = list_entry(ele, struct transport, list); -+ if(!check_transport(transport, str, index, &init, &mac)) -+ continue; -+ if(init != NULL){ -+ eth_configure(index, init, mac, transport); -+ kfree(init); -+ } -+ return(1); -+ } -+ return(0); -+} -+ -+static int eth_setup(char *str) -+{ -+ struct eth_init *new; -+ int n, err; -+ -+ err = eth_parse(str, &n, &str); -+ if(err) return(1); -+ -+ new = alloc_bootmem(sizeof(new)); -+ if(new == NULL){ -+ printk("eth_init : alloc_bootmem failed\n"); -+ return(1); -+ } -+ *new = ((struct eth_init) { .list = LIST_HEAD_INIT(new->list), -+ .index = n, -+ .init = str }); -+ list_add_tail(&new->list, ð_cmd_line); -+ return(1); -+} -+ -+__setup("eth", eth_setup); -+__uml_help(eth_setup, -+"eth[0-9]+=,\n" -+" Configure a network device.\n\n" -+); -+ -+static int eth_init(void) -+{ -+ struct list_head *ele, *next; -+ struct eth_init *eth; -+ -+ list_for_each_safe(ele, next, ð_cmd_line){ -+ eth = list_entry(ele, struct eth_init, list); -+ -+ if(eth_setup_common(eth->init, eth->index)) -+ list_del(ð->list); -+ } -+ -+ return(1); -+} -+ -+__initcall(eth_init); -+ -+static int net_config(char *str) -+{ -+ int n, err; -+ -+ err = eth_parse(str, &n, &str); -+ if(err) return(err); -+ -+ str = uml_strdup(str); -+ if(str == NULL){ -+ printk(KERN_ERR "net_config failed to strdup string\n"); -+ return(-1); -+ } -+ err = !eth_setup_common(str, n); -+ if(err) -+ kfree(str); -+ return(err); -+} -+ -+static int net_remove(char *str) -+{ -+ struct uml_net *device; -+ struct net_device *dev; -+ struct uml_net_private *lp; -+ char *end; -+ int n; -+ -+ n = simple_strtoul(str, &end, 0); -+ if((*end != '\0') || (end == str)) -+ return(-1); -+ -+ device = find_device(n); -+ if(device == NULL) -+ return(0); -+ -+ dev = device->dev; -+ lp = dev->priv; -+ if(lp->fd > 0) return(-1); -+ if(lp->remove != NULL) (*lp->remove)(&lp->user); -+ unregister_netdev(dev); -+ -+ list_del(&device->list); -+ kfree(device); -+ return(0); -+} -+ -+static struct mc_device net_mc = { -+ .name = "eth", -+ .config = net_config, -+ .get_config = NULL, -+ .remove = net_remove, -+}; -+ -+static int uml_inetaddr_event(struct notifier_block *this, unsigned long event, -+ void *ptr) -+{ -+ struct in_ifaddr *ifa = ptr; -+ u32 addr = ifa->ifa_address; -+ u32 netmask = ifa->ifa_mask; -+ struct net_device *dev = ifa->ifa_dev->dev; -+ struct uml_net_private *lp; -+ void (*proc)(unsigned char *, unsigned char *, void *); -+ unsigned char addr_buf[4], netmask_buf[4]; -+ -+ if(dev->open != uml_net_open) return(NOTIFY_DONE); -+ -+ lp = dev->priv; -+ -+ proc = NULL; -+ switch (event){ -+ case NETDEV_UP: -+ proc = lp->add_address; -+ break; -+ case NETDEV_DOWN: -+ proc = lp->delete_address; -+ break; -+ } -+ if(proc != NULL){ -+ addr_buf[0] = addr & 0xff; -+ addr_buf[1] = (addr >> 8) & 0xff; -+ addr_buf[2] = (addr >> 16) & 0xff; -+ addr_buf[3] = addr >> 24; -+ netmask_buf[0] = netmask & 0xff; -+ netmask_buf[1] = (netmask >> 8) & 0xff; -+ netmask_buf[2] = (netmask >> 16) & 0xff; -+ netmask_buf[3] = netmask >> 24; -+ (*proc)(addr_buf, netmask_buf, &lp->user); -+ } -+ return(NOTIFY_DONE); -+} -+ -+struct notifier_block uml_inetaddr_notifier = { -+ .notifier_call = uml_inetaddr_event, -+}; -+ -+static int uml_net_init(void) -+{ -+ struct list_head *ele; -+ struct uml_net_private *lp; -+ struct in_device *ip; -+ struct in_ifaddr *in; -+ -+ mconsole_register_dev(&net_mc); -+ register_inetaddr_notifier(¨_inetaddr_notifier); -+ -+ /* Devices may have been opened already, so the uml_inetaddr_notifier -+ * didn't get a chance to run for them. This fakes it so that -+ * addresses which have already been set up get handled properly. -+ */ -+ list_for_each(ele, &opened){ -+ lp = list_entry(ele, struct uml_net_private, list); -+ ip = lp->dev->ip_ptr; -+ if(ip == NULL) continue; -+ in = ip->ifa_list; -+ while(in != NULL){ -+ uml_inetaddr_event(NULL, NETDEV_UP, in); -+ in = in->ifa_next; -+ } -+ } -+ -+ return(0); -+} -+ -+__initcall(uml_net_init); -+ -+static void close_devices(void) -+{ -+ struct list_head *ele; -+ struct uml_net_private *lp; -+ -+ list_for_each(ele, &opened){ -+ lp = list_entry(ele, struct uml_net_private, list); -+ if(lp->close != NULL) (*lp->close)(lp->fd, &lp->user); -+ if(lp->remove != NULL) (*lp->remove)(&lp->user); -+ } -+} -+ -+__uml_exitcall(close_devices); -+ -+int setup_etheraddr(char *str, unsigned char *addr) -+{ -+ char *end; -+ int i; -+ -+ if(str == NULL) -+ return(0); -+ for(i=0;i<6;i++){ -+ addr[i] = simple_strtoul(str, &end, 16); -+ if((end == str) || -+ ((*end != ':') && (*end != ',') && (*end != '\0'))){ -+ printk(KERN_ERR -+ "setup_etheraddr: failed to parse '%s' " -+ "as an ethernet address\n", str); -+ return(0); -+ } -+ str = end + 1; -+ } -+ if(addr[0] & 1){ -+ printk(KERN_ERR -+ "Attempt to assign a broadcast ethernet address to a " -+ "device disallowed\n"); -+ return(0); -+ } -+ return(1); -+} -+ -+void dev_ip_addr(void *d, char *buf, char *bin_buf) -+{ -+ struct net_device *dev = d; -+ struct in_device *ip = dev->ip_ptr; -+ struct in_ifaddr *in; -+ u32 addr; -+ -+ if((ip == NULL) || ((in = ip->ifa_list) == NULL)){ -+ printk(KERN_WARNING "dev_ip_addr - device not assigned an " -+ "IP address\n"); -+ return; -+ } -+ addr = in->ifa_address; -+ sprintf(buf, "%d.%d.%d.%d", addr & 0xff, (addr >> 8) & 0xff, -+ (addr >> 16) & 0xff, addr >> 24); -+ if(bin_buf){ -+ bin_buf[0] = addr & 0xff; -+ bin_buf[1] = (addr >> 8) & 0xff; -+ bin_buf[2] = (addr >> 16) & 0xff; -+ bin_buf[3] = addr >> 24; -+ } -+} -+ -+void set_ether_mac(void *d, unsigned char *addr) -+{ -+ struct net_device *dev = d; -+ -+ memcpy(dev->dev_addr, addr, ETH_ALEN); -+} -+ -+struct sk_buff *ether_adjust_skb(struct sk_buff *skb, int extra) -+{ -+ if((skb != NULL) && (skb_tailroom(skb) < extra)){ -+ struct sk_buff *skb2; -+ -+ skb2 = skb_copy_expand(skb, 0, extra, GFP_ATOMIC); -+ dev_kfree_skb(skb); -+ skb = skb2; -+ } -+ if(skb != NULL) skb_put(skb, extra); -+ return(skb); -+} -+ -+void iter_addresses(void *d, void (*cb)(unsigned char *, unsigned char *, -+ void *), -+ void *arg) -+{ -+ struct net_device *dev = d; -+ struct in_device *ip = dev->ip_ptr; -+ struct in_ifaddr *in; -+ unsigned char address[4], netmask[4]; -+ -+ if(ip == NULL) return; -+ in = ip->ifa_list; -+ while(in != NULL){ -+ address[0] = in->ifa_address & 0xff; -+ address[1] = (in->ifa_address >> 8) & 0xff; -+ address[2] = (in->ifa_address >> 16) & 0xff; -+ address[3] = in->ifa_address >> 24; -+ netmask[0] = in->ifa_mask & 0xff; -+ netmask[1] = (in->ifa_mask >> 8) & 0xff; -+ netmask[2] = (in->ifa_mask >> 16) & 0xff; -+ netmask[3] = in->ifa_mask >> 24; -+ (*cb)(address, netmask, arg); -+ in = in->ifa_next; -+ } -+} -+ -+int dev_netmask(void *d, void *m) -+{ -+ struct net_device *dev = d; -+ struct in_device *ip = dev->ip_ptr; -+ struct in_ifaddr *in; -+ __u32 *mask_out = m; -+ -+ if(ip == NULL) -+ return(1); -+ -+ in = ip->ifa_list; -+ if(in == NULL) -+ return(1); -+ -+ *mask_out = in->ifa_mask; -+ return(0); -+} -+ -+void *get_output_buffer(int *len_out) -+{ -+ void *ret; -+ -+ ret = (void *) __get_free_pages(GFP_KERNEL, 0); -+ if(ret) *len_out = PAGE_SIZE; -+ else *len_out = 0; -+ return(ret); -+} -+ -+void free_output_buffer(void *buffer) -+{ -+ free_pages((unsigned long) buffer, 0); -+} -+ -+int tap_setup_common(char *str, char *type, char **dev_name, char **mac_out, -+ char **gate_addr) -+{ -+ char *remain; -+ -+ remain = split_if_spec(str, dev_name, mac_out, gate_addr, NULL); -+ if(remain != NULL){ -+ printk("tap_setup_common - Extra garbage on specification : " -+ "'%s'\n", remain); -+ return(1); -+ } -+ -+ return(0); -+} -+ -+unsigned short eth_protocol(struct sk_buff *skb) -+{ -+ return(eth_type_trans(skb, skb->dev)); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/net_user.c um/arch/um/drivers/net_user.c ---- orig/arch/um/drivers/net_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/net_user.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,254 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "user.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "net_user.h" -+#include "helper.h" -+#include "os.h" -+ -+int tap_open_common(void *dev, char *gate_addr) -+{ -+ int tap_addr[4]; -+ -+ if(gate_addr == NULL) return(0); -+ if(sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0], -+ &tap_addr[1], &tap_addr[2], &tap_addr[3]) != 4){ -+ printk("Invalid tap IP address - '%s'\n", -+ gate_addr); -+ return(-EINVAL); -+ } -+ return(0); -+} -+ -+void tap_check_ips(char *gate_addr, char *eth_addr) -+{ -+ int tap_addr[4]; -+ -+ if((gate_addr != NULL) && -+ (sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0], -+ &tap_addr[1], &tap_addr[2], &tap_addr[3]) == 4) && -+ (eth_addr[0] == tap_addr[0]) && -+ (eth_addr[1] == tap_addr[1]) && -+ (eth_addr[2] == tap_addr[2]) && -+ (eth_addr[3] == tap_addr[3])){ -+ printk("The tap IP address and the UML eth IP address" -+ " must be different\n"); -+ } -+} -+ -+void read_output(int fd, char *output, int len) -+{ -+ int remain, n, actual; -+ char c; -+ -+ if(output == NULL){ -+ output = &c; -+ len = sizeof(c); -+ } -+ -+ *output = '\0'; -+ if(read(fd, &remain, sizeof(remain)) != sizeof(remain)){ -+ printk("read_output - read of length failed, errno = %d\n", -+ errno); -+ return; -+ } -+ -+ while(remain != 0){ -+ n = (remain < len) ? remain : len; -+ actual = read(fd, output, n); -+ if(actual != n){ -+ printk("read_output - read of data failed, " -+ "errno = %d\n", errno); -+ return; -+ } -+ remain -= actual; -+ } -+ return; -+} -+ -+int net_read(int fd, void *buf, int len) -+{ -+ int n; -+ -+ while(((n = read(fd, buf, len)) < 0) && (errno == EINTR)) ; -+ -+ if(n < 0){ -+ if(errno == EAGAIN) return(0); -+ return(-errno); -+ } -+ else if(n == 0) return(-ENOTCONN); -+ return(n); -+} -+ -+int net_recvfrom(int fd, void *buf, int len) -+{ -+ int n; -+ -+ while(((n = recvfrom(fd, buf, len, 0, NULL, NULL)) < 0) && -+ (errno == EINTR)) ; -+ -+ if(n < 0){ -+ if(errno == EAGAIN) return(0); -+ return(-errno); -+ } -+ else if(n == 0) return(-ENOTCONN); -+ return(n); -+} -+ -+int net_write(int fd, void *buf, int len) -+{ -+ int n; -+ -+ while(((n = write(fd, buf, len)) < 0) && (errno == EINTR)) ; -+ if(n < 0){ -+ if(errno == EAGAIN) return(0); -+ return(-errno); -+ } -+ else if(n == 0) return(-ENOTCONN); -+ return(n); -+} -+ -+int net_send(int fd, void *buf, int len) -+{ -+ int n; -+ -+ while(((n = send(fd, buf, len, 0)) < 0) && (errno == EINTR)) ; -+ if(n < 0){ -+ if(errno == EAGAIN) return(0); -+ return(-errno); -+ } -+ else if(n == 0) return(-ENOTCONN); -+ return(n); -+} -+ -+int net_sendto(int fd, void *buf, int len, void *to, int sock_len) -+{ -+ int n; -+ -+ while(((n = sendto(fd, buf, len, 0, (struct sockaddr *) to, -+ sock_len)) < 0) && (errno == EINTR)) ; -+ if(n < 0){ -+ if(errno == EAGAIN) return(0); -+ return(-errno); -+ } -+ else if(n == 0) return(-ENOTCONN); -+ return(n); -+} -+ -+struct change_pre_exec_data { -+ int close_me; -+ int stdout; -+}; -+ -+static void change_pre_exec(void *arg) -+{ -+ struct change_pre_exec_data *data = arg; -+ -+ close(data->close_me); -+ dup2(data->stdout, 1); -+} -+ -+static int change_tramp(char **argv, char *output, int output_len) -+{ -+ int pid, fds[2], err; -+ struct change_pre_exec_data pe_data; -+ -+ err = os_pipe(fds, 1, 0); -+ if(err){ -+ printk("change_tramp - pipe failed, errno = %d\n", -err); -+ return(err); -+ } -+ pe_data.close_me = fds[0]; -+ pe_data.stdout = fds[1]; -+ pid = run_helper(change_pre_exec, &pe_data, argv, NULL); -+ -+ close(fds[1]); -+ read_output(fds[0], output, output_len); -+ waitpid(pid, NULL, 0); -+ return(pid); -+} -+ -+static void change(char *dev, char *what, unsigned char *addr, -+ unsigned char *netmask) -+{ -+ char addr_buf[sizeof("255.255.255.255\0")]; -+ char netmask_buf[sizeof("255.255.255.255\0")]; -+ char version[sizeof("nnnnn\0")]; -+ char *argv[] = { "uml_net", version, what, dev, addr_buf, -+ netmask_buf, NULL }; -+ char *output; -+ int output_len, pid; -+ -+ sprintf(version, "%d", UML_NET_VERSION); -+ sprintf(addr_buf, "%d.%d.%d.%d", addr[0], addr[1], addr[2], addr[3]); -+ sprintf(netmask_buf, "%d.%d.%d.%d", netmask[0], netmask[1], -+ netmask[2], netmask[3]); -+ -+ output_len = page_size(); -+ output = um_kmalloc(output_len); -+ if(output == NULL) -+ printk("change : failed to allocate output buffer\n"); -+ -+ pid = change_tramp(argv, output, output_len); -+ if(pid < 0) return; -+ -+ if(output != NULL){ -+ printk("%s", output); -+ kfree(output); -+ } -+} -+ -+void open_addr(unsigned char *addr, unsigned char *netmask, void *arg) -+{ -+ change(arg, "add", addr, netmask); -+} -+ -+void close_addr(unsigned char *addr, unsigned char *netmask, void *arg) -+{ -+ change(arg, "del", addr, netmask); -+} -+ -+char *split_if_spec(char *str, ...) -+{ -+ char **arg, *end; -+ va_list ap; -+ -+ va_start(ap, str); -+ while((arg = va_arg(ap, char **)) != NULL){ -+ if(*str == '\0') -+ return(NULL); -+ end = strchr(str, ','); -+ if(end != str) -+ *arg = str; -+ if(end == NULL) -+ return(NULL); -+ *end++ = '\0'; -+ str = end; -+ } -+ va_end(ap); -+ return(str); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/null.c um/arch/um/drivers/null.c ---- orig/arch/um/drivers/null.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/null.c Sun Dec 15 21:04:00 2002 -@@ -0,0 +1,56 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include "chan_user.h" -+#include "os.h" -+ -+static int null_chan; -+ -+void *null_init(char *str, int device, struct chan_opts *opts) -+{ -+ return(&null_chan); -+} -+ -+int null_open(int input, int output, int primary, void *d, char **dev_out) -+{ -+ *dev_out = NULL; -+ return(os_open_file(DEV_NULL, of_rdwr(OPENFLAGS()), 0)); -+} -+ -+int null_read(int fd, char *c_out, void *unused) -+{ -+ return(-ENODEV); -+} -+ -+void null_free(void *data) -+{ -+} -+ -+struct chan_ops null_ops = { -+ .type = "null", -+ .init = null_init, -+ .open = null_open, -+ .close = generic_close, -+ .read = null_read, -+ .write = generic_write, -+ .console_write = generic_console_write, -+ .window_size = generic_window_size, -+ .free = null_free, -+ .winch = 0, -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/pcap_kern.c um/arch/um/drivers/pcap_kern.c ---- orig/arch/um/drivers/pcap_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/pcap_kern.c Sun Dec 15 21:19:15 2002 -@@ -0,0 +1,127 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike -+ * Licensed under the GPL. -+ */ -+ -+#include "linux/init.h" -+#include "linux/netdevice.h" -+#include "linux/etherdevice.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "pcap_user.h" -+ -+struct pcap_init { -+ char *host_if; -+ int promisc; -+ int optimize; -+ char *filter; -+}; -+ -+void pcap_init(struct net_device *dev, void *data) -+{ -+ struct uml_net_private *pri; -+ struct pcap_data *ppri; -+ struct pcap_init *init = data; -+ -+ init_etherdev(dev, 0); -+ pri = dev->priv; -+ ppri = (struct pcap_data *) pri->user; -+ *ppri = ((struct pcap_data) -+ { .host_if = init->host_if, -+ .promisc = init->promisc, -+ .optimize = init->optimize, -+ .filter = init->filter, -+ .compiled = NULL, -+ .pcap = NULL }); -+} -+ -+static int pcap_read(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER); -+ if(*skb == NULL) return(-ENOMEM); -+ return(pcap_user_read(fd, (*skb)->mac.raw, -+ (*skb)->dev->mtu + ETH_HEADER_OTHER, -+ (struct pcap_data *) &lp->user)); -+} -+ -+static int pcap_write(int fd, struct sk_buff **skb, struct uml_net_private *lp) -+{ -+ return(-EPERM); -+} -+ -+static struct net_kern_info pcap_kern_info = { -+ .init = pcap_init, -+ .protocol = eth_protocol, -+ .read = pcap_read, -+ .write = pcap_write, -+}; -+ -+int pcap_setup(char *str, char **mac_out, void *data) -+{ -+ struct pcap_init *init = data; -+ char *remain, *host_if = NULL, *options[2] = { NULL, NULL }; -+ int i; -+ -+ *init = ((struct pcap_init) -+ { .host_if = "eth0", -+ .promisc = 1, -+ .optimize = 0, -+ .filter = NULL }); -+ -+ remain = split_if_spec(str, &host_if, &init->filter, -+ &options[0], &options[1], NULL); -+ if(remain != NULL){ -+ printk(KERN_ERR "pcap_setup - Extra garbage on " -+ "specification : '%s'\n", remain); -+ return(0); -+ } -+ -+ if(host_if != NULL) -+ init->host_if = host_if; -+ -+ for(i = 0; i < sizeof(options)/sizeof(options[0]); i++){ -+ if(options[i] == NULL) -+ continue; -+ if(!strcmp(options[i], "promisc")) -+ init->promisc = 1; -+ else if(!strcmp(options[i], "nopromisc")) -+ init->promisc = 0; -+ else if(!strcmp(options[i], "optimize")) -+ init->optimize = 1; -+ else if(!strcmp(options[i], "nooptimize")) -+ init->optimize = 0; -+ else printk("pcap_setup : bad option - '%s'\n", options[i]); -+ } -+ -+ return(1); -+} -+ -+static struct transport pcap_transport = { -+ .list = LIST_HEAD_INIT(pcap_transport.list), -+ .name = "pcap", -+ .setup = pcap_setup, -+ .user = &pcap_user_info, -+ .kern = &pcap_kern_info, -+ .private_size = sizeof(struct pcap_data), -+ .setup_size = sizeof(struct pcap_init), -+}; -+ -+static int register_pcap(void) -+{ -+ register_transport(&pcap_transport); -+ return(1); -+} -+ -+__initcall(register_pcap); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/pcap_user.c um/arch/um/drivers/pcap_user.c ---- orig/arch/um/drivers/pcap_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/pcap_user.c Sun Dec 15 21:04:39 2002 -@@ -0,0 +1,143 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike -+ * Licensed under the GPL. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include "net_user.h" -+#include "pcap_user.h" -+#include "user.h" -+ -+#define MAX_PACKET (ETH_MAX_PACKET + ETH_HEADER_OTHER) -+ -+#define PCAP_FD(p) (*(int *)(p)) -+ -+static void pcap_user_init(void *data, void *dev) -+{ -+ struct pcap_data *pri = data; -+ pcap_t *p; -+ char errors[PCAP_ERRBUF_SIZE]; -+ -+ p = pcap_open_live(pri->host_if, MAX_PACKET, pri->promisc, 0, errors); -+ if(p == NULL){ -+ printk("pcap_user_init : pcap_open_live failed - '%s'\n", -+ errors); -+ return; -+ } -+ -+ pri->dev = dev; -+ pri->pcap = p; -+} -+ -+static int pcap_open(void *data) -+{ -+ struct pcap_data *pri = data; -+ __u32 netmask; -+ int err; -+ -+ if(pri->pcap == NULL) -+ return(-ENODEV); -+ -+ if(pri->filter != NULL){ -+ err = dev_netmask(pri->dev, &netmask); -+ if(err < 0){ -+ printk("pcap_open : dev_netmask failed\n"); -+ return(-EIO); -+ } -+ -+ pri->compiled = um_kmalloc(sizeof(struct bpf_program)); -+ if(pri->compiled == NULL){ -+ printk("pcap_open : kmalloc failed\n"); -+ return(-ENOMEM); -+ } -+ -+ err = pcap_compile(pri->pcap, -+ (struct bpf_program *) pri->compiled, -+ pri->filter, pri->optimize, netmask); -+ if(err < 0){ -+ printk("pcap_open : pcap_compile failed - '%s'\n", -+ pcap_geterr(pri->pcap)); -+ return(-EIO); -+ } -+ -+ err = pcap_setfilter(pri->pcap, pri->compiled); -+ if(err < 0){ -+ printk("pcap_open : pcap_setfilter failed - '%s'\n", -+ pcap_geterr(pri->pcap)); -+ return(-EIO); -+ } -+ } -+ -+ return(PCAP_FD(pri->pcap)); -+} -+ -+static void pcap_remove(void *data) -+{ -+ struct pcap_data *pri = data; -+ -+ if(pri->compiled != NULL) -+ pcap_freecode(pri->compiled); -+ -+ pcap_close(pri->pcap); -+} -+ -+struct pcap_handler_data { -+ char *buffer; -+ int len; -+}; -+ -+static void handler(u_char *data, const struct pcap_pkthdr *header, -+ const u_char *packet) -+{ -+ int len; -+ -+ struct pcap_handler_data *hdata = (struct pcap_handler_data *) data; -+ -+ len = hdata->len < header->caplen ? hdata->len : header->caplen; -+ memcpy(hdata->buffer, packet, len); -+ hdata->len = len; -+} -+ -+int pcap_user_read(int fd, void *buffer, int len, struct pcap_data *pri) -+{ -+ struct pcap_handler_data hdata = ((struct pcap_handler_data) -+ { .buffer = buffer, -+ .len = len }); -+ int n; -+ -+ n = pcap_dispatch(pri->pcap, 1, handler, (u_char *) &hdata); -+ if(n < 0){ -+ printk("pcap_dispatch failed - %s\n", pcap_geterr(pri->pcap)); -+ return(-EIO); -+ } -+ else if(n == 0) -+ return(0); -+ return(hdata.len); -+} -+ -+struct net_user_info pcap_user_info = { -+ .init = pcap_user_init, -+ .open = pcap_open, -+ .close = NULL, -+ .remove = pcap_remove, -+ .set_mtu = NULL, -+ .add_address = NULL, -+ .delete_address = NULL, -+ .max_packet = MAX_PACKET - ETH_HEADER_OTHER -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/pcap_user.h um/arch/um/drivers/pcap_user.h ---- orig/arch/um/drivers/pcap_user.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/pcap_user.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,31 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "net_user.h" -+ -+struct pcap_data { -+ char *host_if; -+ int promisc; -+ int optimize; -+ char *filter; -+ void *compiled; -+ void *pcap; -+ void *dev; -+}; -+ -+extern struct net_user_info pcap_user_info; -+ -+extern int pcap_user_read(int fd, void *buf, int len, struct pcap_data *pri); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/port.h um/arch/um/drivers/port.h ---- orig/arch/um/drivers/port.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/port.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __PORT_H__ -+#define __PORT_H__ -+ -+extern void *port_data(int port); -+extern int port_wait(void *data); -+extern void port_kern_close(void *d); -+extern int port_connection(int fd, int *socket_out, int *pid_out); -+extern int port_listen_fd(int port); -+extern void port_read(int fd, void *data); -+extern void port_kern_free(void *d); -+extern int port_rcv_fd(int fd); -+extern void port_remove_dev(void *d); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/port_kern.c um/arch/um/drivers/port_kern.c ---- orig/arch/um/drivers/port_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/port_kern.c Mon Dec 30 20:57:42 2002 -@@ -0,0 +1,302 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/list.h" -+#include "linux/sched.h" -+#include "linux/slab.h" -+#include "linux/irq.h" -+#include "linux/spinlock.h" -+#include "linux/errno.h" -+#include "asm/semaphore.h" -+#include "asm/errno.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "irq_user.h" -+#include "port.h" -+#include "init.h" -+#include "os.h" -+ -+struct port_list { -+ struct list_head list; -+ int has_connection; -+ struct semaphore sem; -+ int port; -+ int fd; -+ spinlock_t lock; -+ struct list_head pending; -+ struct list_head connections; -+}; -+ -+struct port_dev { -+ struct port_list *port; -+ int helper_pid; -+ int telnetd_pid; -+}; -+ -+struct connection { -+ struct list_head list; -+ int fd; -+ int helper_pid; -+ int socket[2]; -+ int telnetd_pid; -+ struct port_list *port; -+}; -+ -+static void pipe_interrupt(int irq, void *data, struct pt_regs *regs) -+{ -+ struct connection *conn = data; -+ int fd; -+ -+ fd = os_rcv_fd(conn->socket[0], &conn->helper_pid); -+ if(fd < 0){ -+ if(fd == -EAGAIN) -+ return; -+ -+ printk(KERN_ERR "pipe_interrupt : os_rcv_fd returned %d\n", -+ -fd); -+ os_close_file(conn->fd); -+ } -+ -+ list_del(&conn->list); -+ -+ conn->fd = fd; -+ list_add(&conn->list, &conn->port->connections); -+ -+ up(&conn->port->sem); -+} -+ -+static int port_accept(struct port_list *port) -+{ -+ struct connection *conn; -+ int fd, socket[2], pid, ret = 0; -+ -+ fd = port_connection(port->fd, socket, &pid); -+ if(fd < 0){ -+ if(fd != -EAGAIN) -+ printk(KERN_ERR "port_accept : port_connection " -+ "returned %d\n", -fd); -+ goto out; -+ } -+ -+ conn = kmalloc(sizeof(*conn), GFP_ATOMIC); -+ if(conn == NULL){ -+ printk(KERN_ERR "port_accept : failed to allocate " -+ "connection\n"); -+ goto out_close; -+ } -+ *conn = ((struct connection) -+ { .list = LIST_HEAD_INIT(conn->list), -+ .fd = fd, -+ .socket = { socket[0], socket[1] }, -+ .telnetd_pid = pid, -+ .port = port }); -+ -+ if(um_request_irq(TELNETD_IRQ, socket[0], IRQ_READ, pipe_interrupt, -+ SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM, -+ "telnetd", conn)){ -+ printk(KERN_ERR "port_accept : failed to get IRQ for " -+ "telnetd\n"); -+ goto out_free; -+ } -+ -+ list_add(&conn->list, &port->pending); -+ return(1); -+ -+ out_free: -+ kfree(conn); -+ out_close: -+ os_close_file(fd); -+ if(pid != -1) -+ os_kill_process(pid, 1); -+ out: -+ return(ret); -+} -+ -+DECLARE_MUTEX(ports_sem); -+struct list_head ports = LIST_HEAD_INIT(ports); -+ -+void port_task_proc(void *unused) -+{ -+ struct port_list *port; -+ struct list_head *ele; -+ unsigned long flags; -+ -+ save_flags(flags); -+ list_for_each(ele, &ports){ -+ port = list_entry(ele, struct port_list, list); -+ if(!port->has_connection) -+ continue; -+ reactivate_fd(port->fd, ACCEPT_IRQ); -+ while(port_accept(port)) ; -+ port->has_connection = 0; -+ } -+ restore_flags(flags); -+} -+ -+struct tq_struct port_task = { -+ .routine = port_task_proc, -+ .data = NULL -+}; -+ -+static void port_interrupt(int irq, void *data, struct pt_regs *regs) -+{ -+ struct port_list *port = data; -+ -+ port->has_connection = 1; -+ schedule_task(&port_task); -+} -+ -+void *port_data(int port_num) -+{ -+ struct list_head *ele; -+ struct port_list *port; -+ struct port_dev *dev = NULL; -+ int fd; -+ -+ down(&ports_sem); -+ list_for_each(ele, &ports){ -+ port = list_entry(ele, struct port_list, list); -+ if(port->port == port_num) goto found; -+ } -+ port = kmalloc(sizeof(struct port_list), GFP_KERNEL); -+ if(port == NULL){ -+ printk(KERN_ERR "Allocation of port list failed\n"); -+ goto out; -+ } -+ -+ fd = port_listen_fd(port_num); -+ if(fd < 0){ -+ printk(KERN_ERR "binding to port %d failed, errno = %d\n", -+ port_num, -fd); -+ goto out_free; -+ } -+ if(um_request_irq(ACCEPT_IRQ, fd, IRQ_READ, port_interrupt, -+ SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM, "port", -+ port)){ -+ printk(KERN_ERR "Failed to get IRQ for port %d\n", port_num); -+ goto out_close; -+ } -+ -+ *port = ((struct port_list) -+ { .list = LIST_HEAD_INIT(port->list), -+ .has_connection = 0, -+ .sem = __SEMAPHORE_INITIALIZER(port->sem, -+ 0), -+ .lock = SPIN_LOCK_UNLOCKED, -+ .port = port_num, -+ .fd = fd, -+ .pending = LIST_HEAD_INIT(port->pending), -+ .connections = LIST_HEAD_INIT(port->connections) }); -+ list_add(&port->list, &ports); -+ -+ found: -+ dev = kmalloc(sizeof(struct port_dev), GFP_KERNEL); -+ if(dev == NULL){ -+ printk(KERN_ERR "Allocation of port device entry failed\n"); -+ goto out; -+ } -+ -+ *dev = ((struct port_dev) { .port = port, -+ .helper_pid = -1, -+ .telnetd_pid = -1 }); -+ goto out; -+ -+ out_free: -+ kfree(port); -+ out_close: -+ os_close_file(fd); -+ out: -+ up(&ports_sem); -+ return(dev); -+} -+ -+int port_wait(void *data) -+{ -+ struct port_dev *dev = data; -+ struct connection *conn; -+ struct port_list *port = dev->port; -+ int fd; -+ -+ while(1){ -+ if(down_interruptible(&port->sem)) -+ return(-ERESTARTSYS); -+ -+ spin_lock(&port->lock); -+ -+ conn = list_entry(port->connections.next, struct connection, -+ list); -+ list_del(&conn->list); -+ spin_unlock(&port->lock); -+ -+ os_shutdown_socket(conn->socket[0], 1, 1); -+ os_close_file(conn->socket[0]); -+ os_shutdown_socket(conn->socket[1], 1, 1); -+ os_close_file(conn->socket[1]); -+ -+ /* This is done here because freeing an IRQ can't be done -+ * within the IRQ handler. So, pipe_interrupt always ups -+ * the semaphore regardless of whether it got a successful -+ * connection. Then we loop here throwing out failed -+ * connections until a good one is found. -+ */ -+ free_irq(TELNETD_IRQ, conn); -+ -+ if(conn->fd >= 0) break; -+ os_close_file(conn->fd); -+ kfree(conn); -+ } -+ -+ fd = conn->fd; -+ dev->helper_pid = conn->helper_pid; -+ dev->telnetd_pid = conn->telnetd_pid; -+ kfree(conn); -+ -+ return(fd); -+} -+ -+void port_remove_dev(void *d) -+{ -+ struct port_dev *dev = d; -+ -+ if(dev->helper_pid != -1) -+ os_kill_process(dev->helper_pid, 0); -+ if(dev->telnetd_pid != -1) -+ os_kill_process(dev->telnetd_pid, 1); -+ dev->helper_pid = -1; -+ dev->telnetd_pid = -1; -+} -+ -+void port_kern_free(void *d) -+{ -+ struct port_dev *dev = d; -+ -+ port_remove_dev(dev); -+ kfree(dev); -+} -+ -+static void free_port(void) -+{ -+ struct list_head *ele; -+ struct port_list *port; -+ -+ list_for_each(ele, &ports){ -+ port = list_entry(ele, struct port_list, list); -+ free_irq_by_fd(port->fd); -+ os_close_file(port->fd); -+ } -+} -+ -+__uml_exitcall(free_port); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/port_user.c um/arch/um/drivers/port_user.c ---- orig/arch/um/drivers/port_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/port_user.c Mon Dec 16 22:46:20 2002 -@@ -0,0 +1,206 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "chan_user.h" -+#include "port.h" -+#include "helper.h" -+#include "os.h" -+ -+struct port_chan { -+ int raw; -+ struct termios tt; -+ void *kernel_data; -+ char dev[sizeof("32768\0")]; -+}; -+ -+void *port_init(char *str, int device, struct chan_opts *opts) -+{ -+ struct port_chan *data; -+ void *kern_data; -+ char *end; -+ int port; -+ -+ if(*str != ':'){ -+ printk("port_init : channel type 'port' must specify a " -+ "port number\n"); -+ return(NULL); -+ } -+ str++; -+ port = strtoul(str, &end, 0); -+ if((*end != '\0') || (end == str)){ -+ printk("port_init : couldn't parse port '%s'\n", str); -+ return(NULL); -+ } -+ -+ if((kern_data = port_data(port)) == NULL) -+ return(NULL); -+ -+ if((data = um_kmalloc(sizeof(*data))) == NULL) -+ goto err; -+ -+ *data = ((struct port_chan) { .raw = opts->raw, -+ .kernel_data = kern_data }); -+ sprintf(data->dev, "%d", port); -+ -+ return(data); -+ err: -+ port_kern_free(kern_data); -+ return(NULL); -+} -+ -+void port_free(void *d) -+{ -+ struct port_chan *data = d; -+ -+ port_kern_free(data->kernel_data); -+ kfree(data); -+} -+ -+int port_open(int input, int output, int primary, void *d, char **dev_out) -+{ -+ struct port_chan *data = d; -+ int fd; -+ -+ fd = port_wait(data->kernel_data); -+ if((fd >= 0) && data->raw){ -+ tcgetattr(fd, &data->tt); -+ raw(fd, 0); -+ } -+ *dev_out = data->dev; -+ return(fd); -+} -+ -+void port_close(int fd, void *d) -+{ -+ struct port_chan *data = d; -+ -+ port_remove_dev(data->kernel_data); -+ close(fd); -+} -+ -+int port_console_write(int fd, const char *buf, int n, void *d) -+{ -+ struct port_chan *data = d; -+ -+ return(generic_console_write(fd, buf, n, &data->tt)); -+} -+ -+struct chan_ops port_ops = { -+ .type = "port", -+ .init = port_init, -+ .open = port_open, -+ .close = port_close, -+ .read = generic_read, -+ .write = generic_write, -+ .console_write = port_console_write, -+ .window_size = generic_window_size, -+ .free = port_free, -+ .winch = 1, -+}; -+ -+int port_listen_fd(int port) -+{ -+ struct sockaddr_in addr; -+ int fd, err; -+ -+ fd = socket(PF_INET, SOCK_STREAM, 0); -+ if(fd == -1) -+ return(-errno); -+ -+ addr.sin_family = AF_INET; -+ addr.sin_port = htons(port); -+ addr.sin_addr.s_addr = htonl(INADDR_ANY); -+ if(bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0){ -+ err = -errno; -+ goto out; -+ } -+ -+ if((listen(fd, 1) < 0) || (os_set_fd_block(fd, 0))){ -+ err = -errno; -+ goto out; -+ } -+ -+ return(fd); -+ out: -+ os_close_file(fd); -+ return(err); -+} -+ -+struct port_pre_exec_data { -+ int sock_fd; -+ int pipe_fd; -+}; -+ -+void port_pre_exec(void *arg) -+{ -+ struct port_pre_exec_data *data = arg; -+ -+ dup2(data->sock_fd, 0); -+ dup2(data->sock_fd, 1); -+ dup2(data->sock_fd, 2); -+ close(data->sock_fd); -+ dup2(data->pipe_fd, 3); -+ os_shutdown_socket(3, 1, 0); -+ close(data->pipe_fd); -+} -+ -+int port_connection(int fd, int *socket, int *pid_out) -+{ -+ int new, err; -+ char *argv[] = { "/usr/sbin/in.telnetd", "-L", -+ "/usr/lib/uml/port-helper", NULL }; -+ struct port_pre_exec_data data; -+ -+ if((new = os_accept_connection(fd)) < 0) -+ return(-errno); -+ -+ err = os_pipe(socket, 0, 0); -+ if(err) -+ goto out_close; -+ -+ data = ((struct port_pre_exec_data) -+ { .sock_fd = new, -+ .pipe_fd = socket[1] }); -+ -+ err = run_helper(port_pre_exec, &data, argv, NULL); -+ if(err < 0) -+ goto out_shutdown; -+ -+ *pid_out = err; -+ return(new); -+ -+ out_shutdown: -+ os_shutdown_socket(socket[0], 1, 1); -+ close(socket[0]); -+ os_shutdown_socket(socket[1], 1, 1); -+ close(socket[1]); -+ out_close: -+ close(new); -+ return(err); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/pty.c um/arch/um/drivers/pty.c ---- orig/arch/um/drivers/pty.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/pty.c Sun Dec 15 21:06:01 2002 -@@ -0,0 +1,148 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include "chan_user.h" -+#include "user.h" -+#include "user_util.h" -+#include "kern_util.h" -+ -+struct pty_chan { -+ void (*announce)(char *dev_name, int dev); -+ int dev; -+ int raw; -+ struct termios tt; -+ char dev_name[sizeof("/dev/pts/0123456\0")]; -+}; -+ -+void *pty_chan_init(char *str, int device, struct chan_opts *opts) -+{ -+ struct pty_chan *data; -+ -+ if((data = um_kmalloc(sizeof(*data))) == NULL) return(NULL); -+ *data = ((struct pty_chan) { .announce = opts->announce, -+ .dev = device, -+ .raw = opts->raw }); -+ return(data); -+} -+ -+int pts_open(int input, int output, int primary, void *d, char **dev_out) -+{ -+ struct pty_chan *data = d; -+ char *dev; -+ int fd; -+ -+ if((fd = get_pty()) < 0){ -+ printk("open_pts : Failed to open pts\n"); -+ return(-errno); -+ } -+ if(data->raw){ -+ tcgetattr(fd, &data->tt); -+ raw(fd, 0); -+ } -+ -+ dev = ptsname(fd); -+ sprintf(data->dev_name, "%s", dev); -+ *dev_out = data->dev_name; -+ if(data->announce) (*data->announce)(dev, data->dev); -+ return(fd); -+} -+ -+int getmaster(char *line) -+{ -+ struct stat stb; -+ char *pty, *bank, *cp; -+ int master; -+ -+ pty = &line[strlen("/dev/ptyp")]; -+ for (bank = "pqrs"; *bank; bank++) { -+ line[strlen("/dev/pty")] = *bank; -+ *pty = '0'; -+ if (stat(line, &stb) < 0) -+ break; -+ for (cp = "0123456789abcdef"; *cp; cp++) { -+ *pty = *cp; -+ master = open(line, O_RDWR); -+ if (master >= 0) { -+ char *tp = &line[strlen("/dev/")]; -+ int ok; -+ -+ /* verify slave side is usable */ -+ *tp = 't'; -+ ok = access(line, R_OK|W_OK) == 0; -+ *tp = 'p'; -+ if (ok) return(master); -+ (void) close(master); -+ } -+ } -+ } -+ return(-1); -+} -+ -+int pty_open(int input, int output, int primary, void *d, char **dev_out) -+{ -+ struct pty_chan *data = d; -+ int fd; -+ char dev[sizeof("/dev/ptyxx\0")] = "/dev/ptyxx"; -+ -+ fd = getmaster(dev); -+ if(fd < 0) return(-errno); -+ -+ if(data->raw) raw(fd, 0); -+ if(data->announce) (*data->announce)(dev, data->dev); -+ -+ sprintf(data->dev_name, "%s", dev); -+ *dev_out = data->dev_name; -+ return(fd); -+} -+ -+int pty_console_write(int fd, const char *buf, int n, void *d) -+{ -+ struct pty_chan *data = d; -+ -+ return(generic_console_write(fd, buf, n, &data->tt)); -+} -+ -+struct chan_ops pty_ops = { -+ .type = "pty", -+ .init = pty_chan_init, -+ .open = pty_open, -+ .close = generic_close, -+ .read = generic_read, -+ .write = generic_write, -+ .console_write = pty_console_write, -+ .window_size = generic_window_size, -+ .free = generic_free, -+ .winch = 0, -+}; -+ -+struct chan_ops pts_ops = { -+ .type = "pts", -+ .init = pty_chan_init, -+ .open = pts_open, -+ .close = generic_close, -+ .read = generic_read, -+ .write = generic_write, -+ .console_write = pty_console_write, -+ .window_size = generic_window_size, -+ .free = generic_free, -+ .winch = 0, -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/slip.h um/arch/um/drivers/slip.h ---- orig/arch/um/drivers/slip.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/slip.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,39 @@ -+#ifndef __UM_SLIP_H -+#define __UM_SLIP_H -+ -+#define BUF_SIZE 1500 -+ /* two bytes each for a (pathological) max packet of escaped chars + * -+ * terminating END char + initial END char */ -+#define ENC_BUF_SIZE (2 * BUF_SIZE + 2) -+ -+struct slip_data { -+ void *dev; -+ char name[sizeof("slnnnnn\0")]; -+ char *addr; -+ char *gate_addr; -+ int slave; -+ char ibuf[ENC_BUF_SIZE]; -+ char obuf[ENC_BUF_SIZE]; -+ int more; /* more data: do not read fd until ibuf has been drained */ -+ int pos; -+ int esc; -+}; -+ -+extern struct net_user_info slip_user_info; -+ -+extern int set_umn_addr(int fd, char *addr, char *ptp_addr); -+extern int slip_user_read(int fd, void *buf, int len, struct slip_data *pri); -+extern int slip_user_write(int fd, void *buf, int len, struct slip_data *pri); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/slip_kern.c um/arch/um/drivers/slip_kern.c ---- orig/arch/um/drivers/slip_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/slip_kern.c Sun Dec 15 21:06:24 2002 -@@ -0,0 +1,109 @@ -+#include "linux/config.h" -+#include "linux/kernel.h" -+#include "linux/stddef.h" -+#include "linux/init.h" -+#include "linux/netdevice.h" -+#include "linux/if_arp.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "kern.h" -+#include "slip.h" -+ -+struct slip_init { -+ char *gate_addr; -+}; -+ -+void slip_init(struct net_device *dev, void *data) -+{ -+ struct uml_net_private *private; -+ struct slip_data *spri; -+ struct slip_init *init = data; -+ -+ private = dev->priv; -+ spri = (struct slip_data *) private->user; -+ *spri = ((struct slip_data) -+ { .name = { '\0' }, -+ .addr = NULL, -+ .gate_addr = init->gate_addr, -+ .slave = -1, -+ .ibuf = { '\0' }, -+ .obuf = { '\0' }, -+ .pos = 0, -+ .esc = 0, -+ .dev = dev }); -+ -+ dev->init = NULL; -+ dev->hard_header_len = 0; -+ dev->addr_len = 4; -+ dev->type = ARPHRD_ETHER; -+ dev->tx_queue_len = 256; -+ dev->flags = IFF_NOARP; -+ printk("SLIP backend - SLIP IP = %s\n", spri->gate_addr); -+} -+ -+static unsigned short slip_protocol(struct sk_buff *skbuff) -+{ -+ return(htons(ETH_P_IP)); -+} -+ -+static int slip_read(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ return(slip_user_read(fd, (*skb)->mac.raw, (*skb)->dev->mtu, -+ (struct slip_data *) &lp->user)); -+} -+ -+static int slip_write(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ return(slip_user_write(fd, (*skb)->data, (*skb)->len, -+ (struct slip_data *) &lp->user)); -+} -+ -+struct net_kern_info slip_kern_info = { -+ .init = slip_init, -+ .protocol = slip_protocol, -+ .read = slip_read, -+ .write = slip_write, -+}; -+ -+static int slip_setup(char *str, char **mac_out, void *data) -+{ -+ struct slip_init *init = data; -+ -+ *init = ((struct slip_init) -+ { .gate_addr = NULL }); -+ -+ if(str[0] != '\0') -+ init->gate_addr = str; -+ return(1); -+} -+ -+static struct transport slip_transport = { -+ .list = LIST_HEAD_INIT(slip_transport.list), -+ .name = "slip", -+ .setup = slip_setup, -+ .user = &slip_user_info, -+ .kern = &slip_kern_info, -+ .private_size = sizeof(struct slip_data), -+ .setup_size = sizeof(struct slip_init), -+}; -+ -+static int register_slip(void) -+{ -+ register_transport(&slip_transport); -+ return(1); -+} -+ -+__initcall(register_slip); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/slip_proto.h um/arch/um/drivers/slip_proto.h ---- orig/arch/um/drivers/slip_proto.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/slip_proto.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,93 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_SLIP_PROTO_H__ -+#define __UM_SLIP_PROTO_H__ -+ -+/* SLIP protocol characters. */ -+#define SLIP_END 0300 /* indicates end of frame */ -+#define SLIP_ESC 0333 /* indicates byte stuffing */ -+#define SLIP_ESC_END 0334 /* ESC ESC_END means END 'data' */ -+#define SLIP_ESC_ESC 0335 /* ESC ESC_ESC means ESC 'data' */ -+ -+static inline int slip_unesc(unsigned char c,char *buf,int *pos, int *esc) -+{ -+ int ret; -+ -+ switch(c){ -+ case SLIP_END: -+ *esc = 0; -+ ret=*pos; -+ *pos=0; -+ return(ret); -+ case SLIP_ESC: -+ *esc = 1; -+ return(0); -+ case SLIP_ESC_ESC: -+ if(*esc){ -+ *esc = 0; -+ c = SLIP_ESC; -+ } -+ break; -+ case SLIP_ESC_END: -+ if(*esc){ -+ *esc = 0; -+ c = SLIP_END; -+ } -+ break; -+ } -+ buf[(*pos)++] = c; -+ return(0); -+} -+ -+static inline int slip_esc(unsigned char *s, unsigned char *d, int len) -+{ -+ unsigned char *ptr = d; -+ unsigned char c; -+ -+ /* -+ * Send an initial END character to flush out any -+ * data that may have accumulated in the receiver -+ * due to line noise. -+ */ -+ -+ *ptr++ = SLIP_END; -+ -+ /* -+ * For each byte in the packet, send the appropriate -+ * character sequence, according to the SLIP protocol. -+ */ -+ -+ while (len-- > 0) { -+ switch(c = *s++) { -+ case SLIP_END: -+ *ptr++ = SLIP_ESC; -+ *ptr++ = SLIP_ESC_END; -+ break; -+ case SLIP_ESC: -+ *ptr++ = SLIP_ESC; -+ *ptr++ = SLIP_ESC_ESC; -+ break; -+ default: -+ *ptr++ = c; -+ break; -+ } -+ } -+ *ptr++ = SLIP_END; -+ return (ptr - d); -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/slip_user.c um/arch/um/drivers/slip_user.c ---- orig/arch/um/drivers/slip_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/slip_user.c Sun Dec 15 21:06:35 2002 -@@ -0,0 +1,279 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "net_user.h" -+#include "slip.h" -+#include "slip_proto.h" -+#include "helper.h" -+#include "os.h" -+ -+void slip_user_init(void *data, void *dev) -+{ -+ struct slip_data *pri = data; -+ -+ pri->dev = dev; -+} -+ -+static int set_up_tty(int fd) -+{ -+ int i; -+ struct termios tios; -+ -+ if (tcgetattr(fd, &tios) < 0) { -+ printk("could not get initial terminal attributes\n"); -+ return(-1); -+ } -+ -+ tios.c_cflag = CS8 | CREAD | HUPCL | CLOCAL; -+ tios.c_iflag = IGNBRK | IGNPAR; -+ tios.c_oflag = 0; -+ tios.c_lflag = 0; -+ for (i = 0; i < NCCS; i++) -+ tios.c_cc[i] = 0; -+ tios.c_cc[VMIN] = 1; -+ tios.c_cc[VTIME] = 0; -+ -+ cfsetospeed(&tios, B38400); -+ cfsetispeed(&tios, B38400); -+ -+ if (tcsetattr(fd, TCSAFLUSH, &tios) < 0) { -+ printk("failed to set terminal attributes\n"); -+ return(-1); -+ } -+ return(0); -+} -+ -+struct slip_pre_exec_data { -+ int stdin; -+ int stdout; -+ int close_me; -+}; -+ -+static void slip_pre_exec(void *arg) -+{ -+ struct slip_pre_exec_data *data = arg; -+ -+ if(data->stdin != -1) dup2(data->stdin, 0); -+ dup2(data->stdout, 1); -+ if(data->close_me != -1) close(data->close_me); -+} -+ -+static int slip_tramp(char **argv, int fd) -+{ -+ struct slip_pre_exec_data pe_data; -+ char *output; -+ int status, pid, fds[2], err, output_len; -+ -+ err = os_pipe(fds, 1, 0); -+ if(err){ -+ printk("slip_tramp : pipe failed, errno = %d\n", -err); -+ return(err); -+ } -+ -+ err = 0; -+ pe_data.stdin = fd; -+ pe_data.stdout = fds[1]; -+ pe_data.close_me = fds[0]; -+ pid = run_helper(slip_pre_exec, &pe_data, argv, NULL); -+ -+ if(pid < 0) err = pid; -+ else { -+ output_len = page_size(); -+ output = um_kmalloc(output_len); -+ if(output == NULL) -+ printk("slip_tramp : failed to allocate output " -+ "buffer\n"); -+ -+ close(fds[1]); -+ read_output(fds[0], output, output_len); -+ if(output != NULL){ -+ printk("%s", output); -+ kfree(output); -+ } -+ if(waitpid(pid, &status, 0) < 0) err = errno; -+ else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 0)){ -+ printk("'%s' didn't exit with status 0\n", argv[0]); -+ err = EINVAL; -+ } -+ } -+ return(err); -+} -+ -+static int slip_open(void *data) -+{ -+ struct slip_data *pri = data; -+ char version_buf[sizeof("nnnnn\0")]; -+ char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; -+ char *argv[] = { "uml_net", version_buf, "slip", "up", gate_buf, -+ NULL }; -+ int sfd, mfd, disc, sencap, err; -+ -+ if((mfd = get_pty()) < 0){ -+ printk("umn : Failed to open pty\n"); -+ return(-1); -+ } -+ if((sfd = os_open_file(ptsname(mfd), of_rdwr(OPENFLAGS()), 0)) < 0){ -+ printk("Couldn't open tty for slip line\n"); -+ return(-1); -+ } -+ if(set_up_tty(sfd)) return(-1); -+ pri->slave = sfd; -+ pri->pos = 0; -+ pri->esc = 0; -+ if(pri->gate_addr != NULL){ -+ sprintf(version_buf, "%d", UML_NET_VERSION); -+ strcpy(gate_buf, pri->gate_addr); -+ -+ err = slip_tramp(argv, sfd); -+ -+ if(err != 0){ -+ printk("slip_tramp failed - errno = %d\n", err); -+ return(-err); -+ } -+ if(ioctl(pri->slave, SIOCGIFNAME, pri->name) < 0){ -+ printk("SIOCGIFNAME failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ iter_addresses(pri->dev, open_addr, pri->name); -+ } -+ else { -+ disc = N_SLIP; -+ if(ioctl(sfd, TIOCSETD, &disc) < 0){ -+ printk("Failed to set slip line discipline - " -+ "errno = %d\n", errno); -+ return(-errno); -+ } -+ sencap = 0; -+ if(ioctl(sfd, SIOCSIFENCAP, &sencap) < 0){ -+ printk("Failed to set slip encapsulation - " -+ "errno = %d\n", errno); -+ return(-errno); -+ } -+ } -+ return(mfd); -+} -+ -+static void slip_close(int fd, void *data) -+{ -+ struct slip_data *pri = data; -+ char version_buf[sizeof("nnnnn\0")]; -+ char *argv[] = { "uml_net", version_buf, "slip", "down", pri->name, -+ NULL }; -+ int err; -+ -+ if(pri->gate_addr != NULL) -+ iter_addresses(pri->dev, close_addr, pri->name); -+ -+ sprintf(version_buf, "%d", UML_NET_VERSION); -+ -+ err = slip_tramp(argv, -1); -+ -+ if(err != 0) -+ printk("slip_tramp failed - errno = %d\n", err); -+ close(fd); -+ close(pri->slave); -+ pri->slave = -1; -+} -+ -+int slip_user_read(int fd, void *buf, int len, struct slip_data *pri) -+{ -+ int i, n, size, start; -+ -+ if(pri->more>0) { -+ i = 0; -+ while(i < pri->more) { -+ size = slip_unesc(pri->ibuf[i++], -+ pri->ibuf, &pri->pos, &pri->esc); -+ if(size){ -+ memcpy(buf, pri->ibuf, size); -+ memmove(pri->ibuf, &pri->ibuf[i], pri->more-i); -+ pri->more=pri->more-i; -+ return(size); -+ } -+ } -+ pri->more=0; -+ } -+ -+ n = net_read(fd, &pri->ibuf[pri->pos], sizeof(pri->ibuf) - pri->pos); -+ if(n <= 0) return(n); -+ -+ start = pri->pos; -+ for(i = 0; i < n; i++){ -+ size = slip_unesc(pri->ibuf[start + i], -+ pri->ibuf, &pri->pos, &pri->esc); -+ if(size){ -+ memcpy(buf, pri->ibuf, size); -+ memmove(pri->ibuf, &pri->ibuf[start+i+1], n-(i+1)); -+ pri->more=n-(i+1); -+ return(size); -+ } -+ } -+ return(0); -+} -+ -+int slip_user_write(int fd, void *buf, int len, struct slip_data *pri) -+{ -+ int actual, n; -+ -+ actual = slip_esc(buf, pri->obuf, len); -+ n = net_write(fd, pri->obuf, actual); -+ if(n < 0) return(n); -+ else return(len); -+} -+ -+static int slip_set_mtu(int mtu, void *data) -+{ -+ return(mtu); -+} -+ -+static void slip_add_addr(unsigned char *addr, unsigned char *netmask, -+ void *data) -+{ -+ struct slip_data *pri = data; -+ -+ if(pri->slave == -1) return; -+ open_addr(addr, netmask, pri->name); -+} -+ -+static void slip_del_addr(unsigned char *addr, unsigned char *netmask, -+ void *data) -+{ -+ struct slip_data *pri = data; -+ -+ if(pri->slave == -1) return; -+ close_addr(addr, netmask, pri->name); -+} -+ -+struct net_user_info slip_user_info = { -+ .init = slip_user_init, -+ .open = slip_open, -+ .close = slip_close, -+ .remove = NULL, -+ .set_mtu = slip_set_mtu, -+ .add_address = slip_add_addr, -+ .delete_address = slip_del_addr, -+ .max_packet = BUF_SIZE -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/slirp.h um/arch/um/drivers/slirp.h ---- orig/arch/um/drivers/slirp.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/slirp.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,51 @@ -+#ifndef __UM_SLIRP_H -+#define __UM_SLIRP_H -+ -+#define BUF_SIZE 1500 -+ /* two bytes each for a (pathological) max packet of escaped chars + * -+ * terminating END char + initial END char */ -+#define ENC_BUF_SIZE (2 * BUF_SIZE + 2) -+ -+#define SLIRP_MAX_ARGS 100 -+/* -+ * XXX this next definition is here because I don't understand why this -+ * initializer doesn't work in slirp_kern.c: -+ * -+ * argv : { init->argv[ 0 ... SLIRP_MAX_ARGS-1 ] }, -+ * -+ * or why I can't typecast like this: -+ * -+ * argv : (char* [SLIRP_MAX_ARGS])(init->argv), -+ */ -+struct arg_list_dummy_wrapper { char *argv[SLIRP_MAX_ARGS]; }; -+ -+struct slirp_data { -+ void *dev; -+ struct arg_list_dummy_wrapper argw; -+ int pid; -+ int slave; -+ char ibuf[ENC_BUF_SIZE]; -+ char obuf[ENC_BUF_SIZE]; -+ int more; /* more data: do not read fd until ibuf has been drained */ -+ int pos; -+ int esc; -+}; -+ -+extern struct net_user_info slirp_user_info; -+ -+extern int set_umn_addr(int fd, char *addr, char *ptp_addr); -+extern int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri); -+extern int slirp_user_write(int fd, void *buf, int len, struct slirp_data *pri); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/slirp_kern.c um/arch/um/drivers/slirp_kern.c ---- orig/arch/um/drivers/slirp_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/slirp_kern.c Sun Dec 15 21:06:54 2002 -@@ -0,0 +1,132 @@ -+#include "linux/kernel.h" -+#include "linux/stddef.h" -+#include "linux/init.h" -+#include "linux/netdevice.h" -+#include "linux/if_arp.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "kern.h" -+#include "slirp.h" -+ -+struct slirp_init { -+ struct arg_list_dummy_wrapper argw; /* XXX should be simpler... */ -+}; -+ -+void slirp_init(struct net_device *dev, void *data) -+{ -+ struct uml_net_private *private; -+ struct slirp_data *spri; -+ struct slirp_init *init = data; -+ int i; -+ -+ private = dev->priv; -+ spri = (struct slirp_data *) private->user; -+ *spri = ((struct slirp_data) -+ { .argw = init->argw, -+ .pid = -1, -+ .slave = -1, -+ .ibuf = { '\0' }, -+ .obuf = { '\0' }, -+ .pos = 0, -+ .esc = 0, -+ .dev = dev }); -+ -+ dev->init = NULL; -+ dev->hard_header_len = 0; -+ dev->addr_len = 4; -+ dev->type = ARPHRD_ETHER; -+ dev->tx_queue_len = 256; -+ dev->flags = IFF_NOARP; -+ printk("SLIRP backend - command line:"); -+ for(i=0;spri->argw.argv[i]!=NULL;i++) { -+ printk(" '%s'",spri->argw.argv[i]); -+ } -+ printk("\n"); -+} -+ -+static unsigned short slirp_protocol(struct sk_buff *skbuff) -+{ -+ return(htons(ETH_P_IP)); -+} -+ -+static int slirp_read(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ return(slirp_user_read(fd, (*skb)->mac.raw, (*skb)->dev->mtu, -+ (struct slirp_data *) &lp->user)); -+} -+ -+static int slirp_write(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ return(slirp_user_write(fd, (*skb)->data, (*skb)->len, -+ (struct slirp_data *) &lp->user)); -+} -+ -+struct net_kern_info slirp_kern_info = { -+ .init = slirp_init, -+ .protocol = slirp_protocol, -+ .read = slirp_read, -+ .write = slirp_write, -+}; -+ -+static int slirp_setup(char *str, char **mac_out, void *data) -+{ -+ struct slirp_init *init = data; -+ int i=0; -+ -+ *init = ((struct slirp_init) -+ { argw : { { "slirp", NULL } } }); -+ -+ str = split_if_spec(str, mac_out, NULL); -+ -+ if(str == NULL) { /* no command line given after MAC addr */ -+ return(1); -+ } -+ -+ do { -+ if(i>=SLIRP_MAX_ARGS-1) { -+ printk("slirp_setup: truncating slirp arguments\n"); -+ break; -+ } -+ init->argw.argv[i++] = str; -+ while(*str && *str!=',') { -+ if(*str=='_') *str=' '; -+ str++; -+ } -+ if(*str!=',') -+ break; -+ *str++='\0'; -+ } while(1); -+ init->argw.argv[i]=NULL; -+ return(1); -+} -+ -+static struct transport slirp_transport = { -+ .list = LIST_HEAD_INIT(slirp_transport.list), -+ .name = "slirp", -+ .setup = slirp_setup, -+ .user = &slirp_user_info, -+ .kern = &slirp_kern_info, -+ .private_size = sizeof(struct slirp_data), -+ .setup_size = sizeof(struct slirp_init), -+}; -+ -+static int register_slirp(void) -+{ -+ register_transport(&slirp_transport); -+ return(1); -+} -+ -+__initcall(register_slirp); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/slirp_user.c um/arch/um/drivers/slirp_user.c ---- orig/arch/um/drivers/slirp_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/slirp_user.c Sun Dec 15 21:07:08 2002 -@@ -0,0 +1,202 @@ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "net_user.h" -+#include "slirp.h" -+#include "slip_proto.h" -+#include "helper.h" -+#include "os.h" -+ -+void slirp_user_init(void *data, void *dev) -+{ -+ struct slirp_data *pri = data; -+ -+ pri->dev = dev; -+} -+ -+struct slirp_pre_exec_data { -+ int stdin; -+ int stdout; -+}; -+ -+static void slirp_pre_exec(void *arg) -+{ -+ struct slirp_pre_exec_data *data = arg; -+ -+ if(data->stdin != -1) dup2(data->stdin, 0); -+ if(data->stdout != -1) dup2(data->stdout, 1); -+} -+ -+static int slirp_tramp(char **argv, int fd) -+{ -+ struct slirp_pre_exec_data pe_data; -+ int pid; -+ -+ pe_data.stdin = fd; -+ pe_data.stdout = fd; -+ pid = run_helper(slirp_pre_exec, &pe_data, argv, NULL); -+ -+ return(pid); -+} -+ -+static int slirp_datachan(int *mfd, int *sfd) -+{ -+ int fds[2], err; -+ -+ err = os_pipe(fds, 1, 1); -+ if(err){ -+ printk("slirp_datachan: Failed to open pipe, errno = %d\n", -+ -err); -+ return(err); -+ } -+ -+ *mfd = fds[0]; -+ *sfd = fds[1]; -+ return(0); -+} -+ -+static int slirp_open(void *data) -+{ -+ struct slirp_data *pri = data; -+ int sfd, mfd, pid, err; -+ -+ err = slirp_datachan(&mfd, &sfd); -+ if(err) -+ return(err); -+ -+ pid = slirp_tramp(pri->argw.argv, sfd); -+ -+ if(pid < 0){ -+ printk("slirp_tramp failed - errno = %d\n", pid); -+ os_close_file(sfd); -+ os_close_file(mfd); -+ return(pid); -+ } -+ -+ pri->slave = sfd; -+ pri->pos = 0; -+ pri->esc = 0; -+ -+ pri->pid = pid; -+ -+ return(mfd); -+} -+ -+static void slirp_close(int fd, void *data) -+{ -+ struct slirp_data *pri = data; -+ int status,err; -+ -+ close(fd); -+ close(pri->slave); -+ -+ pri->slave = -1; -+ -+ if(pri->pid<1) { -+ printk("slirp_close: no child process to shut down\n"); -+ return; -+ } -+ -+#if 0 -+ if(kill(pri->pid, SIGHUP)<0) { -+ printk("slirp_close: sending hangup to %d failed (%d)\n", -+ pri->pid, errno); -+ } -+#endif -+ -+ err = waitpid(pri->pid, &status, WNOHANG); -+ if(err<0) { -+ printk("slirp_close: waitpid returned %d\n", errno); -+ return; -+ } -+ -+ if(err==0) { -+ printk("slirp_close: process %d has not exited\n"); -+ return; -+ } -+ -+ pri->pid = -1; -+} -+ -+int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri) -+{ -+ int i, n, size, start; -+ -+ if(pri->more>0) { -+ i = 0; -+ while(i < pri->more) { -+ size = slip_unesc(pri->ibuf[i++], -+ pri->ibuf,&pri->pos,&pri->esc); -+ if(size){ -+ memcpy(buf, pri->ibuf, size); -+ memmove(pri->ibuf, &pri->ibuf[i], pri->more-i); -+ pri->more=pri->more-i; -+ return(size); -+ } -+ } -+ pri->more=0; -+ } -+ -+ n = net_read(fd, &pri->ibuf[pri->pos], sizeof(pri->ibuf) - pri->pos); -+ if(n <= 0) return(n); -+ -+ start = pri->pos; -+ for(i = 0; i < n; i++){ -+ size = slip_unesc(pri->ibuf[start + i], -+ pri->ibuf,&pri->pos,&pri->esc); -+ if(size){ -+ memcpy(buf, pri->ibuf, size); -+ memmove(pri->ibuf, &pri->ibuf[start+i+1], n-(i+1)); -+ pri->more=n-(i+1); -+ return(size); -+ } -+ } -+ return(0); -+} -+ -+int slirp_user_write(int fd, void *buf, int len, struct slirp_data *pri) -+{ -+ int actual, n; -+ -+ actual = slip_esc(buf, pri->obuf, len); -+ n = net_write(fd, pri->obuf, actual); -+ if(n < 0) return(n); -+ else return(len); -+} -+ -+static int slirp_set_mtu(int mtu, void *data) -+{ -+ return(mtu); -+} -+ -+struct net_user_info slirp_user_info = { -+ .init = slirp_user_init, -+ .open = slirp_open, -+ .close = slirp_close, -+ .remove = NULL, -+ .set_mtu = slirp_set_mtu, -+ .add_address = NULL, -+ .delete_address = NULL, -+ .max_packet = BUF_SIZE -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/ssl.c um/arch/um/drivers/ssl.c ---- orig/arch/um/drivers/ssl.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/ssl.c Thu Mar 6 18:55:01 2003 -@@ -0,0 +1,265 @@ -+/* -+ * Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/fs.h" -+#include "linux/tty.h" -+#include "linux/tty_driver.h" -+#include "linux/major.h" -+#include "linux/mm.h" -+#include "linux/init.h" -+#include "asm/termbits.h" -+#include "asm/irq.h" -+#include "line.h" -+#include "ssl.h" -+#include "chan_kern.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "init.h" -+#include "irq_user.h" -+#include "mconsole_kern.h" -+#include "2_5compat.h" -+ -+static int ssl_version = 1; -+ -+/* Referenced only by tty_driver below - presumably it's locked correctly -+ * by the tty driver. -+ */ -+static int ssl_refcount = 0; -+ -+static struct tty_driver ssl_driver; -+ -+#define NR_PORTS 64 -+ -+void ssl_announce(char *dev_name, int dev) -+{ -+ printk(KERN_INFO "Serial line %d assigned device '%s'\n", dev, -+ dev_name); -+} -+ -+static struct chan_opts opts = { -+ .announce = ssl_announce, -+ .xterm_title = "Serial Line #%d", -+ .raw = 1, -+ .tramp_stack = 0, -+ .in_kernel = 1, -+}; -+ -+static int ssl_config(char *str); -+static int ssl_get_config(char *dev, char *str, int size, char **error_out); -+static int ssl_remove(char *str); -+ -+static struct line_driver driver = { -+ .name = "UML serial line", -+ .devfs_name = "tts/%d", -+ .major = TTY_MAJOR, -+ .minor_start = 64, -+ .type = TTY_DRIVER_TYPE_SERIAL, -+ .subtype = 0, -+ .read_irq = SSL_IRQ, -+ .read_irq_name = "ssl", -+ .write_irq = SSL_WRITE_IRQ, -+ .write_irq_name = "ssl-write", -+ .symlink_from = "serial", -+ .symlink_to = "tts", -+ .mc = { -+ .name = "ssl", -+ .config = ssl_config, -+ .get_config = ssl_get_config, -+ .remove = ssl_remove, -+ }, -+}; -+ -+/* The array is initialized by line_init, which is an initcall. The -+ * individual elements are protected by individual semaphores. -+ */ -+static struct line serial_lines[NR_PORTS] = -+ { [0 ... NR_PORTS - 1] = LINE_INIT(CONFIG_SSL_CHAN, &driver) }; -+ -+static struct lines lines = LINES_INIT(NR_PORTS); -+ -+static int ssl_config(char *str) -+{ -+ return(line_config(serial_lines, -+ sizeof(serial_lines)/sizeof(serial_lines[0]), str)); -+} -+ -+static int ssl_get_config(char *dev, char *str, int size, char **error_out) -+{ -+ return(line_get_config(dev, serial_lines, -+ sizeof(serial_lines)/sizeof(serial_lines[0]), -+ str, size, error_out)); -+} -+ -+static int ssl_remove(char *str) -+{ -+ return(line_remove(serial_lines, -+ sizeof(serial_lines)/sizeof(serial_lines[0]), str)); -+} -+ -+int ssl_open(struct tty_struct *tty, struct file *filp) -+{ -+ return(line_open(serial_lines, tty, &opts)); -+} -+ -+static void ssl_close(struct tty_struct *tty, struct file * filp) -+{ -+ line_close(serial_lines, tty); -+} -+ -+static int ssl_write(struct tty_struct * tty, int from_user, -+ const unsigned char *buf, int count) -+{ -+ return(line_write(serial_lines, tty, from_user, buf, count)); -+} -+ -+static void ssl_put_char(struct tty_struct *tty, unsigned char ch) -+{ -+ line_write(serial_lines, tty, 0, &ch, sizeof(ch)); -+} -+ -+static void ssl_flush_chars(struct tty_struct *tty) -+{ -+ return; -+} -+ -+static int ssl_chars_in_buffer(struct tty_struct *tty) -+{ -+ return(0); -+} -+ -+static void ssl_flush_buffer(struct tty_struct *tty) -+{ -+ return; -+} -+ -+static int ssl_ioctl(struct tty_struct *tty, struct file * file, -+ unsigned int cmd, unsigned long arg) -+{ -+ int ret; -+ -+ ret = 0; -+ switch(cmd){ -+ case TCGETS: -+ case TCSETS: -+ case TCFLSH: -+ case TCSETSF: -+ case TCSETSW: -+ case TCGETA: -+ case TIOCMGET: -+ ret = -ENOIOCTLCMD; -+ break; -+ default: -+ printk(KERN_ERR -+ "Unimplemented ioctl in ssl_ioctl : 0x%x\n", cmd); -+ ret = -ENOIOCTLCMD; -+ break; -+ } -+ return(ret); -+} -+ -+static void ssl_throttle(struct tty_struct * tty) -+{ -+ printk(KERN_ERR "Someone should implement ssl_throttle\n"); -+} -+ -+static void ssl_unthrottle(struct tty_struct * tty) -+{ -+ printk(KERN_ERR "Someone should implement ssl_unthrottle\n"); -+} -+ -+static void ssl_set_termios(struct tty_struct *tty, -+ struct termios *old_termios) -+{ -+} -+ -+static void ssl_stop(struct tty_struct *tty) -+{ -+ printk(KERN_ERR "Someone should implement ssl_stop\n"); -+} -+ -+static void ssl_start(struct tty_struct *tty) -+{ -+ printk(KERN_ERR "Someone should implement ssl_start\n"); -+} -+ -+void ssl_hangup(struct tty_struct *tty) -+{ -+} -+ -+static struct tty_driver ssl_driver = { -+ .refcount = &ssl_refcount, -+ .open = ssl_open, -+ .close = ssl_close, -+ .write = ssl_write, -+ .put_char = ssl_put_char, -+ .flush_chars = ssl_flush_chars, -+ .chars_in_buffer = ssl_chars_in_buffer, -+ .flush_buffer = ssl_flush_buffer, -+ .ioctl = ssl_ioctl, -+ .throttle = ssl_throttle, -+ .unthrottle = ssl_unthrottle, -+ .set_termios = ssl_set_termios, -+ .stop = ssl_stop, -+ .start = ssl_start, -+ .hangup = ssl_hangup -+}; -+ -+/* Changed by ssl_init and referenced by ssl_exit, which are both serialized -+ * by being an initcall and exitcall, respectively. -+ */ -+static int ssl_init_done = 0; -+ -+int ssl_init(void) -+{ -+ char *new_title; -+ -+ printk(KERN_INFO "Initializing software serial port version %d\n", -+ ssl_version); -+ -+ line_register_devfs(&lines, &driver, &ssl_driver, serial_lines, -+ sizeof(serial_lines)/sizeof(serial_lines[0])); -+ -+ lines_init(serial_lines, sizeof(serial_lines)/sizeof(serial_lines[0])); -+ -+ new_title = add_xterm_umid(opts.xterm_title); -+ if(new_title != NULL) opts.xterm_title = new_title; -+ -+ ssl_init_done = 1; -+ return(0); -+} -+ -+__initcall(ssl_init); -+ -+static int ssl_chan_setup(char *str) -+{ -+ line_setup(serial_lines, sizeof(serial_lines)/sizeof(serial_lines[0]), -+ str, 1); -+ return(1); -+} -+ -+__setup("ssl", ssl_chan_setup); -+__channel_help(ssl_chan_setup, "ssl"); -+ -+static void ssl_exit(void) -+{ -+ if(!ssl_init_done) return; -+ close_lines(serial_lines, -+ sizeof(serial_lines)/sizeof(serial_lines[0])); -+} -+ -+__uml_exitcall(ssl_exit); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/ssl.h um/arch/um/drivers/ssl.h ---- orig/arch/um/drivers/ssl.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/ssl.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,23 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SSL_H__ -+#define __SSL_H__ -+ -+extern int ssl_read(int fd, int line); -+extern void ssl_receive_char(int line, char ch); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/stdio_console.c um/arch/um/drivers/stdio_console.c ---- orig/arch/um/drivers/stdio_console.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/stdio_console.c Sun Dec 15 21:08:20 2002 -@@ -0,0 +1,250 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/posix_types.h" -+#include "linux/tty.h" -+#include "linux/tty_flip.h" -+#include "linux/types.h" -+#include "linux/major.h" -+#include "linux/kdev_t.h" -+#include "linux/console.h" -+#include "linux/string.h" -+#include "linux/sched.h" -+#include "linux/list.h" -+#include "linux/init.h" -+#include "linux/interrupt.h" -+#include "linux/slab.h" -+#include "asm/current.h" -+#include "asm/softirq.h" -+#include "asm/hardirq.h" -+#include "asm/irq.h" -+#include "stdio_console.h" -+#include "line.h" -+#include "chan_kern.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "irq_user.h" -+#include "mconsole_kern.h" -+#include "init.h" -+#include "2_5compat.h" -+ -+#define MAX_TTYS (8) -+ -+/* Referenced only by tty_driver below - presumably it's locked correctly -+ * by the tty driver. -+ */ -+ -+static struct tty_driver console_driver; -+ -+static int console_refcount = 0; -+ -+static struct chan_ops init_console_ops = { -+ .type = "you shouldn't see this", -+ .init = NULL, -+ .open = NULL, -+ .close = NULL, -+ .read = NULL, -+ .write = NULL, -+ .console_write = generic_write, -+ .window_size = NULL, -+ .free = NULL, -+ .winch = 0, -+}; -+ -+static struct chan init_console_chan = { -+ .list = { }, -+ .primary = 1, -+ .input = 0, -+ .output = 1, -+ .opened = 1, -+ .fd = 1, -+ .pri = INIT_STATIC, -+ .ops = &init_console_ops, -+ .data = NULL -+}; -+ -+void stdio_announce(char *dev_name, int dev) -+{ -+ printk(KERN_INFO "Virtual console %d assigned device '%s'\n", dev, -+ dev_name); -+} -+ -+static struct chan_opts opts = { -+ .announce = stdio_announce, -+ .xterm_title = "Virtual Console #%d", -+ .raw = 1, -+ .tramp_stack = 0, -+ .in_kernel = 1, -+}; -+ -+static int con_config(char *str); -+static int con_get_config(char *dev, char *str, int size, char **error_out); -+static int con_remove(char *str); -+ -+static struct line_driver driver = { -+ .name = "UML console", -+ .devfs_name = "vc/%d", -+ .major = TTY_MAJOR, -+ .minor_start = 0, -+ .type = TTY_DRIVER_TYPE_CONSOLE, -+ .subtype = SYSTEM_TYPE_CONSOLE, -+ .read_irq = CONSOLE_IRQ, -+ .read_irq_name = "console", -+ .write_irq = CONSOLE_WRITE_IRQ, -+ .write_irq_name = "console-write", -+ .symlink_from = "ttys", -+ .symlink_to = "vc", -+ .mc = { -+ .name = "con", -+ .config = con_config, -+ .get_config = con_get_config, -+ .remove = con_remove, -+ }, -+}; -+ -+static struct lines console_lines = LINES_INIT(MAX_TTYS); -+ -+/* The array is initialized by line_init, which is an initcall. The -+ * individual elements are protected by individual semaphores. -+ */ -+struct line vts[MAX_TTYS] = { LINE_INIT(CONFIG_CON_ZERO_CHAN, &driver), -+ [ 1 ... MAX_TTYS - 1 ] = -+ LINE_INIT(CONFIG_CON_CHAN, &driver) }; -+ -+static int con_config(char *str) -+{ -+ return(line_config(vts, sizeof(vts)/sizeof(vts[0]), str)); -+} -+ -+static int con_get_config(char *dev, char *str, int size, char **error_out) -+{ -+ return(line_get_config(dev, vts, sizeof(vts)/sizeof(vts[0]), str, -+ size, error_out)); -+} -+ -+static int con_remove(char *str) -+{ -+ return(line_remove(vts, sizeof(vts)/sizeof(vts[0]), str)); -+} -+ -+static int open_console(struct tty_struct *tty) -+{ -+ return(line_open(vts, tty, &opts)); -+} -+ -+static int con_open(struct tty_struct *tty, struct file *filp) -+{ -+ return(open_console(tty)); -+} -+ -+static void con_close(struct tty_struct *tty, struct file *filp) -+{ -+ line_close(vts, tty); -+} -+ -+static int con_write(struct tty_struct *tty, int from_user, -+ const unsigned char *buf, int count) -+{ -+ return(line_write(vts, tty, from_user, buf, count)); -+} -+ -+static void set_termios(struct tty_struct *tty, struct termios * old) -+{ -+} -+ -+static int chars_in_buffer(struct tty_struct *tty) -+{ -+ return(0); -+} -+ -+static int con_init_done = 0; -+ -+int stdio_init(void) -+{ -+ char *new_title; -+ -+ printk(KERN_INFO "Initializing stdio console driver\n"); -+ -+ line_register_devfs(&console_lines, &driver, &console_driver, vts, -+ sizeof(vts)/sizeof(vts[0])); -+ -+ lines_init(vts, sizeof(vts)/sizeof(vts[0])); -+ -+ new_title = add_xterm_umid(opts.xterm_title); -+ if(new_title != NULL) opts.xterm_title = new_title; -+ -+ open_console(NULL); -+ con_init_done = 1; -+ return(0); -+} -+ -+__initcall(stdio_init); -+ -+static void console_write(struct console *console, const char *string, -+ unsigned len) -+{ -+ if(con_init_done) down(&vts[console->index].sem); -+ console_write_chan(&vts[console->index].chan_list, string, len); -+ if(con_init_done) up(&vts[console->index].sem); -+} -+ -+static struct tty_driver console_driver = { -+ .refcount = &console_refcount, -+ .open = con_open, -+ .close = con_close, -+ .write = con_write, -+ .chars_in_buffer = chars_in_buffer, -+ .set_termios = set_termios -+}; -+ -+static kdev_t console_device(struct console *c) -+{ -+ return mk_kdev(TTY_MAJOR, c->index); -+} -+ -+static int console_setup(struct console *co, char *options) -+{ -+ return(0); -+} -+ -+static struct console stdiocons = INIT_CONSOLE("tty", console_write, -+ console_device, console_setup, -+ CON_PRINTBUFFER); -+ -+void stdio_console_init(void) -+{ -+ INIT_LIST_HEAD(&vts[0].chan_list); -+ list_add(&init_console_chan.list, &vts[0].chan_list); -+ register_console(&stdiocons); -+} -+ -+static int console_chan_setup(char *str) -+{ -+ line_setup(vts, sizeof(vts)/sizeof(vts[0]), str, 1); -+ return(1); -+} -+ -+__setup("con", console_chan_setup); -+__channel_help(console_chan_setup, "con"); -+ -+static void console_exit(void) -+{ -+ if(!con_init_done) return; -+ close_lines(vts, sizeof(vts)/sizeof(vts[0])); -+} -+ -+__uml_exitcall(console_exit); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/stdio_console.h um/arch/um/drivers/stdio_console.h ---- orig/arch/um/drivers/stdio_console.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/stdio_console.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,21 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __STDIO_CONSOLE_H -+#define __STDIO_CONSOLE_H -+ -+extern void save_console_flags(void); -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/tty.c um/arch/um/drivers/tty.c ---- orig/arch/um/drivers/tty.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/tty.c Sun Dec 15 21:08:41 2002 -@@ -0,0 +1,86 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include "chan_user.h" -+#include "user_util.h" -+#include "user.h" -+#include "os.h" -+ -+struct tty_chan { -+ char *dev; -+ int raw; -+ struct termios tt; -+}; -+ -+void *tty_chan_init(char *str, int device, struct chan_opts *opts) -+{ -+ struct tty_chan *data; -+ -+ if(*str != ':'){ -+ printk("tty_init : channel type 'tty' must specify " -+ "a device\n"); -+ return(NULL); -+ } -+ str++; -+ -+ if((data = um_kmalloc(sizeof(*data))) == NULL) -+ return(NULL); -+ *data = ((struct tty_chan) { .dev = str, -+ .raw = opts->raw }); -+ -+ return(data); -+} -+ -+int tty_open(int input, int output, int primary, void *d, char **dev_out) -+{ -+ struct tty_chan *data = d; -+ int fd; -+ -+ fd = os_open_file(data->dev, of_set_rw(OPENFLAGS(), input, output), 0); -+ if(fd < 0) return(fd); -+ if(data->raw){ -+ tcgetattr(fd, &data->tt); -+ raw(fd, 0); -+ } -+ -+ *dev_out = data->dev; -+ return(fd); -+} -+ -+int tty_console_write(int fd, const char *buf, int n, void *d) -+{ -+ struct tty_chan *data = d; -+ -+ return(generic_console_write(fd, buf, n, &data->tt)); -+} -+ -+struct chan_ops tty_ops = { -+ .type = "tty", -+ .init = tty_chan_init, -+ .open = tty_open, -+ .close = generic_close, -+ .read = generic_read, -+ .write = generic_write, -+ .console_write = tty_console_write, -+ .window_size = generic_window_size, -+ .free = generic_free, -+ .winch = 0, -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/ubd_kern.c um/arch/um/drivers/ubd_kern.c ---- orig/arch/um/drivers/ubd_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/ubd_kern.c Tue Mar 11 15:46:36 2003 -@@ -0,0 +1,1067 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+/* 2001-09-28...2002-04-17 -+ * Partition stuff by James_McMechan@hotmail.com -+ * old style ubd by setting UBD_SHIFT to 0 -+ */ -+ -+#define MAJOR_NR UBD_MAJOR -+#define UBD_SHIFT 4 -+ -+#include "linux/config.h" -+#include "linux/blk.h" -+#include "linux/blkdev.h" -+#include "linux/hdreg.h" -+#include "linux/init.h" -+#include "linux/devfs_fs_kernel.h" -+#include "linux/cdrom.h" -+#include "linux/proc_fs.h" -+#include "linux/ctype.h" -+#include "linux/capability.h" -+#include "linux/mm.h" -+#include "linux/vmalloc.h" -+#include "linux/blkpg.h" -+#include "linux/genhd.h" -+#include "linux/spinlock.h" -+#include "asm/segment.h" -+#include "asm/uaccess.h" -+#include "asm/irq.h" -+#include "asm/types.h" -+#include "user_util.h" -+#include "mem_user.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "mconsole_kern.h" -+#include "init.h" -+#include "irq_user.h" -+#include "ubd_user.h" -+#include "2_5compat.h" -+#include "os.h" -+ -+static int ubd_open(struct inode * inode, struct file * filp); -+static int ubd_release(struct inode * inode, struct file * file); -+static int ubd_ioctl(struct inode * inode, struct file * file, -+ unsigned int cmd, unsigned long arg); -+static int ubd_revalidate(kdev_t rdev); -+static int ubd_revalidate1(kdev_t rdev); -+ -+#define MAX_DEV (8) -+#define MAX_MINOR (MAX_DEV << UBD_SHIFT) -+ -+/* Not modified by this driver */ -+static int blk_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = BLOCK_SIZE }; -+static int hardsect_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 512 }; -+ -+/* Protected by ubd_lock */ -+static int sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 0 }; -+ -+static struct block_device_operations ubd_blops = { -+ .open = ubd_open, -+ .release = ubd_release, -+ .ioctl = ubd_ioctl, -+ .revalidate = ubd_revalidate, -+}; -+ -+/* Protected by ubd_lock, except in prepare_request and ubd_ioctl because -+ * the block layer should ensure that the device is idle before closing it. -+ */ -+static struct hd_struct ubd_part[MAX_MINOR] = -+ { [ 0 ... MAX_MINOR - 1 ] = { 0, 0, 0 } }; -+ -+/* Protected by io_request_lock */ -+static request_queue_t *ubd_queue; -+ -+/* Protected by ubd_lock */ -+static int fake_major = MAJOR_NR; -+ -+static spinlock_t ubd_lock = SPIN_LOCK_UNLOCKED; -+ -+#define INIT_GENDISK(maj, name, parts, shift, bsizes, max, blops) \ -+{ \ -+ .major = maj, \ -+ .major_name = name, \ -+ .minor_shift = shift, \ -+ .max_p = 1 << shift, \ -+ .part = parts, \ -+ .sizes = bsizes, \ -+ .nr_real = max, \ -+ .real_devices = NULL, \ -+ .next = NULL, \ -+ .fops = blops, \ -+ .de_arr = NULL, \ -+ .flags = 0 \ -+} -+ -+static struct gendisk ubd_gendisk = INIT_GENDISK(MAJOR_NR, "ubd", ubd_part, -+ UBD_SHIFT, sizes, MAX_DEV, -+ &ubd_blops); -+static struct gendisk fake_gendisk = INIT_GENDISK(0, "ubd", ubd_part, -+ UBD_SHIFT, sizes, MAX_DEV, -+ &ubd_blops); -+ -+#ifdef CONFIG_BLK_DEV_UBD_SYNC -+#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \ -+ .cl = 1 }) -+#else -+#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \ -+ .cl = 1 }) -+#endif -+ -+/* Not protected - changed only in ubd_setup_common and then only to -+ * to enable O_SYNC. -+ */ -+static struct openflags global_openflags = OPEN_FLAGS; -+ -+struct cow { -+ char *file; -+ int fd; -+ unsigned long *bitmap; -+ unsigned long bitmap_len; -+ int bitmap_offset; -+ int data_offset; -+}; -+ -+struct ubd { -+ char *file; -+ int count; -+ int fd; -+ __u64 size; -+ struct openflags boot_openflags; -+ struct openflags openflags; -+ devfs_handle_t devfs; -+ struct cow cow; -+}; -+ -+#define DEFAULT_COW { \ -+ .file = NULL, \ -+ .fd = -1, \ -+ .bitmap = NULL, \ -+ .bitmap_offset = 0, \ -+ .data_offset = 0, \ -+} -+ -+#define DEFAULT_UBD { \ -+ .file = NULL, \ -+ .count = 0, \ -+ .fd = -1, \ -+ .size = -1, \ -+ .boot_openflags = OPEN_FLAGS, \ -+ .openflags = OPEN_FLAGS, \ -+ .devfs = NULL, \ -+ .cow = DEFAULT_COW, \ -+} -+ -+struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD }; -+ -+static int ubd0_init(void) -+{ -+ struct ubd *dev = &ubd_dev[0]; -+ -+ if(dev->file == NULL) -+ dev->file = "root_fs"; -+ return(0); -+} -+ -+__initcall(ubd0_init); -+ -+/* Only changed by fake_ide_setup which is a setup */ -+static int fake_ide = 0; -+static struct proc_dir_entry *proc_ide_root = NULL; -+static struct proc_dir_entry *proc_ide = NULL; -+ -+static void make_proc_ide(void) -+{ -+ proc_ide_root = proc_mkdir("ide", 0); -+ proc_ide = proc_mkdir("ide0", proc_ide_root); -+} -+ -+static int proc_ide_read_media(char *page, char **start, off_t off, int count, -+ int *eof, void *data) -+{ -+ int len; -+ -+ strcpy(page, "disk\n"); -+ len = strlen("disk\n"); -+ len -= off; -+ if (len < count){ -+ *eof = 1; -+ if (len <= 0) return 0; -+ } -+ else len = count; -+ *start = page + off; -+ return len; -+} -+ -+static void make_ide_entries(char *dev_name) -+{ -+ struct proc_dir_entry *dir, *ent; -+ char name[64]; -+ -+ if(!fake_ide) return; -+ -+ /* Without locking this could race if a UML was booted with no -+ * disks and then two mconsole requests which add disks came in -+ * at the same time. -+ */ -+ spin_lock(&ubd_lock); -+ if(proc_ide_root == NULL) make_proc_ide(); -+ spin_unlock(&ubd_lock); -+ -+ dir = proc_mkdir(dev_name, proc_ide); -+ if(!dir) return; -+ -+ ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir); -+ if(!ent) return; -+ ent->nlink = 1; -+ ent->data = NULL; -+ ent->read_proc = proc_ide_read_media; -+ ent->write_proc = NULL; -+ sprintf(name,"ide0/%s", dev_name); -+ proc_symlink(dev_name, proc_ide_root, name); -+} -+ -+static int fake_ide_setup(char *str) -+{ -+ fake_ide = 1; -+ return(1); -+} -+ -+__setup("fake_ide", fake_ide_setup); -+ -+__uml_help(fake_ide_setup, -+"fake_ide\n" -+" Create ide0 entries that map onto ubd devices.\n\n" -+); -+ -+static int parse_unit(char **ptr) -+{ -+ char *str = *ptr, *end; -+ int n = -1; -+ -+ if(isdigit(*str)) { -+ n = simple_strtoul(str, &end, 0); -+ if(end == str) -+ return(-1); -+ *ptr = end; -+ } -+ else if (('a' <= *str) && (*str <= 'h')) { -+ n = *str - 'a'; -+ str++; -+ *ptr = str; -+ } -+ return(n); -+} -+ -+static int ubd_setup_common(char *str, int *index_out) -+{ -+ struct openflags flags = global_openflags; -+ struct ubd *dev; -+ char *backing_file; -+ int n, err; -+ -+ if(index_out) *index_out = -1; -+ n = *str; -+ if(n == '='){ -+ char *end; -+ int major; -+ -+ str++; -+ if(!strcmp(str, "sync")){ -+ global_openflags.s = 1; -+ return(0); -+ } -+ major = simple_strtoul(str, &end, 0); -+ if((*end != '\0') || (end == str)){ -+ printk(KERN_ERR -+ "ubd_setup : didn't parse major number\n"); -+ return(1); -+ } -+ -+ err = 1; -+ spin_lock(&ubd_lock); -+ if(fake_major != MAJOR_NR){ -+ printk(KERN_ERR "Can't assign a fake major twice\n"); -+ goto out1; -+ } -+ -+ fake_gendisk.major = major; -+ fake_major = major; -+ -+ printk(KERN_INFO "Setting extra ubd major number to %d\n", -+ major); -+ err = 0; -+ out1: -+ spin_unlock(&ubd_lock); -+ return(err); -+ } -+ -+ n = parse_unit(&str); -+ if(n < 0){ -+ printk(KERN_ERR "ubd_setup : couldn't parse unit number " -+ "'%s'\n", str); -+ return(1); -+ } -+ -+ if(n >= MAX_DEV){ -+ printk(KERN_ERR "ubd_setup : index %d out of range " -+ "(%d devices)\n", n, MAX_DEV); -+ return(1); -+ } -+ -+ err = 1; -+ spin_lock(&ubd_lock); -+ -+ dev = &ubd_dev[n]; -+ if(dev->file != NULL){ -+ printk(KERN_ERR "ubd_setup : device already configured\n"); -+ goto out2; -+ } -+ -+ if(index_out) *index_out = n; -+ -+ if (*str == 'r'){ -+ flags.w = 0; -+ str++; -+ } -+ if (*str == 's'){ -+ flags.s = 1; -+ str++; -+ } -+ if(*str++ != '='){ -+ printk(KERN_ERR "ubd_setup : Expected '='\n"); -+ goto out2; -+ } -+ -+ err = 0; -+ backing_file = strchr(str, ','); -+ if(backing_file){ -+ *backing_file = '\0'; -+ backing_file++; -+ } -+ dev->file = str; -+ dev->cow.file = backing_file; -+ dev->boot_openflags = flags; -+ out2: -+ spin_unlock(&ubd_lock); -+ return(err); -+} -+ -+static int ubd_setup(char *str) -+{ -+ ubd_setup_common(str, NULL); -+ return(1); -+} -+ -+__setup("ubd", ubd_setup); -+__uml_help(ubd_setup, -+"ubd=\n" -+" This is used to associate a device with a file in the underlying\n" -+" filesystem. Usually, there is a filesystem in the file, but \n" -+" that's not required. Swap devices containing swap files can be\n" -+" specified like this. Also, a file which doesn't contain a\n" -+" filesystem can have its contents read in the virtual \n" -+" machine by running dd on the device. n must be in the range\n" -+" 0 to 7. Appending an 'r' to the number will cause that device\n" -+" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n" -+" an 's' (has to be _after_ 'r', if there is one) will cause data\n" -+" to be written to disk on the host immediately.\n\n" -+); -+ -+static int fakehd(char *str) -+{ -+ printk(KERN_INFO -+ "fakehd : Changing ubd_gendisk.major_name to \"hd\".\n"); -+ ubd_gendisk.major_name = "hd"; -+ return(1); -+} -+ -+__setup("fakehd", fakehd); -+__uml_help(fakehd, -+"fakehd\n" -+" Change the ubd device name to \"hd\".\n\n" -+); -+ -+static void do_ubd_request(request_queue_t * q); -+ -+/* Only changed by ubd_init, which is an initcall. */ -+int thread_fd = -1; -+ -+/* Changed by ubd_handler, which is serialized because interrupts only -+ * happen on CPU 0. -+ */ -+int intr_count = 0; -+ -+static void ubd_finish(int error) -+{ -+ int nsect; -+ -+ if(error){ -+ end_request(0); -+ return; -+ } -+ nsect = CURRENT->current_nr_sectors; -+ CURRENT->sector += nsect; -+ CURRENT->buffer += nsect << 9; -+ CURRENT->errors = 0; -+ CURRENT->nr_sectors -= nsect; -+ CURRENT->current_nr_sectors = 0; -+ end_request(1); -+} -+ -+static void ubd_handler(void) -+{ -+ struct io_thread_req req; -+ int n; -+ -+ DEVICE_INTR = NULL; -+ intr_count++; -+ n = read_ubd_fs(thread_fd, &req, sizeof(req)); -+ if(n != sizeof(req)){ -+ printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, " -+ "errno = %d\n", os_getpid(), -n); -+ spin_lock(&io_request_lock); -+ end_request(0); -+ spin_unlock(&io_request_lock); -+ return; -+ } -+ -+ if((req.offset != ((__u64) (CURRENT->sector)) << 9) || -+ (req.length != (CURRENT->current_nr_sectors) << 9)) -+ panic("I/O op mismatch"); -+ -+ spin_lock(&io_request_lock); -+ ubd_finish(req.error); -+ reactivate_fd(thread_fd, UBD_IRQ); -+ do_ubd_request(ubd_queue); -+ spin_unlock(&io_request_lock); -+} -+ -+static void ubd_intr(int irq, void *dev, struct pt_regs *unused) -+{ -+ ubd_handler(); -+} -+ -+/* Only changed by ubd_init, which is an initcall. */ -+static int io_pid = -1; -+ -+void kill_io_thread(void) -+{ -+ if(io_pid != -1) -+ os_kill_process(io_pid, 1); -+} -+ -+__uml_exitcall(kill_io_thread); -+ -+/* Initialized in an initcall, and unchanged thereafter */ -+devfs_handle_t ubd_dir_handle; -+ -+static int ubd_add(int n) -+{ -+ struct ubd *dev = &ubd_dev[n]; -+ char name[sizeof("nnnnnn\0")], dev_name[sizeof("ubd0x")]; -+ int err = -EISDIR; -+ -+ if(dev->file == NULL) -+ goto out; -+ -+ err = ubd_revalidate1(MKDEV(MAJOR_NR, n << UBD_SHIFT)); -+ if(err) -+ goto out; -+ -+ sprintf(name, "%d", n); -+ dev->devfs = devfs_register(ubd_dir_handle, name, DEVFS_FL_REMOVABLE, -+ MAJOR_NR, n << UBD_SHIFT, S_IFBLK | -+ S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP, -+ &ubd_blops, NULL); -+ -+ if(!strcmp(ubd_gendisk.major_name, "ubd")) -+ sprintf(dev_name, "%s%d", ubd_gendisk.major_name, n); -+ else sprintf(dev_name, "%s%c", ubd_gendisk.major_name, -+ n + 'a'); -+ -+ make_ide_entries(dev_name); -+ return(0); -+ -+ out: -+ return(err); -+} -+ -+static int ubd_config(char *str) -+{ -+ int n, err; -+ -+ str = uml_strdup(str); -+ if(str == NULL){ -+ printk(KERN_ERR "ubd_config failed to strdup string\n"); -+ return(1); -+ } -+ err = ubd_setup_common(str, &n); -+ if(err){ -+ kfree(str); -+ return(-1); -+ } -+ if(n == -1) return(0); -+ -+ spin_lock(&ubd_lock); -+ err = ubd_add(n); -+ if(err) -+ ubd_dev[n].file = NULL; -+ spin_unlock(&ubd_lock); -+ -+ return(err); -+} -+ -+static int ubd_get_config(char *name, char *str, int size, char **error_out) -+{ -+ struct ubd *dev; -+ char *end; -+ int n, len = 0; -+ -+ n = simple_strtoul(name, &end, 0); -+ if((*end != '\0') || (end == name)){ -+ *error_out = "ubd_get_config : didn't parse device number"; -+ return(-1); -+ } -+ -+ if((n >= MAX_DEV) || (n < 0)){ -+ *error_out = "ubd_get_config : device number out of range"; -+ return(-1); -+ } -+ -+ dev = &ubd_dev[n]; -+ spin_lock(&ubd_lock); -+ -+ if(dev->file == NULL){ -+ CONFIG_CHUNK(str, size, len, "", 1); -+ goto out; -+ } -+ -+ CONFIG_CHUNK(str, size, len, dev->file, 0); -+ -+ if(dev->cow.file != NULL){ -+ CONFIG_CHUNK(str, size, len, ",", 0); -+ CONFIG_CHUNK(str, size, len, dev->cow.file, 1); -+ } -+ else CONFIG_CHUNK(str, size, len, "", 1); -+ -+ out: -+ spin_unlock(&ubd_lock); -+ return(len); -+} -+ -+static int ubd_remove(char *str) -+{ -+ struct ubd *dev; -+ int n, err = -ENODEV; -+ -+ if(isdigit(*str)){ -+ char *end; -+ n = simple_strtoul(str, &end, 0); -+ if ((*end != '\0') || (end == str)) -+ return(err); -+ } -+ else if (('a' <= *str) && (*str <= 'h')) -+ n = *str - 'a'; -+ else -+ return(err); /* it should be a number 0-7/a-h */ -+ -+ if((n < 0) || (n >= MAX_DEV)) -+ return(err); -+ -+ dev = &ubd_dev[n]; -+ -+ spin_lock(&ubd_lock); -+ err = 0; -+ if(dev->file == NULL) -+ goto out; -+ err = -1; -+ if(dev->count > 0) -+ goto out; -+ if(dev->devfs != NULL) -+ devfs_unregister(dev->devfs); -+ -+ *dev = ((struct ubd) DEFAULT_UBD); -+ err = 0; -+ out: -+ spin_unlock(&ubd_lock); -+ return(err); -+} -+ -+static struct mc_device ubd_mc = { -+ .name = "ubd", -+ .config = ubd_config, -+ .get_config = ubd_get_config, -+ .remove = ubd_remove, -+}; -+ -+static int ubd_mc_init(void) -+{ -+ mconsole_register_dev(&ubd_mc); -+ return(0); -+} -+ -+__initcall(ubd_mc_init); -+ -+static request_queue_t *ubd_get_queue(kdev_t device) -+{ -+ return(ubd_queue); -+} -+ -+int ubd_init(void) -+{ -+ unsigned long stack; -+ int i, err; -+ -+ ubd_dir_handle = devfs_mk_dir (NULL, "ubd", NULL); -+ if (devfs_register_blkdev(MAJOR_NR, "ubd", &ubd_blops)) { -+ printk(KERN_ERR "ubd: unable to get major %d\n", MAJOR_NR); -+ return -1; -+ } -+ read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read-ahead */ -+ blksize_size[MAJOR_NR] = blk_sizes; -+ blk_size[MAJOR_NR] = sizes; -+ INIT_HARDSECT(hardsect_size, MAJOR_NR, hardsect_sizes); -+ -+ ubd_queue = BLK_DEFAULT_QUEUE(MAJOR_NR); -+ blk_init_queue(ubd_queue, DEVICE_REQUEST); -+ INIT_ELV(ubd_queue, &ubd_queue->elevator); -+ -+ add_gendisk(&ubd_gendisk); -+ if (fake_major != MAJOR_NR){ -+ /* major number 0 is used to auto select */ -+ err = devfs_register_blkdev(fake_major, "fake", &ubd_blops); -+ if(fake_major == 0){ -+ /* auto device number case */ -+ fake_major = err; -+ if(err == 0) -+ return(-ENODEV); -+ } -+ else if (err){ -+ /* not auto so normal error */ -+ printk(KERN_ERR "ubd: error %d getting major %d\n", -+ err, fake_major); -+ return(-ENODEV); -+ } -+ -+ blk_dev[fake_major].queue = ubd_get_queue; -+ read_ahead[fake_major] = 8; /* 8 sector (4kB) read-ahead */ -+ blksize_size[fake_major] = blk_sizes; -+ blk_size[fake_major] = sizes; -+ INIT_HARDSECT(hardsect_size, fake_major, hardsect_sizes); -+ add_gendisk(&fake_gendisk); -+ } -+ -+ for(i=0;ifd); -+ if(dev->cow.file != NULL) { -+ os_close_file(dev->cow.fd); -+ vfree(dev->cow.bitmap); -+ dev->cow.bitmap = NULL; -+ } -+} -+ -+static int ubd_open_dev(struct ubd *dev) -+{ -+ struct openflags flags; -+ int err, create_cow, *create_ptr; -+ -+ dev->openflags = dev->boot_openflags; -+ create_cow = 0; -+ create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL; -+ dev->fd = open_ubd_file(dev->file, &dev->openflags, &dev->cow.file, -+ &dev->cow.bitmap_offset, &dev->cow.bitmap_len, -+ &dev->cow.data_offset, create_ptr); -+ -+ if((dev->fd == -ENOENT) && create_cow){ -+ dev->fd = create_cow_file(dev->file, dev->cow.file, -+ dev->openflags, 1 << 9, -+ &dev->cow.bitmap_offset, -+ &dev->cow.bitmap_len, -+ &dev->cow.data_offset); -+ if(dev->fd >= 0){ -+ printk(KERN_INFO "Creating \"%s\" as COW file for " -+ "\"%s\"\n", dev->file, dev->cow.file); -+ } -+ } -+ -+ if(dev->fd < 0) return(dev->fd); -+ -+ if(dev->cow.file != NULL){ -+ err = -ENOMEM; -+ dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len); -+ if(dev->cow.bitmap == NULL) goto error; -+ flush_tlb_kernel_vm(); -+ -+ err = read_cow_bitmap(dev->fd, dev->cow.bitmap, -+ dev->cow.bitmap_offset, -+ dev->cow.bitmap_len); -+ if(err) goto error; -+ -+ flags = dev->openflags; -+ flags.w = 0; -+ err = open_ubd_file(dev->cow.file, &flags, NULL, NULL, NULL, -+ NULL, NULL); -+ if(err < 0) goto error; -+ dev->cow.fd = err; -+ } -+ return(0); -+ error: -+ os_close_file(dev->fd); -+ return(err); -+} -+ -+static int ubd_file_size(struct ubd *dev, __u64 *size_out) -+{ -+ char *file; -+ -+ file = dev->cow.file ? dev->cow.file : dev->file; -+ return(os_file_size(file, size_out)); -+} -+ -+static int ubd_open(struct inode *inode, struct file *filp) -+{ -+ struct ubd *dev; -+ int n, offset, err = 0; -+ -+ n = DEVICE_NR(inode->i_rdev); -+ dev = &ubd_dev[n]; -+ if(n >= MAX_DEV) -+ return -ENODEV; -+ -+ spin_lock(&ubd_lock); -+ offset = n << UBD_SHIFT; -+ -+ if(dev->count == 0){ -+ err = ubd_open_dev(dev); -+ if(err){ -+ printk(KERN_ERR "ubd%d: Can't open \"%s\": " -+ "errno = %d\n", n, dev->file, -err); -+ goto out; -+ } -+ err = ubd_file_size(dev, &dev->size); -+ if(err) -+ goto out; -+ sizes[offset] = dev->size / BLOCK_SIZE; -+ ubd_part[offset].nr_sects = dev->size / hardsect_sizes[offset]; -+ } -+ dev->count++; -+ if((filp->f_mode & FMODE_WRITE) && !dev->openflags.w){ -+ if(--dev->count == 0) ubd_close(dev); -+ err = -EROFS; -+ } -+ out: -+ spin_unlock(&ubd_lock); -+ return(err); -+} -+ -+static int ubd_release(struct inode * inode, struct file * file) -+{ -+ int n, offset; -+ -+ n = DEVICE_NR(inode->i_rdev); -+ offset = n << UBD_SHIFT; -+ if(n >= MAX_DEV) -+ return -ENODEV; -+ -+ spin_lock(&ubd_lock); -+ if(--ubd_dev[n].count == 0) -+ ubd_close(&ubd_dev[n]); -+ spin_unlock(&ubd_lock); -+ -+ return(0); -+} -+ -+void cowify_req(struct io_thread_req *req, struct ubd *dev) -+{ -+ int i, update_bitmap, sector = req->offset >> 9; -+ -+ if(req->length > (sizeof(req->sector_mask) * 8) << 9) -+ panic("Operation too long"); -+ if(req->op == UBD_READ) { -+ for(i = 0; i < req->length >> 9; i++){ -+ if(ubd_test_bit(sector + i, (unsigned char *) -+ dev->cow.bitmap)){ -+ ubd_set_bit(i, (unsigned char *) -+ &req->sector_mask); -+ } -+ } -+ } -+ else { -+ update_bitmap = 0; -+ for(i = 0; i < req->length >> 9; i++){ -+ ubd_set_bit(i, (unsigned char *) -+ &req->sector_mask); -+ if(!ubd_test_bit(sector + i, (unsigned char *) -+ dev->cow.bitmap)) -+ update_bitmap = 1; -+ ubd_set_bit(sector + i, (unsigned char *) -+ dev->cow.bitmap); -+ } -+ if(update_bitmap){ -+ req->cow_offset = sector / (sizeof(unsigned long) * 8); -+ req->bitmap_words[0] = -+ dev->cow.bitmap[req->cow_offset]; -+ req->bitmap_words[1] = -+ dev->cow.bitmap[req->cow_offset + 1]; -+ req->cow_offset *= sizeof(unsigned long); -+ req->cow_offset += dev->cow.bitmap_offset; -+ } -+ } -+} -+ -+static int prepare_request(struct request *req, struct io_thread_req *io_req) -+{ -+ struct ubd *dev; -+ __u64 block; -+ int nsect, minor, n; -+ -+ if(req->rq_status == RQ_INACTIVE) return(1); -+ -+ minor = MINOR(req->rq_dev); -+ n = minor >> UBD_SHIFT; -+ dev = &ubd_dev[n]; -+ if(IS_WRITE(req) && !dev->openflags.w){ -+ printk("Write attempted on readonly ubd device %d\n", n); -+ end_request(0); -+ return(1); -+ } -+ -+ req->sector += ubd_part[minor].start_sect; -+ block = req->sector; -+ nsect = req->current_nr_sectors; -+ -+ io_req->op = (req->cmd == READ) ? UBD_READ : UBD_WRITE; -+ io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd; -+ io_req->fds[1] = dev->fd; -+ io_req->offsets[0] = 0; -+ io_req->offsets[1] = dev->cow.data_offset; -+ io_req->offset = ((__u64) block) << 9; -+ io_req->length = nsect << 9; -+ io_req->buffer = req->buffer; -+ io_req->sectorsize = 1 << 9; -+ io_req->sector_mask = 0; -+ io_req->cow_offset = -1; -+ io_req->error = 0; -+ -+ if(dev->cow.file != NULL) cowify_req(io_req, dev); -+ return(0); -+} -+ -+static void do_ubd_request(request_queue_t *q) -+{ -+ struct io_thread_req io_req; -+ struct request *req; -+ int err, n; -+ -+ if(thread_fd == -1){ -+ while(!list_empty(&q->queue_head)){ -+ req = blkdev_entry_next_request(&q->queue_head); -+ err = prepare_request(req, &io_req); -+ if(!err){ -+ do_io(&io_req); -+ ubd_finish(io_req.error); -+ } -+ } -+ } -+ else { -+ if(DEVICE_INTR || list_empty(&q->queue_head)) return; -+ req = blkdev_entry_next_request(&q->queue_head); -+ err = prepare_request(req, &io_req); -+ if(!err){ -+ SET_INTR(ubd_handler); -+ n = write_ubd_fs(thread_fd, (char *) &io_req, -+ sizeof(io_req)); -+ if(n != sizeof(io_req)) -+ printk("write to io thread failed, " -+ "errno = %d\n", -n); -+ } -+ } -+} -+ -+static int ubd_ioctl(struct inode * inode, struct file * file, -+ unsigned int cmd, unsigned long arg) -+{ -+ struct hd_geometry *loc = (struct hd_geometry *) arg; -+ struct ubd *dev; -+ int n, minor, err; -+ struct hd_driveid ubd_id = { -+ .cyls = 0, -+ .heads = 128, -+ .sectors = 32, -+ }; -+ -+ if(!inode) return(-EINVAL); -+ minor = MINOR(inode->i_rdev); -+ n = minor >> UBD_SHIFT; -+ if(n >= MAX_DEV) -+ return(-EINVAL); -+ dev = &ubd_dev[n]; -+ switch (cmd) { -+ struct hd_geometry g; -+ struct cdrom_volctrl volume; -+ case HDIO_GETGEO: -+ if(!loc) return(-EINVAL); -+ g.heads = 128; -+ g.sectors = 32; -+ g.cylinders = dev->size / (128 * 32 * hardsect_sizes[minor]); -+ g.start = 2; -+ return(copy_to_user(loc, &g, sizeof(g)) ? -EFAULT : 0); -+ case BLKGETSIZE: /* Return device size */ -+ if(!arg) return(-EINVAL); -+ err = verify_area(VERIFY_WRITE, (long *) arg, sizeof(long)); -+ if(err) -+ return(err); -+ put_user(ubd_part[minor].nr_sects, (long *) arg); -+ return(0); -+ case BLKRRPART: /* Re-read partition tables */ -+ return(ubd_revalidate(inode->i_rdev)); -+ -+ case HDIO_SET_UNMASKINTR: -+ if(!capable(CAP_SYS_ADMIN)) return(-EACCES); -+ if((arg > 1) || (minor & 0x3F)) return(-EINVAL); -+ return(0); -+ -+ case HDIO_GET_UNMASKINTR: -+ if(!arg) return(-EINVAL); -+ err = verify_area(VERIFY_WRITE, (long *) arg, sizeof(long)); -+ if(err) -+ return(err); -+ return(0); -+ -+ case HDIO_GET_MULTCOUNT: -+ if(!arg) return(-EINVAL); -+ err = verify_area(VERIFY_WRITE, (long *) arg, sizeof(long)); -+ if(err) -+ return(err); -+ return(0); -+ -+ case HDIO_SET_MULTCOUNT: -+ if(!capable(CAP_SYS_ADMIN)) return(-EACCES); -+ if(MINOR(inode->i_rdev) & 0x3F) return(-EINVAL); -+ return(0); -+ -+ case HDIO_GET_IDENTITY: -+ ubd_id.cyls = dev->size / (128 * 32 * hardsect_sizes[minor]); -+ if(copy_to_user((char *) arg, (char *) &ubd_id, -+ sizeof(ubd_id))) -+ return(-EFAULT); -+ return(0); -+ -+ case CDROMVOLREAD: -+ if(copy_from_user(&volume, (char *) arg, sizeof(volume))) -+ return(-EFAULT); -+ volume.channel0 = 255; -+ volume.channel1 = 255; -+ volume.channel2 = 255; -+ volume.channel3 = 255; -+ if(copy_to_user((char *) arg, &volume, sizeof(volume))) -+ return(-EFAULT); -+ return(0); -+ -+ default: -+ return blk_ioctl(inode->i_rdev, cmd, arg); -+ } -+} -+ -+static int ubd_revalidate1(kdev_t rdev) -+{ -+ int i, n, offset, err = 0, pcount = 1 << UBD_SHIFT; -+ struct ubd *dev; -+ struct hd_struct *part; -+ -+ n = DEVICE_NR(rdev); -+ offset = n << UBD_SHIFT; -+ dev = &ubd_dev[n]; -+ -+ part = &ubd_part[offset]; -+ -+ /* clear all old partition counts */ -+ for(i = 1; i < pcount; i++) { -+ part[i].start_sect = 0; -+ part[i].nr_sects = 0; -+ } -+ -+ /* If it already has been opened we can check the partitions -+ * directly -+ */ -+ if(dev->count){ -+ part->start_sect = 0; -+ register_disk(&ubd_gendisk, MKDEV(MAJOR_NR, offset), pcount, -+ &ubd_blops, part->nr_sects); -+ } -+ else if(dev->file){ -+ err = ubd_open_dev(dev); -+ if(err){ -+ printk(KERN_ERR "unable to open %s for validation\n", -+ dev->file); -+ goto out; -+ } -+ -+ /* have to recompute sizes since we opened it */ -+ err = ubd_file_size(dev, &dev->size); -+ if(err) { -+ ubd_close(dev); -+ goto out; -+ } -+ part->start_sect = 0; -+ part->nr_sects = dev->size / hardsect_sizes[offset]; -+ register_disk(&ubd_gendisk, MKDEV(MAJOR_NR, offset), pcount, -+ &ubd_blops, part->nr_sects); -+ -+ /* we are done so close it */ -+ ubd_close(dev); -+ } -+ else err = -ENODEV; -+ out: -+ return(err); -+} -+ -+static int ubd_revalidate(kdev_t rdev) -+{ -+ int err; -+ -+ spin_lock(&ubd_lock); -+ err = ubd_revalidate1(rdev); -+ spin_unlock(&ubd_lock); -+ return(err); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/ubd_user.c um/arch/um/drivers/ubd_user.c ---- orig/arch/um/drivers/ubd_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/ubd_user.c Thu Mar 6 18:08:55 2003 -@@ -0,0 +1,626 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Copyright (C) 2001 Ridgerun,Inc (glonnon@ridgerun.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "asm/types.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "ubd_user.h" -+#include "os.h" -+ -+#include -+#include -+#if __BYTE_ORDER == __BIG_ENDIAN -+# define ntohll(x) (x) -+# define htonll(x) (x) -+#elif __BYTE_ORDER == __LITTLE_ENDIAN -+# define ntohll(x) bswap_64(x) -+# define htonll(x) bswap_64(x) -+#else -+#error "__BYTE_ORDER not defined" -+#endif -+ -+#define PATH_LEN_V1 256 -+ -+struct cow_header_v1 { -+ int magic; -+ int version; -+ char backing_file[PATH_LEN_V1]; -+ time_t mtime; -+ __u64 size; -+ int sectorsize; -+}; -+ -+#define PATH_LEN_V2 MAXPATHLEN -+ -+struct cow_header_v2 { -+ unsigned long magic; -+ unsigned long version; -+ char backing_file[PATH_LEN_V2]; -+ time_t mtime; -+ __u64 size; -+ int sectorsize; -+}; -+ -+union cow_header { -+ struct cow_header_v1 v1; -+ struct cow_header_v2 v2; -+}; -+ -+#define COW_MAGIC 0x4f4f4f4d /* MOOO */ -+#define COW_VERSION 2 -+ -+static void sizes(__u64 size, int sectorsize, int bitmap_offset, -+ unsigned long *bitmap_len_out, int *data_offset_out) -+{ -+ *bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize); -+ -+ *data_offset_out = bitmap_offset + *bitmap_len_out; -+ *data_offset_out = (*data_offset_out + sectorsize - 1) / sectorsize; -+ *data_offset_out *= sectorsize; -+} -+ -+static int read_cow_header(int fd, int *magic_out, char **backing_file_out, -+ time_t *mtime_out, __u64 *size_out, -+ int *sectorsize_out, int *bitmap_offset_out) -+{ -+ union cow_header *header; -+ char *file; -+ int err, n; -+ unsigned long version, magic; -+ -+ header = um_kmalloc(sizeof(*header)); -+ if(header == NULL){ -+ printk("read_cow_header - Failed to allocate header\n"); -+ return(-ENOMEM); -+ } -+ err = -EINVAL; -+ n = read(fd, header, sizeof(*header)); -+ if(n < offsetof(typeof(header->v1), backing_file)){ -+ printk("read_cow_header - short header\n"); -+ goto out; -+ } -+ -+ magic = header->v1.magic; -+ if(magic == COW_MAGIC) { -+ version = header->v1.version; -+ } -+ else if(magic == ntohl(COW_MAGIC)){ -+ version = ntohl(header->v1.version); -+ } -+ else goto out; -+ -+ *magic_out = COW_MAGIC; -+ -+ if(version == 1){ -+ if(n < sizeof(header->v1)){ -+ printk("read_cow_header - failed to read V1 header\n"); -+ goto out; -+ } -+ *mtime_out = header->v1.mtime; -+ *size_out = header->v1.size; -+ *sectorsize_out = header->v1.sectorsize; -+ *bitmap_offset_out = sizeof(header->v1); -+ file = header->v1.backing_file; -+ } -+ else if(version == 2){ -+ if(n < sizeof(header->v2)){ -+ printk("read_cow_header - failed to read V2 header\n"); -+ goto out; -+ } -+ *mtime_out = ntohl(header->v2.mtime); -+ *size_out = ntohll(header->v2.size); -+ *sectorsize_out = ntohl(header->v2.sectorsize); -+ *bitmap_offset_out = sizeof(header->v2); -+ file = header->v2.backing_file; -+ } -+ else { -+ printk("read_cow_header - invalid COW version\n"); -+ goto out; -+ } -+ err = -ENOMEM; -+ *backing_file_out = uml_strdup(file); -+ if(*backing_file_out == NULL){ -+ printk("read_cow_header - failed to allocate backing file\n"); -+ goto out; -+ } -+ err = 0; -+ out: -+ kfree(header); -+ return(err); -+} -+ -+static int same_backing_files(char *from_cmdline, char *from_cow, char *cow) -+{ -+ struct stat64 buf1, buf2; -+ -+ if(from_cmdline == NULL) return(1); -+ if(!strcmp(from_cmdline, from_cow)) return(1); -+ -+ if(stat64(from_cmdline, &buf1) < 0){ -+ printk("Couldn't stat '%s', errno = %d\n", from_cmdline, -+ errno); -+ return(1); -+ } -+ if(stat64(from_cow, &buf2) < 0){ -+ printk("Couldn't stat '%s', errno = %d\n", from_cow, errno); -+ return(1); -+ } -+ if((buf1.st_dev == buf2.st_dev) && (buf1.st_ino == buf2.st_ino)) -+ return(1); -+ -+ printk("Backing file mismatch - \"%s\" requested,\n" -+ "\"%s\" specified in COW header of \"%s\"\n", -+ from_cmdline, from_cow, cow); -+ return(0); -+} -+ -+static int backing_file_mismatch(char *file, __u64 size, time_t mtime) -+{ -+ struct stat64 buf; -+ long long actual; -+ int err; -+ -+ if(stat64(file, &buf) < 0){ -+ printk("Failed to stat backing file \"%s\", errno = %d\n", -+ file, errno); -+ return(-errno); -+ } -+ -+ err = os_file_size(file, &actual); -+ if(err){ -+ printk("Failed to get size of backing file \"%s\", " -+ "errno = %d\n", file, -err); -+ return(err); -+ } -+ -+ if(actual != size){ -+ printk("Size mismatch (%ld vs %ld) of COW header vs backing " -+ "file\n", size, actual); -+ return(-EINVAL); -+ } -+ if(buf.st_mtime != mtime){ -+ printk("mtime mismatch (%ld vs %ld) of COW header vs backing " -+ "file\n", mtime, buf.st_mtime); -+ return(-EINVAL); -+ } -+ return(0); -+} -+ -+int read_cow_bitmap(int fd, void *buf, int offset, int len) -+{ -+ int err; -+ -+ err = os_seek_file(fd, offset); -+ if(err != 0) return(-errno); -+ err = read(fd, buf, len); -+ if(err < 0) return(-errno); -+ return(0); -+} -+ -+static int absolutize(char *to, int size, char *from) -+{ -+ char save_cwd[256], *slash; -+ int remaining; -+ -+ if(getcwd(save_cwd, sizeof(save_cwd)) == NULL) { -+ printk("absolutize : unable to get cwd - errno = %d\n", errno); -+ return(-1); -+ } -+ slash = strrchr(from, '/'); -+ if(slash != NULL){ -+ *slash = '\0'; -+ if(chdir(from)){ -+ *slash = '/'; -+ printk("absolutize : Can't cd to '%s' - errno = %d\n", -+ from, errno); -+ return(-1); -+ } -+ *slash = '/'; -+ if(getcwd(to, size) == NULL){ -+ printk("absolutize : unable to get cwd of '%s' - " -+ "errno = %d\n", from, errno); -+ return(-1); -+ } -+ remaining = size - strlen(to); -+ if(strlen(slash) + 1 > remaining){ -+ printk("absolutize : unable to fit '%s' into %d " -+ "chars\n", from, size); -+ return(-1); -+ } -+ strcat(to, slash); -+ } -+ else { -+ if(strlen(save_cwd) + 1 + strlen(from) + 1 > size){ -+ printk("absolutize : unable to fit '%s' into %d " -+ "chars\n", from, size); -+ return(-1); -+ } -+ strcpy(to, save_cwd); -+ strcat(to, "/"); -+ strcat(to, from); -+ } -+ chdir(save_cwd); -+ return(0); -+} -+ -+static int write_cow_header(char *cow_file, int fd, char *backing_file, -+ int sectorsize, long long *size) -+{ -+ struct cow_header_v2 *header; -+ struct stat64 buf; -+ int err; -+ -+ err = os_seek_file(fd, 0); -+ if(err != 0){ -+ printk("write_cow_header - lseek failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ err = -ENOMEM; -+ header = um_kmalloc(sizeof(*header)); -+ if(header == NULL){ -+ printk("Failed to allocate COW V2 header\n"); -+ goto out; -+ } -+ header->magic = htonl(COW_MAGIC); -+ header->version = htonl(COW_VERSION); -+ -+ err = -EINVAL; -+ if(strlen(backing_file) > sizeof(header->backing_file) - 1){ -+ printk("Backing file name \"%s\" is too long - names are " -+ "limited to %d characters\n", backing_file, -+ sizeof(header->backing_file) - 1); -+ goto out_free; -+ } -+ -+ if(absolutize(header->backing_file, sizeof(header->backing_file), -+ backing_file)) -+ goto out_free; -+ -+ err = stat64(header->backing_file, &buf); -+ if(err < 0){ -+ printk("Stat of backing file '%s' failed, errno = %d\n", -+ header->backing_file, errno); -+ err = -errno; -+ goto out_free; -+ } -+ -+ err = os_file_size(header->backing_file, size); -+ if(err){ -+ printk("Couldn't get size of backing file '%s', errno = %d\n", -+ header->backing_file, -*size); -+ goto out_free; -+ } -+ -+ header->mtime = htonl(buf.st_mtime); -+ header->size = htonll(*size); -+ header->sectorsize = htonl(sectorsize); -+ -+ err = write(fd, header, sizeof(*header)); -+ if(err != sizeof(*header)){ -+ printk("Write of header to new COW file '%s' failed, " -+ "errno = %d\n", cow_file, errno); -+ goto out_free; -+ } -+ err = 0; -+ out_free: -+ kfree(header); -+ out: -+ return(err); -+} -+ -+int open_ubd_file(char *file, struct openflags *openflags, -+ char **backing_file_out, int *bitmap_offset_out, -+ unsigned long *bitmap_len_out, int *data_offset_out, -+ int *create_cow_out) -+{ -+ time_t mtime; -+ __u64 size; -+ char *backing_file; -+ int fd, err, sectorsize, magic, same, mode = 0644; -+ -+ if((fd = os_open_file(file, *openflags, mode)) < 0){ -+ if((fd == -ENOENT) && (create_cow_out != NULL)) -+ *create_cow_out = 1; -+ if(!openflags->w || -+ ((errno != EROFS) && (errno != EACCES))) return(-errno); -+ openflags->w = 0; -+ if((fd = os_open_file(file, *openflags, mode)) < 0) -+ return(fd); -+ } -+ -+ err = os_lock_file(fd, openflags->w); -+ if(err){ -+ printk("Failed to lock '%s', errno = %d\n", file, -err); -+ goto error; -+ } -+ -+ if(backing_file_out == NULL) return(fd); -+ -+ err = read_cow_header(fd, &magic, &backing_file, &mtime, &size, -+ §orsize, bitmap_offset_out); -+ if(err && (*backing_file_out != NULL)){ -+ printk("Failed to read COW header from COW file \"%s\", " -+ "errno = %d\n", file, err); -+ goto error; -+ } -+ if(err) return(fd); -+ -+ if(backing_file_out == NULL) return(fd); -+ -+ same = same_backing_files(*backing_file_out, backing_file, file); -+ -+ if(!same && !backing_file_mismatch(*backing_file_out, size, mtime)){ -+ printk("Switching backing file to '%s'\n", *backing_file_out); -+ err = write_cow_header(file, fd, *backing_file_out, -+ sectorsize, &size); -+ if(err){ -+ printk("Switch failed, errno = %d\n", err); -+ return(err); -+ } -+ } -+ else { -+ *backing_file_out = backing_file; -+ err = backing_file_mismatch(*backing_file_out, size, mtime); -+ if(err) goto error; -+ } -+ -+ sizes(size, sectorsize, *bitmap_offset_out, bitmap_len_out, -+ data_offset_out); -+ -+ return(fd); -+ error: -+ os_close_file(fd); -+ return(err); -+} -+ -+int create_cow_file(char *cow_file, char *backing_file, struct openflags flags, -+ int sectorsize, int *bitmap_offset_out, -+ unsigned long *bitmap_len_out, int *data_offset_out) -+{ -+ __u64 offset; -+ int err, fd; -+ long long size; -+ char zero = 0; -+ -+ flags.c = 1; -+ fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL); -+ if(fd < 0){ -+ err = fd; -+ printk("Open of COW file '%s' failed, errno = %d\n", cow_file, -+ -err); -+ goto out; -+ } -+ -+ err = write_cow_header(cow_file, fd, backing_file, sectorsize, &size); -+ if(err) goto out_close; -+ -+ sizes(size, sectorsize, sizeof(struct cow_header_v2), -+ bitmap_len_out, data_offset_out); -+ *bitmap_offset_out = sizeof(struct cow_header_v2); -+ -+ offset = *data_offset_out + size - sizeof(zero); -+ err = os_seek_file(fd, offset); -+ if(err != 0){ -+ printk("cow bitmap lseek failed : errno = %d\n", errno); -+ goto out_close; -+ } -+ -+ /* does not really matter how much we write it is just to set EOF -+ * this also sets the entire COW bitmap -+ * to zero without having to allocate it -+ */ -+ err = os_write_file(fd, &zero, sizeof(zero)); -+ if(err != sizeof(zero)){ -+ printk("Write of bitmap to new COW file '%s' failed, " -+ "errno = %d\n", cow_file, errno); -+ goto out_close; -+ } -+ -+ return(fd); -+ -+ out_close: -+ close(fd); -+ out: -+ return(err); -+} -+ -+int read_ubd_fs(int fd, void *buffer, int len) -+{ -+ int n; -+ -+ n = read(fd, buffer, len); -+ if(n < 0) return(-errno); -+ else return(n); -+} -+ -+int write_ubd_fs(int fd, char *buffer, int len) -+{ -+ int n; -+ -+ n = write(fd, buffer, len); -+ if(n < 0) return(-errno); -+ else return(n); -+} -+ -+void do_io(struct io_thread_req *req) -+{ -+ char *buf; -+ unsigned long len; -+ int n, nsectors, start, end, bit; -+ __u64 off; -+ -+ nsectors = req->length / req->sectorsize; -+ start = 0; -+ do { -+ bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask); -+ end = start; -+ while((end < nsectors) && -+ (ubd_test_bit(end, (unsigned char *) -+ &req->sector_mask) == bit)) -+ end++; -+ -+ if(end != nsectors) -+ printk("end != nsectors\n"); -+ off = req->offset + req->offsets[bit] + -+ start * req->sectorsize; -+ len = (end - start) * req->sectorsize; -+ buf = &req->buffer[start * req->sectorsize]; -+ -+ if(os_seek_file(req->fds[bit], off) != 0){ -+ printk("do_io - lseek failed : errno = %d\n", errno); -+ req->error = 1; -+ return; -+ } -+ if(req->op == UBD_READ){ -+ n = 0; -+ do { -+ buf = &buf[n]; -+ len -= n; -+ n = read(req->fds[bit], buf, len); -+ if (n < 0) { -+ printk("do_io - read returned %d : " -+ "errno = %d fd = %d\n", n, -+ errno, req->fds[bit]); -+ req->error = 1; -+ return; -+ } -+ } while((n < len) && (n != 0)); -+ if (n < len) memset(&buf[n], 0, len - n); -+ } -+ else { -+ n = write(req->fds[bit], buf, len); -+ if(n != len){ -+ printk("do_io - write returned %d : " -+ "errno = %d fd = %d\n", n, -+ errno, req->fds[bit]); -+ req->error = 1; -+ return; -+ } -+ } -+ -+ start = end; -+ } while(start < nsectors); -+ -+ if(req->cow_offset != -1){ -+ if(os_seek_file(req->fds[1], req->cow_offset) != 0){ -+ printk("do_io - bitmap lseek failed : errno = %d\n", -+ errno); -+ req->error = 1; -+ return; -+ } -+ n = write(req->fds[1], &req->bitmap_words, -+ sizeof(req->bitmap_words)); -+ if(n != sizeof(req->bitmap_words)){ -+ printk("do_io - bitmap update returned %d : " -+ "errno = %d fd = %d\n", n, errno, req->fds[1]); -+ req->error = 1; -+ return; -+ } -+ } -+ req->error = 0; -+ return; -+} -+ -+/* Changed in start_io_thread, which is serialized by being called only -+ * from ubd_init, which is an initcall. -+ */ -+int kernel_fd = -1; -+ -+/* Only changed by the io thread */ -+int io_count = 0; -+ -+int io_thread(void *arg) -+{ -+ struct io_thread_req req; -+ int n; -+ -+ signal(SIGWINCH, SIG_IGN); -+ while(1){ -+ n = read(kernel_fd, &req, sizeof(req)); -+ if(n < 0) printk("io_thread - read returned %d, errno = %d\n", -+ n, errno); -+ else if(n < sizeof(req)){ -+ printk("io_thread - short read : length = %d\n", n); -+ continue; -+ } -+ io_count++; -+ do_io(&req); -+ n = write(kernel_fd, &req, sizeof(req)); -+ if(n != sizeof(req)) -+ printk("io_thread - write failed, errno = %d\n", -+ errno); -+ } -+} -+ -+int start_io_thread(unsigned long sp, int *fd_out) -+{ -+ int pid, fds[2], err; -+ -+ err = os_pipe(fds, 1, 1); -+ if(err){ -+ printk("start_io_thread - os_pipe failed, errno = %d\n", -err); -+ return(-1); -+ } -+ kernel_fd = fds[0]; -+ *fd_out = fds[1]; -+ -+ pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM | SIGCHLD, -+ NULL); -+ if(pid < 0){ -+ printk("start_io_thread - clone failed : errno = %d\n", errno); -+ return(-errno); -+ } -+ return(pid); -+} -+ -+#ifdef notdef -+int start_io_thread(unsigned long sp, int *fd_out) -+{ -+ int pid; -+ -+ if((kernel_fd = get_pty()) < 0) return(-1); -+ raw(kernel_fd, 0); -+ if((*fd_out = open(ptsname(kernel_fd), O_RDWR)) < 0){ -+ printk("Couldn't open tty for IO\n"); -+ return(-1); -+ } -+ -+ pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM | SIGCHLD, -+ NULL); -+ if(pid < 0){ -+ printk("start_io_thread - clone failed : errno = %d\n", errno); -+ return(-errno); -+ } -+ return(pid); -+} -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/xterm.c um/arch/um/drivers/xterm.c ---- orig/arch/um/drivers/xterm.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/xterm.c Mon Dec 30 20:49:22 2002 -@@ -0,0 +1,200 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "kern_util.h" -+#include "chan_user.h" -+#include "helper.h" -+#include "user_util.h" -+#include "user.h" -+#include "os.h" -+#include "xterm.h" -+ -+struct xterm_chan { -+ int pid; -+ int helper_pid; -+ char *title; -+ int device; -+ int raw; -+ struct termios tt; -+ unsigned long stack; -+ int direct_rcv; -+}; -+ -+void *xterm_init(char *str, int device, struct chan_opts *opts) -+{ -+ struct xterm_chan *data; -+ -+ if((data = malloc(sizeof(*data))) == NULL) return(NULL); -+ *data = ((struct xterm_chan) { .pid = -1, -+ .helper_pid = -1, -+ .device = device, -+ .title = opts->xterm_title, -+ .raw = opts->raw, -+ .stack = opts->tramp_stack, -+ .direct_rcv = !opts->in_kernel } ); -+ return(data); -+} -+ -+/* Only changed by xterm_setup, which is a setup */ -+static char *terminal_emulator = "xterm"; -+static char *title_switch = "-T"; -+static char *exec_switch = "-e"; -+ -+static int __init xterm_setup(char *line, int *add) -+{ -+ *add = 0; -+ terminal_emulator = line; -+ -+ line = strchr(line, ','); -+ if(line == NULL) return(0); -+ *line++ = '\0'; -+ if(*line) title_switch = line; -+ -+ line = strchr(line, ','); -+ if(line == NULL) return(0); -+ *line++ = '\0'; -+ if(*line) exec_switch = line; -+ -+ return(0); -+} -+ -+__uml_setup("xterm=", xterm_setup, -+"xterm=,,<exec switch>\n" -+" Specifies an alternate terminal emulator to use for the debugger,\n" -+" consoles, and serial lines when they are attached to the xterm channel.\n" -+" The values are the terminal emulator binary, the switch it uses to set\n" -+" its title, and the switch it uses to execute a subprocess,\n" -+" respectively. The title switch must have the form '<switch> title',\n" -+" not '<switch>=title'. Similarly, the exec switch must have the form\n" -+" '<switch> command arg1 arg2 ...'.\n" -+" The default values are 'xterm=xterm,-T,-e'. Values for gnome-terminal\n" -+" are 'xterm=gnome-terminal,-t,-x'.\n\n" -+); -+ -+int xterm_open(int input, int output, int primary, void *d, char **dev_out) -+{ -+ struct xterm_chan *data = d; -+ unsigned long stack; -+ int pid, fd, new, err; -+ char title[256], file[] = "/tmp/xterm-pipeXXXXXX"; -+ char *argv[] = { terminal_emulator, title_switch, title, exec_switch, -+ "/usr/lib/uml/port-helper", "-uml-socket", -+ file, NULL }; -+ -+ if(access(argv[4], X_OK)) -+ argv[4] = "port-helper"; -+ -+ fd = mkstemp(file); -+ if(fd < 0){ -+ printk("xterm_open : mkstemp failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ if(unlink(file)){ -+ printk("xterm_open : unlink failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ close(fd); -+ -+ fd = create_unix_socket(file, sizeof(file)); -+ if(fd < 0){ -+ printk("xterm_open : create_unix_socket failed, errno = %d\n", -+ -fd); -+ return(-fd); -+ } -+ -+ sprintf(title, data->title, data->device); -+ stack = data->stack; -+ pid = run_helper(NULL, NULL, argv, &stack); -+ if(pid < 0){ -+ printk("xterm_open : run_helper failed, errno = %d\n", -pid); -+ return(pid); -+ } -+ -+ if(data->stack == 0) free_stack(stack, 0); -+ -+ if(data->direct_rcv) -+ new = os_rcv_fd(fd, &data->helper_pid); -+ else { -+ if((err = os_set_fd_block(fd, 0)) != 0){ -+ printk("xterm_open : failed to set descriptor " -+ "non-blocking, errno = %d\n", err); -+ return(err); -+ } -+ new = xterm_fd(fd, &data->helper_pid); -+ } -+ if(new < 0){ -+ printk("xterm_open : os_rcv_fd failed, errno = %d\n", -new); -+ goto out; -+ } -+ -+ tcgetattr(new, &data->tt); -+ if(data->raw) raw(new, 0); -+ -+ data->pid = pid; -+ *dev_out = NULL; -+ out: -+ unlink(file); -+ return(new); -+} -+ -+void xterm_close(int fd, void *d) -+{ -+ struct xterm_chan *data = d; -+ -+ if(data->pid != -1) -+ os_kill_process(data->pid, 1); -+ data->pid = -1; -+ if(data->helper_pid != -1) -+ os_kill_process(data->helper_pid, 0); -+ data->helper_pid = -1; -+ close(fd); -+} -+ -+void xterm_free(void *d) -+{ -+ free(d); -+} -+ -+int xterm_console_write(int fd, const char *buf, int n, void *d) -+{ -+ struct xterm_chan *data = d; -+ -+ return(generic_console_write(fd, buf, n, &data->tt)); -+} -+ -+struct chan_ops xterm_ops = { -+ .type = "xterm", -+ .init = xterm_init, -+ .open = xterm_open, -+ .close = xterm_close, -+ .read = generic_read, -+ .write = generic_write, -+ .console_write = xterm_console_write, -+ .window_size = generic_window_size, -+ .free = xterm_free, -+ .winch = 1, -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/xterm.h um/arch/um/drivers/xterm.h ---- orig/arch/um/drivers/xterm.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/xterm.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __XTERM_H__ -+#define __XTERM_H__ -+ -+extern int xterm_fd(int socket, int *pid_out); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/drivers/xterm_kern.c um/arch/um/drivers/xterm_kern.c ---- orig/arch/um/drivers/xterm_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/drivers/xterm_kern.c Tue Dec 17 17:31:20 2002 -@@ -0,0 +1,79 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/errno.h" -+#include "linux/slab.h" -+#include "asm/semaphore.h" -+#include "asm/irq.h" -+#include "irq_user.h" -+#include "kern_util.h" -+#include "os.h" -+#include "xterm.h" -+ -+struct xterm_wait { -+ struct semaphore sem; -+ int fd; -+ int pid; -+ int new_fd; -+}; -+ -+static void xterm_interrupt(int irq, void *data, struct pt_regs *regs) -+{ -+ struct xterm_wait *xterm = data; -+ int fd; -+ -+ fd = os_rcv_fd(xterm->fd, &xterm->pid); -+ if(fd == -EAGAIN) -+ return; -+ -+ xterm->new_fd = fd; -+ up(&xterm->sem); -+} -+ -+int xterm_fd(int socket, int *pid_out) -+{ -+ struct xterm_wait *data; -+ int err, ret; -+ -+ data = kmalloc(sizeof(*data), GFP_KERNEL); -+ if(data == NULL){ -+ printk(KERN_ERR "xterm_fd : failed to allocate xterm_wait\n"); -+ return(-ENOMEM); -+ } -+ *data = ((struct xterm_wait) -+ { .sem = __SEMAPHORE_INITIALIZER(data->sem, 0), -+ .fd = socket, -+ .pid = -1, -+ .new_fd = -1 }); -+ -+ err = um_request_irq(XTERM_IRQ, socket, IRQ_READ, xterm_interrupt, -+ SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM, -+ "xterm", data); -+ if(err){ -+ printk(KERN_ERR "xterm_fd : failed to get IRQ for xterm, " -+ "err = %d\n", err); -+ return(err); -+ } -+ down(&data->sem); -+ -+ free_irq(XTERM_IRQ, data); -+ -+ ret = data->new_fd; -+ *pid_out = data->pid; -+ kfree(data); -+ -+ return(ret); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/dyn_link.ld.in um/arch/um/dyn_link.ld.in ---- orig/arch/um/dyn_link.ld.in Wed Dec 31 19:00:00 1969 -+++ um/arch/um/dyn_link.ld.in Fri Jan 17 23:37:27 2003 -@@ -0,0 +1,172 @@ -+OUTPUT_FORMAT("ELF_FORMAT") -+OUTPUT_ARCH(ELF_ARCH) -+ENTRY(_start) -+SEARCH_DIR("/usr/local/i686-pc-linux-gnu/lib"); SEARCH_DIR("/usr/local/lib"); SEARCH_DIR("/lib"); SEARCH_DIR("/usr/lib"); -+/* Do we need any of these for elf? -+ __DYNAMIC = 0; */ -+SECTIONS -+{ -+ . = START() + SIZEOF_HEADERS; -+ .interp : { *(.interp) } -+ . = ALIGN(4096); -+ __binary_start = .; -+ . = ALIGN(4096); /* Init code and data */ -+ _stext = .; -+ __init_begin = .; -+ .text.init : { *(.text.init) } -+ -+ . = ALIGN(4096); -+ -+ /* Read-only sections, merged into text segment: */ -+ .hash : { *(.hash) } -+ .dynsym : { *(.dynsym) } -+ .dynstr : { *(.dynstr) } -+ .gnu.version : { *(.gnu.version) } -+ .gnu.version_d : { *(.gnu.version_d) } -+ .gnu.version_r : { *(.gnu.version_r) } -+ .rel.init : { *(.rel.init) } -+ .rela.init : { *(.rela.init) } -+ .rel.text : { *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*) } -+ .rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) } -+ .rel.fini : { *(.rel.fini) } -+ .rela.fini : { *(.rela.fini) } -+ .rel.rodata : { *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*) } -+ .rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) } -+ .rel.data : { *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*) } -+ .rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) } -+ .rel.tdata : { *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*) } -+ .rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) } -+ .rel.tbss : { *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*) } -+ .rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) } -+ .rel.ctors : { *(.rel.ctors) } -+ .rela.ctors : { *(.rela.ctors) } -+ .rel.dtors : { *(.rel.dtors) } -+ .rela.dtors : { *(.rela.dtors) } -+ .rel.got : { *(.rel.got) } -+ .rela.got : { *(.rela.got) } -+ .rel.bss : { *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*) } -+ .rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) } -+ .rel.plt : { *(.rel.plt) } -+ .rela.plt : { *(.rela.plt) } -+ .init : { -+ KEEP (*(.init)) -+ } =0x90909090 -+ .plt : { *(.plt) } -+ .text : { -+ *(.text .stub .text.* .gnu.linkonce.t.*) -+ /* .gnu.warning sections are handled specially by elf32.em. */ -+ *(.gnu.warning) -+ } =0x90909090 -+ .fini : { -+ KEEP (*(.fini)) -+ } =0x90909090 -+ -+ PROVIDE (__etext = .); -+ PROVIDE (_etext = .); -+ PROVIDE (etext = .); -+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } -+ .rodata1 : { *(.rodata1) } -+ .eh_frame_hdr : { *(.eh_frame_hdr) } -+ -+ -+ . = ALIGN(4096); -+ PROVIDE (_sdata = .); -+ -+include(`arch/um/common.ld.in') -+ -+ /* Ensure the __preinit_array_start label is properly aligned. We -+ could instead move the label definition inside the section, but -+ the linker would then create the section even if it turns out to -+ be empty, which isn't pretty. */ -+ . = ALIGN(32 / 8); -+ .preinit_array : { *(.preinit_array) } -+ .init_array : { *(.init_array) } -+ .fini_array : { *(.fini_array) } -+ .data : { -+ . = ALIGN(KERNEL_STACK_SIZE); /* init_task */ -+ *(.data.init_task) -+ *(.data .data.* .gnu.linkonce.d.*) -+ SORT(CONSTRUCTORS) -+ } -+ .data1 : { *(.data1) } -+ .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) } -+ .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) } -+ .eh_frame : { KEEP (*(.eh_frame)) } -+ .gcc_except_table : { *(.gcc_except_table) } -+ .dynamic : { *(.dynamic) } -+ .ctors : { -+ /* gcc uses crtbegin.o to find the start of -+ the constructors, so we make sure it is -+ first. Because this is a wildcard, it -+ doesn't matter if the user does not -+ actually link against crtbegin.o; the -+ linker won't look for a file to match a -+ wildcard. The wildcard also means that it -+ doesn't matter which directory crtbegin.o -+ is in. */ -+ KEEP (*crtbegin.o(.ctors)) -+ /* We don't want to include the .ctor section from -+ from the crtend.o file until after the sorted ctors. -+ The .ctor section from the crtend file contains the -+ end of ctors marker and it must be last */ -+ KEEP (*(EXCLUDE_FILE (*crtend.o ) .ctors)) -+ KEEP (*(SORT(.ctors.*))) -+ KEEP (*(.ctors)) -+ } -+ .dtors : { -+ KEEP (*crtbegin.o(.dtors)) -+ KEEP (*(EXCLUDE_FILE (*crtend.o ) .dtors)) -+ KEEP (*(SORT(.dtors.*))) -+ KEEP (*(.dtors)) -+ } -+ .jcr : { KEEP (*(.jcr)) } -+ .got : { *(.got.plt) *(.got) } -+ _edata = .; -+ PROVIDE (edata = .); -+ __bss_start = .; -+ .bss : { -+ *(.dynbss) -+ *(.bss .bss.* .gnu.linkonce.b.*) -+ *(COMMON) -+ /* Align here to ensure that the .bss section occupies space up to -+ _end. Align after .bss to ensure correct alignment even if the -+ .bss section disappears because there are no input sections. */ -+ . = ALIGN(32 / 8); -+ . = ALIGN(32 / 8); -+ } -+ _end = .; -+ PROVIDE (end = .); -+ /* Stabs debugging sections. */ -+ .stab 0 : { *(.stab) } -+ .stabstr 0 : { *(.stabstr) } -+ .stab.excl 0 : { *(.stab.excl) } -+ .stab.exclstr 0 : { *(.stab.exclstr) } -+ .stab.index 0 : { *(.stab.index) } -+ .stab.indexstr 0 : { *(.stab.indexstr) } -+ .comment 0 : { *(.comment) } -+ /* DWARF debug sections. -+ Symbols in the DWARF debugging sections are relative to the beginning -+ of the section so we begin them at 0. */ -+ /* DWARF 1 */ -+ .debug 0 : { *(.debug) } -+ .line 0 : { *(.line) } -+ /* GNU DWARF 1 extensions */ -+ .debug_srcinfo 0 : { *(.debug_srcinfo) } -+ .debug_sfnames 0 : { *(.debug_sfnames) } -+ /* DWARF 1.1 and DWARF 2 */ -+ .debug_aranges 0 : { *(.debug_aranges) } -+ .debug_pubnames 0 : { *(.debug_pubnames) } -+ /* DWARF 2 */ -+ .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } -+ .debug_abbrev 0 : { *(.debug_abbrev) } -+ .debug_line 0 : { *(.debug_line) } -+ .debug_frame 0 : { *(.debug_frame) } -+ .debug_str 0 : { *(.debug_str) } -+ .debug_loc 0 : { *(.debug_loc) } -+ .debug_macinfo 0 : { *(.debug_macinfo) } -+ /* SGI/MIPS DWARF 2 extensions */ -+ .debug_weaknames 0 : { *(.debug_weaknames) } -+ .debug_funcnames 0 : { *(.debug_funcnames) } -+ .debug_typenames 0 : { *(.debug_typenames) } -+ .debug_varnames 0 : { *(.debug_varnames) } -+} -diff -Naur -X ../exclude-files orig/arch/um/fs/Makefile um/arch/um/fs/Makefile ---- orig/arch/um/fs/Makefile Wed Dec 31 19:00:00 1969 -+++ um/arch/um/fs/Makefile Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,23 @@ -+# -+# Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET := built-in.o -+ -+subdir-y = -+subdir-m = -+ -+subdir-$(CONFIG_HOSTFS) += hostfs -+subdir-$(CONFIG_HPPFS) += hppfs -+ -+obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) -+obj-m += $(join $(subdir-m),$(subdir-m:%=/%.o)) -+ -+include $(TOPDIR)/Rules.make -+ -+dep: -+ -+clean: -+ -+archmrproper: -diff -Naur -X ../exclude-files orig/arch/um/fs/hostfs/Makefile um/arch/um/fs/hostfs/Makefile ---- orig/arch/um/fs/hostfs/Makefile Wed Dec 31 19:00:00 1969 -+++ um/arch/um/fs/hostfs/Makefile Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,24 @@ -+# -+# Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+# struct stat64 changed the inode field name between 2.2 and 2.4 from st_ino -+# to __st_ino. It stayed in the same place, so as long as the correct name -+# is used, hostfs compiled on 2.2 should work on 2.4 and vice versa. -+ -+STAT64_INO_FIELD := $(shell grep -q __st_ino /usr/include/bits/stat.h && \ -+ echo __)st_ino -+ -+USER_CFLAGS := $(USER_CFLAGS) -DSTAT64_INO_FIELD=$(STAT64_INO_FIELD) -+ -+O_TARGET := hostfs.o -+obj-y = hostfs_kern.o hostfs_user.o -+obj-m = $(O_TARGET) -+ -+USER_OBJS = $(filter %_user.o,$(obj-y)) -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -diff -Naur -X ../exclude-files orig/arch/um/fs/hostfs/hostfs.h um/arch/um/fs/hostfs/hostfs.h ---- orig/arch/um/fs/hostfs/hostfs.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/fs/hostfs/hostfs.h Mon Feb 24 23:00:47 2003 -@@ -0,0 +1,69 @@ -+#ifndef __UM_FS_HOSTFS -+#define __UM_FS_HOSTFS -+ -+#include "os.h" -+ -+/* These are exactly the same definitions as in fs.h, but the names are -+ * changed so that this file can be included in both kernel and user files. -+ */ -+ -+#define HOSTFS_ATTR_MODE 1 -+#define HOSTFS_ATTR_UID 2 -+#define HOSTFS_ATTR_GID 4 -+#define HOSTFS_ATTR_SIZE 8 -+#define HOSTFS_ATTR_ATIME 16 -+#define HOSTFS_ATTR_MTIME 32 -+#define HOSTFS_ATTR_CTIME 64 -+#define HOSTFS_ATTR_ATIME_SET 128 -+#define HOSTFS_ATTR_MTIME_SET 256 -+#define HOSTFS_ATTR_FORCE 512 /* Not a change, but a change it */ -+#define HOSTFS_ATTR_ATTR_FLAG 1024 -+ -+struct hostfs_iattr { -+ unsigned int ia_valid; -+ mode_t ia_mode; -+ uid_t ia_uid; -+ gid_t ia_gid; -+ loff_t ia_size; -+ time_t ia_atime; -+ time_t ia_mtime; -+ time_t ia_ctime; -+ unsigned int ia_attr_flags; -+}; -+ -+extern int stat_file(const char *path, int *dev_out, -+ unsigned long long *inode_out, int *mode_out, -+ int *nlink_out, int *uid_out, int *gid_out, -+ unsigned long long *size_out, unsigned long *atime_out, -+ unsigned long *mtime_out, unsigned long *ctime_out, -+ int *blksize_out, unsigned long long *blocks_out); -+extern int access_file(char *path, int r, int w, int x); -+extern int open_file(char *path, int r, int w); -+extern int file_type(const char *path, int *rdev); -+extern void *open_dir(char *path, int *err_out); -+extern char *read_dir(void *stream, unsigned long long *pos, -+ unsigned long long *ino_out, int *len_out); -+extern void close_file(void *stream); -+extern void close_dir(void *stream); -+extern int read_file(int fd, unsigned long long *offset, char *buf, int len); -+extern int write_file(int fd, unsigned long long *offset, const char *buf, -+ int len); -+extern int lseek_file(int fd, long long offset, int whence); -+extern int file_create(char *name, int ur, int uw, int ux, int gr, -+ int gw, int gx, int or, int ow, int ox); -+extern int set_attr(const char *file, struct hostfs_iattr *attrs); -+extern int make_symlink(const char *from, const char *to); -+extern int unlink_file(const char *file); -+extern int do_mkdir(const char *file, int mode); -+extern int do_rmdir(const char *file); -+extern int do_mknod(const char *file, int mode, int dev); -+extern int link_file(const char *from, const char *to); -+extern int do_readlink(char *file, char *buf, int size); -+extern int rename_file(char *from, char *to); -+extern int do_statfs(char *root, long *bsize_out, long long *blocks_out, -+ long long *bfree_out, long long *bavail_out, -+ long long *files_out, long long *ffree_out, -+ void *fsid_out, int fsid_size, long *namelen_out, -+ long *spare_out); -+ -+#endif -diff -Naur -X ../exclude-files orig/arch/um/fs/hostfs/hostfs_kern.c um/arch/um/fs/hostfs/hostfs_kern.c ---- orig/arch/um/fs/hostfs/hostfs_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/fs/hostfs/hostfs_kern.c Sun Apr 13 21:29:33 2003 -@@ -0,0 +1,870 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <linux/stddef.h> -+#include <linux/fs.h> -+#include <linux/version.h> -+#include <linux/module.h> -+#include <linux/init.h> -+#include <linux/slab.h> -+#include <linux/pagemap.h> -+#include <linux/blkdev.h> -+#include <asm/uaccess.h> -+#include "hostfs.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "user_util.h" -+#include "2_5compat.h" -+ -+#define file_hostfs_i(file) (&(file)->f_dentry->d_inode->u.hostfs_i) -+ -+int hostfs_d_delete(struct dentry *dentry) -+{ -+ return(1); -+} -+ -+struct dentry_operations hostfs_dentry_ops = { -+ .d_delete = hostfs_d_delete, -+}; -+ -+/* Not changed */ -+static char *root_ino = "/"; -+ -+#define HOSTFS_SUPER_MAGIC 0x00c0ffee -+ -+static struct inode_operations hostfs_iops; -+static struct inode_operations hostfs_dir_iops; -+static struct address_space_operations hostfs_link_aops; -+ -+static char *dentry_name(struct dentry *dentry, int extra) -+{ -+ struct dentry *parent; -+ char *root, *name; -+ int len; -+ -+ len = 0; -+ parent = dentry; -+ while(parent->d_parent != parent){ -+ len += parent->d_name.len + 1; -+ parent = parent->d_parent; -+ } -+ -+ root = parent->d_inode->u.hostfs_i.host_filename; -+ len += strlen(root); -+ name = kmalloc(len + extra + 1, GFP_KERNEL); -+ if(name == NULL) return(NULL); -+ -+ name[len] = '\0'; -+ parent = dentry; -+ while(parent->d_parent != parent){ -+ len -= parent->d_name.len + 1; -+ name[len] = '/'; -+ strncpy(&name[len + 1], parent->d_name.name, -+ parent->d_name.len); -+ parent = parent->d_parent; -+ } -+ strncpy(name, root, strlen(root)); -+ return(name); -+} -+ -+static char *inode_name(struct inode *ino, int extra) -+{ -+ struct dentry *dentry; -+ -+ dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias); -+ return(dentry_name(dentry, extra)); -+} -+ -+static int read_name(struct inode *ino, char *name) -+{ -+ /* The non-int inode fields are copied into ints by stat_file and -+ * then copied into the inode because passing the actual pointers -+ * in and having them treated as int * breaks on big-endian machines -+ */ -+ int err; -+ int i_dev, i_mode, i_nlink, i_blksize; -+ unsigned long long i_size; -+ unsigned long long i_ino; -+ unsigned long long i_blocks; -+ err = stat_file(name, &i_dev, &i_ino, &i_mode, &i_nlink, -+ &ino->i_uid, &ino->i_gid, &i_size, &ino->i_atime, -+ &ino->i_mtime, &ino->i_ctime, &i_blksize, &i_blocks); -+ if(err) return(err); -+ ino->i_ino = i_ino; -+ ino->i_dev = i_dev; -+ ino->i_mode = i_mode; -+ ino->i_nlink = i_nlink; -+ ino->i_size = i_size; -+ ino->i_blksize = i_blksize; -+ ino->i_blocks = i_blocks; -+ if(kdev_same(ino->i_sb->s_dev, ROOT_DEV) && (ino->i_uid == getuid())) -+ ino->i_uid = 0; -+ return(0); -+} -+ -+static char *follow_link(char *link) -+{ -+ int len, n; -+ char *name, *resolved, *end; -+ -+ len = 64; -+ while(1){ -+ n = -ENOMEM; -+ name = kmalloc(len, GFP_KERNEL); -+ if(name == NULL) -+ goto out; -+ -+ n = do_readlink(link, name, len); -+ if(n < len) -+ break; -+ len *= 2; -+ kfree(name); -+ } -+ if(n < 0) -+ goto out_free; -+ -+ if(*name == '/') -+ return(name); -+ -+ end = strrchr(link, '/'); -+ if(end == NULL) -+ return(name); -+ -+ *(end + 1) = '\0'; -+ len = strlen(link) + strlen(name) + 1; -+ -+ resolved = kmalloc(len, GFP_KERNEL); -+ if(resolved == NULL){ -+ n = -ENOMEM; -+ goto out_free; -+ } -+ -+ sprintf(resolved, "%s%s", link, name); -+ kfree(name); -+ kfree(link); -+ return(resolved); -+ -+ out_free: -+ kfree(name); -+ out: -+ return(ERR_PTR(n)); -+} -+ -+static int read_inode(struct inode *ino) -+{ -+ char *name; -+ int err; -+ -+ err = -ENOMEM; -+ name = inode_name(ino, 0); -+ if(name == NULL) -+ goto out; -+ -+ if(file_type(name, NULL) == OS_TYPE_SYMLINK){ -+ name = follow_link(name); -+ if(IS_ERR(name)){ -+ err = PTR_ERR(name); -+ goto out; -+ } -+ } -+ -+ err = read_name(ino, name); -+ kfree(name); -+ out: -+ return(err); -+} -+ -+void hostfs_delete_inode(struct inode *ino) -+{ -+ if(ino->u.hostfs_i.host_filename) -+ kfree(ino->u.hostfs_i.host_filename); -+ ino->u.hostfs_i.host_filename = NULL; -+ -+ if(ino->u.hostfs_i.fd != -1) -+ close_file(&ino->u.hostfs_i.fd); -+ -+ ino->u.hostfs_i.mode = 0; -+ clear_inode(ino); -+} -+ -+int hostfs_statfs(struct super_block *sb, struct statfs *sf) -+{ -+ /* do_statfs uses struct statfs64 internally, but the linux kernel -+ * struct statfs still has 32-bit versions for most of these fields, -+ * so we convert them here -+ */ -+ int err; -+ long long f_blocks; -+ long long f_bfree; -+ long long f_bavail; -+ long long f_files; -+ long long f_ffree; -+ -+ err = do_statfs(sb->s_root->d_inode->u.hostfs_i.host_filename, -+ &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files, -+ &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid), -+ &sf->f_namelen, sf->f_spare); -+ if(err) return(err); -+ sf->f_blocks = f_blocks; -+ sf->f_bfree = f_bfree; -+ sf->f_bavail = f_bavail; -+ sf->f_files = f_files; -+ sf->f_ffree = f_ffree; -+ sf->f_type = HOSTFS_SUPER_MAGIC; -+ return(0); -+} -+ -+static struct super_operations hostfs_sbops = { -+ .put_inode = force_delete, -+ .delete_inode = hostfs_delete_inode, -+ .statfs = hostfs_statfs, -+}; -+ -+int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) -+{ -+ void *dir; -+ char *name; -+ unsigned long long next, ino; -+ int error, len; -+ -+ name = dentry_name(file->f_dentry, 0); -+ if(name == NULL) return(-ENOMEM); -+ dir = open_dir(name, &error); -+ kfree(name); -+ if(dir == NULL) return(-error); -+ next = file->f_pos; -+ while((name = read_dir(dir, &next, &ino, &len)) != NULL){ -+ error = (*filldir)(ent, name, len, file->f_pos, -+ ino, DT_UNKNOWN); -+ if(error) break; -+ file->f_pos = next; -+ } -+ close_dir(dir); -+ return(0); -+} -+ -+int hostfs_file_open(struct inode *ino, struct file *file) -+{ -+ char *name; -+ int mode = 0, r = 0, w = 0, fd; -+ -+ mode = file->f_mode & (FMODE_READ | FMODE_WRITE); -+ if((mode & ino->u.hostfs_i.mode) == mode) -+ return(0); -+ -+ /* The file may already have been opened, but with the wrong access, -+ * so this resets things and reopens the file with the new access. -+ */ -+ if(ino->u.hostfs_i.fd != -1){ -+ close_file(&ino->u.hostfs_i.fd); -+ ino->u.hostfs_i.fd = -1; -+ } -+ -+ ino->u.hostfs_i.mode |= mode; -+ if(ino->u.hostfs_i.mode & FMODE_READ) -+ r = 1; -+ if(ino->u.hostfs_i.mode & FMODE_WRITE) -+ w = 1; -+ if(w) -+ r = 1; -+ -+ name = dentry_name(file->f_dentry, 0); -+ if(name == NULL) -+ return(-ENOMEM); -+ -+ fd = open_file(name, r, w); -+ kfree(name); -+ if(fd < 0) return(fd); -+ file_hostfs_i(file)->fd = fd; -+ -+ return(0); -+} -+ -+int hostfs_dir_open(struct inode *ino, struct file *file) -+{ -+ return(0); -+} -+ -+int hostfs_dir_release(struct inode *ino, struct file *file) -+{ -+ return(0); -+} -+ -+int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync) -+{ -+ return(0); -+} -+ -+static struct file_operations hostfs_file_fops = { -+ .owner = NULL, -+ .read = generic_file_read, -+ .write = generic_file_write, -+ .mmap = generic_file_mmap, -+ .open = hostfs_file_open, -+ .release = NULL, -+ .fsync = hostfs_fsync, -+}; -+ -+static struct file_operations hostfs_dir_fops = { -+ .owner = NULL, -+ .readdir = hostfs_readdir, -+ .open = hostfs_dir_open, -+ .release = hostfs_dir_release, -+ .fsync = hostfs_fsync, -+}; -+ -+int hostfs_writepage(struct page *page) -+{ -+ struct address_space *mapping = page->mapping; -+ struct inode *inode = mapping->host; -+ char *buffer; -+ unsigned long long base; -+ int count = PAGE_CACHE_SIZE; -+ int end_index = inode->i_size >> PAGE_CACHE_SHIFT; -+ int err; -+ -+ if (page->index >= end_index) -+ count = inode->i_size & (PAGE_CACHE_SIZE-1); -+ -+ buffer = kmap(page); -+ base = ((unsigned long long) page->index) << PAGE_CACHE_SHIFT; -+ -+ err = write_file(inode->u.hostfs_i.fd, &base, buffer, count); -+ if(err != count){ -+ ClearPageUptodate(page); -+ goto out; -+ } -+ -+ if (base > inode->i_size) -+ inode->i_size = base; -+ -+ if (PageError(page)) -+ ClearPageError(page); -+ err = 0; -+ -+ out: -+ kunmap(page); -+ -+ UnlockPage(page); -+ return err; -+} -+ -+int hostfs_readpage(struct file *file, struct page *page) -+{ -+ char *buffer; -+ long long start; -+ int err = 0; -+ -+ start = (long long) page->index << PAGE_CACHE_SHIFT; -+ buffer = kmap(page); -+ err = read_file(file_hostfs_i(file)->fd, &start, buffer, -+ PAGE_CACHE_SIZE); -+ if(err < 0) goto out; -+ -+ flush_dcache_page(page); -+ SetPageUptodate(page); -+ if (PageError(page)) ClearPageError(page); -+ err = 0; -+ out: -+ kunmap(page); -+ UnlockPage(page); -+ return(err); -+} -+ -+int hostfs_prepare_write(struct file *file, struct page *page, -+ unsigned int from, unsigned int to) -+{ -+ char *buffer; -+ long long start, tmp; -+ int err; -+ -+ start = (long long) page->index << PAGE_CACHE_SHIFT; -+ buffer = kmap(page); -+ if(from != 0){ -+ tmp = start; -+ err = read_file(file_hostfs_i(file)->fd, &tmp, buffer, -+ from); -+ if(err < 0) goto out; -+ } -+ if(to != PAGE_CACHE_SIZE){ -+ start += to; -+ err = read_file(file_hostfs_i(file)->fd, &start, buffer + to, -+ PAGE_CACHE_SIZE - to); -+ if(err < 0) goto out; -+ } -+ err = 0; -+ out: -+ kunmap(page); -+ return(err); -+} -+ -+int hostfs_commit_write(struct file *file, struct page *page, unsigned from, -+ unsigned to) -+{ -+ struct address_space *mapping = page->mapping; -+ struct inode *inode = mapping->host; -+ char *buffer; -+ long long start; -+ int err = 0; -+ -+ start = (long long) (page->index << PAGE_CACHE_SHIFT) + from; -+ buffer = kmap(page); -+ err = write_file(file_hostfs_i(file)->fd, &start, buffer + from, -+ to - from); -+ if(err > 0) err = 0; -+ if(!err && (start > inode->i_size)) -+ inode->i_size = start; -+ -+ kunmap(page); -+ return(err); -+} -+ -+static struct address_space_operations hostfs_aops = { -+ .writepage = hostfs_writepage, -+ .readpage = hostfs_readpage, -+ .prepare_write = hostfs_prepare_write, -+ .commit_write = hostfs_commit_write -+}; -+ -+static struct inode *get_inode(struct super_block *sb, struct dentry *dentry, -+ int *error) -+{ -+ struct inode *inode; -+ char *name; -+ int type, err = -ENOMEM, rdev; -+ -+ inode = new_inode(sb); -+ if(inode == NULL) -+ goto out; -+ -+ inode->u.hostfs_i.host_filename = NULL; -+ inode->u.hostfs_i.fd = -1; -+ inode->u.hostfs_i.mode = 0; -+ insert_inode_hash(inode); -+ if(dentry){ -+ name = dentry_name(dentry, 0); -+ if(name == NULL){ -+ err = -ENOMEM; -+ goto out_put; -+ } -+ type = file_type(name, &rdev); -+ kfree(name); -+ } -+ else type = OS_TYPE_DIR; -+ inode->i_sb = sb; -+ -+ err = 0; -+ if(type == OS_TYPE_SYMLINK) -+ inode->i_op = &page_symlink_inode_operations; -+ else if(type == OS_TYPE_DIR) -+ inode->i_op = &hostfs_dir_iops; -+ else inode->i_op = &hostfs_iops; -+ -+ if(type == OS_TYPE_DIR) inode->i_fop = &hostfs_dir_fops; -+ else inode->i_fop = &hostfs_file_fops; -+ -+ if(type == OS_TYPE_SYMLINK) -+ inode->i_mapping->a_ops = &hostfs_link_aops; -+ else inode->i_mapping->a_ops = &hostfs_aops; -+ -+ switch (type) { -+ case OS_TYPE_CHARDEV: -+ init_special_inode(inode, S_IFCHR, rdev); -+ break; -+ case OS_TYPE_BLOCKDEV: -+ init_special_inode(inode, S_IFBLK, rdev); -+ break; -+ case OS_TYPE_FIFO: -+ init_special_inode(inode, S_IFIFO, 0); -+ break; -+ case OS_TYPE_SOCK: -+ init_special_inode(inode, S_IFSOCK, 0); -+ break; -+ } -+ -+ if(error) *error = err; -+ return(inode); -+ out_put: -+ iput(inode); -+ out: -+ if(error) *error = err; -+ return(NULL); -+} -+ -+int hostfs_create(struct inode *dir, struct dentry *dentry, int mode) -+{ -+ struct inode *inode; -+ char *name; -+ int error; -+ -+ inode = get_inode(dir->i_sb, dentry, &error); -+ if(error) return(error); -+ name = dentry_name(dentry, 0); -+ if(name == NULL){ -+ iput(inode); -+ return(-ENOMEM); -+ } -+ error = file_create(name, -+ mode & S_IRUSR, mode & S_IWUSR, mode & S_IXUSR, -+ mode & S_IRGRP, mode & S_IWGRP, mode & S_IXGRP, -+ mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH); -+ if(!error) error = read_name(inode, name); -+ kfree(name); -+ if(error){ -+ iput(inode); -+ return(error); -+ } -+ d_instantiate(dentry, inode); -+ return(0); -+} -+ -+struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry) -+{ -+ struct inode *inode; -+ char *name; -+ int error; -+ -+ inode = get_inode(ino->i_sb, dentry, &error); -+ if(error != 0) return(ERR_PTR(error)); -+ name = dentry_name(dentry, 0); -+ if(name == NULL) return(ERR_PTR(-ENOMEM)); -+ error = read_name(inode, name); -+ kfree(name); -+ if(error){ -+ iput(inode); -+ if(error == -ENOENT) inode = NULL; -+ else return(ERR_PTR(error)); -+ } -+ d_add(dentry, inode); -+ dentry->d_op = &hostfs_dentry_ops; -+ return(NULL); -+} -+ -+static char *inode_dentry_name(struct inode *ino, struct dentry *dentry) -+{ -+ char *file; -+ int len; -+ -+ file = inode_name(ino, dentry->d_name.len + 1); -+ if(file == NULL) return(NULL); -+ strcat(file, "/"); -+ len = strlen(file); -+ strncat(file, dentry->d_name.name, dentry->d_name.len); -+ file[len + dentry->d_name.len] = '\0'; -+ return(file); -+} -+ -+int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from) -+{ -+ char *from_name, *to_name; -+ int err; -+ -+ if((from_name = inode_dentry_name(ino, from)) == NULL) -+ return(-ENOMEM); -+ to_name = dentry_name(to, 0); -+ if(to_name == NULL){ -+ kfree(from_name); -+ return(-ENOMEM); -+ } -+ err = link_file(to_name, from_name); -+ kfree(from_name); -+ kfree(to_name); -+ return(err); -+} -+ -+int hostfs_unlink(struct inode *ino, struct dentry *dentry) -+{ -+ char *file; -+ int err; -+ -+ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); -+ err = unlink_file(file); -+ kfree(file); -+ return(err); -+} -+ -+int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to) -+{ -+ char *file; -+ int err; -+ -+ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); -+ err = make_symlink(file, to); -+ kfree(file); -+ return(err); -+} -+ -+int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode) -+{ -+ char *file; -+ int err; -+ -+ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); -+ err = do_mkdir(file, mode); -+ kfree(file); -+ return(err); -+} -+ -+int hostfs_rmdir(struct inode *ino, struct dentry *dentry) -+{ -+ char *file; -+ int err; -+ -+ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); -+ err = do_rmdir(file); -+ kfree(file); -+ return(err); -+} -+ -+int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int dev) -+{ -+ struct inode *inode; -+ char *name; -+ int error; -+ -+ inode = get_inode(dir->i_sb, dentry, &error); -+ if(error) return(error); -+ name = dentry_name(dentry, 0); -+ if(name == NULL){ -+ iput(inode); -+ return(-ENOMEM); -+ } -+ init_special_inode(inode, mode, dev); -+ error = do_mknod(name, mode, dev); -+ if(!error) error = read_name(inode, name); -+ kfree(name); -+ if(error){ -+ iput(inode); -+ return(error); -+ } -+ d_instantiate(dentry, inode); -+ return(0); -+} -+ -+int hostfs_rename(struct inode *from_ino, struct dentry *from, -+ struct inode *to_ino, struct dentry *to) -+{ -+ char *from_name, *to_name; -+ int err; -+ -+ if((from_name = inode_dentry_name(from_ino, from)) == NULL) -+ return(-ENOMEM); -+ if((to_name = inode_dentry_name(to_ino, to)) == NULL){ -+ kfree(from_name); -+ return(-ENOMEM); -+ } -+ err = rename_file(from_name, to_name); -+ kfree(from_name); -+ kfree(to_name); -+ return(err); -+} -+ -+void hostfs_truncate(struct inode *ino) -+{ -+ not_implemented(); -+} -+ -+int hostfs_permission(struct inode *ino, int desired) -+{ -+ char *name; -+ int r = 0, w = 0, x = 0, err; -+ -+ if(desired & MAY_READ) r = 1; -+ if(desired & MAY_WRITE) w = 1; -+ if(desired & MAY_EXEC) x = 1; -+ name = inode_name(ino, 0); -+ if(name == NULL) return(-ENOMEM); -+ err = access_file(name, r, w, x); -+ kfree(name); -+ if(!err) err = vfs_permission(ino, desired); -+ return(err); -+} -+ -+int hostfs_setattr(struct dentry *dentry, struct iattr *attr) -+{ -+ struct hostfs_iattr attrs; -+ char *name; -+ int err; -+ -+ attrs.ia_valid = 0; -+ if(attr->ia_valid & ATTR_MODE){ -+ attrs.ia_valid |= HOSTFS_ATTR_MODE; -+ attrs.ia_mode = attr->ia_mode; -+ } -+ if(attr->ia_valid & ATTR_UID){ -+ if(kdev_same(dentry->d_inode->i_sb->s_dev, ROOT_DEV) && -+ (attr->ia_uid == 0)) -+ attr->ia_uid = getuid(); -+ attrs.ia_valid |= HOSTFS_ATTR_UID; -+ attrs.ia_uid = attr->ia_uid; -+ } -+ if(attr->ia_valid & ATTR_GID){ -+ if(kdev_same(dentry->d_inode->i_sb->s_dev, ROOT_DEV) && -+ (attr->ia_gid == 0)) -+ attr->ia_gid = getuid(); -+ attrs.ia_valid |= HOSTFS_ATTR_GID; -+ attrs.ia_gid = attr->ia_gid; -+ } -+ if(attr->ia_valid & ATTR_SIZE){ -+ attrs.ia_valid |= HOSTFS_ATTR_SIZE; -+ attrs.ia_size = attr->ia_size; -+ } -+ if(attr->ia_valid & ATTR_ATIME){ -+ attrs.ia_valid |= HOSTFS_ATTR_ATIME; -+ attrs.ia_atime = attr->ia_atime; -+ } -+ if(attr->ia_valid & ATTR_MTIME){ -+ attrs.ia_valid |= HOSTFS_ATTR_MTIME; -+ attrs.ia_mtime = attr->ia_mtime; -+ } -+ if(attr->ia_valid & ATTR_CTIME){ -+ attrs.ia_valid |= HOSTFS_ATTR_CTIME; -+ attrs.ia_ctime = attr->ia_ctime; -+ } -+ if(attr->ia_valid & ATTR_ATIME_SET){ -+ attrs.ia_valid |= HOSTFS_ATTR_ATIME_SET; -+ } -+ if(attr->ia_valid & ATTR_MTIME_SET){ -+ attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET; -+ } -+ name = dentry_name(dentry, 0); -+ if(name == NULL) return(-ENOMEM); -+ err = set_attr(name, &attrs); -+ kfree(name); -+ if(err) -+ return(err); -+ -+ return(inode_setattr(dentry->d_inode, attr)); -+} -+ -+int hostfs_getattr(struct dentry *dentry, struct iattr *attr) -+{ -+ not_implemented(); -+ return(-EINVAL); -+} -+ -+static struct inode_operations hostfs_iops = { -+ .create = hostfs_create, -+ .link = hostfs_link, -+ .unlink = hostfs_unlink, -+ .symlink = hostfs_symlink, -+ .mkdir = hostfs_mkdir, -+ .rmdir = hostfs_rmdir, -+ .mknod = hostfs_mknod, -+ .rename = hostfs_rename, -+ .truncate = hostfs_truncate, -+ .permission = hostfs_permission, -+ .setattr = hostfs_setattr, -+ .getattr = hostfs_getattr, -+}; -+ -+static struct inode_operations hostfs_dir_iops = { -+ .create = hostfs_create, -+ .lookup = hostfs_lookup, -+ .link = hostfs_link, -+ .unlink = hostfs_unlink, -+ .symlink = hostfs_symlink, -+ .mkdir = hostfs_mkdir, -+ .rmdir = hostfs_rmdir, -+ .mknod = hostfs_mknod, -+ .rename = hostfs_rename, -+ .truncate = hostfs_truncate, -+ .permission = hostfs_permission, -+ .setattr = hostfs_setattr, -+ .getattr = hostfs_getattr, -+}; -+ -+int hostfs_link_readpage(struct file *file, struct page *page) -+{ -+ char *buffer, *name; -+ long long start; -+ int err; -+ -+ start = page->index << PAGE_CACHE_SHIFT; -+ buffer = kmap(page); -+ name = inode_name(page->mapping->host, 0); -+ if(name == NULL) return(-ENOMEM); -+ err = do_readlink(name, buffer, PAGE_CACHE_SIZE); -+ kfree(name); -+ if(err == PAGE_CACHE_SIZE) -+ err = -E2BIG; -+ else if(err > 0){ -+ flush_dcache_page(page); -+ SetPageUptodate(page); -+ if (PageError(page)) ClearPageError(page); -+ err = 0; -+ } -+ kunmap(page); -+ UnlockPage(page); -+ return(err); -+} -+ -+static struct address_space_operations hostfs_link_aops = { -+ .readpage = hostfs_link_readpage, -+}; -+ -+static struct super_block *hostfs_read_super_common(struct super_block *sb, -+ char *data) -+{ -+ struct inode *root_inode; -+ char *name; -+ -+ sb->s_blocksize = 1024; -+ sb->s_blocksize_bits = 10; -+ sb->s_magic = HOSTFS_SUPER_MAGIC; -+ sb->s_op = &hostfs_sbops; -+ if((data == NULL) || (*((char *) data) == '\0')) data = root_ino; -+ name = kmalloc(strlen(data) + 1, GFP_KERNEL); -+ if(name == NULL) return(NULL); -+ strcpy(name, data); -+ root_inode = get_inode(sb, NULL, NULL); -+ if(root_inode == NULL) -+ goto out_free; -+ -+ root_inode->u.hostfs_i.host_filename = name; -+ sb->s_root = d_alloc_root(root_inode); -+ if(read_inode(root_inode)) -+ goto out_put; -+ return(sb); -+ -+ out_free: -+ kfree(name); -+ out_put: -+ iput(root_inode); -+ return(NULL); -+} -+ -+struct super_block *hostfs_read_super(struct super_block *sb, void *data, -+ int silent) -+{ -+ return(hostfs_read_super_common(sb, data)); -+} -+ -+DECLARE_FSTYPE(hostfs_type, "hostfs", hostfs_read_super, 0); -+ -+static int __init init_hostfs(void) -+{ -+ return(register_filesystem(&hostfs_type)); -+} -+ -+static void __exit exit_hostfs(void) -+{ -+ unregister_filesystem(&hostfs_type); -+} -+ -+module_init(init_hostfs) -+module_exit(exit_hostfs) -+MODULE_LICENSE("GPL"); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/fs/hostfs/hostfs_user.c um/arch/um/fs/hostfs/hostfs_user.c ---- orig/arch/um/fs/hostfs/hostfs_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/fs/hostfs/hostfs_user.c Fri Jan 31 21:48:30 2003 -@@ -0,0 +1,341 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <unistd.h> -+#include <stdio.h> -+#include <fcntl.h> -+#include <dirent.h> -+#include <errno.h> -+#include <utime.h> -+#include <string.h> -+#include <sys/stat.h> -+#include <sys/time.h> -+#include <sys/vfs.h> -+#include "hostfs.h" -+#include "kern_util.h" -+#include "user.h" -+ -+int stat_file(const char *path, int *dev_out, unsigned long long *inode_out, -+ int *mode_out, int *nlink_out, int *uid_out, int *gid_out, -+ unsigned long long *size_out, unsigned long *atime_out, -+ unsigned long *mtime_out, unsigned long *ctime_out, -+ int *blksize_out, unsigned long long *blocks_out) -+{ -+ struct stat64 buf; -+ -+ if(lstat64(path, &buf) < 0) -+ return(-errno); -+ if(dev_out != NULL) *dev_out = buf.st_dev; -+ -+ /* See the Makefile for why STAT64_INO_FIELD is passed in -+ * by the build -+ */ -+ if(inode_out != NULL) *inode_out = buf.STAT64_INO_FIELD; -+ if(mode_out != NULL) *mode_out = buf.st_mode; -+ if(nlink_out != NULL) *nlink_out = buf.st_nlink; -+ if(uid_out != NULL) *uid_out = buf.st_uid; -+ if(gid_out != NULL) *gid_out = buf.st_gid; -+ if(size_out != NULL) *size_out = buf.st_size; -+ if(atime_out != NULL) *atime_out = buf.st_atime; -+ if(mtime_out != NULL) *mtime_out = buf.st_mtime; -+ if(ctime_out != NULL) *ctime_out = buf.st_ctime; -+ if(blksize_out != NULL) *blksize_out = buf.st_blksize; -+ if(blocks_out != NULL) *blocks_out = buf.st_blocks; -+ return(0); -+} -+ -+int file_type(const char *path, int *rdev) -+{ -+ struct stat64 buf; -+ -+ if(lstat64(path, &buf) < 0) -+ return(-errno); -+ if(rdev != NULL) -+ *rdev = buf.st_rdev; -+ -+ if(S_ISDIR(buf.st_mode)) return(OS_TYPE_DIR); -+ else if(S_ISLNK(buf.st_mode)) return(OS_TYPE_SYMLINK); -+ else if(S_ISCHR(buf.st_mode)) return(OS_TYPE_CHARDEV); -+ else if(S_ISBLK(buf.st_mode)) return(OS_TYPE_BLOCKDEV); -+ else if(S_ISFIFO(buf.st_mode))return(OS_TYPE_FIFO); -+ else if(S_ISSOCK(buf.st_mode))return(OS_TYPE_SOCK); -+ else return(OS_TYPE_FILE); -+} -+ -+int access_file(char *path, int r, int w, int x) -+{ -+ int mode = 0; -+ -+ if(r) mode = R_OK; -+ if(w) mode |= W_OK; -+ if(x) mode |= X_OK; -+ if(access(path, mode) != 0) return(-errno); -+ else return(0); -+} -+ -+int open_file(char *path, int r, int w) -+{ -+ int mode = 0, fd; -+ -+ if(r && !w) mode = O_RDONLY; -+ else if(!r && w) mode = O_WRONLY; -+ else if(r && w) mode = O_RDWR; -+ else panic("Impossible mode in open_file"); -+ fd = open64(path, mode); -+ if(fd < 0) return(-errno); -+ else return(fd); -+} -+ -+void *open_dir(char *path, int *err_out) -+{ -+ DIR *dir; -+ -+ dir = opendir(path); -+ *err_out = errno; -+ if(dir == NULL) return(NULL); -+ return(dir); -+} -+ -+char *read_dir(void *stream, unsigned long long *pos, -+ unsigned long long *ino_out, int *len_out) -+{ -+ DIR *dir = stream; -+ struct dirent *ent; -+ -+ seekdir(dir, *pos); -+ ent = readdir(dir); -+ if(ent == NULL) return(NULL); -+ *len_out = strlen(ent->d_name); -+ *ino_out = ent->d_ino; -+ *pos = telldir(dir); -+ return(ent->d_name); -+} -+ -+int read_file(int fd, unsigned long long *offset, char *buf, int len) -+{ -+ int n; -+ -+ n = pread64(fd, buf, len, *offset); -+ if(n < 0) return(-errno); -+ *offset += n; -+ return(n); -+} -+ -+int write_file(int fd, unsigned long long *offset, const char *buf, int len) -+{ -+ int n; -+ -+ n = pwrite64(fd, buf, len, *offset); -+ if(n < 0) return(-errno); -+ *offset += n; -+ return(n); -+} -+ -+int lseek_file(int fd, long long offset, int whence) -+{ -+ int ret; -+ -+ ret = lseek64(fd, offset, whence); -+ if(ret < 0) return(-errno); -+ return(0); -+} -+ -+void close_file(void *stream) -+{ -+ close(*((int *) stream)); -+} -+ -+void close_dir(void *stream) -+{ -+ closedir(stream); -+} -+ -+int file_create(char *name, int ur, int uw, int ux, int gr, -+ int gw, int gx, int or, int ow, int ox) -+{ -+ int mode, fd; -+ -+ mode = 0; -+ mode |= ur ? S_IRUSR : 0; -+ mode |= uw ? S_IWUSR : 0; -+ mode |= ux ? S_IXUSR : 0; -+ mode |= gr ? S_IRGRP : 0; -+ mode |= gw ? S_IWGRP : 0; -+ mode |= gx ? S_IXGRP : 0; -+ mode |= or ? S_IROTH : 0; -+ mode |= ow ? S_IWOTH : 0; -+ mode |= ox ? S_IXOTH : 0; -+ fd = open64(name, O_CREAT, mode); -+ if(fd < 0) return(-errno); -+ close(fd); -+ return(0); -+} -+ -+int set_attr(const char *file, struct hostfs_iattr *attrs) -+{ -+ struct utimbuf buf; -+ int err, ma; -+ -+ if(attrs->ia_valid & HOSTFS_ATTR_MODE){ -+ if(chmod(file, attrs->ia_mode) != 0) return(-errno); -+ } -+ if(attrs->ia_valid & HOSTFS_ATTR_UID){ -+ if(chown(file, attrs->ia_uid, -1)) return(-errno); -+ } -+ if(attrs->ia_valid & HOSTFS_ATTR_GID){ -+ if(chown(file, -1, attrs->ia_gid)) return(-errno); -+ } -+ if(attrs->ia_valid & HOSTFS_ATTR_SIZE){ -+ if(truncate(file, attrs->ia_size)) return(-errno); -+ } -+ ma = HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET; -+ if((attrs->ia_valid & ma) == ma){ -+ buf.actime = attrs->ia_atime; -+ buf.modtime = attrs->ia_mtime; -+ if(utime(file, &buf) != 0) return(-errno); -+ } -+ else { -+ if(attrs->ia_valid & HOSTFS_ATTR_ATIME_SET){ -+ err = stat_file(file, NULL, NULL, NULL, NULL, NULL, -+ NULL, NULL, NULL, &buf.modtime, NULL, -+ NULL, NULL); -+ if(err != 0) return(err); -+ buf.actime = attrs->ia_atime; -+ if(utime(file, &buf) != 0) return(-errno); -+ } -+ if(attrs->ia_valid & HOSTFS_ATTR_MTIME_SET){ -+ err = stat_file(file, NULL, NULL, NULL, NULL, NULL, -+ NULL, NULL, &buf.actime, NULL, NULL, -+ NULL, NULL); -+ if(err != 0) return(err); -+ buf.modtime = attrs->ia_mtime; -+ if(utime(file, &buf) != 0) return(-errno); -+ } -+ } -+ if(attrs->ia_valid & HOSTFS_ATTR_CTIME) ; -+ if(attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)){ -+ err = stat_file(file, NULL, NULL, NULL, NULL, NULL, -+ NULL, NULL, &attrs->ia_atime, &attrs->ia_mtime, -+ NULL, NULL, NULL); -+ if(err != 0) return(err); -+ } -+ return(0); -+} -+ -+int make_symlink(const char *from, const char *to) -+{ -+ int err; -+ -+ err = symlink(to, from); -+ if(err) return(-errno); -+ return(0); -+} -+ -+int unlink_file(const char *file) -+{ -+ int err; -+ -+ err = unlink(file); -+ if(err) return(-errno); -+ return(0); -+} -+ -+int do_mkdir(const char *file, int mode) -+{ -+ int err; -+ -+ err = mkdir(file, mode); -+ if(err) return(-errno); -+ return(0); -+} -+ -+int do_rmdir(const char *file) -+{ -+ int err; -+ -+ err = rmdir(file); -+ if(err) return(-errno); -+ return(0); -+} -+ -+int do_mknod(const char *file, int mode, int dev) -+{ -+ int err; -+ -+ err = mknod(file, mode, dev); -+ if(err) return(-errno); -+ return(0); -+} -+ -+int link_file(const char *to, const char *from) -+{ -+ int err; -+ -+ err = link(to, from); -+ if(err) return(-errno); -+ return(0); -+} -+ -+int do_readlink(char *file, char *buf, int size) -+{ -+ int n; -+ -+ n = readlink(file, buf, size); -+ if(n < 0) -+ return(-errno); -+ if(n < size) -+ buf[n] = '\0'; -+ return(n); -+} -+ -+int rename_file(char *from, char *to) -+{ -+ int err; -+ -+ err = rename(from, to); -+ if(err < 0) return(-errno); -+ return(0); -+} -+ -+int do_statfs(char *root, long *bsize_out, long long *blocks_out, -+ long long *bfree_out, long long *bavail_out, -+ long long *files_out, long long *ffree_out, -+ void *fsid_out, int fsid_size, long *namelen_out, -+ long *spare_out) -+{ -+ struct statfs64 buf; -+ int err; -+ -+ err = statfs64(root, &buf); -+ if(err < 0) return(-errno); -+ *bsize_out = buf.f_bsize; -+ *blocks_out = buf.f_blocks; -+ *bfree_out = buf.f_bfree; -+ *bavail_out = buf.f_bavail; -+ *files_out = buf.f_files; -+ *ffree_out = buf.f_ffree; -+ memcpy(fsid_out, &buf.f_fsid, -+ sizeof(buf.f_fsid) > fsid_size ? fsid_size : -+ sizeof(buf.f_fsid)); -+ *namelen_out = buf.f_namelen; -+ spare_out[0] = buf.f_spare[0]; -+ spare_out[1] = buf.f_spare[1]; -+ spare_out[2] = buf.f_spare[2]; -+ spare_out[3] = buf.f_spare[3]; -+ spare_out[4] = buf.f_spare[4]; -+ spare_out[5] = buf.f_spare[5]; -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/fs/hppfs/Makefile um/arch/um/fs/hppfs/Makefile ---- orig/arch/um/fs/hppfs/Makefile Wed Dec 31 19:00:00 1969 -+++ um/arch/um/fs/hppfs/Makefile Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,10 @@ -+O_TARGET := hppfs.o -+obj-y = hppfs_kern.o #hppfs_user.o -+obj-m = $(O_TARGET) -+ -+CFLAGS_hppfs_kern.o := $(CFLAGS) -+#CFLAGS_hppfs_user.o := $(USER_CFLAGS) -+ -+override CFLAGS = -+ -+include $(TOPDIR)/Rules.make -diff -Naur -X ../exclude-files orig/arch/um/fs/hppfs/hppfs_kern.c um/arch/um/fs/hppfs/hppfs_kern.c ---- orig/arch/um/fs/hppfs/hppfs_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/fs/hppfs/hppfs_kern.c Thu Feb 27 13:14:26 2003 -@@ -0,0 +1,725 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <linux/fs.h> -+#include <linux/module.h> -+#include <linux/slab.h> -+#include <linux/list.h> -+#include <linux/kernel.h> -+#include <linux/ctype.h> -+#include <asm/uaccess.h> -+#include "os.h" -+ -+struct hppfs_data { -+ struct list_head list; -+ char contents[PAGE_SIZE - sizeof(struct list_head)]; -+}; -+ -+struct hppfs_private { -+ struct file proc_file; -+ int host_fd; -+ loff_t len; -+ struct hppfs_data *contents; -+}; -+ -+#define HPPFS_SUPER_MAGIC 0xb00000ee -+ -+static struct super_operations hppfs_sbops; -+ -+static struct inode *get_inode(struct super_block *sb, struct dentry *dentry, -+ int *error); -+ -+static int is_pid(struct dentry *dentry) -+{ -+ struct super_block *sb; -+ int i; -+ -+ sb = dentry->d_sb; -+ if((sb->s_op != &hppfs_sbops) || (dentry->d_parent != sb->s_root)) -+ return(0); -+ -+ for(i = 0; i < dentry->d_name.len; i++){ -+ if(!isdigit(dentry->d_name.name[i])) -+ return(0); -+ } -+ return(1); -+} -+ -+static char *dentry_name(struct dentry *dentry, int extra) -+{ -+ struct dentry *parent; -+ char *root, *name; -+ const char *seg_name; -+ int len, seg_len; -+ -+ len = 0; -+ parent = dentry; -+ while(parent->d_parent != parent){ -+ if(is_pid(parent)) -+ len += strlen("pid") + 1; -+ else len += parent->d_name.len + 1; -+ parent = parent->d_parent; -+ } -+ -+ root = "proc"; -+ len += strlen(root); -+ name = kmalloc(len + extra + 1, GFP_KERNEL); -+ if(name == NULL) return(NULL); -+ -+ name[len] = '\0'; -+ parent = dentry; -+ while(parent->d_parent != parent){ -+ if(is_pid(parent)){ -+ seg_name = "pid"; -+ seg_len = strlen("pid"); -+ } -+ else { -+ seg_name = parent->d_name.name; -+ seg_len = parent->d_name.len; -+ } -+ -+ len -= seg_len + 1; -+ name[len] = '/'; -+ strncpy(&name[len + 1], seg_name, seg_len); -+ parent = parent->d_parent; -+ } -+ strncpy(name, root, strlen(root)); -+ return(name); -+} -+ -+struct dentry_operations hppfs_dentry_ops = { -+}; -+ -+static int file_removed(struct dentry *dentry, const char *file) -+{ -+ char *host_file; -+ int extra, fd; -+ -+ extra = 0; -+ if(file != NULL) extra += strlen(file) + 1; -+ -+ host_file = dentry_name(dentry, extra + strlen("/remove")); -+ if(host_file == NULL){ -+ printk("file_removed : allocation failed\n"); -+ return(-ENOMEM); -+ } -+ -+ if(file != NULL){ -+ strcat(host_file, "/"); -+ strcat(host_file, file); -+ } -+ strcat(host_file, "/remove"); -+ -+ fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); -+ kfree(host_file); -+ if(fd > 0){ -+ os_close_file(fd); -+ return(1); -+ } -+ return(0); -+} -+ -+static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry) -+{ -+ struct dentry *proc_dentry; -+ struct inode *inode; -+ int err, deleted; -+ -+ deleted = file_removed(dentry, NULL); -+ if(deleted < 0) -+ return(ERR_PTR(deleted)); -+ else if(deleted) -+ return(ERR_PTR(-ENOENT)); -+ -+ proc_dentry = lookup_hash(&dentry->d_name, ino->u.hppfs_i.proc_dentry); -+ if(IS_ERR(proc_dentry)) -+ return(proc_dentry); -+ -+ inode = get_inode(ino->i_sb, proc_dentry, &err); -+ if(err != 0) -+ return(ERR_PTR(err)); -+ -+ d_add(dentry, inode); -+ dentry->d_op = &hppfs_dentry_ops; -+ return(NULL); -+} -+ -+static struct inode_operations hppfs_file_iops = { -+}; -+ -+static struct inode_operations hppfs_dir_iops = { -+ .lookup = hppfs_lookup, -+}; -+ -+static ssize_t read_proc(struct file *file, char *buf, ssize_t count, -+ loff_t *ppos, int is_user) -+{ -+ ssize_t (*read)(struct file *, char *, size_t, loff_t *); -+ ssize_t n; -+ -+ read = file->f_dentry->d_inode->i_fop->read; -+ -+ if(!is_user) -+ set_fs(KERNEL_DS); -+ -+ n = (*read)(file, buf, count, &file->f_pos); -+ -+ if(!is_user) -+ set_fs(USER_DS); -+ -+ if(ppos) *ppos = file->f_pos; -+ return(n); -+} -+ -+static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count) -+{ -+ ssize_t n; -+ int cur, err; -+ char *new_buf; -+ -+ n = -ENOMEM; -+ new_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); -+ if(new_buf == NULL){ -+ printk("hppfs_read_file : kmalloc failed\n"); -+ goto out; -+ } -+ n = 0; -+ while(count > 0){ -+ cur = min_t(ssize_t, count, PAGE_SIZE); -+ err = os_read_file(fd, new_buf, cur); -+ if(err < 0){ -+ printk("hppfs_read : read failed, errno = %d\n", -+ count); -+ n = err; -+ goto out_free; -+ } -+ else if(err == 0) -+ break; -+ -+ if(copy_to_user(buf, new_buf, err)){ -+ n = -EFAULT; -+ goto out_free; -+ } -+ n += err; -+ count -= err; -+ } -+ out_free: -+ kfree(new_buf); -+ out: -+ return(n); -+} -+ -+static ssize_t hppfs_read(struct file *file, char *buf, size_t count, -+ loff_t *ppos) -+{ -+ struct hppfs_private *hppfs = file->private_data; -+ struct hppfs_data *data; -+ loff_t off; -+ int err; -+ -+ if(hppfs->contents != NULL){ -+ if(*ppos >= hppfs->len) return(0); -+ -+ data = hppfs->contents; -+ off = *ppos; -+ while(off >= sizeof(data->contents)){ -+ data = list_entry(data->list.next, struct hppfs_data, -+ list); -+ off -= sizeof(data->contents); -+ } -+ -+ if(off + count > hppfs->len) -+ count = hppfs->len - off; -+ copy_to_user(buf, &data->contents[off], count); -+ *ppos += count; -+ } -+ else if(hppfs->host_fd != -1){ -+ err = os_seek_file(hppfs->host_fd, *ppos); -+ if(err){ -+ printk("hppfs_read : seek failed, errno = %d\n", err); -+ return(err); -+ } -+ count = hppfs_read_file(hppfs->host_fd, buf, count); -+ if(count > 0) -+ *ppos += count; -+ } -+ else count = read_proc(&hppfs->proc_file, buf, count, ppos, 1); -+ -+ return(count); -+} -+ -+static ssize_t hppfs_write(struct file *file, const char *buf, size_t len, -+ loff_t *ppos) -+{ -+ struct hppfs_private *data = file->private_data; -+ struct file *proc_file = &data->proc_file; -+ ssize_t (*write)(struct file *, const char *, size_t, loff_t *); -+ int err; -+ -+ write = proc_file->f_dentry->d_inode->i_fop->write; -+ -+ proc_file->f_pos = file->f_pos; -+ err = (*write)(proc_file, buf, len, &proc_file->f_pos); -+ file->f_pos = proc_file->f_pos; -+ -+ return(err); -+} -+ -+static int open_host_sock(char *host_file, int *filter_out) -+{ -+ char *end; -+ int fd; -+ -+ end = &host_file[strlen(host_file)]; -+ strcpy(end, "/rw"); -+ *filter_out = 1; -+ fd = os_connect_socket(host_file); -+ if(fd > 0) -+ return(fd); -+ -+ strcpy(end, "/r"); -+ *filter_out = 0; -+ fd = os_connect_socket(host_file); -+ return(fd); -+} -+ -+static void free_contents(struct hppfs_data *head) -+{ -+ struct hppfs_data *data; -+ struct list_head *ele, *next; -+ -+ if(head == NULL) return; -+ -+ list_for_each_safe(ele, next, &head->list){ -+ data = list_entry(ele, struct hppfs_data, list); -+ kfree(data); -+ } -+ kfree(head); -+} -+ -+static struct hppfs_data *hppfs_get_data(int fd, int filter, -+ struct file *proc_file, -+ struct file *hppfs_file, -+ loff_t *size_out) -+{ -+ struct hppfs_data *data, *new, *head; -+ int n, err; -+ -+ err = -ENOMEM; -+ data = kmalloc(sizeof(*data), GFP_KERNEL); -+ if(data == NULL){ -+ printk("hppfs_get_data : head allocation failed\n"); -+ goto failed; -+ } -+ -+ INIT_LIST_HEAD(&data->list); -+ -+ head = data; -+ *size_out = 0; -+ -+ if(filter){ -+ while((n = read_proc(proc_file, data->contents, -+ sizeof(data->contents), NULL, 0)) > 0) -+ os_write_file(fd, data->contents, n); -+ err = os_shutdown_socket(fd, 0, 1); -+ if(err){ -+ printk("hppfs_get_data : failed to shut down " -+ "socket\n"); -+ goto failed_free; -+ } -+ } -+ while(1){ -+ n = os_read_file(fd, data->contents, sizeof(data->contents)); -+ if(n < 0){ -+ err = n; -+ printk("hppfs_get_data : read failed, errno = %d\n", -+ err); -+ goto failed_free; -+ } -+ else if(n == 0) -+ break; -+ -+ *size_out += n; -+ -+ if(n < sizeof(data->contents)) -+ break; -+ -+ new = kmalloc(sizeof(*data), GFP_KERNEL); -+ if(new == 0){ -+ printk("hppfs_get_data : data allocation failed\n"); -+ err = -ENOMEM; -+ goto failed_free; -+ } -+ -+ INIT_LIST_HEAD(&new->list); -+ list_add(&new->list, &data->list); -+ data = new; -+ } -+ return(head); -+ -+ failed_free: -+ free_contents(head); -+ failed: -+ return(ERR_PTR(err)); -+} -+ -+static struct hppfs_private *hppfs_data(void) -+{ -+ struct hppfs_private *data; -+ -+ data = kmalloc(sizeof(*data), GFP_KERNEL); -+ if(data == NULL) -+ return(data); -+ -+ *data = ((struct hppfs_private ) { .host_fd = -1, -+ .len = -1, -+ .contents = NULL } ); -+ return(data); -+} -+ -+static int hppfs_open(struct inode *inode, struct file *file) -+{ -+ struct hppfs_private *data; -+ struct dentry *proc_dentry; -+ char *host_file; -+ int err, fd, type, filter; -+ -+ err = -ENOMEM; -+ data = hppfs_data(); -+ if(data == NULL) -+ goto out; -+ -+ host_file = dentry_name(file->f_dentry, strlen("/rw")); -+ if(host_file == NULL) -+ goto out_free2; -+ -+ proc_dentry = inode->u.hppfs_i.proc_dentry; -+ err = init_private_file(&data->proc_file, proc_dentry, file->f_mode); -+ if(err) -+ goto out_free1; -+ -+ type = os_file_type(host_file); -+ if(type == OS_TYPE_FILE){ -+ fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); -+ if(fd >= 0) -+ data->host_fd = fd; -+ else printk("hppfs_open : failed to open '%s', errno = %d\n", -+ host_file, -fd); -+ -+ data->contents = NULL; -+ } -+ else if(type == OS_TYPE_DIR){ -+ fd = open_host_sock(host_file, &filter); -+ if(fd > 0){ -+ data->contents = hppfs_get_data(fd, filter, -+ &data->proc_file, -+ file, &data->len); -+ if(!IS_ERR(data->contents)) -+ data->host_fd = fd; -+ } -+ else printk("hppfs_open : failed to open a socket in " -+ "'%s', errno = %d\n", host_file, -fd); -+ } -+ kfree(host_file); -+ -+ file->private_data = data; -+ return(0); -+ -+ out_free1: -+ kfree(host_file); -+ out_free2: -+ free_contents(data->contents); -+ kfree(data); -+ out: -+ return(err); -+} -+ -+static int hppfs_dir_open(struct inode *inode, struct file *file) -+{ -+ struct hppfs_private *data; -+ struct dentry *proc_dentry; -+ int err; -+ -+ err = -ENOMEM; -+ data = hppfs_data(); -+ if(data == NULL) -+ goto out; -+ -+ proc_dentry = inode->u.hppfs_i.proc_dentry; -+ err = init_private_file(&data->proc_file, proc_dentry, file->f_mode); -+ if(err) -+ goto out_free; -+ -+ file->private_data = data; -+ return(0); -+ -+ out_free: -+ kfree(data); -+ out: -+ return(err); -+} -+ -+static loff_t hppfs_llseek(struct file *file, loff_t off, int where) -+{ -+ struct hppfs_private *data = file->private_data; -+ struct file *proc_file = &data->proc_file; -+ loff_t (*llseek)(struct file *, loff_t, int); -+ loff_t ret; -+ -+ llseek = proc_file->f_dentry->d_inode->i_fop->llseek; -+ if(llseek != NULL){ -+ ret = (*llseek)(proc_file, off, where); -+ if(ret < 0) -+ return(ret); -+ } -+ -+ return(default_llseek(file, off, where)); -+} -+ -+struct hppfs_dirent { -+ void *vfs_dirent; -+ filldir_t filldir; -+ struct dentry *dentry; -+}; -+ -+static int hppfs_filldir(void *d, const char *name, int size, -+ loff_t offset, ino_t inode, unsigned int type) -+{ -+ struct hppfs_dirent *dirent = d; -+ -+ if(file_removed(dirent->dentry, name)) -+ return(0); -+ -+ return((*dirent->filldir)(dirent->vfs_dirent, name, size, offset, -+ inode, type)); -+} -+ -+static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir) -+{ -+ struct hppfs_private *data = file->private_data; -+ struct file *proc_file = &data->proc_file; -+ int (*readdir)(struct file *, void *, filldir_t); -+ struct hppfs_dirent dirent = ((struct hppfs_dirent) -+ { .vfs_dirent = ent, -+ .filldir = filldir, -+ .dentry = file->f_dentry } ); -+ int err; -+ -+ readdir = proc_file->f_dentry->d_inode->i_fop->readdir; -+ -+ proc_file->f_pos = file->f_pos; -+ err = (*readdir)(proc_file, &dirent, hppfs_filldir); -+ file->f_pos = proc_file->f_pos; -+ -+ return(err); -+} -+ -+static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync) -+{ -+ return(0); -+} -+ -+static struct file_operations hppfs_file_fops = { -+ .owner = NULL, -+ .llseek = hppfs_llseek, -+ .read = hppfs_read, -+ .write = hppfs_write, -+ .open = hppfs_open, -+}; -+ -+static struct file_operations hppfs_dir_fops = { -+ .owner = NULL, -+ .readdir = hppfs_readdir, -+ .open = hppfs_dir_open, -+ .fsync = hppfs_fsync, -+}; -+ -+static int hppfs_statfs(struct super_block *sb, struct statfs *sf) -+{ -+ sf->f_blocks = 0; -+ sf->f_bfree = 0; -+ sf->f_bavail = 0; -+ sf->f_files = 0; -+ sf->f_ffree = 0; -+ sf->f_type = HPPFS_SUPER_MAGIC; -+ return(0); -+} -+ -+static struct super_operations hppfs_sbops = { -+ .put_inode = force_delete, -+ .delete_inode = NULL, -+ .statfs = hppfs_statfs, -+}; -+ -+static int hppfs_readlink(struct dentry *dentry, char *buffer, int buflen) -+{ -+ struct file proc_file; -+ struct dentry *proc_dentry; -+ int (*readlink)(struct dentry *, char *, int); -+ int err, n; -+ -+ proc_dentry = dentry->d_inode->u.hppfs_i.proc_dentry; -+ err = init_private_file(&proc_file, proc_dentry, FMODE_READ); -+ if(err) -+ return(err); -+ -+ readlink = proc_dentry->d_inode->i_op->readlink; -+ n = (*readlink)(proc_dentry, buffer, buflen); -+ -+ if(proc_file.f_op->release) -+ (*proc_file.f_op->release)(proc_dentry->d_inode, &proc_file); -+ -+ return(n); -+} -+ -+static int hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) -+{ -+ struct file proc_file; -+ struct dentry *proc_dentry; -+ int (*follow_link)(struct dentry *, struct nameidata *); -+ int err, n; -+ -+ proc_dentry = dentry->d_inode->u.hppfs_i.proc_dentry; -+ err = init_private_file(&proc_file, proc_dentry, FMODE_READ); -+ if(err) -+ return(err); -+ -+ follow_link = proc_dentry->d_inode->i_op->follow_link; -+ n = (*follow_link)(proc_dentry, nd); -+ -+ if(proc_file.f_op->release) -+ (*proc_file.f_op->release)(proc_dentry->d_inode, &proc_file); -+ -+ return(n); -+} -+ -+static struct inode_operations hppfs_link_iops = { -+ .readlink = hppfs_readlink, -+ .follow_link = hppfs_follow_link, -+}; -+ -+static void read_inode(struct inode *ino) -+{ -+ struct inode *proc_ino; -+ -+ proc_ino = ino->u.hppfs_i.proc_dentry->d_inode; -+ ino->i_uid = proc_ino->i_uid; -+ ino->i_gid = proc_ino->i_gid; -+ ino->i_atime = proc_ino->i_atime; -+ ino->i_mtime = proc_ino->i_mtime; -+ ino->i_ctime = proc_ino->i_ctime; -+ ino->i_ino = proc_ino->i_ino; -+ ino->i_dev = proc_ino->i_dev; -+ ino->i_mode = proc_ino->i_mode; -+ ino->i_nlink = proc_ino->i_nlink; -+ ino->i_size = proc_ino->i_size; -+ ino->i_blksize = proc_ino->i_blksize; -+ ino->i_blocks = proc_ino->i_blocks; -+} -+ -+static struct inode *get_inode(struct super_block *sb, struct dentry *dentry, -+ int *error) -+{ -+ struct inode *inode; -+ int err = -ENOMEM; -+ -+ inode = new_inode(sb); -+ if(inode == NULL) -+ goto out; -+ -+ insert_inode_hash(inode); -+ if(S_ISDIR(dentry->d_inode->i_mode)){ -+ inode->i_op = &hppfs_dir_iops; -+ inode->i_fop = &hppfs_dir_fops; -+ } -+ else if(S_ISLNK(dentry->d_inode->i_mode)){ -+ inode->i_op = &hppfs_link_iops; -+ inode->i_fop = &hppfs_file_fops; -+ } -+ else { -+ inode->i_op = &hppfs_file_iops; -+ inode->i_fop = &hppfs_file_fops; -+ } -+ -+ inode->i_sb = sb; -+ inode->u.hppfs_i.proc_dentry = dentry; -+ -+ read_inode(inode); -+ err = 0; -+ -+ if(error) *error = err; -+ return(inode); -+ out: -+ if(error) *error = err; -+ return(NULL); -+} -+ -+static struct super_block *hppfs_read_super(struct super_block *sb, void *d, -+ int silent) -+{ -+ struct inode *root_inode; -+ struct file_system_type *procfs; -+ struct super_block *proc_sb; -+ -+ procfs = get_fs_type("proc"); -+ if(procfs == NULL) -+ goto out; -+ -+ if(list_empty(&procfs->fs_supers)) -+ goto out; -+ -+ proc_sb = list_entry(procfs->fs_supers.next, struct super_block, -+ s_instances); -+ -+ sb->s_blocksize = 1024; -+ sb->s_blocksize_bits = 10; -+ sb->s_magic = HPPFS_SUPER_MAGIC; -+ sb->s_op = &hppfs_sbops; -+ -+ dget(proc_sb->s_root); -+ root_inode = get_inode(sb, proc_sb->s_root, NULL); -+ if(root_inode == NULL) -+ goto out_dput; -+ -+ sb->s_root = d_alloc_root(root_inode); -+ if(sb->s_root == NULL) -+ goto out_put; -+ -+ return(sb); -+ -+ out_put: -+ iput(root_inode); -+ out_dput: -+ dput(proc_sb->s_root); -+ out: -+ return(NULL); -+} -+ -+DECLARE_FSTYPE(hppfs_type, "hppfs", hppfs_read_super, 0); -+ -+static int __init init_hppfs(void) -+{ -+ return(register_filesystem(&hppfs_type)); -+} -+ -+static void __exit exit_hppfs(void) -+{ -+ unregister_filesystem(&hppfs_type); -+} -+ -+module_init(init_hppfs) -+module_exit(exit_hppfs) -+MODULE_LICENSE("GPL"); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/2_5compat.h um/arch/um/include/2_5compat.h ---- orig/arch/um/include/2_5compat.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/2_5compat.h Thu Feb 27 20:15:19 2003 -@@ -0,0 +1,46 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __2_5_COMPAT_H__ -+#define __2_5_COMPAT_H__ -+ -+#include "linux/version.h" -+ -+#define INIT_CONSOLE(dev_name, write_proc, device_proc, setup_proc, f) { \ -+ name : dev_name, \ -+ write : write_proc, \ -+ read : NULL, \ -+ device : device_proc, \ -+ unblank : NULL, \ -+ setup : setup_proc, \ -+ flags : f, \ -+ index : -1, \ -+ cflag : 0, \ -+ next : NULL \ -+} -+ -+#define INIT_ELV(queue, elv) elevator_init(elv, ELV_NOOP) -+ -+#define ELV_NOOP ELEVATOR_NOOP -+ -+#define INIT_HARDSECT(arr, maj, sizes) arr[maj] = sizes -+ -+#define IS_WRITE(req) ((req)->cmd == WRITE) -+ -+#define SET_PRI(task) \ -+ do { (task)->nice = 20; (task)->counter = -100; } while(0); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/Makefile um/arch/um/include/Makefile ---- orig/arch/um/include/Makefile Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/Makefile Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,7 @@ -+all : sc.h -+ -+sc.h : ../util/mk_sc -+ ../util/mk_sc > $@ -+ -+../util/mk_sc : -+ $(MAKE) -C ../util mk_sc -diff -Naur -X ../exclude-files orig/arch/um/include/chan_kern.h um/arch/um/include/chan_kern.h ---- orig/arch/um/include/chan_kern.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/chan_kern.h Fri Nov 15 13:32:35 2002 -@@ -0,0 +1,56 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __CHAN_KERN_H__ -+#define __CHAN_KERN_H__ -+ -+#include "linux/tty.h" -+#include "linux/list.h" -+#include "chan_user.h" -+ -+struct chan { -+ struct list_head list; -+ char *dev; -+ unsigned int primary:1; -+ unsigned int input:1; -+ unsigned int output:1; -+ unsigned int opened:1; -+ int fd; -+ enum chan_init_pri pri; -+ struct chan_ops *ops; -+ void *data; -+}; -+ -+extern void chan_interrupt(struct list_head *chans, struct tq_struct *task, -+ struct tty_struct *tty, int irq, void *dev); -+extern int parse_chan_pair(char *str, struct list_head *chans, int pri, -+ int device, struct chan_opts *opts); -+extern int open_chan(struct list_head *chans); -+extern int write_chan(struct list_head *chans, const char *buf, int len, -+ int write_irq); -+extern int console_write_chan(struct list_head *chans, const char *buf, -+ int len); -+extern void close_chan(struct list_head *chans); -+extern void chan_enable_winch(struct list_head *chans, void *line); -+extern void enable_chan(struct list_head *chans, void *data); -+extern int chan_window_size(struct list_head *chans, -+ unsigned short *rows_out, -+ unsigned short *cols_out); -+extern int chan_out_fd(struct list_head *chans); -+extern int chan_config_string(struct list_head *chans, char *str, int size, -+ char **error_out); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/chan_user.h um/arch/um/include/chan_user.h ---- orig/arch/um/include/chan_user.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/chan_user.h Wed Nov 6 16:44:00 2002 -@@ -0,0 +1,66 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __CHAN_USER_H__ -+#define __CHAN_USER_H__ -+ -+#include "init.h" -+ -+struct chan_opts { -+ void (*announce)(char *dev_name, int dev); -+ char *xterm_title; -+ int raw; -+ unsigned long tramp_stack; -+ int in_kernel; -+}; -+ -+enum chan_init_pri { INIT_STATIC, INIT_ALL, INIT_ONE }; -+ -+struct chan_ops { -+ char *type; -+ void *(*init)(char *, int, struct chan_opts *); -+ int (*open)(int, int, int, void *, char **); -+ void (*close)(int, void *); -+ int (*read)(int, char *, void *); -+ int (*write)(int, const char *, int, void *); -+ int (*console_write)(int, const char *, int, void *); -+ int (*window_size)(int, void *, unsigned short *, unsigned short *); -+ void (*free)(void *); -+ int winch; -+}; -+ -+extern struct chan_ops fd_ops, null_ops, port_ops, pts_ops, pty_ops, tty_ops, -+ xterm_ops; -+ -+extern void generic_close(int fd, void *unused); -+extern int generic_read(int fd, char *c_out, void *unused); -+extern int generic_write(int fd, const char *buf, int n, void *unused); -+extern int generic_console_write(int fd, const char *buf, int n, void *state); -+extern int generic_window_size(int fd, void *unused, unsigned short *rows_out, -+ unsigned short *cols_out); -+extern void generic_free(void *data); -+ -+extern void register_winch(int fd, void *device_data); -+extern void register_winch_irq(int fd, int tty_fd, int pid, void *line); -+ -+#define __channel_help(fn, prefix) \ -+__uml_help(fn, prefix "[0-9]*=<channel description>\n" \ -+" Attach a console or serial line to a host channel. See\n" \ -+" http://user-mode-linux.sourceforge.net/input.html for a complete\n" \ -+" description of this switch.\n\n" \ -+); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/choose-mode.h um/arch/um/include/choose-mode.h ---- orig/arch/um/include/choose-mode.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/choose-mode.h Fri Jan 17 13:23:32 2003 -@@ -0,0 +1,35 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __CHOOSE_MODE_H__ -+#define __CHOOSE_MODE_H__ -+ -+#include "uml-config.h" -+ -+#if defined(UML_CONFIG_MODE_TT) && defined(UML_CONFIG_MODE_SKAS) -+#define CHOOSE_MODE(tt, skas) (mode_tt ? (tt) : (skas)) -+ -+#elif defined(UML_CONFIG_MODE_SKAS) -+#define CHOOSE_MODE(tt, skas) (skas) -+ -+#elif defined(UML_CONFIG_MODE_TT) -+#define CHOOSE_MODE(tt, skas) (tt) -+#endif -+ -+#define CHOOSE_MODE_PROC(tt, skas, args...) \ -+ CHOOSE_MODE(tt(args), skas(args)) -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/frame.h um/arch/um/include/frame.h ---- orig/arch/um/include/frame.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/frame.h Mon Dec 2 21:43:03 2002 -@@ -0,0 +1,53 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __FRAME_H_ -+#define __FRAME_H_ -+ -+#include "sysdep/frame.h" -+ -+struct frame_common { -+ void *data; -+ int len; -+ int sig_index; -+ int sr_index; -+ int sr_relative; -+ int sp_index; -+ struct arch_frame_data arch; -+}; -+ -+struct sc_frame { -+ struct frame_common common; -+ int sc_index; -+}; -+ -+extern struct sc_frame signal_frame_sc; -+ -+extern struct sc_frame signal_frame_sc_sr; -+ -+struct si_frame { -+ struct frame_common common; -+ int sip_index; -+ int si_index; -+ int ucp_index; -+ int uc_index; -+}; -+ -+extern struct si_frame signal_frame_si; -+ -+extern void capture_signal_stack(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/frame_kern.h um/arch/um/include/frame_kern.h ---- orig/arch/um/include/frame_kern.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/frame_kern.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,34 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __FRAME_KERN_H_ -+#define __FRAME_KERN_H_ -+ -+#include "frame.h" -+#include "sysdep/frame_kern.h" -+ -+extern int setup_signal_stack_sc(unsigned long stack_top, int sig, -+ unsigned long handler, -+ void (*restorer)(void), -+ struct pt_regs *regs, -+ sigset_t *mask); -+extern int setup_signal_stack_si(unsigned long stack_top, int sig, -+ unsigned long handler, -+ void (*restorer)(void), -+ struct pt_regs *regs, siginfo_t *info, -+ sigset_t *mask); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/frame_user.h um/arch/um/include/frame_user.h ---- orig/arch/um/include/frame_user.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/frame_user.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,23 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __FRAME_USER_H_ -+#define __FRAME_USER_H_ -+ -+#include "sysdep/frame_user.h" -+#include "frame.h" -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/helper.h um/arch/um/include/helper.h ---- orig/arch/um/include/helper.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/helper.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,27 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __HELPER_H__ -+#define __HELPER_H__ -+ -+extern int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv, -+ unsigned long *stack_out); -+extern int run_helper_thread(int (*proc)(void *), void *arg, -+ unsigned int flags, unsigned long *stack_out, -+ int stack_order); -+extern int helper_wait(int pid); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/hostaudio.h um/arch/um/include/hostaudio.h ---- orig/arch/um/include/hostaudio.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/hostaudio.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,48 @@ -+/* -+ * Copyright (C) 2002 Steve Schmidtke -+ * Licensed under the GPL -+ */ -+ -+#ifndef HOSTAUDIO_H -+#define HOSTAUDIO_H -+ -+#define HOSTAUDIO_DEV_DSP "/dev/sound/dsp" -+#define HOSTAUDIO_DEV_MIXER "/dev/sound/mixer" -+ -+struct hostaudio_state { -+ int fd; -+}; -+ -+struct hostmixer_state { -+ int fd; -+}; -+ -+/* UML user-side protoypes */ -+extern ssize_t hostaudio_read_user(struct hostaudio_state *state, char *buffer, -+ size_t count, loff_t *ppos); -+extern ssize_t hostaudio_write_user(struct hostaudio_state *state, -+ const char *buffer, size_t count, -+ loff_t *ppos); -+extern int hostaudio_ioctl_user(struct hostaudio_state *state, -+ unsigned int cmd, unsigned long arg); -+extern int hostaudio_open_user(struct hostaudio_state *state, int r, int w, -+ char *dsp); -+extern int hostaudio_release_user(struct hostaudio_state *state); -+extern int hostmixer_ioctl_mixdev_user(struct hostmixer_state *state, -+ unsigned int cmd, unsigned long arg); -+extern int hostmixer_open_mixdev_user(struct hostmixer_state *state, int r, -+ int w, char *mixer); -+extern int hostmixer_release_mixdev_user(struct hostmixer_state *state); -+ -+#endif /* HOSTAUDIO_H */ -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/init.h um/arch/um/include/init.h ---- orig/arch/um/include/init.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/init.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,114 @@ -+#ifndef _LINUX_UML_INIT_H -+#define _LINUX_UML_INIT_H -+ -+/* These macros are used to mark some functions or -+ * initialized data (doesn't apply to uninitialized data) -+ * as `initialization' functions. The kernel can take this -+ * as hint that the function is used only during the initialization -+ * phase and free up used memory resources after -+ * -+ * Usage: -+ * For functions: -+ * -+ * You should add __init immediately before the function name, like: -+ * -+ * static void __init initme(int x, int y) -+ * { -+ * extern int z; z = x * y; -+ * } -+ * -+ * If the function has a prototype somewhere, you can also add -+ * __init between closing brace of the prototype and semicolon: -+ * -+ * extern int initialize_foobar_device(int, int, int) __init; -+ * -+ * For initialized data: -+ * You should insert __initdata between the variable name and equal -+ * sign followed by value, e.g.: -+ * -+ * static int init_variable __initdata = 0; -+ * static char linux_logo[] __initdata = { 0x32, 0x36, ... }; -+ * -+ * Don't forget to initialize data not at file scope, i.e. within a function, -+ * as gcc otherwise puts the data into the bss section and not into the init -+ * section. -+ * -+ * Also note, that this data cannot be "const". -+ */ -+ -+#ifndef _LINUX_INIT_H -+typedef int (*initcall_t)(void); -+typedef void (*exitcall_t)(void); -+ -+#define __init __attribute__ ((__section__ (".text.init"))) -+#define __exit __attribute__ ((unused, __section__(".text.exit"))) -+#define __initdata __attribute__ ((__section__ (".data.init"))) -+ -+#endif -+ -+#ifndef MODULE -+struct uml_param { -+ const char *str; -+ int (*setup_func)(char *, int *); -+}; -+ -+extern initcall_t __uml_initcall_start, __uml_initcall_end; -+extern initcall_t __uml_postsetup_start, __uml_postsetup_end; -+extern const char *__uml_help_start, *__uml_help_end; -+#endif -+ -+#define __uml_initcall(fn) \ -+ static initcall_t __uml_initcall_##fn __uml_init_call = fn -+ -+#define __uml_exitcall(fn) \ -+ static exitcall_t __uml_exitcall_##fn __uml_exit_call = fn -+ -+extern struct uml_param __uml_setup_start, __uml_setup_end; -+ -+#define __uml_postsetup(fn) \ -+ static initcall_t __uml_postsetup_##fn __uml_postsetup_call = fn -+ -+#define __non_empty_string(dummyname,string) \ -+ struct __uml_non_empty_string_struct_##dummyname \ -+ { \ -+ char _string[sizeof(string)-2]; \ -+ } -+ -+#ifndef MODULE -+#define __uml_setup(str, fn, help...) \ -+ __non_empty_string(fn ##_setup, str); \ -+ __uml_help(fn, help); \ -+ static char __uml_setup_str_##fn[] __initdata = str; \ -+ static struct uml_param __uml_setup_##fn __uml_init_setup = { __uml_setup_str_##fn, fn } -+#else -+#define __uml_setup(str, fn, help...) \ -+ -+#endif -+ -+#define __uml_help(fn, help...) \ -+ __non_empty_string(fn ##__help, help); \ -+ static char __uml_help_str_##fn[] __initdata = help; \ -+ static const char *__uml_help_##fn __uml_setup_help = __uml_help_str_##fn -+ -+/* -+ * Mark functions and data as being only used at initialization -+ * or exit time. -+ */ -+#define __uml_init_setup __attribute__ ((unused,__section__ (".uml.setup.init"))) -+#define __uml_setup_help __attribute__ ((unused,__section__ (".uml.help.init"))) -+#define __uml_init_call __attribute__ ((unused,__section__ (".uml.initcall.init"))) -+#define __uml_postsetup_call __attribute__ ((unused,__section__ (".uml.postsetup.init"))) -+#define __uml_exit_call __attribute__ ((unused,__section__ (".uml.exitcall.exit"))) -+ -+#endif /* _LINUX_UML_INIT_H */ -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/initrd.h um/arch/um/include/initrd.h ---- orig/arch/um/include/initrd.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/initrd.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __INITRD_USER_H__ -+#define __INITRD_USER_H__ -+ -+extern int load_initrd(char *filename, void *buf, int size); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/irq_user.h um/arch/um/include/irq_user.h ---- orig/arch/um/include/irq_user.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/irq_user.h Sun Dec 8 20:38:42 2002 -@@ -0,0 +1,35 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __IRQ_USER_H__ -+#define __IRQ_USER_H__ -+ -+enum { IRQ_READ, IRQ_WRITE }; -+ -+extern void sigio_handler(int sig, union uml_pt_regs *regs); -+extern int activate_fd(int irq, int fd, int type, void *dev_id); -+extern void free_irq_by_irq_and_dev(int irq, void *dev_id); -+extern void free_irq_by_fd(int fd); -+extern void reactivate_fd(int fd, int irqnum); -+extern void deactivate_fd(int fd, int irqnum); -+extern void forward_interrupts(int pid); -+extern void init_irq_signals(int on_sigstack); -+extern void forward_ipi(int fd, int pid); -+extern void free_irq_later(int irq, void *dev_id); -+extern int activate_ipi(int fd, int pid); -+extern unsigned long irq_lock(void); -+extern void irq_unlock(unsigned long flags); -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/kern.h um/arch/um/include/kern.h ---- orig/arch/um/include/kern.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/kern.h Sat Nov 2 21:38:02 2002 -@@ -0,0 +1,48 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __KERN_H__ -+#define __KERN_H__ -+ -+/* These are all user-mode things which are convenient to call directly -+ * from kernel code and for which writing a wrapper is too much of a pain. -+ * The regular include files can't be included because this file is included -+ * only into kernel code, and user-space includes conflict with kernel -+ * includes. -+ */ -+ -+extern int errno; -+ -+extern int clone(int (*proc)(void *), void *sp, int flags, void *data); -+extern int sleep(int); -+extern int printf(char *fmt, ...); -+extern char *strerror(int errnum); -+extern char *ptsname(int __fd); -+extern int munmap(void *, int); -+extern void *sbrk(int increment); -+extern void *malloc(int size); -+extern void perror(char *err); -+extern int kill(int pid, int sig); -+extern int getuid(void); -+extern int pause(void); -+extern int write(int, const void *, int); -+extern int exit(int); -+extern int close(int); -+extern int read(unsigned int, char *, int); -+extern int pipe(int *); -+extern int sched_yield(void); -+extern int ptrace(int op, int pid, long addr, long data); -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/kern_util.h um/arch/um/include/kern_util.h ---- orig/arch/um/include/kern_util.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/kern_util.h Wed Apr 16 16:00:11 2003 -@@ -0,0 +1,121 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __KERN_UTIL_H__ -+#define __KERN_UTIL_H__ -+ -+#include "sysdep/ptrace.h" -+ -+extern int ncpus; -+extern char *linux_prog; -+extern char *gdb_init; -+extern int kmalloc_ok; -+extern int timer_irq_inited; -+extern int jail; -+extern int nsyscalls; -+ -+#define UML_ROUND_DOWN(addr) ((void *)(((unsigned long) addr) & PAGE_MASK)) -+#define UML_ROUND_UP(addr) \ -+ UML_ROUND_DOWN(((unsigned long) addr) + PAGE_SIZE - 1) -+ -+extern int kernel_fork(unsigned long flags, int (*fn)(void *), void * arg); -+extern unsigned long stack_sp(unsigned long page); -+extern int kernel_thread_proc(void *data); -+extern void syscall_segv(int sig); -+extern int current_pid(void); -+extern unsigned long alloc_stack(int order, int atomic); -+extern int do_signal(int error); -+extern int is_stack_fault(unsigned long sp); -+extern unsigned long segv(unsigned long address, unsigned long ip, -+ int is_write, int is_user, void *sc); -+extern unsigned long handle_page_fault(unsigned long address, unsigned long ip, -+ int is_write, int is_user, -+ int *code_out); -+extern void syscall_ready(void); -+extern int segv_syscall(void); -+extern void kern_finish_exec(void *task, int new_pid, unsigned long stack); -+extern int page_size(void); -+extern int page_mask(void); -+extern int need_finish_fork(void); -+extern void free_stack(unsigned long stack, int order); -+extern void add_input_request(int op, void (*proc)(int), void *arg); -+extern int sys_execve(char *file, char **argv, char **env); -+extern char *current_cmd(void); -+extern void timer_handler(int sig, union uml_pt_regs *regs); -+extern int set_signals(int enable); -+extern void force_sigbus(void); -+extern int pid_to_processor_id(int pid); -+extern void block_signals(void); -+extern void unblock_signals(void); -+extern void deliver_signals(void *t); -+extern int next_syscall_index(int max); -+extern int next_trap_index(int max); -+extern void cpu_idle(void); -+extern void finish_fork(void); -+extern void paging_init(void); -+extern void init_flush_vm(void); -+extern void *syscall_sp(void *t); -+extern void syscall_trace(void); -+extern int hz(void); -+extern void idle_timer(void); -+extern unsigned int do_IRQ(int irq, union uml_pt_regs *regs); -+extern int external_pid(void *t); -+extern void boot_timer_handler(int sig); -+extern void interrupt_end(void); -+extern void initial_thread_cb(void (*proc)(void *), void *arg); -+extern int debugger_signal(int status, int pid); -+extern void debugger_parent_signal(int status, int pid); -+extern void child_signal(int pid, int status); -+extern int init_ptrace_proxy(int idle_pid, int startup, int stop); -+extern int init_parent_proxy(int pid); -+extern void check_stack_overflow(void *ptr); -+extern void relay_signal(int sig, union uml_pt_regs *regs); -+extern void not_implemented(void); -+extern int user_context(unsigned long sp); -+extern void timer_irq(union uml_pt_regs *regs); -+extern void unprotect_stack(unsigned long stack); -+extern void do_uml_exitcalls(void); -+extern int attach_debugger(int idle_pid, int pid, int stop); -+extern void bad_segv(unsigned long address, unsigned long ip, int is_write); -+extern int config_gdb(char *str); -+extern int remove_gdb(void); -+extern char *uml_strdup(char *string); -+extern void unprotect_kernel_mem(void); -+extern void protect_kernel_mem(void); -+extern void set_kmem_end(unsigned long); -+extern void uml_cleanup(void); -+extern void set_current(void *t); -+extern void lock_signalled_task(void *t); -+extern void IPI_handler(int cpu); -+extern int jail_setup(char *line, int *add); -+extern void *get_init_task(void); -+extern int clear_user_proc(void *buf, int size); -+extern int copy_to_user_proc(void *to, void *from, int size); -+extern int copy_from_user_proc(void *to, void *from, int size); -+extern int strlen_user_proc(char *str); -+extern void bus_handler(int sig, union uml_pt_regs *regs); -+extern void winch(int sig, union uml_pt_regs *regs); -+extern long execute_syscall(void *r); -+extern int smp_sigio_handler(void); -+extern void *get_current(void); -+extern struct task_struct *get_task(int pid, int require); -+extern void machine_halt(void); -+extern int is_syscall(unsigned long addr); -+extern void arch_switch(void); -+extern void free_irq(unsigned int, void *); -+extern int um_in_interrupt(void); -+extern int cpu(void); -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/line.h um/arch/um/include/line.h ---- orig/arch/um/include/line.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/line.h Fri Nov 15 13:44:44 2002 -@@ -0,0 +1,106 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __LINE_H__ -+#define __LINE_H__ -+ -+#include "linux/list.h" -+#include "linux/tqueue.h" -+#include "linux/tty.h" -+#include "asm/semaphore.h" -+#include "chan_user.h" -+#include "mconsole_kern.h" -+ -+struct line_driver { -+ char *name; -+ char *devfs_name; -+ short major; -+ short minor_start; -+ short type; -+ short subtype; -+ int read_irq; -+ char *read_irq_name; -+ int write_irq; -+ char *write_irq_name; -+ char *symlink_from; -+ char *symlink_to; -+ struct mc_device mc; -+}; -+ -+struct line { -+ char *init_str; -+ int init_pri; -+ struct list_head chan_list; -+ int valid; -+ int count; -+ struct tty_struct *tty; -+ struct semaphore sem; -+ char *buffer; -+ char *head; -+ char *tail; -+ int sigio; -+ struct tq_struct task; -+ struct line_driver *driver; -+ int have_irq; -+}; -+ -+#define LINE_INIT(str, d) \ -+ { init_str : str, \ -+ init_pri : INIT_STATIC, \ -+ chan_list : { }, \ -+ valid : 1, \ -+ count : 0, \ -+ tty : NULL, \ -+ sem : { }, \ -+ buffer : NULL, \ -+ head : NULL, \ -+ tail : NULL, \ -+ sigio : 0, \ -+ driver : d, \ -+ have_irq : 0 } -+ -+struct lines { -+ int num; -+}; -+ -+#define LINES_INIT(n) { num : n } -+ -+extern void line_interrupt(int irq, void *data, struct pt_regs *unused); -+extern void line_write_interrupt(int irq, void *data, struct pt_regs *unused); -+extern void line_close(struct line *lines, struct tty_struct *tty); -+extern int line_open(struct line *lines, struct tty_struct *tty, -+ struct chan_opts *opts); -+extern int line_setup(struct line *lines, int num, char *init, -+ int all_allowed); -+extern int line_write(struct line *line, struct tty_struct *tty, int from_user, -+ const char *buf, int len); -+extern int line_write_room(struct tty_struct *tty); -+extern char *add_xterm_umid(char *base); -+extern int line_setup_irq(int fd, int input, int output, void *data); -+extern void line_close_chan(struct line *line); -+extern void line_disable(struct line *line, int current_irq); -+extern void line_register_devfs(struct lines *set, -+ struct line_driver *line_driver, -+ struct tty_driver *driver, struct line *lines, -+ int nlines); -+extern void lines_init(struct line *lines, int nlines); -+extern void close_lines(struct line *lines, int nlines); -+extern int line_config(struct line *lines, int num, char *str); -+extern int line_remove(struct line *lines, int num, char *str); -+extern int line_get_config(char *dev, struct line *lines, int num, char *str, -+ int size, char **error_out); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/mconsole.h um/arch/um/include/mconsole.h ---- orig/arch/um/include/mconsole.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/mconsole.h Fri Jan 17 13:48:25 2003 -@@ -0,0 +1,99 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MCONSOLE_H__ -+#define __MCONSOLE_H__ -+ -+#ifndef __KERNEL__ -+#include <stdint.h> -+#define u32 uint32_t -+#endif -+ -+#define MCONSOLE_MAGIC (0xcafebabe) -+#define MCONSOLE_MAX_DATA (512) -+#define MCONSOLE_VERSION 2 -+ -+struct mconsole_request { -+ u32 magic; -+ u32 version; -+ u32 len; -+ char data[MCONSOLE_MAX_DATA]; -+}; -+ -+struct mconsole_reply { -+ u32 err; -+ u32 more; -+ u32 len; -+ char data[MCONSOLE_MAX_DATA]; -+}; -+ -+struct mconsole_notify { -+ u32 magic; -+ u32 version; -+ enum { MCONSOLE_SOCKET, MCONSOLE_PANIC, MCONSOLE_HANG, -+ MCONSOLE_USER_NOTIFY } type; -+ u32 len; -+ char data[MCONSOLE_MAX_DATA]; -+}; -+ -+struct mc_request; -+ -+struct mconsole_command -+{ -+ char *command; -+ void (*handler)(struct mc_request *req); -+ int as_interrupt; -+}; -+ -+struct mc_request -+{ -+ int len; -+ int as_interrupt; -+ -+ int originating_fd; -+ int originlen; -+ unsigned char origin[128]; /* sockaddr_un */ -+ -+ struct mconsole_request request; -+ struct mconsole_command *cmd; -+}; -+ -+extern char mconsole_socket_name[]; -+ -+extern int mconsole_unlink_socket(void); -+extern int mconsole_reply(struct mc_request *req, char *reply, int err, -+ int more); -+ -+extern void mconsole_version(struct mc_request *req); -+extern void mconsole_help(struct mc_request *req); -+extern void mconsole_halt(struct mc_request *req); -+extern void mconsole_reboot(struct mc_request *req); -+extern void mconsole_config(struct mc_request *req); -+extern void mconsole_remove(struct mc_request *req); -+extern void mconsole_sysrq(struct mc_request *req); -+extern void mconsole_cad(struct mc_request *req); -+extern void mconsole_stop(struct mc_request *req); -+extern void mconsole_go(struct mc_request *req); -+ -+extern int mconsole_get_request(int fd, struct mc_request *req); -+extern int mconsole_notify(char *sock_name, int type, const void *data, -+ int len); -+extern char *mconsole_notify_socket(void); -+extern void lock_notify(void); -+extern void unlock_notify(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/mconsole_kern.h um/arch/um/include/mconsole_kern.h ---- orig/arch/um/include/mconsole_kern.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/mconsole_kern.h Fri Nov 15 15:21:58 2002 -@@ -0,0 +1,62 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MCONSOLE_KERN_H__ -+#define __MCONSOLE_KERN_H__ -+ -+#include "linux/config.h" -+#include "linux/list.h" -+#include "mconsole.h" -+ -+struct mconsole_entry { -+ struct list_head list; -+ struct mc_request request; -+}; -+ -+struct mc_device { -+ struct list_head list; -+ char *name; -+ int (*config)(char *); -+ int (*get_config)(char *, char *, int, char **); -+ int (*remove)(char *); -+}; -+ -+#define CONFIG_CHUNK(str, size, current, chunk, end) \ -+do { \ -+ current += strlen(chunk); \ -+ if(current >= size) \ -+ str = NULL; \ -+ if(str != NULL){ \ -+ strcpy(str, chunk); \ -+ str += strlen(chunk); \ -+ } \ -+ if(end) \ -+ current++; \ -+} while(0) -+ -+#ifdef CONFIG_MCONSOLE -+ -+extern void mconsole_register_dev(struct mc_device *new); -+ -+#else -+ -+static inline void mconsole_register_dev(struct mc_device *new) -+{ -+} -+ -+#endif -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/mem.h um/arch/um/include/mem.h ---- orig/arch/um/include/mem.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/mem.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,29 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MEM_H__ -+#define __MEM_H__ -+ -+struct vm_reserved { -+ struct list_head list; -+ unsigned long start; -+ unsigned long end; -+}; -+ -+extern void set_usable_vm(unsigned long start, unsigned long end); -+extern void set_kmem_end(unsigned long new); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/mem_user.h um/arch/um/include/mem_user.h ---- orig/arch/um/include/mem_user.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/mem_user.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,87 @@ -+/* -+ * arch/um/include/mem_user.h -+ * -+ * BRIEF MODULE DESCRIPTION -+ * user side memory interface for support IO memory inside user mode linux -+ * -+ * Copyright (C) 2001 RidgeRun, Inc. -+ * Author: RidgeRun, Inc. -+ * Greg Lonnon glonnon@ridgerun.com or info@ridgerun.com -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License as published by the -+ * Free Software Foundation; either version 2 of the License, or (at your -+ * option) any later version. -+ * -+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED -+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN -+ * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF -+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ * -+ * You should have received a copy of the GNU General Public License along -+ * with this program; if not, write to the Free Software Foundation, Inc., -+ * 675 Mass Ave, Cambridge, MA 02139, USA. -+ */ -+ -+#ifndef _MEM_USER_H -+#define _MEM_USER_H -+ -+struct mem_region { -+ char *driver; -+ unsigned long start_pfn; -+ unsigned long start; -+ unsigned long len; -+ void *mem_map; -+ int fd; -+}; -+ -+extern struct mem_region *regions[]; -+extern struct mem_region physmem_region; -+ -+#define ROUND_4M(n) ((((unsigned long) (n)) + (1 << 22)) & ~((1 << 22) - 1)) -+ -+extern unsigned long host_task_size; -+extern unsigned long task_size; -+ -+extern int init_mem_user(void); -+extern int create_mem_file(unsigned long len); -+extern void setup_range(int fd, char *driver, unsigned long start, -+ unsigned long pfn, unsigned long total, int need_vm, -+ struct mem_region *region, void *reserved); -+extern void setup_memory(void *entry); -+extern unsigned long find_iomem(char *driver, unsigned long *len_out); -+extern int init_maps(struct mem_region *region); -+extern int nregions(void); -+extern int reserve_vm(unsigned long start, unsigned long end, void *e); -+extern unsigned long get_vm(unsigned long len); -+extern void setup_physmem(unsigned long start, unsigned long usable, -+ unsigned long len); -+extern int setup_region(struct mem_region *region, void *entry); -+extern void add_iomem(char *name, int fd, unsigned long size); -+extern struct mem_region *phys_region(unsigned long phys); -+extern unsigned long phys_offset(unsigned long phys); -+extern void unmap_physmem(void); -+extern int map_memory(unsigned long virt, unsigned long phys, -+ unsigned long len, int r, int w, int x); -+extern int protect_memory(unsigned long addr, unsigned long len, -+ int r, int w, int x, int must_succeed); -+extern unsigned long get_kmem_end(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/mode.h um/arch/um/include/mode.h ---- orig/arch/um/include/mode.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/mode.h Fri Jan 17 13:23:32 2003 -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MODE_H__ -+#define __MODE_H__ -+ -+#include "uml-config.h" -+ -+#ifdef UML_CONFIG_MODE_TT -+#include "../kernel/tt/include/mode.h" -+#endif -+ -+#ifdef UML_CONFIG_MODE_SKAS -+#include "../kernel/skas/include/mode.h" -+#endif -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/mode_kern.h um/arch/um/include/mode_kern.h ---- orig/arch/um/include/mode_kern.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/mode_kern.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MODE_KERN_H__ -+#define __MODE_KERN_H__ -+ -+#include "linux/config.h" -+ -+#ifdef CONFIG_MODE_TT -+#include "../kernel/tt/include/mode_kern.h" -+#endif -+ -+#ifdef CONFIG_MODE_SKAS -+#include "../kernel/skas/include/mode_kern.h" -+#endif -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/net_kern.h um/arch/um/include/net_kern.h ---- orig/arch/um/include/net_kern.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/net_kern.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,81 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_NET_KERN_H -+#define __UM_NET_KERN_H -+ -+#include "linux/netdevice.h" -+#include "linux/skbuff.h" -+#include "linux/socket.h" -+#include "linux/list.h" -+ -+struct uml_net { -+ struct list_head list; -+ struct net_device *dev; -+ int index; -+ unsigned char mac[ETH_ALEN]; -+ int have_mac; -+}; -+ -+struct uml_net_private { -+ struct list_head list; -+ spinlock_t lock; -+ struct net_device *dev; -+ struct timer_list tl; -+ struct net_device_stats stats; -+ int fd; -+ unsigned char mac[ETH_ALEN]; -+ int have_mac; -+ unsigned short (*protocol)(struct sk_buff *); -+ int (*open)(void *); -+ void (*close)(int, void *); -+ void (*remove)(void *); -+ int (*read)(int, struct sk_buff **skb, struct uml_net_private *); -+ int (*write)(int, struct sk_buff **skb, struct uml_net_private *); -+ -+ void (*add_address)(unsigned char *, unsigned char *, void *); -+ void (*delete_address)(unsigned char *, unsigned char *, void *); -+ int (*set_mtu)(int mtu, void *); -+ int user[1]; -+}; -+ -+struct net_kern_info { -+ void (*init)(struct net_device *, void *); -+ unsigned short (*protocol)(struct sk_buff *); -+ int (*read)(int, struct sk_buff **skb, struct uml_net_private *); -+ int (*write)(int, struct sk_buff **skb, struct uml_net_private *); -+}; -+ -+struct transport { -+ struct list_head list; -+ char *name; -+ int (*setup)(char *, char **, void *); -+ struct net_user_info *user; -+ struct net_kern_info *kern; -+ int private_size; -+ int setup_size; -+}; -+ -+extern struct net_device *ether_init(int); -+extern unsigned short ether_protocol(struct sk_buff *); -+extern int setup_etheraddr(char *str, unsigned char *addr); -+extern struct sk_buff *ether_adjust_skb(struct sk_buff *skb, int extra); -+extern int tap_setup_common(char *str, char *type, char **dev_name, -+ char **mac_out, char **gate_addr); -+extern void register_transport(struct transport *new); -+extern unsigned short eth_protocol(struct sk_buff *skb); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/net_user.h um/arch/um/include/net_user.h ---- orig/arch/um/include/net_user.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/net_user.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,66 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_NET_USER_H__ -+#define __UM_NET_USER_H__ -+ -+#define ETH_ADDR_LEN (6) -+#define ETH_HEADER_ETHERTAP (16) -+#define ETH_HEADER_OTHER (14) -+#define ETH_MAX_PACKET (1500) -+ -+#define UML_NET_VERSION (4) -+ -+struct net_user_info { -+ void (*init)(void *, void *); -+ int (*open)(void *); -+ void (*close)(int, void *); -+ void (*remove)(void *); -+ int (*set_mtu)(int mtu, void *); -+ void (*add_address)(unsigned char *, unsigned char *, void *); -+ void (*delete_address)(unsigned char *, unsigned char *, void *); -+ int max_packet; -+}; -+ -+extern void ether_user_init(void *data, void *dev); -+extern void dev_ip_addr(void *d, char *buf, char *bin_buf); -+extern void set_ether_mac(void *d, unsigned char *addr); -+extern void iter_addresses(void *d, void (*cb)(unsigned char *, -+ unsigned char *, void *), -+ void *arg); -+ -+extern void *get_output_buffer(int *len_out); -+extern void free_output_buffer(void *buffer); -+ -+extern int tap_open_common(void *dev, char *gate_addr); -+extern void tap_check_ips(char *gate_addr, char *eth_addr); -+ -+extern void read_output(int fd, char *output_out, int len); -+ -+extern int net_read(int fd, void *buf, int len); -+extern int net_recvfrom(int fd, void *buf, int len); -+extern int net_write(int fd, void *buf, int len); -+extern int net_send(int fd, void *buf, int len); -+extern int net_sendto(int fd, void *buf, int len, void *to, int sock_len); -+ -+extern void open_addr(unsigned char *addr, unsigned char *netmask, void *arg); -+extern void close_addr(unsigned char *addr, unsigned char *netmask, void *arg); -+ -+extern char *split_if_spec(char *str, ...); -+ -+extern int dev_netmask(void *d, void *m); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/os.h um/arch/um/include/os.h ---- orig/arch/um/include/os.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/os.h Tue Feb 4 19:11:32 2003 -@@ -0,0 +1,137 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __OS_H__ -+#define __OS_H__ -+ -+#include "asm/types.h" -+#include "../os/include/file.h" -+ -+#define OS_TYPE_FILE 1 -+#define OS_TYPE_DIR 2 -+#define OS_TYPE_SYMLINK 3 -+#define OS_TYPE_CHARDEV 4 -+#define OS_TYPE_BLOCKDEV 5 -+#define OS_TYPE_FIFO 6 -+#define OS_TYPE_SOCK 7 -+ -+struct openflags { -+ unsigned int r : 1; -+ unsigned int w : 1; -+ unsigned int s : 1; /* O_SYNC */ -+ unsigned int c : 1; /* O_CREAT */ -+ unsigned int t : 1; /* O_TRUNC */ -+ unsigned int a : 1; /* O_APPEND */ -+ unsigned int e : 1; /* O_EXCL */ -+ unsigned int cl : 1; /* FD_CLOEXEC */ -+}; -+ -+#define OPENFLAGS() ((struct openflags) { .r = 0, .w = 0, .s = 0, .c = 0, \ -+ .t = 0, .a = 0, .e = 0, .cl = 0 }) -+ -+static inline struct openflags of_read(struct openflags flags) -+{ -+ flags.r = 1; -+ return(flags); -+} -+ -+static inline struct openflags of_write(struct openflags flags) -+{ -+ flags.w = 1; -+ return(flags); -+} -+ -+static inline struct openflags of_rdwr(struct openflags flags) -+{ -+ return(of_read(of_write(flags))); -+} -+ -+static inline struct openflags of_set_rw(struct openflags flags, int r, int w) -+{ -+ flags.r = r; -+ flags.w = w; -+ return(flags); -+} -+ -+static inline struct openflags of_sync(struct openflags flags) -+{ -+ flags.s = 1; -+ return(flags); -+} -+ -+static inline struct openflags of_create(struct openflags flags) -+{ -+ flags.c = 1; -+ return(flags); -+} -+ -+static inline struct openflags of_trunc(struct openflags flags) -+{ -+ flags.t = 1; -+ return(flags); -+} -+ -+static inline struct openflags of_append(struct openflags flags) -+{ -+ flags.a = 1; -+ return(flags); -+} -+ -+static inline struct openflags of_excl(struct openflags flags) -+{ -+ flags.e = 1; -+ return(flags); -+} -+ -+static inline struct openflags of_cloexec(struct openflags flags) -+{ -+ flags.cl = 1; -+ return(flags); -+} -+ -+extern int os_seek_file(int fd, __u64 offset); -+extern int os_open_file(char *file, struct openflags flags, int mode); -+extern int os_read_file(int fd, void *buf, int len); -+extern int os_write_file(int fd, void *buf, int count); -+extern int os_file_size(char *file, long long *size_out); -+extern int os_pipe(int *fd, int stream, int close_on_exec); -+extern int os_set_fd_async(int fd, int owner); -+extern int os_set_fd_block(int fd, int blocking); -+extern int os_accept_connection(int fd); -+extern int os_shutdown_socket(int fd, int r, int w); -+extern void os_close_file(int fd); -+extern int os_rcv_fd(int fd, int *helper_pid_out); -+extern int create_unix_socket(char *file, int len); -+extern int os_connect_socket(char *name); -+extern int os_file_type(char *file); -+extern int os_file_mode(char *file, struct openflags *mode_out); -+extern int os_lock_file(int fd, int excl); -+ -+extern unsigned long os_process_pc(int pid); -+extern int os_process_parent(int pid); -+extern void os_stop_process(int pid); -+extern void os_kill_process(int pid, int reap_child); -+extern void os_usr1_process(int pid); -+extern int os_getpid(void); -+ -+extern int os_map_memory(void *virt, int fd, unsigned long off, -+ unsigned long len, int r, int w, int x); -+extern int os_protect_memory(void *addr, unsigned long len, -+ int r, int w, int x); -+extern int os_unmap_memory(void *addr, int len); -+extern void os_flush_stdout(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/process.h um/arch/um/include/process.h ---- orig/arch/um/include/process.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/process.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,25 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __PROCESS_H__ -+#define __PROCESS_H__ -+ -+#include <asm/sigcontext.h> -+ -+extern void sig_handler(int sig, struct sigcontext sc); -+extern void alarm_handler(int sig, struct sigcontext sc); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/ptrace_user.h um/arch/um/include/ptrace_user.h ---- orig/arch/um/include/ptrace_user.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/ptrace_user.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,18 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __PTRACE_USER_H__ -+#define __PTRACE_USER_H__ -+ -+#include "sysdep/ptrace_user.h" -+ -+extern int ptrace_getregs(long pid, unsigned long *regs_out); -+extern int ptrace_setregs(long pid, unsigned long *regs_in); -+extern int ptrace_getfpregs(long pid, unsigned long *regs_out); -+extern void arch_enter_kernel(void *task, int pid); -+extern void arch_leave_kernel(void *task, int pid); -+extern void ptrace_pokeuser(unsigned long addr, unsigned long data); -+ -+#endif -diff -Naur -X ../exclude-files orig/arch/um/include/sigcontext.h um/arch/um/include/sigcontext.h ---- orig/arch/um/include/sigcontext.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/sigcontext.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,25 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UML_SIGCONTEXT_H__ -+#define __UML_SIGCONTEXT_H__ -+ -+#include "sysdep/sigcontext.h" -+ -+extern int sc_size(void *data); -+extern void sc_to_sc(void *to_ptr, void *from_ptr); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sigio.h um/arch/um/include/sigio.h ---- orig/arch/um/include/sigio.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/sigio.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,28 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SIGIO_H__ -+#define __SIGIO_H__ -+ -+extern int write_sigio_irq(int fd); -+extern int register_sigio_fd(int fd); -+extern int read_sigio_fd(int fd); -+extern int add_sigio_fd(int fd, int read); -+extern int ignore_sigio_fd(int fd); -+extern void sigio_lock(void); -+extern void sigio_unlock(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/signal_kern.h um/arch/um/include/signal_kern.h ---- orig/arch/um/include/signal_kern.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/signal_kern.h Thu Dec 5 18:08:47 2002 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SIGNAL_KERN_H__ -+#define __SIGNAL_KERN_H__ -+ -+extern int have_signals(void *t); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/signal_user.h um/arch/um/include/signal_user.h ---- orig/arch/um/include/signal_user.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/signal_user.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,26 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SIGNAL_USER_H__ -+#define __SIGNAL_USER_H__ -+ -+extern int signal_stack_size; -+ -+extern int change_sig(int signal, int on); -+extern void set_sigstack(void *stack, int size); -+extern void set_handler(int sig, void (*handler)(int), int flags, ...); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/skas_ptrace.h um/arch/um/include/skas_ptrace.h ---- orig/arch/um/include/skas_ptrace.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/skas_ptrace.h Mon Dec 16 11:54:52 2002 -@@ -0,0 +1,36 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SKAS_PTRACE_H -+#define __SKAS_PTRACE_H -+ -+struct ptrace_faultinfo { -+ int is_write; -+ unsigned long addr; -+}; -+ -+struct ptrace_ldt { -+ int func; -+ void *ptr; -+ unsigned long bytecount; -+}; -+ -+#define PTRACE_FAULTINFO 52 -+#define PTRACE_SIGPENDING 53 -+#define PTRACE_LDT 54 -+#define PTRACE_SWITCH_MM 55 -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/syscall_user.h um/arch/um/include/syscall_user.h ---- orig/arch/um/include/syscall_user.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/syscall_user.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,23 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYSCALL_USER_H -+#define __SYSCALL_USER_H -+ -+extern int record_syscall_start(int syscall); -+extern void record_syscall_end(int index, int result); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-i386/checksum.h um/arch/um/include/sysdep-i386/checksum.h ---- orig/arch/um/include/sysdep-i386/checksum.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/sysdep-i386/checksum.h Tue Oct 29 21:23:02 2002 -@@ -0,0 +1,217 @@ -+/* -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_SYSDEP_CHECKSUM_H -+#define __UM_SYSDEP_CHECKSUM_H -+ -+#include "linux/string.h" -+ -+/* -+ * computes the checksum of a memory block at buff, length len, -+ * and adds in "sum" (32-bit) -+ * -+ * returns a 32-bit number suitable for feeding into itself -+ * or csum_tcpudp_magic -+ * -+ * this function must be called with even lengths, except -+ * for the last fragment, which may be odd -+ * -+ * it's best to have buff aligned on a 32-bit boundary -+ */ -+unsigned int csum_partial(const unsigned char * buff, int len, -+ unsigned int sum); -+ -+/* -+ * the same as csum_partial, but copies from src while it -+ * checksums, and handles user-space pointer exceptions correctly, when needed. -+ * -+ * here even more important to align src and dst on a 32-bit (or even -+ * better 64-bit) boundary -+ */ -+ -+unsigned int csum_partial_copy_to(const char *src, char *dst, int len, -+ int sum, int *err_ptr); -+unsigned int csum_partial_copy_from(const char *src, char *dst, int len, -+ int sum, int *err_ptr); -+ -+/* -+ * Note: when you get a NULL pointer exception here this means someone -+ * passed in an incorrect kernel address to one of these functions. -+ * -+ * If you use these functions directly please don't forget the -+ * verify_area(). -+ */ -+ -+static __inline__ -+unsigned int csum_partial_copy_nocheck(const char *src, char *dst, -+ int len, int sum) -+{ -+ memcpy(dst, src, len); -+ return(csum_partial(dst, len, sum)); -+} -+ -+static __inline__ -+unsigned int csum_partial_copy_from_user(const char *src, char *dst, -+ int len, int sum, int *err_ptr) -+{ -+ return csum_partial_copy_from(src, dst, len, sum, err_ptr); -+} -+ -+/* -+ * These are the old (and unsafe) way of doing checksums, a warning message -+ * will be printed if they are used and an exeption occurs. -+ * -+ * these functions should go away after some time. -+ */ -+ -+#define csum_partial_copy_fromuser csum_partial_copy_from_user -+unsigned int csum_partial_copy( const char *src, char *dst, int len, int sum); -+ -+/* -+ * This is a version of ip_compute_csum() optimized for IP headers, -+ * which always checksum on 4 octet boundaries. -+ * -+ * By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by -+ * Arnt Gulbrandsen. -+ */ -+static inline unsigned short ip_fast_csum(unsigned char * iph, -+ unsigned int ihl) -+{ -+ unsigned int sum; -+ -+ __asm__ __volatile__( -+ "movl (%1), %0 ;\n" -+ "subl $4, %2 ;\n" -+ "jbe 2f ;\n" -+ "addl 4(%1), %0 ;\n" -+ "adcl 8(%1), %0 ;\n" -+ "adcl 12(%1), %0 ;\n" -+"1: adcl 16(%1), %0 ;\n" -+ "lea 4(%1), %1 ;\n" -+ "decl %2 ;\n" -+ "jne 1b ;\n" -+ "adcl $0, %0 ;\n" -+ "movl %0, %2 ;\n" -+ "shrl $16, %0 ;\n" -+ "addw %w2, %w0 ;\n" -+ "adcl $0, %0 ;\n" -+ "notl %0 ;\n" -+"2: ;\n" -+ /* Since the input registers which are loaded with iph and ipl -+ are modified, we must also specify them as outputs, or gcc -+ will assume they contain their original values. */ -+ : "=r" (sum), "=r" (iph), "=r" (ihl) -+ : "1" (iph), "2" (ihl)); -+ return(sum); -+} -+ -+/* -+ * Fold a partial checksum -+ */ -+ -+static inline unsigned int csum_fold(unsigned int sum) -+{ -+ __asm__( -+ "addl %1, %0 ;\n" -+ "adcl $0xffff, %0 ;\n" -+ : "=r" (sum) -+ : "r" (sum << 16), "0" (sum & 0xffff0000) -+ ); -+ return (~sum) >> 16; -+} -+ -+static inline unsigned long csum_tcpudp_nofold(unsigned long saddr, -+ unsigned long daddr, -+ unsigned short len, -+ unsigned short proto, -+ unsigned int sum) -+{ -+ __asm__( -+ "addl %1, %0 ;\n" -+ "adcl %2, %0 ;\n" -+ "adcl %3, %0 ;\n" -+ "adcl $0, %0 ;\n" -+ : "=r" (sum) -+ : "g" (daddr), "g"(saddr), "g"((ntohs(len)<<16)+proto*256), "0"(sum)); -+ return sum; -+} -+ -+/* -+ * computes the checksum of the TCP/UDP pseudo-header -+ * returns a 16-bit checksum, already complemented -+ */ -+static inline unsigned short int csum_tcpudp_magic(unsigned long saddr, -+ unsigned long daddr, -+ unsigned short len, -+ unsigned short proto, -+ unsigned int sum) -+{ -+ return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); -+} -+ -+/* -+ * this routine is used for miscellaneous IP-like checksums, mainly -+ * in icmp.c -+ */ -+ -+static inline unsigned short ip_compute_csum(unsigned char * buff, int len) -+{ -+ return csum_fold (csum_partial(buff, len, 0)); -+} -+ -+#define _HAVE_ARCH_IPV6_CSUM -+static __inline__ unsigned short int csum_ipv6_magic(struct in6_addr *saddr, -+ struct in6_addr *daddr, -+ __u32 len, -+ unsigned short proto, -+ unsigned int sum) -+{ -+ __asm__( -+ "addl 0(%1), %0 ;\n" -+ "adcl 4(%1), %0 ;\n" -+ "adcl 8(%1), %0 ;\n" -+ "adcl 12(%1), %0 ;\n" -+ "adcl 0(%2), %0 ;\n" -+ "adcl 4(%2), %0 ;\n" -+ "adcl 8(%2), %0 ;\n" -+ "adcl 12(%2), %0 ;\n" -+ "adcl %3, %0 ;\n" -+ "adcl %4, %0 ;\n" -+ "adcl $0, %0 ;\n" -+ : "=&r" (sum) -+ : "r" (saddr), "r" (daddr), -+ "r"(htonl(len)), "r"(htonl(proto)), "0"(sum)); -+ -+ return csum_fold(sum); -+} -+ -+/* -+ * Copy and checksum to user -+ */ -+#define HAVE_CSUM_COPY_USER -+static __inline__ unsigned int csum_and_copy_to_user(const char *src, -+ char *dst, int len, -+ int sum, int *err_ptr) -+{ -+ if (access_ok(VERIFY_WRITE, dst, len)) -+ return(csum_partial_copy_to(src, dst, len, sum, err_ptr)); -+ -+ if (len) -+ *err_ptr = -EFAULT; -+ -+ return -1; /* invalid checksum */ -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-i386/frame.h um/arch/um/include/sysdep-i386/frame.h ---- orig/arch/um/include/sysdep-i386/frame.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/sysdep-i386/frame.h Fri Dec 6 14:07:54 2002 -@@ -0,0 +1,29 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __FRAME_I386_H -+#define __FRAME_I386_H -+ -+struct arch_frame_data_raw { -+ unsigned long fp_start; -+ unsigned long sr; -+}; -+ -+struct arch_frame_data { -+ int fpstate_size; -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-i386/frame_kern.h um/arch/um/include/sysdep-i386/frame_kern.h ---- orig/arch/um/include/sysdep-i386/frame_kern.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/sysdep-i386/frame_kern.h Mon Dec 2 21:45:04 2002 -@@ -0,0 +1,69 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __FRAME_KERN_I386_H -+#define __FRAME_KERN_I386_H -+ -+/* This is called from sys_sigreturn. It takes the sp at the point of the -+ * sigreturn system call and returns the address of the sigcontext struct -+ * on the stack. -+ */ -+ -+static inline void *sp_to_sc(unsigned long sp) -+{ -+ return((void *) sp); -+} -+ -+static inline void *sp_to_uc(unsigned long sp) -+{ -+ unsigned long uc; -+ -+ uc = sp + signal_frame_si.uc_index - -+ signal_frame_si.common.sp_index - 4; -+ return((void *) uc); -+} -+ -+static inline void *sp_to_rt_sc(unsigned long sp) -+{ -+ unsigned long sc; -+ -+ sc = sp - signal_frame_si.common.sp_index + -+ signal_frame_si.common.len - 4; -+ return((void *) sc); -+} -+ -+static inline void *sp_to_mask(unsigned long sp) -+{ -+ unsigned long mask; -+ -+ mask = sp - signal_frame_sc.common.sp_index + -+ signal_frame_sc.common.len - 8; -+ return((void *) mask); -+} -+ -+extern int sc_size(void *data); -+ -+static inline void *sp_to_rt_mask(unsigned long sp) -+{ -+ unsigned long mask; -+ -+ mask = sp - signal_frame_si.common.sp_index + -+ signal_frame_si.common.len + -+ sc_size(&signal_frame_si.common.arch) - 4; -+ return((void *) mask); -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-i386/frame_user.h um/arch/um/include/sysdep-i386/frame_user.h ---- orig/arch/um/include/sysdep-i386/frame_user.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/sysdep-i386/frame_user.h Fri Dec 6 14:13:59 2002 -@@ -0,0 +1,91 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __FRAME_USER_I386_H -+#define __FRAME_USER_I386_H -+ -+#include <asm/page.h> -+#include "sysdep/frame.h" -+ -+/* This stuff is to calculate the size of the fp state struct at runtime -+ * because it has changed between 2.2 and 2.4 and it would be good for a -+ * UML compiled on one to work on the other. -+ * So, setup_arch_frame_raw fills in the arch struct with the raw data, which -+ * just contains the address of the end of the sigcontext. This is invoked -+ * from the signal handler. -+ * setup_arch_frame uses that data to figure out what -+ * arch_frame_data.fpstate_size should be. It really has no idea, since it's -+ * not allowed to do sizeof(struct fpstate) but it's safe to consider that it's -+ * everything from the end of the sigcontext up to the top of the stack. So, -+ * it masks off the page number to get the offset within the page and subtracts -+ * that from the page size, and that's how big the fpstate struct will be -+ * considered to be. -+ */ -+ -+static inline void setup_arch_frame_raw(struct arch_frame_data_raw *data, -+ void *end, unsigned long srp) -+{ -+ unsigned long sr = *((unsigned long *) srp); -+ -+ data->fp_start = (unsigned long) end; -+ if((sr & PAGE_MASK) == ((unsigned long) end & PAGE_MASK)) -+ data->sr = sr; -+ else data->sr = 0; -+} -+ -+static inline void setup_arch_frame(struct arch_frame_data_raw *in, -+ struct arch_frame_data *out) -+{ -+ unsigned long fpstate_start = in->fp_start; -+ -+ if(in->sr == 0){ -+ fpstate_start &= ~PAGE_MASK; -+ out->fpstate_size = PAGE_SIZE - fpstate_start; -+ } -+ else { -+ out->fpstate_size = in->sr - fpstate_start; -+ } -+} -+ -+/* This figures out where on the stack the SA_RESTORER function address -+ * is stored. For i386, it's the signal handler return address, so it's -+ * located next to the frame pointer. -+ * This is inlined, so __builtin_frame_address(0) is correct. Otherwise, -+ * it would have to be __builtin_frame_address(1). -+ */ -+ -+static inline unsigned long frame_restorer(void) -+{ -+ unsigned long *fp; -+ -+ fp = __builtin_frame_address(0); -+ return((unsigned long) (fp + 1)); -+} -+ -+/* Similarly, this returns the value of sp when the handler was first -+ * entered. This is used to calculate the proper sp when delivering -+ * signals. -+ */ -+ -+static inline unsigned long frame_sp(void) -+{ -+ unsigned long *fp; -+ -+ fp = __builtin_frame_address(0); -+ return((unsigned long) (fp + 1)); -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-i386/ptrace.h um/arch/um/include/sysdep-i386/ptrace.h ---- orig/arch/um/include/sysdep-i386/ptrace.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/sysdep-i386/ptrace.h Fri Jan 17 13:23:31 2003 -@@ -0,0 +1,193 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYSDEP_I386_PTRACE_H -+#define __SYSDEP_I386_PTRACE_H -+ -+#include "uml-config.h" -+ -+#ifdef UML_CONFIG_MODE_TT -+#include "ptrace-tt.h" -+#endif -+ -+#ifdef UML_CONFIG_MODE_SKAS -+#include "ptrace-skas.h" -+#endif -+ -+#include "choose-mode.h" -+ -+union uml_pt_regs { -+#ifdef UML_CONFIG_MODE_TT -+ struct tt_regs { -+ long syscall; -+ void *sc; -+ } tt; -+#endif -+#ifdef UML_CONFIG_MODE_SKAS -+ struct skas_regs { -+ unsigned long regs[HOST_FRAME_SIZE]; -+ unsigned long fp[HOST_FP_SIZE]; -+ unsigned long xfp[HOST_XFP_SIZE]; -+ unsigned long fault_addr; -+ unsigned long fault_type; -+ unsigned long trap_type; -+ long syscall; -+ int is_user; -+ } skas; -+#endif -+}; -+ -+#define EMPTY_UML_PT_REGS { } -+ -+extern int mode_tt; -+ -+#define UPT_SC(r) ((r)->tt.sc) -+#define UPT_IP(r) \ -+ CHOOSE_MODE(SC_IP(UPT_SC(r)), REGS_IP((r)->skas.regs)) -+#define UPT_SP(r) \ -+ CHOOSE_MODE(SC_SP(UPT_SC(r)), REGS_SP((r)->skas.regs)) -+#define UPT_EFLAGS(r) \ -+ CHOOSE_MODE(SC_EFLAGS(UPT_SC(r)), REGS_EFLAGS((r)->skas.regs)) -+#define UPT_EAX(r) \ -+ CHOOSE_MODE(SC_EAX(UPT_SC(r)), REGS_EAX((r)->skas.regs)) -+#define UPT_EBX(r) \ -+ CHOOSE_MODE(SC_EBX(UPT_SC(r)), REGS_EBX((r)->skas.regs)) -+#define UPT_ECX(r) \ -+ CHOOSE_MODE(SC_ECX(UPT_SC(r)), REGS_ECX((r)->skas.regs)) -+#define UPT_EDX(r) \ -+ CHOOSE_MODE(SC_EDX(UPT_SC(r)), REGS_EDX((r)->skas.regs)) -+#define UPT_ESI(r) \ -+ CHOOSE_MODE(SC_ESI(UPT_SC(r)), REGS_ESI((r)->skas.regs)) -+#define UPT_EDI(r) \ -+ CHOOSE_MODE(SC_EDI(UPT_SC(r)), REGS_EDI((r)->skas.regs)) -+#define UPT_EBP(r) \ -+ CHOOSE_MODE(SC_EBP(UPT_SC(r)), REGS_EBP((r)->skas.regs)) -+#define UPT_ORIG_EAX(r) \ -+ CHOOSE_MODE((r)->tt.syscall, (r)->skas.syscall) -+#define UPT_CS(r) \ -+ CHOOSE_MODE(SC_CS(UPT_SC(r)), REGS_CS((r)->skas.regs)) -+#define UPT_SS(r) \ -+ CHOOSE_MODE(SC_SS(UPT_SC(r)), REGS_SS((r)->skas.regs)) -+#define UPT_DS(r) \ -+ CHOOSE_MODE(SC_DS(UPT_SC(r)), REGS_DS((r)->skas.regs)) -+#define UPT_ES(r) \ -+ CHOOSE_MODE(SC_ES(UPT_SC(r)), REGS_ES((r)->skas.regs)) -+#define UPT_FS(r) \ -+ CHOOSE_MODE(SC_FS(UPT_SC(r)), REGS_FS((r)->skas.regs)) -+#define UPT_GS(r) \ -+ CHOOSE_MODE(SC_GS(UPT_SC(r)), REGS_GS((r)->skas.regs)) -+ -+#define UPT_SYSCALL_ARG1(r) UPT_EBX(r) -+#define UPT_SYSCALL_ARG2(r) UPT_ECX(r) -+#define UPT_SYSCALL_ARG3(r) UPT_EDX(r) -+#define UPT_SYSCALL_ARG4(r) UPT_ESI(r) -+#define UPT_SYSCALL_ARG5(r) UPT_EDI(r) -+#define UPT_SYSCALL_ARG6(r) UPT_EBP(r) -+ -+extern int user_context(unsigned long sp); -+ -+#define UPT_IS_USER(r) \ -+ CHOOSE_MODE(user_context(UPT_SP(r)), (r)->skas.is_user) -+ -+struct syscall_args { -+ unsigned long args[6]; -+}; -+ -+#define SYSCALL_ARGS(r) ((struct syscall_args) \ -+ { .args = { UPT_SYSCALL_ARG1(r), \ -+ UPT_SYSCALL_ARG2(r), \ -+ UPT_SYSCALL_ARG3(r), \ -+ UPT_SYSCALL_ARG4(r), \ -+ UPT_SYSCALL_ARG5(r), \ -+ UPT_SYSCALL_ARG6(r) } } ) -+ -+#define UPT_REG(regs, reg) \ -+ ({ unsigned long val; \ -+ switch(reg){ \ -+ case EIP: val = UPT_IP(regs); break; \ -+ case UESP: val = UPT_SP(regs); break; \ -+ case EAX: val = UPT_EAX(regs); break; \ -+ case EBX: val = UPT_EBX(regs); break; \ -+ case ECX: val = UPT_ECX(regs); break; \ -+ case EDX: val = UPT_EDX(regs); break; \ -+ case ESI: val = UPT_ESI(regs); break; \ -+ case EDI: val = UPT_EDI(regs); break; \ -+ case EBP: val = UPT_EBP(regs); break; \ -+ case ORIG_EAX: val = UPT_ORIG_EAX(regs); break; \ -+ case CS: val = UPT_CS(regs); break; \ -+ case SS: val = UPT_SS(regs); break; \ -+ case DS: val = UPT_DS(regs); break; \ -+ case ES: val = UPT_ES(regs); break; \ -+ case FS: val = UPT_FS(regs); break; \ -+ case GS: val = UPT_GS(regs); break; \ -+ case EFL: val = UPT_EFLAGS(regs); break; \ -+ default : \ -+ panic("Bad register in UPT_REG : %d\n", reg); \ -+ val = -1; \ -+ } \ -+ val; \ -+ }) -+ -+ -+#define UPT_SET(regs, reg, val) \ -+ do { \ -+ switch(reg){ \ -+ case EIP: UPT_IP(regs) = val; break; \ -+ case UESP: UPT_SP(regs) = val; break; \ -+ case EAX: UPT_EAX(regs) = val; break; \ -+ case EBX: UPT_EBX(regs) = val; break; \ -+ case ECX: UPT_ECX(regs) = val; break; \ -+ case EDX: UPT_EDX(regs) = val; break; \ -+ case ESI: UPT_ESI(regs) = val; break; \ -+ case EDI: UPT_EDI(regs) = val; break; \ -+ case EBP: UPT_EBP(regs) = val; break; \ -+ case ORIG_EAX: UPT_ORIG_EAX(regs) = val; break; \ -+ case CS: UPT_CS(regs) = val; break; \ -+ case SS: UPT_SS(regs) = val; break; \ -+ case DS: UPT_DS(regs) = val; break; \ -+ case ES: UPT_ES(regs) = val; break; \ -+ case FS: UPT_FS(regs) = val; break; \ -+ case GS: UPT_GS(regs) = val; break; \ -+ case EFL: UPT_EFLAGS(regs) = val; break; \ -+ default : \ -+ panic("Bad register in UPT_SET : %d\n", reg); \ -+ break; \ -+ } \ -+ } while (0) -+ -+#define UPT_SET_SYSCALL_RETURN(r, res) \ -+ CHOOSE_MODE(SC_SET_SYSCALL_RETURN(UPT_SC(r), (res)), \ -+ REGS_SET_SYSCALL_RETURN((r)->skas.regs, (res))) -+ -+#define UPT_RESTART_SYSCALL(r) \ -+ CHOOSE_MODE(SC_RESTART_SYSCALL(UPT_SC(r)), \ -+ REGS_RESTART_SYSCALL((r)->skas.regs)) -+ -+#define UPT_ORIG_SYSCALL(r) UPT_EAX(r) -+#define UPT_SYSCALL_NR(r) UPT_ORIG_EAX(r) -+#define UPT_SYSCALL_RET(r) UPT_EAX(r) -+ -+#define UPT_SEGV_IS_FIXABLE(r) \ -+ CHOOSE_MODE(SC_SEGV_IS_FIXABLE(UPT_SC(r)), \ -+ REGS_SEGV_IS_FIXABLE(&r->skas)) -+ -+#define UPT_FAULT_ADDR(r) \ -+ CHOOSE_MODE(SC_FAULT_ADDR(UPT_SC(r)), REGS_FAULT_ADDR(&r->skas)) -+ -+#define UPT_FAULT_WRITE(r) \ -+ CHOOSE_MODE(SC_FAULT_WRITE(UPT_SC(r)), REGS_FAULT_WRITE(&r->skas)) -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-i386/ptrace_user.h um/arch/um/include/sysdep-i386/ptrace_user.h ---- orig/arch/um/include/sysdep-i386/ptrace_user.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/sysdep-i386/ptrace_user.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,62 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYSDEP_I386_PTRACE_USER_H__ -+#define __SYSDEP_I386_PTRACE_USER_H__ -+ -+#include <asm/ptrace.h> -+ -+#define PT_OFFSET(r) ((r) * sizeof(long)) -+ -+#define PT_SYSCALL_NR(regs) ((regs)[ORIG_EAX]) -+#define PT_SYSCALL_NR_OFFSET PT_OFFSET(ORIG_EAX) -+ -+#define PT_SYSCALL_ARG1_OFFSET PT_OFFSET(EBX) -+#define PT_SYSCALL_ARG2_OFFSET PT_OFFSET(ECX) -+#define PT_SYSCALL_ARG3_OFFSET PT_OFFSET(EDX) -+#define PT_SYSCALL_ARG4_OFFSET PT_OFFSET(ESI) -+#define PT_SYSCALL_ARG5_OFFSET PT_OFFSET(EDI) -+ -+#define PT_SYSCALL_RET_OFFSET PT_OFFSET(EAX) -+ -+#define PT_IP_OFFSET PT_OFFSET(EIP) -+#define PT_IP(regs) ((regs)[EIP]) -+#define PT_SP(regs) ((regs)[UESP]) -+ -+#ifndef FRAME_SIZE -+#define FRAME_SIZE (17) -+#endif -+#define FRAME_SIZE_OFFSET (FRAME_SIZE * sizeof(unsigned long)) -+ -+#define FP_FRAME_SIZE (27) -+#define FPX_FRAME_SIZE (128) -+ -+#ifdef PTRACE_GETREGS -+#define UM_HAVE_GETREGS -+#endif -+ -+#ifdef PTRACE_SETREGS -+#define UM_HAVE_SETREGS -+#endif -+ -+#ifdef PTRACE_GETFPREGS -+#define UM_HAVE_GETFPREGS -+#endif -+ -+#ifdef PTRACE_SETFPREGS -+#define UM_HAVE_SETFPREGS -+#endif -+ -+#ifdef PTRACE_GETFPXREGS -+#define UM_HAVE_GETFPXREGS -+#endif -+ -+#ifdef PTRACE_SETFPXREGS -+#define UM_HAVE_SETFPXREGS -+#endif -+ -+extern void update_debugregs(int seq); -+ -+#endif -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-i386/sigcontext.h um/arch/um/include/sysdep-i386/sigcontext.h ---- orig/arch/um/include/sysdep-i386/sigcontext.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/sysdep-i386/sigcontext.h Sun Dec 8 18:21:33 2002 -@@ -0,0 +1,49 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYS_SIGCONTEXT_I386_H -+#define __SYS_SIGCONTEXT_I386_H -+ -+#include "sc.h" -+ -+#define IP_RESTART_SYSCALL(ip) ((ip) -= 2) -+ -+#define SC_RESTART_SYSCALL(sc) IP_RESTART_SYSCALL(SC_IP(sc)) -+#define SC_SET_SYSCALL_RETURN(sc, result) SC_EAX(sc) = (result) -+ -+#define SC_FAULT_ADDR(sc) SC_CR2(sc) -+#define SC_FAULT_TYPE(sc) SC_ERR(sc) -+ -+#define FAULT_WRITE(err) (err & 2) -+#define TO_SC_ERR(is_write) ((is_write) ? 2 : 0) -+ -+#define SC_FAULT_WRITE(sc) (FAULT_WRITE(SC_ERR(sc))) -+ -+#define SC_TRAP_TYPE(sc) SC_TRAPNO(sc) -+ -+/* ptrace expects that, at the start of a system call, %eax contains -+ * -ENOSYS, so this makes it so. -+ */ -+#define SC_START_SYSCALL(sc) do SC_EAX(sc) = -ENOSYS; while(0) -+ -+/* These are General Protection and Page Fault */ -+#define SEGV_IS_FIXABLE(trap) ((trap == 13) || (trap == 14)) -+ -+#define SC_SEGV_IS_FIXABLE(sc) (SEGV_IS_FIXABLE(SC_TRAPNO(sc))) -+ -+extern unsigned long *sc_sigmask(void *sc_ptr); -+extern int sc_get_fpregs(unsigned long buf, void *sc_ptr); -+ -+#endif -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-i386/syscalls.h um/arch/um/include/sysdep-i386/syscalls.h ---- orig/arch/um/include/sysdep-i386/syscalls.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/sysdep-i386/syscalls.h Sun Dec 8 18:04:15 2002 -@@ -0,0 +1,61 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "asm/unistd.h" -+#include "sysdep/ptrace.h" -+ -+typedef long syscall_handler_t(struct pt_regs); -+ -+#define EXECUTE_SYSCALL(syscall, regs) \ -+ ((long (*)(struct syscall_args)) (*sys_call_table[syscall]))(SYSCALL_ARGS(®s->regs)) -+ -+extern syscall_handler_t sys_modify_ldt; -+extern syscall_handler_t old_mmap_i386; -+extern syscall_handler_t old_select; -+extern syscall_handler_t sys_ni_syscall; -+ -+#define ARCH_SYSCALLS \ -+ [ __NR_mmap ] = old_mmap_i386, \ -+ [ __NR_select ] = old_select, \ -+ [ __NR_vm86old ] = sys_ni_syscall, \ -+ [ __NR_modify_ldt ] = sys_modify_ldt, \ -+ [ __NR_lchown32 ] = sys_lchown, \ -+ [ __NR_getuid32 ] = sys_getuid, \ -+ [ __NR_getgid32 ] = sys_getgid, \ -+ [ __NR_geteuid32 ] = sys_geteuid, \ -+ [ __NR_getegid32 ] = sys_getegid, \ -+ [ __NR_setreuid32 ] = sys_setreuid, \ -+ [ __NR_setregid32 ] = sys_setregid, \ -+ [ __NR_getgroups32 ] = sys_getgroups, \ -+ [ __NR_setgroups32 ] = sys_setgroups, \ -+ [ __NR_fchown32 ] = sys_fchown, \ -+ [ __NR_setresuid32 ] = sys_setresuid, \ -+ [ __NR_getresuid32 ] = sys_getresuid, \ -+ [ __NR_setresgid32 ] = sys_setresgid, \ -+ [ __NR_getresgid32 ] = sys_getresgid, \ -+ [ __NR_chown32 ] = sys_chown, \ -+ [ __NR_setuid32 ] = sys_setuid, \ -+ [ __NR_setgid32 ] = sys_setgid, \ -+ [ __NR_setfsuid32 ] = sys_setfsuid, \ -+ [ __NR_setfsgid32 ] = sys_setfsgid, \ -+ [ __NR_pivot_root ] = sys_pivot_root, \ -+ [ __NR_mincore ] = sys_mincore, \ -+ [ __NR_madvise ] = sys_madvise, \ -+ [ 222 ] = sys_ni_syscall, -+ -+/* 222 doesn't yet have a name in include/asm-i386/unistd.h */ -+ -+#define LAST_ARCH_SYSCALL 222 -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-ia64/ptrace.h um/arch/um/include/sysdep-ia64/ptrace.h ---- orig/arch/um/include/sysdep-ia64/ptrace.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/sysdep-ia64/ptrace.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,26 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYSDEP_IA64_PTRACE_H -+#define __SYSDEP_IA64_PTRACE_H -+ -+struct sys_pt_regs { -+ int foo; -+}; -+ -+#define EMPTY_REGS { 0 } -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-ia64/sigcontext.h um/arch/um/include/sysdep-ia64/sigcontext.h ---- orig/arch/um/include/sysdep-ia64/sigcontext.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/sysdep-ia64/sigcontext.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,20 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYSDEP_IA64_SIGCONTEXT_H -+#define __SYSDEP_IA64_SIGCONTEXT_H -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-ia64/syscalls.h um/arch/um/include/sysdep-ia64/syscalls.h ---- orig/arch/um/include/sysdep-ia64/syscalls.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/sysdep-ia64/syscalls.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,20 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYSDEP_IA64_SYSCALLS_H -+#define __SYSDEP_IA64_SYSCALLS_H -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-ppc/ptrace.h um/arch/um/include/sysdep-ppc/ptrace.h ---- orig/arch/um/include/sysdep-ppc/ptrace.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/sysdep-ppc/ptrace.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,104 @@ -+/* -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYS_PTRACE_PPC_H -+#define __SYS_PTRACE_PPC_H -+ -+#include "linux/config.h" -+#include "linux/types.h" -+ -+/* the following taken from <asm-ppc/ptrace.h> */ -+ -+#ifdef CONFIG_PPC64 -+#define PPC_REG unsigned long /*long*/ -+#else -+#define PPC_REG unsigned long -+#endif -+struct sys_pt_regs_s { -+ PPC_REG gpr[32]; -+ PPC_REG nip; -+ PPC_REG msr; -+ PPC_REG orig_gpr3; /* Used for restarting system calls */ -+ PPC_REG ctr; -+ PPC_REG link; -+ PPC_REG xer; -+ PPC_REG ccr; -+ PPC_REG mq; /* 601 only (not used at present) */ -+ /* Used on APUS to hold IPL value. */ -+ PPC_REG trap; /* Reason for being here */ -+ PPC_REG dar; /* Fault registers */ -+ PPC_REG dsisr; -+ PPC_REG result; /* Result of a system call */ -+}; -+ -+#define NUM_REGS (sizeof(struct sys_pt_regs_s) / sizeof(PPC_REG)) -+ -+struct sys_pt_regs { -+ PPC_REG regs[sizeof(struct sys_pt_regs_s) / sizeof(PPC_REG)]; -+}; -+ -+#define UM_MAX_REG (PT_FPR0) -+#define UM_MAX_REG_OFFSET (UM_MAX_REG * sizeof(PPC_REG)) -+ -+#define EMPTY_REGS { { [ 0 ... NUM_REGS - 1] = 0 } } -+ -+#define UM_REG(r, n) ((r)->regs[n]) -+ -+#define UM_SYSCALL_RET(r) UM_REG(r, PT_R3) -+#define UM_SP(r) UM_REG(r, PT_R1) -+#define UM_IP(r) UM_REG(r, PT_NIP) -+#define UM_ELF_ZERO(r) UM_REG(r, PT_FPSCR) -+#define UM_SYSCALL_NR(r) UM_REG(r, PT_R0) -+#define UM_SYSCALL_ARG1(r) UM_REG(r, PT_ORIG_R3) -+#define UM_SYSCALL_ARG2(r) UM_REG(r, PT_R4) -+#define UM_SYSCALL_ARG3(r) UM_REG(r, PT_R5) -+#define UM_SYSCALL_ARG4(r) UM_REG(r, PT_R6) -+#define UM_SYSCALL_ARG5(r) UM_REG(r, PT_R7) -+#define UM_SYSCALL_ARG6(r) UM_REG(r, PT_R8) -+ -+#define UM_SYSCALL_NR_OFFSET (PT_R0 * sizeof(PPC_REG)) -+#define UM_SYSCALL_RET_OFFSET (PT_R3 * sizeof(PPC_REG)) -+#define UM_SYSCALL_ARG1_OFFSET (PT_R3 * sizeof(PPC_REG)) -+#define UM_SYSCALL_ARG2_OFFSET (PT_R4 * sizeof(PPC_REG)) -+#define UM_SYSCALL_ARG3_OFFSET (PT_R5 * sizeof(PPC_REG)) -+#define UM_SYSCALL_ARG4_OFFSET (PT_R6 * sizeof(PPC_REG)) -+#define UM_SYSCALL_ARG5_OFFSET (PT_R7 * sizeof(PPC_REG)) -+#define UM_SYSCALL_ARG6_OFFSET (PT_R8 * sizeof(PPC_REG)) -+#define UM_SP_OFFSET (PT_R1 * sizeof(PPC_REG)) -+#define UM_IP_OFFSET (PT_NIP * sizeof(PPC_REG)) -+#define UM_ELF_ZERO_OFFSET (PT_R3 * sizeof(PPC_REG)) -+ -+#define UM_SET_SYSCALL_RETURN(_regs, result) \ -+do { \ -+ if (result < 0) { \ -+ (_regs)->regs[PT_CCR] |= 0x10000000; \ -+ UM_SYSCALL_RET((_regs)) = -result; \ -+ } else { \ -+ UM_SYSCALL_RET((_regs)) = result; \ -+ } \ -+} while(0) -+ -+extern void shove_aux_table(unsigned long sp); -+#define UM_FIX_EXEC_STACK(sp) shove_aux_table(sp); -+ -+/* These aren't actually defined. The undefs are just to make sure -+ * everyone's clear on the concept. -+ */ -+#undef UML_HAVE_GETREGS -+#undef UML_HAVE_GETFPREGS -+#undef UML_HAVE_SETREGS -+#undef UML_HAVE_SETFPREGS -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-ppc/sigcontext.h um/arch/um/include/sysdep-ppc/sigcontext.h ---- orig/arch/um/include/sysdep-ppc/sigcontext.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/sysdep-ppc/sigcontext.h Sat Nov 23 22:02:19 2002 -@@ -0,0 +1,62 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SYS_SIGCONTEXT_PPC_H -+#define __SYS_SIGCONTEXT_PPC_H -+ -+#define DSISR_WRITE 0x02000000 -+ -+#define SC_FAULT_ADDR(sc) ({ \ -+ struct sigcontext *_sc = (sc); \ -+ long retval = -1; \ -+ switch (_sc->regs->trap) { \ -+ case 0x300: \ -+ /* data exception */ \ -+ retval = _sc->regs->dar; \ -+ break; \ -+ case 0x400: \ -+ /* instruction exception */ \ -+ retval = _sc->regs->nip; \ -+ break; \ -+ default: \ -+ panic("SC_FAULT_ADDR: unhandled trap type\n"); \ -+ } \ -+ retval; \ -+ }) -+ -+#define SC_FAULT_WRITE(sc) ({ \ -+ struct sigcontext *_sc = (sc); \ -+ long retval = -1; \ -+ switch (_sc->regs->trap) { \ -+ case 0x300: \ -+ /* data exception */ \ -+ retval = !!(_sc->regs->dsisr & DSISR_WRITE); \ -+ break; \ -+ case 0x400: \ -+ /* instruction exception: not a write */ \ -+ retval = 0; \ -+ break; \ -+ default: \ -+ panic("SC_FAULT_ADDR: unhandled trap type\n"); \ -+ } \ -+ retval; \ -+ }) -+ -+#define SC_IP(sc) ((sc)->regs->nip) -+#define SC_SP(sc) ((sc)->regs->gpr[1]) -+#define SEGV_IS_FIXABLE(sc) (1) -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysdep-ppc/syscalls.h um/arch/um/include/sysdep-ppc/syscalls.h ---- orig/arch/um/include/sysdep-ppc/syscalls.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/sysdep-ppc/syscalls.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,50 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+typedef long syscall_handler_t(unsigned long arg1, unsigned long arg2, -+ unsigned long arg3, unsigned long arg4, -+ unsigned long arg5, unsigned long arg6); -+ -+#define EXECUTE_SYSCALL(syscall, regs) \ -+ (*sys_call_table[syscall])(UM_SYSCALL_ARG1(®s), \ -+ UM_SYSCALL_ARG2(®s), \ -+ UM_SYSCALL_ARG3(®s), \ -+ UM_SYSCALL_ARG4(®s), \ -+ UM_SYSCALL_ARG5(®s), \ -+ UM_SYSCALL_ARG6(®s)) -+ -+extern syscall_handler_t sys_mincore; -+extern syscall_handler_t sys_madvise; -+ -+/* old_mmap needs the correct prototype since syscall_kern.c includes -+ * this file. -+ */ -+int old_mmap(unsigned long addr, unsigned long len, -+ unsigned long prot, unsigned long flags, -+ unsigned long fd, unsigned long offset); -+ -+#define ARCH_SYSCALLS \ -+ [ __NR_modify_ldt ] = sys_ni_syscall, \ -+ [ __NR_pciconfig_read ] = sys_ni_syscall, \ -+ [ __NR_pciconfig_write ] = sys_ni_syscall, \ -+ [ __NR_pciconfig_iobase ] = sys_ni_syscall, \ -+ [ __NR_pivot_root ] = sys_ni_syscall, \ -+ [ __NR_multiplexer ] = sys_ni_syscall, \ -+ [ __NR_mmap ] = old_mmap, \ -+ [ __NR_madvise ] = sys_madvise, \ -+ [ __NR_mincore ] = sys_mincore, -+ -+#define LAST_ARCH_SYSCALL __NR_mincore -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/sysrq.h um/arch/um/include/sysrq.h ---- orig/arch/um/include/sysrq.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/sysrq.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SYSRQ_H -+#define __UM_SYSRQ_H -+ -+extern void show_trace(unsigned long *stack); -+ -+#endif -diff -Naur -X ../exclude-files orig/arch/um/include/tempfile.h um/arch/um/include/tempfile.h ---- orig/arch/um/include/tempfile.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/tempfile.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,21 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __TEMPFILE_H__ -+#define __TEMPFILE_H__ -+ -+extern int make_tempfile(const char *template, char **tempname, int do_unlink); -+ -+#endif -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/time_user.h um/arch/um/include/time_user.h ---- orig/arch/um/include/time_user.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/time_user.h Wed Jan 8 12:55:47 2003 -@@ -0,0 +1,17 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __TIME_USER_H__ -+#define __TIME_USER_H__ -+ -+extern void timer(void); -+extern void switch_timers(int to_real); -+extern void set_interval(int timer_type); -+extern void idle_sleep(int secs); -+extern void enable_timer(void); -+extern unsigned long time_lock(void); -+extern void time_unlock(unsigned long); -+ -+#endif -diff -Naur -X ../exclude-files orig/arch/um/include/tlb.h um/arch/um/include/tlb.h ---- orig/arch/um/include/tlb.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/tlb.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,23 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __TLB_H__ -+#define __TLB_H__ -+ -+extern void mprotect_kernel_vm(int w); -+extern void force_flush_all(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/ubd_user.h um/arch/um/include/ubd_user.h ---- orig/arch/um/include/ubd_user.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/ubd_user.h Thu Mar 6 18:09:14 2003 -@@ -0,0 +1,77 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Copyright (C) 2001 RidgeRun, Inc (glonnon@ridgerun.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_UBD_USER_H -+#define __UM_UBD_USER_H -+ -+#include "os.h" -+ -+enum ubd_req { UBD_READ, UBD_WRITE }; -+ -+struct io_thread_req { -+ enum ubd_req op; -+ int fds[2]; -+ unsigned long offsets[2]; -+ unsigned long long offset; -+ unsigned long length; -+ char *buffer; -+ int sectorsize; -+ unsigned long sector_mask; -+ unsigned long cow_offset; -+ unsigned long bitmap_words[2]; -+ int error; -+}; -+ -+extern int open_ubd_file(char *file, struct openflags *openflags, -+ char **backing_file_out, int *bitmap_offset_out, -+ unsigned long *bitmap_len_out, int *data_offset_out, -+ int *create_cow_out); -+extern int create_cow_file(char *cow_file, char *backing_file, -+ struct openflags flags, int sectorsize, -+ int *bitmap_offset_out, -+ unsigned long *bitmap_len_out, -+ int *data_offset_out); -+extern int read_cow_bitmap(int fd, void *buf, int offset, int len); -+extern int read_ubd_fs(int fd, void *buffer, int len); -+extern int write_ubd_fs(int fd, char *buffer, int len); -+extern int start_io_thread(unsigned long sp, int *fds_out); -+extern void do_io(struct io_thread_req *req); -+ -+static inline int ubd_test_bit(__u64 bit, unsigned char *data) -+{ -+ __u64 n; -+ int bits, off; -+ -+ bits = sizeof(data[0]) * 8; -+ n = bit / bits; -+ off = bit % bits; -+ return((data[n] & (1 << off)) != 0); -+} -+ -+static inline void ubd_set_bit(__u64 bit, unsigned char *data) -+{ -+ __u64 n; -+ int bits, off; -+ -+ bits = sizeof(data[0]) * 8; -+ n = bit / bits; -+ off = bit % bits; -+ data[n] |= (1 << off); -+} -+ -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/um_mmu.h um/arch/um/include/um_mmu.h ---- orig/arch/um/include/um_mmu.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/um_mmu.h Sat Nov 9 12:51:43 2002 -@@ -0,0 +1,40 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __ARCH_UM_MMU_H -+#define __ARCH_UM_MMU_H -+ -+#include "linux/config.h" -+#include "choose-mode.h" -+ -+#ifdef CONFIG_MODE_TT -+#include "../kernel/tt/include/mmu.h" -+#endif -+ -+#ifdef CONFIG_MODE_SKAS -+#include "../kernel/skas/include/mmu.h" -+#endif -+ -+typedef union { -+#ifdef CONFIG_MODE_TT -+ struct mmu_context_tt tt; -+#endif -+#ifdef CONFIG_MODE_SKAS -+ struct mmu_context_skas skas; -+#endif -+} mm_context_t; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/um_uaccess.h um/arch/um/include/um_uaccess.h ---- orig/arch/um/include/um_uaccess.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/um_uaccess.h Sat Nov 23 22:03:02 2002 -@@ -0,0 +1,73 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __ARCH_UM_UACCESS_H -+#define __ARCH_UM_UACCESS_H -+ -+#include "linux/config.h" -+#include "choose-mode.h" -+ -+#ifdef CONFIG_MODE_TT -+#include "../kernel/tt/include/uaccess.h" -+#endif -+ -+#ifdef CONFIG_MODE_SKAS -+#include "../kernel/skas/include/uaccess.h" -+#endif -+ -+#define access_ok(type, addr, size) \ -+ CHOOSE_MODE_PROC(access_ok_tt, access_ok_skas, type, addr, size) -+ -+static inline int verify_area(int type, const void * addr, unsigned long size) -+{ -+ return(CHOOSE_MODE_PROC(verify_area_tt, verify_area_skas, type, addr, -+ size)); -+} -+ -+static inline int copy_from_user(void *to, const void *from, int n) -+{ -+ return(CHOOSE_MODE_PROC(copy_from_user_tt, copy_from_user_skas, to, -+ from, n)); -+} -+ -+static inline int copy_to_user(void *to, const void *from, int n) -+{ -+ return(CHOOSE_MODE_PROC(copy_to_user_tt, copy_to_user_skas, to, -+ from, n)); -+} -+ -+static inline int strncpy_from_user(char *dst, const char *src, int count) -+{ -+ return(CHOOSE_MODE_PROC(strncpy_from_user_tt, strncpy_from_user_skas, -+ dst, src, count)); -+} -+ -+static inline int __clear_user(void *mem, int len) -+{ -+ return(CHOOSE_MODE_PROC(__clear_user_tt, __clear_user_skas, mem, len)); -+} -+ -+static inline int clear_user(void *mem, int len) -+{ -+ return(CHOOSE_MODE_PROC(clear_user_tt, clear_user_skas, mem, len)); -+} -+ -+static inline int strnlen_user(const void *str, int len) -+{ -+ return(CHOOSE_MODE_PROC(strnlen_user_tt, strnlen_user_skas, str, len)); -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/umid.h um/arch/um/include/umid.h ---- orig/arch/um/include/umid.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/umid.h Mon Dec 16 20:52:19 2002 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UMID_H__ -+#define __UMID_H__ -+ -+extern int umid_file_name(char *name, char *buf, int len); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/uml_uaccess.h um/arch/um/include/uml_uaccess.h ---- orig/arch/um/include/uml_uaccess.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/uml_uaccess.h Thu Dec 19 13:15:22 2002 -@@ -0,0 +1,28 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UML_UACCESS_H__ -+#define __UML_UACCESS_H__ -+ -+extern int __do_copy_to_user(void *to, const void *from, int n, -+ void **fault_addr, void **fault_catcher); -+extern unsigned long __do_user_copy(void *to, const void *from, int n, -+ void **fault_addr, void **fault_catcher, -+ void (*op)(void *to, const void *from, -+ int n), int *faulted_out); -+void __do_copy(void *to, const void *from, int n); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/umn.h um/arch/um/include/umn.h ---- orig/arch/um/include/umn.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/umn.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,27 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UMN_H -+#define __UMN_H -+ -+extern int open_umn_tty(int *slave_out, int *slipno_out); -+extern void close_umn_tty(int master, int slave); -+extern int umn_send_packet(int fd, void *data, int len); -+extern int set_umn_addr(int fd, char *addr, char *ptp_addr); -+extern void slip_unesc(unsigned char s); -+extern void umn_read(int fd); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/user.h um/arch/um/include/user.h ---- orig/arch/um/include/user.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/user.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,29 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __USER_H__ -+#define __USER_H__ -+ -+extern void panic(const char *fmt, ...); -+extern int printk(const char *fmt, ...); -+extern void schedule(void); -+extern void *um_kmalloc(int size); -+extern void *um_kmalloc_atomic(int size); -+extern void kfree(void *ptr); -+extern int in_aton(char *str); -+extern int open_gdb_chan(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/include/user_util.h um/arch/um/include/user_util.h ---- orig/arch/um/include/user_util.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/include/user_util.h Wed Apr 23 20:42:00 2003 -@@ -0,0 +1,103 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __USER_UTIL_H__ -+#define __USER_UTIL_H__ -+ -+#include "sysdep/ptrace.h" -+ -+extern int mode_tt; -+ -+extern int grantpt(int __fd); -+extern int unlockpt(int __fd); -+extern char *ptsname(int __fd); -+ -+enum { OP_NONE, OP_EXEC, OP_FORK, OP_TRACE_ON, OP_REBOOT, OP_HALT, OP_CB }; -+ -+struct cpu_task { -+ int pid; -+ void *task; -+}; -+ -+extern struct cpu_task cpu_tasks[]; -+ -+struct signal_info { -+ void (*handler)(int, union uml_pt_regs *); -+ int is_irq; -+}; -+ -+extern struct signal_info sig_info[]; -+ -+extern unsigned long low_physmem; -+extern unsigned long high_physmem; -+extern unsigned long uml_physmem; -+extern unsigned long uml_reserved; -+extern unsigned long end_vm; -+extern unsigned long start_vm; -+extern unsigned long highmem; -+ -+extern char host_info[]; -+ -+extern char saved_command_line[]; -+extern char command_line[]; -+ -+extern char *tempdir; -+ -+extern unsigned long _stext, _etext, _sdata, _edata, __bss_start, _end; -+extern unsigned long _unprotected_end; -+extern unsigned long brk_start; -+ -+extern int pty_output_sigio; -+extern int pty_close_sigio; -+ -+extern void stop(void); -+extern void stack_protections(unsigned long address); -+extern void task_protections(unsigned long address); -+extern int wait_for_stop(int pid, int sig, int cont_type, void *relay); -+extern void *add_signal_handler(int sig, void (*handler)(int)); -+extern int start_fork_tramp(void *arg, unsigned long temp_stack, -+ int clone_flags, int (*tramp)(void *)); -+extern int linux_main(int argc, char **argv); -+extern void set_cmdline(char *cmd); -+extern void input_cb(void (*proc)(void *), void *arg, int arg_len); -+extern int get_pty(void); -+extern void *um_kmalloc(int size); -+extern int raw(int fd, int complain); -+extern int switcheroo(int fd, int prot, void *from, void *to, int size); -+extern void setup_machinename(char *machine_out); -+extern void setup_hostinfo(void); -+extern void add_arg(char *cmd_line, char *arg); -+extern void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)); -+extern void init_new_thread_signals(int altstack); -+extern void do_exec(int old_pid, int new_pid); -+extern void tracer_panic(char *msg, ...); -+extern char *get_umid(int only_if_set); -+extern void do_longjmp(void *p, int val); -+extern void suspend_new_thread(int fd); -+extern int detach(int pid, int sig); -+extern int attach(int pid); -+extern void kill_child_dead(int pid); -+extern int cont(int pid); -+extern void check_ptrace(void); -+extern void check_sigio(void); -+extern int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr); -+extern void write_sigio_workaround(void); -+extern void arch_check_bugs(void); -+extern int arch_handle_signal(int sig, union uml_pt_regs *regs); -+extern int arch_fixup(unsigned long address, void *sc_ptr); -+extern int can_do_skas(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/Makefile um/arch/um/kernel/Makefile ---- orig/arch/um/kernel/Makefile Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/Makefile Thu Apr 10 11:14:55 2003 -@@ -0,0 +1,73 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET = built-in.o -+ -+obj-y = config.o checksum.o exec_kern.o exitcode.o frame_kern.o frame.o \ -+ helper.o init_task.o irq.o irq_user.o ksyms.o mem.o mem_user.o \ -+ process.o process_kern.o ptrace.o reboot.o resource.o sigio_user.o \ -+ sigio_kern.o signal_kern.o signal_user.o smp.o syscall_kern.o \ -+ syscall_user.o sysrq.o sys_call_table.o tempfile.o time.o \ -+ time_kern.o tlb.o trap_kern.o trap_user.o uaccess_user.o um_arch.o \ -+ umid.o user_syms.o user_util.o -+ -+obj-$(CONFIG_BLK_DEV_INITRD) += initrd_kern.o initrd_user.o -+obj-$(CONFIG_GPROF) += gprof_syms.o -+obj-$(CONFIG_GCOV) += gmon_syms.o -+obj-$(CONFIG_TTY_LOG) += tty_log.o -+ -+subdir-$(CONFIG_MODE_TT) += tt -+subdir-$(CONFIG_MODE_SKAS) += skas -+ -+user-objs-$(CONFIG_TTY_LOG) += tty_log.o -+ -+obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) -+ -+# user_syms.o not included here because Rules.make has its own ideas about -+# building anything in export-objs -+ -+USER_OBJS = $(filter %_user.o,$(obj-y)) $(user-objs-y) config.o helper.o \ -+ process.o tempfile.o time.o umid.o user_util.o -+ -+DMODULES-$(CONFIG_MODULES) = -D__CONFIG_MODULES__ -+DMODVERSIONS-$(CONFIG_MODVERSIONS) = -D__CONFIG_MODVERSIONS__ -+ -+export-objs-$(CONFIG_GPROF) += gprof_syms.o -+export-objs-$(CONFIG_GCOV) += gmon_syms.o -+ -+export-objs = ksyms.o process_kern.o signal_kern.o user_syms.o $(export-objs-y) -+ -+CFLAGS_user_syms.o = -D__AUTOCONF_INCLUDED__ $(DMODULES-y) $(DMODVERSIONS-y) \ -+ -I/usr/include -I../include -+ -+CFLAGS_frame.o := $(patsubst -fomit-frame-pointer,,$(USER_CFLAGS)) -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+# This has to be separate because it needs be compiled with frame pointers -+# regardless of how the rest of the kernel is built. -+ -+frame.o: frame.c -+ $(CC) $(CFLAGS_$@) -c -o $@ $< -+ -+QUOTE = 'my $$config=`cat $(TOPDIR)/.config`; $$config =~ s/"/\\"/g ; $$config =~ s/\n/\\n"\n"/g ; while(<STDIN>) { $$_ =~ s/CONFIG/$$config/; print $$_ }' -+ -+config.c : config.c.in $(TOPDIR)/.config -+ $(PERL) -e $(QUOTE) < config.c.in > $@ -+ -+clean: -+ $(RM) config.c -+ for dir in $(subdir-y) ; do $(MAKE) -C $$dir clean; done -+ -+modules: -+ -+fastdep: -+ -+dep: -+ -+archmrproper: clean -diff -Naur -X ../exclude-files orig/arch/um/kernel/checksum.c um/arch/um/kernel/checksum.c ---- orig/arch/um/kernel/checksum.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/checksum.c Thu Oct 31 22:39:58 2002 -@@ -0,0 +1,42 @@ -+#include "asm/uaccess.h" -+#include "linux/errno.h" -+ -+extern unsigned int arch_csum_partial(const char *buff, int len, int sum); -+ -+extern unsigned int csum_partial(char *buff, int len, int sum) -+{ -+ return(arch_csum_partial(buff, len, sum)); -+} -+ -+unsigned int csum_partial_copy_to(const char *src, char *dst, int len, -+ int sum, int *err_ptr) -+{ -+ if(copy_to_user(dst, src, len)){ -+ *err_ptr = -EFAULT; -+ return(-1); -+ } -+ -+ return(arch_csum_partial(src, len, sum)); -+} -+ -+unsigned int csum_partial_copy_from(const char *src, char *dst, int len, -+ int sum, int *err_ptr) -+{ -+ if(copy_from_user(dst, src, len)){ -+ *err_ptr = -EFAULT; -+ return(-1); -+ } -+ -+ return(arch_csum_partial(dst, len, sum)); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/config.c.in um/arch/um/kernel/config.c.in ---- orig/arch/um/kernel/config.c.in Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/config.c.in Thu Apr 10 11:17:55 2003 -@@ -0,0 +1,32 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include "init.h" -+ -+static __initdata char *config = "CONFIG"; -+ -+static int __init print_config(char *line, int *add) -+{ -+ printf("%s", config); -+ exit(0); -+} -+ -+__uml_setup("--showconfig", print_config, -+"--showconfig\n" -+" Prints the config file that this UML binary was generated from.\n\n" -+); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/exec_kern.c um/arch/um/kernel/exec_kern.c ---- orig/arch/um/kernel/exec_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/exec_kern.c Wed Apr 16 16:35:05 2003 -@@ -0,0 +1,86 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/slab.h" -+#include "linux/smp_lock.h" -+#include "asm/ptrace.h" -+#include "asm/pgtable.h" -+#include "asm/pgalloc.h" -+#include "asm/uaccess.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "mem_user.h" -+#include "kern.h" -+#include "irq_user.h" -+#include "tlb.h" -+#include "2_5compat.h" -+#include "os.h" -+#include "time_user.h" -+#include "choose-mode.h" -+#include "mode_kern.h" -+ -+void flush_thread(void) -+{ -+ CHOOSE_MODE(flush_thread_tt(), flush_thread_skas()); -+} -+ -+void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) -+{ -+ CHOOSE_MODE_PROC(start_thread_tt, start_thread_skas, regs, eip, esp); -+} -+ -+extern void log_exec(char **argv, void *tty); -+ -+static int execve1(char *file, char **argv, char **env) -+{ -+ int error; -+ -+#ifdef CONFIG_TTY_LOG -+ log_exec(argv, current->tty); -+#endif -+ error = do_execve(file, argv, env, ¤t->thread.regs); -+ if (error == 0){ -+ current->ptrace &= ~PT_DTRACE; -+ set_cmdline(current_cmd()); -+ } -+ return(error); -+} -+ -+int um_execve(char *file, char **argv, char **env) -+{ -+ int err; -+ -+ err = execve1(file, argv, env); -+ if(!err) -+ do_longjmp(current->thread.exec_buf, 1); -+ return(err); -+} -+ -+int sys_execve(char *file, char **argv, char **env) -+{ -+ int error; -+ char *filename; -+ -+ lock_kernel(); -+ filename = getname((char *) file); -+ error = PTR_ERR(filename); -+ if (IS_ERR(filename)) goto out; -+ error = execve1(filename, argv, env); -+ putname(filename); -+ out: -+ unlock_kernel(); -+ return(error); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/exitcode.c um/arch/um/kernel/exitcode.c ---- orig/arch/um/kernel/exitcode.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/exitcode.c Thu Nov 7 18:22:04 2002 -@@ -0,0 +1,73 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/init.h" -+#include "linux/ctype.h" -+#include "linux/proc_fs.h" -+#include "asm/uaccess.h" -+ -+/* If read and write race, the read will still atomically read a valid -+ * value. -+ */ -+int uml_exitcode = 0; -+ -+static int read_proc_exitcode(char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ int len; -+ -+ len = sprintf(page, "%d\n", uml_exitcode); -+ len -= off; -+ if(len <= off+count) *eof = 1; -+ *start = page + off; -+ if(len > count) len = count; -+ if(len < 0) len = 0; -+ return(len); -+} -+ -+static int write_proc_exitcode(struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ char *end, buf[sizeof("nnnnn\0")]; -+ int tmp; -+ -+ if(copy_from_user(buf, buffer, count)) -+ return(-EFAULT); -+ tmp = simple_strtol(buf, &end, 0); -+ if((*end != '\0') && !isspace(*end)) -+ return(-EINVAL); -+ uml_exitcode = tmp; -+ return(count); -+} -+ -+static int make_proc_exitcode(void) -+{ -+ struct proc_dir_entry *ent; -+ -+ ent = create_proc_entry("exitcode", 0600, &proc_root); -+ if(ent == NULL){ -+ printk("make_proc_exitcode : Failed to register " -+ "/proc/exitcode\n"); -+ return(0); -+ } -+ -+ ent->read_proc = read_proc_exitcode; -+ ent->write_proc = write_proc_exitcode; -+ -+ return(0); -+} -+ -+__initcall(make_proc_exitcode); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/frame.c um/arch/um/kernel/frame.c ---- orig/arch/um/kernel/frame.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/frame.c Wed Dec 11 11:12:41 2002 -@@ -0,0 +1,342 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <string.h> -+#include <signal.h> -+#include <wait.h> -+#include <sched.h> -+#include <errno.h> -+#include <sys/ptrace.h> -+#include <sys/syscall.h> -+#include <sys/mman.h> -+#include <asm/page.h> -+#include <asm/ptrace.h> -+#include <asm/sigcontext.h> -+#include "sysdep/ptrace.h" -+#include "sysdep/sigcontext.h" -+#include "frame_user.h" -+#include "kern_util.h" -+#include "ptrace_user.h" -+#include "os.h" -+ -+static int capture_stack(int (*child)(void *arg), void *arg, void *sp, -+ unsigned long top, void **data_out) -+{ -+ unsigned long regs[FRAME_SIZE]; -+ int pid, status, n, len; -+ -+ /* Start the child as a thread */ -+ pid = clone(child, sp, CLONE_VM | SIGCHLD, arg); -+ if(pid < 0){ -+ printf("capture_stack : clone failed - errno = %d\n", errno); -+ exit(1); -+ } -+ -+ /* Wait for it to stop itself and continue it with a SIGUSR1 to force -+ * it into the signal handler. -+ */ -+ n = waitpid(pid, &status, WUNTRACED); -+ if(n < 0){ -+ printf("capture_stack : waitpid failed - errno = %d\n", errno); -+ exit(1); -+ } -+ if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)){ -+ fprintf(stderr, "capture_stack : Expected SIGSTOP, " -+ "got status = 0x%x\n", status); -+ exit(1); -+ } -+ if(ptrace(PTRACE_CONT, pid, 0, SIGUSR1) < 0){ -+ printf("capture_stack : PTRACE_CONT failed - errno = %d\n", -+ errno); -+ exit(1); -+ } -+ -+ /* Wait for it to stop itself again and grab its registers again. -+ * At this point, the handler has stuffed the addresses of -+ * sig, sc, and SA_RESTORER in raw. -+ */ -+ n = waitpid(pid, &status, WUNTRACED); -+ if(n < 0){ -+ printf("capture_stack : waitpid failed - errno = %d\n", errno); -+ exit(1); -+ } -+ if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)){ -+ fprintf(stderr, "capture_stack : Expected SIGSTOP, " -+ "got status = 0x%x\n", status); -+ exit(1); -+ } -+ if(ptrace(PTRACE_GETREGS, pid, 0, regs) < 0){ -+ printf("capture_stack : PTRACE_GETREGS failed - errno = %d\n", -+ errno); -+ exit(1); -+ } -+ -+ /* It has outlived its usefulness, so continue it so it can exit */ -+ if(ptrace(PTRACE_CONT, pid, 0, 0) < 0){ -+ printf("capture_stack : PTRACE_CONT failed - errno = %d\n", -+ errno); -+ exit(1); -+ } -+ if(waitpid(pid, &status, 0) < 0){ -+ printf("capture_stack : waitpid failed - errno = %d\n", errno); -+ exit(1); -+ } -+ if(!WIFSIGNALED(status) || (WTERMSIG(status) != 9)){ -+ printf("capture_stack : Expected exit signal 9, " -+ "got status = 0x%x\n", status); -+ exit(1); -+ } -+ -+ /* The frame that we want is the top of the signal stack */ -+ -+ len = top - PT_SP(regs); -+ *data_out = malloc(len); -+ if(*data_out == NULL){ -+ printf("capture_stack : malloc failed - errno = %d\n", errno); -+ exit(1); -+ } -+ memcpy(*data_out, (void *) PT_SP(regs), len); -+ -+ return(len); -+} -+ -+struct common_raw { -+ void *stack; -+ int size; -+ unsigned long sig; -+ unsigned long sr; -+ unsigned long sp; -+ struct arch_frame_data_raw arch; -+}; -+ -+#define SA_RESTORER (0x04000000) -+ -+typedef unsigned long old_sigset_t; -+ -+struct old_sigaction { -+ __sighandler_t handler; -+ old_sigset_t sa_mask; -+ unsigned long sa_flags; -+ void (*sa_restorer)(void); -+}; -+ -+static void child_common(struct common_raw *common, sighandler_t handler, -+ int restorer, int flags) -+{ -+ stack_t ss = ((stack_t) { .ss_sp = common->stack, -+ .ss_flags = 0, -+ .ss_size = common->size }); -+ int err; -+ -+ if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){ -+ printf("PTRACE_TRACEME failed, errno = %d\n", errno); -+ } -+ if(sigaltstack(&ss, NULL) < 0){ -+ printf("sigaltstack failed - errno = %d\n", errno); -+ kill(getpid(), SIGKILL); -+ } -+ -+ if(restorer){ -+ struct sigaction sa; -+ -+ sa.sa_handler = handler; -+ sigemptyset(&sa.sa_mask); -+ sa.sa_flags = SA_ONSTACK | flags; -+ err = sigaction(SIGUSR1, &sa, NULL); -+ } -+ else { -+ struct old_sigaction sa; -+ -+ sa.handler = handler; -+ sa.sa_mask = 0; -+ sa.sa_flags = (SA_ONSTACK | flags) & ~SA_RESTORER; -+ err = syscall(__NR_sigaction, SIGUSR1, &sa, NULL); -+ } -+ -+ if(err < 0){ -+ printf("sigaction failed - errno = %d\n", errno); -+ kill(getpid(), SIGKILL); -+ } -+ -+ os_stop_process(os_getpid()); -+} -+ -+/* Changed only during early boot */ -+struct sc_frame signal_frame_sc; -+ -+struct sc_frame signal_frame_sc_sr; -+ -+struct sc_frame_raw { -+ struct common_raw common; -+ unsigned long sc; -+ int restorer; -+}; -+ -+/* Changed only during early boot */ -+static struct sc_frame_raw *raw_sc = NULL; -+ -+static void sc_handler(int sig, struct sigcontext sc) -+{ -+ raw_sc->common.sig = (unsigned long) &sig; -+ raw_sc->common.sr = frame_restorer(); -+ raw_sc->common.sp = frame_sp(); -+ raw_sc->sc = (unsigned long) ≻ -+ setup_arch_frame_raw(&raw_sc->common.arch, &sc + 1, raw_sc->common.sr); -+ -+ os_stop_process(os_getpid()); -+ kill(getpid(), SIGKILL); -+} -+ -+static int sc_child(void *arg) -+{ -+ raw_sc = arg; -+ child_common(&raw_sc->common, (sighandler_t) sc_handler, -+ raw_sc->restorer, 0); -+ return(-1); -+} -+ -+/* Changed only during early boot */ -+struct si_frame signal_frame_si; -+ -+struct si_frame_raw { -+ struct common_raw common; -+ unsigned long sip; -+ unsigned long si; -+ unsigned long ucp; -+ unsigned long uc; -+}; -+ -+/* Changed only during early boot */ -+static struct si_frame_raw *raw_si = NULL; -+ -+static void si_handler(int sig, siginfo_t *si, struct ucontext *ucontext) -+{ -+ raw_si->common.sig = (unsigned long) &sig; -+ raw_si->common.sr = frame_restorer(); -+ raw_si->common.sp = frame_sp(); -+ raw_si->sip = (unsigned long) &si; -+ raw_si->si = (unsigned long) si; -+ raw_si->ucp = (unsigned long) &ucontext; -+ raw_si->uc = (unsigned long) ucontext; -+ setup_arch_frame_raw(&raw_si->common.arch, -+ ucontext->uc_mcontext.fpregs, raw_si->common.sr); -+ -+ os_stop_process(os_getpid()); -+ kill(getpid(), SIGKILL); -+} -+ -+static int si_child(void *arg) -+{ -+ raw_si = arg; -+ child_common(&raw_si->common, (sighandler_t) si_handler, 1, -+ SA_SIGINFO); -+ return(-1); -+} -+ -+static int relative_sr(unsigned long sr, int sr_index, void *stack, -+ void *framep) -+{ -+ unsigned long *srp = (unsigned long *) sr; -+ unsigned long frame = (unsigned long) framep; -+ -+ if((*srp & PAGE_MASK) == (unsigned long) stack){ -+ *srp -= sr; -+ *((unsigned long *) (frame + sr_index)) = *srp; -+ return(1); -+ } -+ else return(0); -+} -+ -+static unsigned long capture_stack_common(int (*proc)(void *), void *arg, -+ struct common_raw *common_in, -+ void *top, void *sigstack, -+ int stack_len, -+ struct frame_common *common_out) -+{ -+ unsigned long sig_top = (unsigned long) sigstack + stack_len, base; -+ -+ common_in->stack = (void *) sigstack; -+ common_in->size = stack_len; -+ common_out->len = capture_stack(proc, arg, top, sig_top, -+ &common_out->data); -+ base = sig_top - common_out->len; -+ common_out->sig_index = common_in->sig - base; -+ common_out->sp_index = common_in->sp - base; -+ common_out->sr_index = common_in->sr - base; -+ common_out->sr_relative = relative_sr(common_in->sr, -+ common_out->sr_index, sigstack, -+ common_out->data); -+ return(base); -+} -+ -+void capture_signal_stack(void) -+{ -+ struct sc_frame_raw raw_sc; -+ struct si_frame_raw raw_si; -+ void *stack, *sigstack; -+ unsigned long top, sig_top, base; -+ -+ stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, -+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -+ sigstack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, -+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -+ if((stack == MAP_FAILED) || (sigstack == MAP_FAILED)){ -+ printf("capture_signal_stack : mmap failed - errno = %d\n", -+ errno); -+ exit(1); -+ } -+ -+ top = (unsigned long) stack + PAGE_SIZE - sizeof(void *); -+ sig_top = (unsigned long) sigstack + PAGE_SIZE; -+ -+ /* Get the sigcontext, no sigrestorer layout */ -+ raw_sc.restorer = 0; -+ base = capture_stack_common(sc_child, &raw_sc, &raw_sc.common, -+ (void *) top, sigstack, PAGE_SIZE, -+ &signal_frame_sc.common); -+ -+ signal_frame_sc.sc_index = raw_sc.sc - base; -+ setup_arch_frame(&raw_sc.common.arch, &signal_frame_sc.common.arch); -+ -+ /* Ditto for the sigcontext, sigrestorer layout */ -+ raw_sc.restorer = 1; -+ base = capture_stack_common(sc_child, &raw_sc, &raw_sc.common, -+ (void *) top, sigstack, PAGE_SIZE, -+ &signal_frame_sc_sr.common); -+ signal_frame_sc_sr.sc_index = raw_sc.sc - base; -+ setup_arch_frame(&raw_sc.common.arch, &signal_frame_sc_sr.common.arch); -+ -+ /* And the siginfo layout */ -+ -+ base = capture_stack_common(si_child, &raw_si, &raw_si.common, -+ (void *) top, sigstack, PAGE_SIZE, -+ &signal_frame_si.common); -+ signal_frame_si.sip_index = raw_si.sip - base; -+ signal_frame_si.si_index = raw_si.si - base; -+ signal_frame_si.ucp_index = raw_si.ucp - base; -+ signal_frame_si.uc_index = raw_si.uc - base; -+ setup_arch_frame(&raw_si.common.arch, &signal_frame_si.common.arch); -+ -+ if((munmap(stack, PAGE_SIZE) < 0) || -+ (munmap(sigstack, PAGE_SIZE) < 0)){ -+ printf("capture_signal_stack : munmap failed - errno = %d\n", -+ errno); -+ exit(1); -+ } -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/frame_kern.c um/arch/um/kernel/frame_kern.c ---- orig/arch/um/kernel/frame_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/frame_kern.c Sun Dec 8 19:44:13 2002 -@@ -0,0 +1,171 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "asm/ptrace.h" -+#include "asm/uaccess.h" -+#include "asm/signal.h" -+#include "asm/uaccess.h" -+#include "asm/ucontext.h" -+#include "frame_kern.h" -+#include "sigcontext.h" -+#include "sysdep/ptrace.h" -+#include "choose-mode.h" -+#include "mode.h" -+ -+int copy_siginfo_to_user(siginfo_t *to, siginfo_t *from) -+{ -+ if (!access_ok (VERIFY_WRITE, to, sizeof(siginfo_t))) -+ return -EFAULT; -+ if (from->si_code < 0) -+ return __copy_to_user(to, from, sizeof(siginfo_t)); -+ else { -+ int err; -+ -+ /* If you change siginfo_t structure, please be sure -+ this code is fixed accordingly. -+ It should never copy any pad contained in the structure -+ to avoid security leaks, but must copy the generic -+ 3 ints plus the relevant union member. */ -+ err = __put_user(from->si_signo, &to->si_signo); -+ err |= __put_user(from->si_errno, &to->si_errno); -+ err |= __put_user((short)from->si_code, &to->si_code); -+ /* First 32bits of unions are always present. */ -+ err |= __put_user(from->si_pid, &to->si_pid); -+ switch (from->si_code >> 16) { -+ case __SI_FAULT >> 16: -+ break; -+ case __SI_CHLD >> 16: -+ err |= __put_user(from->si_utime, &to->si_utime); -+ err |= __put_user(from->si_stime, &to->si_stime); -+ err |= __put_user(from->si_status, &to->si_status); -+ default: -+ err |= __put_user(from->si_uid, &to->si_uid); -+ break; -+ } -+ return err; -+ } -+} -+ -+static int copy_restorer(void (*restorer)(void), unsigned long start, -+ unsigned long sr_index, int sr_relative) -+{ -+ unsigned long sr; -+ -+ if(sr_relative){ -+ sr = (unsigned long) restorer; -+ sr += start + sr_index; -+ restorer = (void (*)(void)) sr; -+ } -+ -+ return(copy_to_user((void *) (start + sr_index), &restorer, -+ sizeof(restorer))); -+} -+ -+static int copy_sc_to_user(void *to, void *fp, struct pt_regs *from, -+ struct arch_frame_data *arch) -+{ -+ return(CHOOSE_MODE(copy_sc_to_user_tt(to, fp, UPT_SC(&from->regs), -+ arch), -+ copy_sc_to_user_skas(to, fp, &from->regs, -+ current->thread.cr2, -+ current->thread.err))); -+} -+ -+static int copy_ucontext_to_user(struct ucontext *uc, void *fp, sigset_t *set, -+ unsigned long sp) -+{ -+ int err = 0; -+ -+ err |= put_user(current->sas_ss_sp, &uc->uc_stack.ss_sp); -+ err |= put_user(sas_ss_flags(sp), &uc->uc_stack.ss_flags); -+ err |= put_user(current->sas_ss_size, &uc->uc_stack.ss_size); -+ err |= copy_sc_to_user(&uc->uc_mcontext, fp, ¤t->thread.regs, -+ &signal_frame_si.common.arch); -+ err |= copy_to_user(&uc->uc_sigmask, set, sizeof(*set)); -+ return(err); -+} -+ -+int setup_signal_stack_si(unsigned long stack_top, int sig, -+ unsigned long handler, void (*restorer)(void), -+ struct pt_regs *regs, siginfo_t *info, -+ sigset_t *mask) -+{ -+ unsigned long start; -+ void *sip, *ucp, *fp; -+ -+ start = stack_top - signal_frame_si.common.len; -+ sip = (void *) (start + signal_frame_si.si_index); -+ ucp = (void *) (start + signal_frame_si.uc_index); -+ fp = (void *) (((unsigned long) ucp) + sizeof(struct ucontext)); -+ -+ if(restorer == NULL) -+ panic("setup_signal_stack_si - no restorer"); -+ -+ if(copy_to_user((void *) start, signal_frame_si.common.data, -+ signal_frame_si.common.len) || -+ copy_to_user((void *) (start + signal_frame_si.common.sig_index), -+ &sig, sizeof(sig)) || -+ copy_siginfo_to_user(sip, info) || -+ copy_to_user((void *) (start + signal_frame_si.sip_index), &sip, -+ sizeof(sip)) || -+ copy_ucontext_to_user(ucp, fp, mask, PT_REGS_SP(regs)) || -+ copy_to_user((void *) (start + signal_frame_si.ucp_index), &ucp, -+ sizeof(ucp)) || -+ copy_restorer(restorer, start, signal_frame_si.common.sr_index, -+ signal_frame_si.common.sr_relative)) -+ return(1); -+ -+ PT_REGS_IP(regs) = handler; -+ PT_REGS_SP(regs) = start + signal_frame_si.common.sp_index; -+ return(0); -+} -+ -+int setup_signal_stack_sc(unsigned long stack_top, int sig, -+ unsigned long handler, void (*restorer)(void), -+ struct pt_regs *regs, sigset_t *mask) -+{ -+ struct frame_common *frame = &signal_frame_sc_sr.common; -+ void *user_sc; -+ int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long); -+ unsigned long sigs, sr; -+ unsigned long start = stack_top - frame->len - sig_size; -+ -+ user_sc = (void *) (start + signal_frame_sc_sr.sc_index); -+ if(restorer == NULL){ -+ frame = &signal_frame_sc.common; -+ user_sc = (void *) (start + signal_frame_sc.sc_index); -+ sr = (unsigned long) frame->data; -+ sr += frame->sr_index; -+ sr = *((unsigned long *) sr); -+ restorer = ((void (*)(void)) sr); -+ } -+ -+ sigs = start + frame->len; -+ if(copy_to_user((void *) start, frame->data, frame->len) || -+ copy_to_user((void *) (start + frame->sig_index), &sig, -+ sizeof(sig)) || -+ copy_sc_to_user(user_sc, NULL, regs, -+ &signal_frame_sc.common.arch) || -+ copy_to_user(sc_sigmask(user_sc), mask, sizeof(mask->sig[0])) || -+ copy_to_user((void *) sigs, &mask->sig[1], sig_size) || -+ copy_restorer(restorer, start, frame->sr_index, frame->sr_relative)) -+ return(1); -+ -+ PT_REGS_IP(regs) = handler; -+ PT_REGS_SP(regs) = start + frame->sp_index; -+ -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/gmon_syms.c um/arch/um/kernel/gmon_syms.c ---- orig/arch/um/kernel/gmon_syms.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/gmon_syms.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,20 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/module.h" -+ -+extern void __bb_init_func(void *); -+EXPORT_SYMBOL(__bb_init_func); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/gprof_syms.c um/arch/um/kernel/gprof_syms.c ---- orig/arch/um/kernel/gprof_syms.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/gprof_syms.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,20 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/module.h" -+ -+extern void mcount(void); -+EXPORT_SYMBOL(mcount); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/helper.c um/arch/um/kernel/helper.c ---- orig/arch/um/kernel/helper.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/helper.c Thu Oct 31 10:34:23 2002 -@@ -0,0 +1,153 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <errno.h> -+#include <fcntl.h> -+#include <sched.h> -+#include <sys/signal.h> -+#include <sys/wait.h> -+#include "user.h" -+#include "kern_util.h" -+#include "os.h" -+ -+struct helper_data { -+ void (*pre_exec)(void*); -+ void *pre_data; -+ char **argv; -+ int fd; -+}; -+ -+/* Debugging aid, changed only from gdb */ -+int helper_pause = 0; -+ -+static void helper_hup(int sig) -+{ -+} -+ -+static int helper_child(void *arg) -+{ -+ struct helper_data *data = arg; -+ char **argv = data->argv; -+ -+ if(helper_pause){ -+ signal(SIGHUP, helper_hup); -+ pause(); -+ } -+ if(data->pre_exec != NULL) -+ (*data->pre_exec)(data->pre_data); -+ execvp(argv[0], argv); -+ printk("execvp of '%s' failed - errno = %d\n", argv[0], errno); -+ write(data->fd, &errno, sizeof(errno)); -+ os_kill_process(os_getpid(), 0); -+ return(0); -+} -+ -+/* XXX The alloc_stack here breaks if this is called in the tracing thread */ -+ -+int run_helper(void (*pre_exec)(void *), void *pre_data, char **argv, -+ unsigned long *stack_out) -+{ -+ struct helper_data data; -+ unsigned long stack, sp; -+ int pid, fds[2], err, n; -+ -+ if((stack_out != NULL) && (*stack_out != 0)) -+ stack = *stack_out; -+ else stack = alloc_stack(0, um_in_interrupt()); -+ if(stack == 0) return(-ENOMEM); -+ -+ err = os_pipe(fds, 1, 0); -+ if(err){ -+ printk("run_helper : pipe failed, errno = %d\n", -err); -+ return(err); -+ } -+ if(fcntl(fds[1], F_SETFD, 1) != 0){ -+ printk("run_helper : setting FD_CLOEXEC failed, errno = %d\n", -+ errno); -+ return(-errno); -+ } -+ -+ sp = stack + page_size() - sizeof(void *); -+ data.pre_exec = pre_exec; -+ data.pre_data = pre_data; -+ data.argv = argv; -+ data.fd = fds[1]; -+ pid = clone(helper_child, (void *) sp, CLONE_VM | SIGCHLD, &data); -+ if(pid < 0){ -+ printk("run_helper : clone failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ close(fds[1]); -+ n = read(fds[0], &err, sizeof(err)); -+ if(n < 0){ -+ printk("run_helper : read on pipe failed, errno = %d\n", -+ errno); -+ return(-errno); -+ } -+ else if(n != 0){ -+ waitpid(pid, NULL, 0); -+ pid = -err; -+ } -+ -+ if(stack_out == NULL) free_stack(stack, 0); -+ else *stack_out = stack; -+ return(pid); -+} -+ -+int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, -+ unsigned long *stack_out, int stack_order) -+{ -+ unsigned long stack, sp; -+ int pid, status; -+ -+ stack = alloc_stack(stack_order, um_in_interrupt()); -+ if(stack == 0) return(-ENOMEM); -+ -+ sp = stack + (page_size() << stack_order) - sizeof(void *); -+ pid = clone(proc, (void *) sp, flags | SIGCHLD, arg); -+ if(pid < 0){ -+ printk("run_helper_thread : clone failed, errno = %d\n", -+ errno); -+ return(-errno); -+ } -+ if(stack_out == NULL){ -+ pid = waitpid(pid, &status, 0); -+ if(pid < 0) -+ printk("run_helper_thread - wait failed, errno = %d\n", -+ pid); -+ if(!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) -+ printk("run_helper_thread - thread returned status " -+ "0x%x\n", status); -+ free_stack(stack, stack_order); -+ } -+ else *stack_out = stack; -+ return(pid); -+} -+ -+int helper_wait(int pid, int block) -+{ -+ int ret; -+ -+ ret = waitpid(pid, NULL, WNOHANG); -+ if(ret < 0){ -+ printk("helper_wait : waitpid failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ return(ret); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/init_task.c um/arch/um/kernel/init_task.c ---- orig/arch/um/kernel/init_task.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/init_task.c Sat Dec 28 19:58:44 2002 -@@ -0,0 +1,61 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/mm.h" -+#include "linux/sched.h" -+#include "linux/version.h" -+#include "asm/uaccess.h" -+#include "asm/pgtable.h" -+#include "user_util.h" -+#include "mem_user.h" -+ -+static struct fs_struct init_fs = INIT_FS; -+static struct files_struct init_files = INIT_FILES; -+static struct signal_struct init_signals = INIT_SIGNALS; -+struct mm_struct init_mm = INIT_MM(init_mm); -+ -+/* -+ * Initial task structure. -+ * -+ * We need to make sure that this is 16384-byte aligned due to the -+ * way process stacks are handled. This is done by having a special -+ * "init_task" linker map entry.. -+ */ -+ -+union task_union init_task_union -+__attribute__((__section__(".data.init_task"))) = -+{ INIT_TASK(init_task_union.task) }; -+ -+struct task_struct *alloc_task_struct(void) -+{ -+ return((struct task_struct *) -+ __get_free_pages(GFP_KERNEL, CONFIG_KERNEL_STACK_ORDER)); -+} -+ -+void unprotect_stack(unsigned long stack) -+{ -+ protect_memory(stack, (1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE, -+ 1, 1, 0, 1); -+} -+ -+void free_task_struct(struct task_struct *task) -+{ -+ /* free_pages decrements the page counter and only actually frees -+ * the pages if they are now not accessed by anything. -+ */ -+ free_pages((unsigned long) task, CONFIG_KERNEL_STACK_ORDER); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/initrd_kern.c um/arch/um/kernel/initrd_kern.c ---- orig/arch/um/kernel/initrd_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/initrd_kern.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,59 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/init.h" -+#include "linux/bootmem.h" -+#include "linux/blk.h" -+#include "asm/types.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "initrd.h" -+#include "init.h" -+#include "os.h" -+ -+/* Changed by uml_initrd_setup, which is a setup */ -+static char *initrd __initdata = NULL; -+ -+static int __init read_initrd(void) -+{ -+ void *area; -+ long long size; -+ int err; -+ -+ if(initrd == NULL) return 0; -+ err = os_file_size(initrd, &size); -+ if(err) return 0; -+ area = alloc_bootmem(size); -+ if(area == NULL) return 0; -+ if(load_initrd(initrd, area, size) == -1) return 0; -+ initrd_start = (unsigned long) area; -+ initrd_end = initrd_start + size; -+ return 0; -+} -+ -+__uml_postsetup(read_initrd); -+ -+static int __init uml_initrd_setup(char *line, int *add) -+{ -+ initrd = line; -+ return 0; -+} -+ -+__uml_setup("initrd=", uml_initrd_setup, -+"initrd=<initrd image>\n" -+" This is used to boot UML from an initrd image. The argument is the\n" -+" name of the file containing the image.\n\n" -+); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/initrd_user.c um/arch/um/kernel/initrd_user.c ---- orig/arch/um/kernel/initrd_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/initrd_user.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,43 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <unistd.h> -+#include <sys/types.h> -+#include <sys/stat.h> -+#include <fcntl.h> -+#include <errno.h> -+ -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "initrd.h" -+#include "os.h" -+ -+int load_initrd(char *filename, void *buf, int size) -+{ -+ int fd, n; -+ -+ if((fd = os_open_file(filename, of_read(OPENFLAGS()), 0)) < 0){ -+ printk("Opening '%s' failed - errno = %d\n", filename, errno); -+ return(-1); -+ } -+ if((n = read(fd, buf, size)) != size){ -+ printk("Read of %d bytes from '%s' returned %d, errno = %d\n", -+ size, filename, n, errno); -+ return(-1); -+ } -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/irq.c um/arch/um/kernel/irq.c ---- orig/arch/um/kernel/irq.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/irq.c Wed Mar 26 14:45:29 2003 -@@ -0,0 +1,842 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c: -+ * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar -+ */ -+ -+#include "linux/config.h" -+#include "linux/kernel.h" -+#include "linux/smp.h" -+#include "linux/irq.h" -+#include "linux/kernel_stat.h" -+#include "linux/interrupt.h" -+#include "linux/random.h" -+#include "linux/slab.h" -+#include "linux/file.h" -+#include "linux/proc_fs.h" -+#include "linux/init.h" -+#include "linux/seq_file.h" -+#include "asm/irq.h" -+#include "asm/hw_irq.h" -+#include "asm/hardirq.h" -+#include "asm/atomic.h" -+#include "asm/signal.h" -+#include "asm/system.h" -+#include "asm/errno.h" -+#include "asm/uaccess.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "irq_user.h" -+ -+static void register_irq_proc (unsigned int irq); -+ -+irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = -+ { [0 ... NR_IRQS-1] = { 0, &no_irq_type, NULL, 0, SPIN_LOCK_UNLOCKED}}; -+ -+/* -+ * Generic no controller code -+ */ -+ -+static void enable_none(unsigned int irq) { } -+static unsigned int startup_none(unsigned int irq) { return 0; } -+static void disable_none(unsigned int irq) { } -+static void ack_none(unsigned int irq) -+{ -+/* -+ * 'what should we do if we get a hw irq event on an illegal vector'. -+ * each architecture has to answer this themselves, it doesnt deserve -+ * a generic callback i think. -+ */ -+#if CONFIG_X86 -+ printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); -+#ifdef CONFIG_X86_LOCAL_APIC -+ /* -+ * Currently unexpected vectors happen only on SMP and APIC. -+ * We _must_ ack these because every local APIC has only N -+ * irq slots per priority level, and a 'hanging, unacked' IRQ -+ * holds up an irq slot - in excessive cases (when multiple -+ * unexpected vectors occur) that might lock up the APIC -+ * completely. -+ */ -+ ack_APIC_irq(); -+#endif -+#endif -+} -+ -+/* startup is the same as "enable", shutdown is same as "disable" */ -+#define shutdown_none disable_none -+#define end_none enable_none -+ -+struct hw_interrupt_type no_irq_type = { -+ "none", -+ startup_none, -+ shutdown_none, -+ enable_none, -+ disable_none, -+ ack_none, -+ end_none -+}; -+ -+/* Not changed */ -+volatile unsigned long irq_err_count; -+ -+/* -+ * Generic, controller-independent functions: -+ */ -+ -+int get_irq_list(char *buf) -+{ -+ int i, j; -+ unsigned long flags; -+ struct irqaction * action; -+ char *p = buf; -+ -+ p += sprintf(p, " "); -+ for (j=0; j<smp_num_cpus; j++) -+ p += sprintf(p, "CPU%d ",j); -+ *p++ = '\n'; -+ -+ for (i = 0 ; i < NR_IRQS ; i++) { -+ spin_lock_irqsave(&irq_desc[i].lock, flags); -+ action = irq_desc[i].action; -+ if (!action) -+ goto end; -+ p += sprintf(p, "%3d: ",i); -+#ifndef CONFIG_SMP -+ p += sprintf(p, "%10u ", kstat_irqs(i)); -+#else -+ for (j = 0; j < smp_num_cpus; j++) -+ p += sprintf(p, "%10u ", -+ kstat.irqs[cpu_logical_map(j)][i]); -+#endif -+ p += sprintf(p, " %14s", irq_desc[i].handler->typename); -+ p += sprintf(p, " %s", action->name); -+ -+ for (action=action->next; action; action = action->next) -+ p += sprintf(p, ", %s", action->name); -+ *p++ = '\n'; -+ end: -+ spin_unlock_irqrestore(&irq_desc[i].lock, flags); -+ } -+ p += sprintf(p, "\n"); -+#ifdef notdef -+#if CONFIG_SMP -+ p += sprintf(p, "LOC: "); -+ for (j = 0; j < smp_num_cpus; j++) -+ p += sprintf(p, "%10u ", -+ apic_timer_irqs[cpu_logical_map(j)]); -+ p += sprintf(p, "\n"); -+#endif -+#endif -+ p += sprintf(p, "ERR: %10lu\n", irq_err_count); -+ return p - buf; -+} -+ -+ -+/* -+ * This should really return information about whether -+ * we should do bottom half handling etc. Right now we -+ * end up _always_ checking the bottom half, which is a -+ * waste of time and is not what some drivers would -+ * prefer. -+ */ -+int handle_IRQ_event(unsigned int irq, struct pt_regs * regs, -+ struct irqaction * action) -+{ -+ int status; -+ int cpu = smp_processor_id(); -+ -+ irq_enter(cpu, irq); -+ -+ status = 1; /* Force the "do bottom halves" bit */ -+ -+ if (!(action->flags & SA_INTERRUPT)) -+ __sti(); -+ -+ do { -+ status |= action->flags; -+ action->handler(irq, action->dev_id, regs); -+ action = action->next; -+ } while (action); -+ if (status & SA_SAMPLE_RANDOM) -+ add_interrupt_randomness(irq); -+ __cli(); -+ -+ irq_exit(cpu, irq); -+ -+ return status; -+} -+ -+/* -+ * Generic enable/disable code: this just calls -+ * down into the PIC-specific version for the actual -+ * hardware disable after having gotten the irq -+ * controller lock. -+ */ -+ -+/** -+ * disable_irq_nosync - disable an irq without waiting -+ * @irq: Interrupt to disable -+ * -+ * Disable the selected interrupt line. Disables of an interrupt -+ * stack. Unlike disable_irq(), this function does not ensure existing -+ * instances of the IRQ handler have completed before returning. -+ * -+ * This function may be called from IRQ context. -+ */ -+ -+void inline disable_irq_nosync(unsigned int irq) -+{ -+ irq_desc_t *desc = irq_desc + irq; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&desc->lock, flags); -+ if (!desc->depth++) { -+ desc->status |= IRQ_DISABLED; -+ desc->handler->disable(irq); -+ } -+ spin_unlock_irqrestore(&desc->lock, flags); -+} -+ -+/** -+ * disable_irq - disable an irq and wait for completion -+ * @irq: Interrupt to disable -+ * -+ * Disable the selected interrupt line. Disables of an interrupt -+ * stack. That is for two disables you need two enables. This -+ * function waits for any pending IRQ handlers for this interrupt -+ * to complete before returning. If you use this function while -+ * holding a resource the IRQ handler may need you will deadlock. -+ * -+ * This function may be called - with care - from IRQ context. -+ */ -+ -+void disable_irq(unsigned int irq) -+{ -+ disable_irq_nosync(irq); -+ -+ if (!local_irq_count(smp_processor_id())) { -+ do { -+ barrier(); -+ } while (irq_desc[irq].status & IRQ_INPROGRESS); -+ } -+} -+ -+/** -+ * enable_irq - enable interrupt handling on an irq -+ * @irq: Interrupt to enable -+ * -+ * Re-enables the processing of interrupts on this IRQ line -+ * providing no disable_irq calls are now in effect. -+ * -+ * This function may be called from IRQ context. -+ */ -+ -+void enable_irq(unsigned int irq) -+{ -+ irq_desc_t *desc = irq_desc + irq; -+ unsigned long flags; -+ -+ spin_lock_irqsave(&desc->lock, flags); -+ switch (desc->depth) { -+ case 1: { -+ unsigned int status = desc->status & ~IRQ_DISABLED; -+ desc->status = status; -+ if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { -+ desc->status = status | IRQ_REPLAY; -+ hw_resend_irq(desc->handler,irq); -+ } -+ desc->handler->enable(irq); -+ /* fall-through */ -+ } -+ default: -+ desc->depth--; -+ break; -+ case 0: -+ printk(KERN_ERR "enable_irq() unbalanced from %p\n", -+ __builtin_return_address(0)); -+ } -+ spin_unlock_irqrestore(&desc->lock, flags); -+} -+ -+/* -+ * do_IRQ handles all normal device IRQ's (the special -+ * SMP cross-CPU interrupts have their own specific -+ * handlers). -+ */ -+unsigned int do_IRQ(int irq, union uml_pt_regs *regs) -+{ -+ /* -+ * 0 return value means that this irq is already being -+ * handled by some other CPU. (or is disabled) -+ */ -+ int cpu = smp_processor_id(); -+ irq_desc_t *desc = irq_desc + irq; -+ struct irqaction * action; -+ unsigned int status; -+ -+ kstat.irqs[cpu][irq]++; -+ spin_lock(&desc->lock); -+ desc->handler->ack(irq); -+ /* -+ REPLAY is when Linux resends an IRQ that was dropped earlier -+ WAITING is used by probe to mark irqs that are being tested -+ */ -+ status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); -+ status |= IRQ_PENDING; /* we _want_ to handle it */ -+ -+ /* -+ * If the IRQ is disabled for whatever reason, we cannot -+ * use the action we have. -+ */ -+ action = NULL; -+ if (!(status & (IRQ_DISABLED | IRQ_INPROGRESS))) { -+ action = desc->action; -+ status &= ~IRQ_PENDING; /* we commit to handling */ -+ status |= IRQ_INPROGRESS; /* we are handling it */ -+ } -+ desc->status = status; -+ -+ /* -+ * If there is no IRQ handler or it was disabled, exit early. -+ Since we set PENDING, if another processor is handling -+ a different instance of this same irq, the other processor -+ will take care of it. -+ */ -+ if (!action) -+ goto out; -+ -+ /* -+ * Edge triggered interrupts need to remember -+ * pending events. -+ * This applies to any hw interrupts that allow a second -+ * instance of the same irq to arrive while we are in do_IRQ -+ * or in the handler. But the code here only handles the _second_ -+ * instance of the irq, not the third or fourth. So it is mostly -+ * useful for irq hardware that does not mask cleanly in an -+ * SMP environment. -+ */ -+ for (;;) { -+ spin_unlock(&desc->lock); -+ handle_IRQ_event(irq, (struct pt_regs *) regs, action); -+ spin_lock(&desc->lock); -+ -+ if (!(desc->status & IRQ_PENDING)) -+ break; -+ desc->status &= ~IRQ_PENDING; -+ } -+ desc->status &= ~IRQ_INPROGRESS; -+out: -+ /* -+ * The ->end() handler has to deal with interrupts which got -+ * disabled while the handler was running. -+ */ -+ desc->handler->end(irq); -+ spin_unlock(&desc->lock); -+ -+ if (softirq_pending(cpu)) -+ do_softirq(); -+ return 1; -+} -+ -+/** -+ * request_irq - allocate an interrupt line -+ * @irq: Interrupt line to allocate -+ * @handler: Function to be called when the IRQ occurs -+ * @irqflags: Interrupt type flags -+ * @devname: An ascii name for the claiming device -+ * @dev_id: A cookie passed back to the handler function -+ * -+ * This call allocates interrupt resources and enables the -+ * interrupt line and IRQ handling. From the point this -+ * call is made your handler function may be invoked. Since -+ * your handler function must clear any interrupt the board -+ * raises, you must take care both to initialise your hardware -+ * and to set up the interrupt handler in the right order. -+ * -+ * Dev_id must be globally unique. Normally the address of the -+ * device data structure is used as the cookie. Since the handler -+ * receives this value it makes sense to use it. -+ * -+ * If your interrupt is shared you must pass a non NULL dev_id -+ * as this is required when freeing the interrupt. -+ * -+ * Flags: -+ * -+ * SA_SHIRQ Interrupt is shared -+ * -+ * SA_INTERRUPT Disable local interrupts while processing -+ * -+ * SA_SAMPLE_RANDOM The interrupt can be used for entropy -+ * -+ */ -+ -+int request_irq(unsigned int irq, -+ void (*handler)(int, void *, struct pt_regs *), -+ unsigned long irqflags, -+ const char * devname, -+ void *dev_id) -+{ -+ int retval; -+ struct irqaction * action; -+ -+#if 1 -+ /* -+ * Sanity-check: shared interrupts should REALLY pass in -+ * a real dev-ID, otherwise we'll have trouble later trying -+ * to figure out which interrupt is which (messes up the -+ * interrupt freeing logic etc). -+ */ -+ if (irqflags & SA_SHIRQ) { -+ if (!dev_id) -+ printk(KERN_ERR "Bad boy: %s (at 0x%x) called us " -+ "without a dev_id!\n", devname, (&irq)[-1]); -+ } -+#endif -+ -+ if (irq >= NR_IRQS) -+ return -EINVAL; -+ if (!handler) -+ return -EINVAL; -+ -+ action = (struct irqaction *) -+ kmalloc(sizeof(struct irqaction), GFP_KERNEL); -+ if (!action) -+ return -ENOMEM; -+ -+ action->handler = handler; -+ action->flags = irqflags; -+ action->mask = 0; -+ action->name = devname; -+ action->next = NULL; -+ action->dev_id = dev_id; -+ -+ retval = setup_irq(irq, action); -+ if (retval) -+ kfree(action); -+ return retval; -+} -+ -+int um_request_irq(unsigned int irq, int fd, int type, -+ void (*handler)(int, void *, struct pt_regs *), -+ unsigned long irqflags, const char * devname, -+ void *dev_id) -+{ -+ int err; -+ -+ err = request_irq(irq, handler, irqflags, devname, dev_id); -+ if(err) -+ return(err); -+ -+ if(fd != -1) -+ err = activate_fd(irq, fd, type, dev_id); -+ return(err); -+} -+ -+/* this was setup_x86_irq but it seems pretty generic */ -+int setup_irq(unsigned int irq, struct irqaction * new) -+{ -+ int shared = 0; -+ unsigned long flags; -+ struct irqaction *old, **p; -+ irq_desc_t *desc = irq_desc + irq; -+ -+ /* -+ * Some drivers like serial.c use request_irq() heavily, -+ * so we have to be careful not to interfere with a -+ * running system. -+ */ -+ if (new->flags & SA_SAMPLE_RANDOM) { -+ /* -+ * This function might sleep, we want to call it first, -+ * outside of the atomic block. -+ * Yes, this might clear the entropy pool if the wrong -+ * driver is attempted to be loaded, without actually -+ * installing a new handler, but is this really a problem, -+ * only the sysadmin is able to do this. -+ */ -+ rand_initialize_irq(irq); -+ } -+ -+ /* -+ * The following block of code has to be executed atomically -+ */ -+ spin_lock_irqsave(&desc->lock,flags); -+ p = &desc->action; -+ if ((old = *p) != NULL) { -+ /* Can't share interrupts unless both agree to */ -+ if (!(old->flags & new->flags & SA_SHIRQ)) { -+ spin_unlock_irqrestore(&desc->lock,flags); -+ return -EBUSY; -+ } -+ -+ /* add new interrupt at end of irq queue */ -+ do { -+ p = &old->next; -+ old = *p; -+ } while (old); -+ shared = 1; -+ } -+ -+ *p = new; -+ -+ if (!shared) { -+ desc->depth = 0; -+ desc->status &= ~IRQ_DISABLED; -+ desc->handler->startup(irq); -+ } -+ spin_unlock_irqrestore(&desc->lock,flags); -+ -+ register_irq_proc(irq); -+ return 0; -+} -+ -+/** -+ * free_irq - free an interrupt -+ * @irq: Interrupt line to free -+ * @dev_id: Device identity to free -+ * -+ * Remove an interrupt handler. The handler is removed and if the -+ * interrupt line is no longer in use by any driver it is disabled. -+ * On a shared IRQ the caller must ensure the interrupt is disabled -+ * on the card it drives before calling this function. The function -+ * does not return until any executing interrupts for this IRQ -+ * have completed. -+ * -+ * This function may be called from interrupt context. -+ * -+ * Bugs: Attempting to free an irq in a handler for the same irq hangs -+ * the machine. -+ */ -+ -+void free_irq(unsigned int irq, void *dev_id) -+{ -+ irq_desc_t *desc; -+ struct irqaction **p; -+ unsigned long flags; -+ -+ if (irq >= NR_IRQS) -+ return; -+ -+ desc = irq_desc + irq; -+ spin_lock_irqsave(&desc->lock,flags); -+ p = &desc->action; -+ for (;;) { -+ struct irqaction * action = *p; -+ if (action) { -+ struct irqaction **pp = p; -+ p = &action->next; -+ if (action->dev_id != dev_id) -+ continue; -+ -+ /* Found it - now remove it from the list of entries */ -+ *pp = action->next; -+ if (!desc->action) { -+ desc->status |= IRQ_DISABLED; -+ desc->handler->shutdown(irq); -+ } -+ free_irq_by_irq_and_dev(irq, dev_id); -+ spin_unlock_irqrestore(&desc->lock,flags); -+ -+#ifdef CONFIG_SMP -+ /* Wait to make sure it's not being used on another CPU */ -+ while (desc->status & IRQ_INPROGRESS) -+ barrier(); -+#endif -+ kfree(action); -+ return; -+ } -+ printk(KERN_ERR "Trying to free free IRQ%d\n",irq); -+ spin_unlock_irqrestore(&desc->lock,flags); -+ return; -+ } -+} -+ -+/* These are initialized by sysctl_init, which is called from init/main.c */ -+static struct proc_dir_entry * root_irq_dir; -+static struct proc_dir_entry * irq_dir [NR_IRQS]; -+static struct proc_dir_entry * smp_affinity_entry [NR_IRQS]; -+ -+/* These are read and written as longs, so a read won't see a partial write -+ * even during a race. -+ */ -+static unsigned long irq_affinity [NR_IRQS] = { [0 ... NR_IRQS-1] = ~0UL }; -+ -+#define HEX_DIGITS 8 -+ -+static int irq_affinity_read_proc (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ if (count < HEX_DIGITS+1) -+ return -EINVAL; -+ return sprintf (page, "%08lx\n", irq_affinity[(long)data]); -+} -+ -+static unsigned int parse_hex_value (const char *buffer, -+ unsigned long count, unsigned long *ret) -+{ -+ unsigned char hexnum [HEX_DIGITS]; -+ unsigned long value; -+ int i; -+ -+ if (!count) -+ return -EINVAL; -+ if (count > HEX_DIGITS) -+ count = HEX_DIGITS; -+ if (copy_from_user(hexnum, buffer, count)) -+ return -EFAULT; -+ -+ /* -+ * Parse the first 8 characters as a hex string, any non-hex char -+ * is end-of-string. '00e1', 'e1', '00E1', 'E1' are all the same. -+ */ -+ value = 0; -+ -+ for (i = 0; i < count; i++) { -+ unsigned int c = hexnum[i]; -+ -+ switch (c) { -+ case '0' ... '9': c -= '0'; break; -+ case 'a' ... 'f': c -= 'a'-10; break; -+ case 'A' ... 'F': c -= 'A'-10; break; -+ default: -+ goto out; -+ } -+ value = (value << 4) | c; -+ } -+out: -+ *ret = value; -+ return 0; -+} -+ -+static int irq_affinity_write_proc (struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ int irq = (long) data, full_count = count, err; -+ unsigned long new_value; -+ -+ if (!irq_desc[irq].handler->set_affinity) -+ return -EIO; -+ -+ err = parse_hex_value(buffer, count, &new_value); -+ -+#if CONFIG_SMP -+ /* -+ * Do not allow disabling IRQs completely - it's a too easy -+ * way to make the system unusable accidentally :-) At least -+ * one online CPU still has to be targeted. -+ */ -+ if (!(new_value & cpu_online_map)) -+ return -EINVAL; -+#endif -+ -+ irq_affinity[irq] = new_value; -+ irq_desc[irq].handler->set_affinity(irq, new_value); -+ -+ return full_count; -+} -+ -+static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, -+ int count, int *eof, void *data) -+{ -+ unsigned long *mask = (unsigned long *) data; -+ if (count < HEX_DIGITS+1) -+ return -EINVAL; -+ return sprintf (page, "%08lx\n", *mask); -+} -+ -+static int prof_cpu_mask_write_proc (struct file *file, const char *buffer, -+ unsigned long count, void *data) -+{ -+ unsigned long *mask = (unsigned long *) data, full_count = count, err; -+ unsigned long new_value; -+ -+ err = parse_hex_value(buffer, count, &new_value); -+ if (err) -+ return err; -+ -+ *mask = new_value; -+ return full_count; -+} -+ -+#define MAX_NAMELEN 10 -+ -+static void register_irq_proc (unsigned int irq) -+{ -+ struct proc_dir_entry *entry; -+ char name [MAX_NAMELEN]; -+ -+ if (!root_irq_dir || (irq_desc[irq].handler == &no_irq_type) || -+ irq_dir[irq]) -+ return; -+ -+ memset(name, 0, MAX_NAMELEN); -+ sprintf(name, "%d", irq); -+ -+ /* create /proc/irq/1234 */ -+ irq_dir[irq] = proc_mkdir(name, root_irq_dir); -+ -+ /* create /proc/irq/1234/smp_affinity */ -+ entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]); -+ -+ entry->nlink = 1; -+ entry->data = (void *)(long)irq; -+ entry->read_proc = irq_affinity_read_proc; -+ entry->write_proc = irq_affinity_write_proc; -+ -+ smp_affinity_entry[irq] = entry; -+} -+ -+/* Read and written as a long */ -+unsigned long prof_cpu_mask = -1; -+ -+void __init init_irq_proc (void) -+{ -+ struct proc_dir_entry *entry; -+ int i; -+ -+ /* create /proc/irq */ -+ root_irq_dir = proc_mkdir("irq", 0); -+ -+ /* create /proc/irq/prof_cpu_mask */ -+ entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir); -+ -+ entry->nlink = 1; -+ entry->data = (void *)&prof_cpu_mask; -+ entry->read_proc = prof_cpu_mask_read_proc; -+ entry->write_proc = prof_cpu_mask_write_proc; -+ -+ /* -+ * Create entries for all existing IRQs. -+ */ -+ for (i = 0; i < NR_IRQS; i++) -+ register_irq_proc(i); -+} -+ -+static spinlock_t irq_spinlock = SPIN_LOCK_UNLOCKED; -+ -+unsigned long irq_lock(void) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&irq_spinlock, flags); -+ return(flags); -+} -+ -+void irq_unlock(unsigned long flags) -+{ -+ spin_unlock_irqrestore(&irq_spinlock, flags); -+} -+ -+unsigned long probe_irq_on(void) -+{ -+ return(0); -+} -+ -+int probe_irq_off(unsigned long val) -+{ -+ return(0); -+} -+ -+static unsigned int startup_SIGIO_irq(unsigned int irq) -+{ -+ return(0); -+} -+ -+static void shutdown_SIGIO_irq(unsigned int irq) -+{ -+} -+ -+static void enable_SIGIO_irq(unsigned int irq) -+{ -+} -+ -+static void disable_SIGIO_irq(unsigned int irq) -+{ -+} -+ -+static void mask_and_ack_SIGIO(unsigned int irq) -+{ -+} -+ -+static void end_SIGIO_irq(unsigned int irq) -+{ -+} -+ -+static unsigned int startup_SIGVTALRM_irq(unsigned int irq) -+{ -+ return(0); -+} -+ -+static void shutdown_SIGVTALRM_irq(unsigned int irq) -+{ -+} -+ -+static void enable_SIGVTALRM_irq(unsigned int irq) -+{ -+} -+ -+static void disable_SIGVTALRM_irq(unsigned int irq) -+{ -+} -+ -+static void mask_and_ack_SIGVTALRM(unsigned int irq) -+{ -+} -+ -+static void end_SIGVTALRM_irq(unsigned int irq) -+{ -+} -+ -+static struct hw_interrupt_type SIGIO_irq_type = { -+ "SIGIO", -+ startup_SIGIO_irq, -+ shutdown_SIGIO_irq, -+ enable_SIGIO_irq, -+ disable_SIGIO_irq, -+ mask_and_ack_SIGIO, -+ end_SIGIO_irq, -+ NULL -+}; -+ -+static struct hw_interrupt_type SIGVTALRM_irq_type = { -+ "SIGVTALRM", -+ startup_SIGVTALRM_irq, -+ shutdown_SIGVTALRM_irq, -+ enable_SIGVTALRM_irq, -+ disable_SIGVTALRM_irq, -+ mask_and_ack_SIGVTALRM, -+ end_SIGVTALRM_irq, -+ NULL -+}; -+ -+void __init init_IRQ(void) -+{ -+ int i; -+ -+ irq_desc[TIMER_IRQ].status = IRQ_DISABLED; -+ irq_desc[TIMER_IRQ].action = 0; -+ irq_desc[TIMER_IRQ].depth = 1; -+ irq_desc[TIMER_IRQ].handler = &SIGVTALRM_irq_type; -+ enable_irq(TIMER_IRQ); -+ for(i=1;i<NR_IRQS;i++){ -+ irq_desc[i].status = IRQ_DISABLED; -+ irq_desc[i].action = 0; -+ irq_desc[i].depth = 1; -+ irq_desc[i].handler = &SIGIO_irq_type; -+ enable_irq(i); -+ } -+ init_irq_signals(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/irq_user.c um/arch/um/kernel/irq_user.c ---- orig/arch/um/kernel/irq_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/irq_user.c Sun Dec 22 15:49:46 2002 -@@ -0,0 +1,427 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <unistd.h> -+#include <errno.h> -+#include <fcntl.h> -+#include <signal.h> -+#include <string.h> -+#include <sys/poll.h> -+#include <sys/types.h> -+#include <sys/time.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "process.h" -+#include "signal_user.h" -+#include "sigio.h" -+#include "irq_user.h" -+#include "os.h" -+ -+struct irq_fd { -+ struct irq_fd *next; -+ void *id; -+ int fd; -+ int type; -+ int irq; -+ int pid; -+ int events; -+ int current_events; -+ int freed; -+}; -+ -+static struct irq_fd *active_fds = NULL; -+static struct irq_fd **last_irq_ptr = &active_fds; -+ -+static struct pollfd *pollfds = NULL; -+static int pollfds_num = 0; -+static int pollfds_size = 0; -+ -+extern int io_count, intr_count; -+ -+void sigio_handler(int sig, union uml_pt_regs *regs) -+{ -+ struct irq_fd *irq_fd, *next; -+ int i, n; -+ -+ if(smp_sigio_handler()) return; -+ while(1){ -+ if((n = poll(pollfds, pollfds_num, 0)) < 0){ -+ if(errno == EINTR) continue; -+ printk("sigio_handler : poll returned %d, " -+ "errno = %d\n", n, errno); -+ break; -+ } -+ if(n == 0) break; -+ -+ irq_fd = active_fds; -+ for(i = 0; i < pollfds_num; i++){ -+ if(pollfds[i].revents != 0){ -+ irq_fd->current_events = pollfds[i].revents; -+ pollfds[i].fd = -1; -+ } -+ irq_fd = irq_fd->next; -+ } -+ -+ for(irq_fd = active_fds; irq_fd != NULL; irq_fd = next){ -+ next = irq_fd->next; -+ if(irq_fd->current_events != 0){ -+ irq_fd->current_events = 0; -+ do_IRQ(irq_fd->irq, regs); -+ -+ /* This is here because the next irq may be -+ * freed in the handler. If a console goes -+ * away, both the read and write irqs will be -+ * freed. After do_IRQ, ->next will point to -+ * a good IRQ. -+ * Irqs can't be freed inside their handlers, -+ * so the next best thing is to have them -+ * marked as needing freeing, so that they -+ * can be freed here. -+ */ -+ next = irq_fd->next; -+ if(irq_fd->freed) -+ free_irq(irq_fd->irq, irq_fd->id); -+ } -+ } -+ } -+} -+ -+int activate_ipi(int fd, int pid) -+{ -+ return(os_set_fd_async(fd, pid)); -+} -+ -+static void maybe_sigio_broken(int fd, int type) -+{ -+ if(isatty(fd)){ -+ if((type == IRQ_WRITE) && !pty_output_sigio){ -+ write_sigio_workaround(); -+ add_sigio_fd(fd, 0); -+ } -+ else if((type == IRQ_READ) && !pty_close_sigio){ -+ write_sigio_workaround(); -+ add_sigio_fd(fd, 1); -+ } -+ } -+} -+ -+int activate_fd(int irq, int fd, int type, void *dev_id) -+{ -+ struct pollfd *tmp_pfd; -+ struct irq_fd *new_fd, *irq_fd; -+ unsigned long flags; -+ int pid, events, err, n, size; -+ -+ pid = os_getpid(); -+ err = os_set_fd_async(fd, pid); -+ if(err < 0) -+ goto out; -+ -+ new_fd = um_kmalloc(sizeof(*new_fd)); -+ err = -ENOMEM; -+ if(new_fd == NULL) -+ goto out; -+ -+ if(type == IRQ_READ) events = POLLIN | POLLPRI; -+ else events = POLLOUT; -+ *new_fd = ((struct irq_fd) { .next = NULL, -+ .id = dev_id, -+ .fd = fd, -+ .type = type, -+ .irq = irq, -+ .pid = pid, -+ .events = events, -+ .current_events = 0, -+ .freed = 0 } ); -+ -+ /* Critical section - locked by a spinlock because this stuff can -+ * be changed from interrupt handlers. The stuff above is done -+ * outside the lock because it allocates memory. -+ */ -+ -+ /* Actually, it only looks like it can be called from interrupt -+ * context. The culprit is reactivate_fd, which calls -+ * maybe_sigio_broken, which calls write_sigio_workaround, -+ * which calls activate_fd. However, write_sigio_workaround should -+ * only be called once, at boot time. That would make it clear that -+ * this is called only from process context, and can be locked with -+ * a semaphore. -+ */ -+ flags = irq_lock(); -+ for(irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next){ -+ if((irq_fd->fd == fd) && (irq_fd->type == type)){ -+ printk("Registering fd %d twice\n", fd); -+ printk("Irqs : %d, %d\n", irq_fd->irq, irq); -+ printk("Ids : 0x%x, 0x%x\n", irq_fd->id, dev_id); -+ goto out_unlock; -+ } -+ } -+ -+ n = pollfds_num; -+ if(n == pollfds_size){ -+ while(1){ -+ /* Here we have to drop the lock in order to call -+ * kmalloc, which might sleep. If something else -+ * came in and changed the pollfds array, we free -+ * the buffer and try again. -+ */ -+ irq_unlock(flags); -+ size = (pollfds_num + 1) * sizeof(pollfds[0]); -+ tmp_pfd = um_kmalloc(size); -+ flags = irq_lock(); -+ if(tmp_pfd == NULL) -+ goto out_unlock; -+ if(n == pollfds_size) -+ break; -+ kfree(tmp_pfd); -+ } -+ if(pollfds != NULL){ -+ memcpy(tmp_pfd, pollfds, -+ sizeof(pollfds[0]) * pollfds_size); -+ kfree(pollfds); -+ } -+ pollfds = tmp_pfd; -+ pollfds_size++; -+ } -+ -+ if(type == IRQ_WRITE) -+ fd = -1; -+ -+ pollfds[pollfds_num] = ((struct pollfd) { .fd = fd, -+ .events = events, -+ .revents = 0 }); -+ pollfds_num++; -+ -+ *last_irq_ptr = new_fd; -+ last_irq_ptr = &new_fd->next; -+ -+ irq_unlock(flags); -+ -+ /* This calls activate_fd, so it has to be outside the critical -+ * section. -+ */ -+ maybe_sigio_broken(fd, type); -+ -+ return(0); -+ -+ out_unlock: -+ irq_unlock(flags); -+ kfree(new_fd); -+ out: -+ return(err); -+} -+ -+static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg) -+{ -+ struct irq_fd **prev; -+ unsigned long flags; -+ int i = 0; -+ -+ flags = irq_lock(); -+ prev = &active_fds; -+ while(*prev != NULL){ -+ if((*test)(*prev, arg)){ -+ struct irq_fd *old_fd = *prev; -+ if((pollfds[i].fd != -1) && -+ (pollfds[i].fd != (*prev)->fd)){ -+ printk("free_irq_by_cb - mismatch between " -+ "active_fds and pollfds, fd %d vs %d\n", -+ (*prev)->fd, pollfds[i].fd); -+ goto out; -+ } -+ memcpy(&pollfds[i], &pollfds[i + 1], -+ (pollfds_num - i - 1) * sizeof(pollfds[0])); -+ pollfds_num--; -+ if(last_irq_ptr == &old_fd->next) -+ last_irq_ptr = prev; -+ *prev = (*prev)->next; -+ if(old_fd->type == IRQ_WRITE) -+ ignore_sigio_fd(old_fd->fd); -+ kfree(old_fd); -+ continue; -+ } -+ prev = &(*prev)->next; -+ i++; -+ } -+ out: -+ irq_unlock(flags); -+} -+ -+struct irq_and_dev { -+ int irq; -+ void *dev; -+}; -+ -+static int same_irq_and_dev(struct irq_fd *irq, void *d) -+{ -+ struct irq_and_dev *data = d; -+ -+ return((irq->irq == data->irq) && (irq->id == data->dev)); -+} -+ -+void free_irq_by_irq_and_dev(int irq, void *dev) -+{ -+ struct irq_and_dev data = ((struct irq_and_dev) { .irq = irq, -+ .dev = dev }); -+ -+ free_irq_by_cb(same_irq_and_dev, &data); -+} -+ -+static int same_fd(struct irq_fd *irq, void *fd) -+{ -+ return(irq->fd == *((int *) fd)); -+} -+ -+void free_irq_by_fd(int fd) -+{ -+ free_irq_by_cb(same_fd, &fd); -+} -+ -+static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out) -+{ -+ struct irq_fd *irq; -+ int i = 0; -+ -+ for(irq=active_fds; irq != NULL; irq = irq->next){ -+ if((irq->fd == fd) && (irq->irq == irqnum)) break; -+ i++; -+ } -+ if(irq == NULL){ -+ printk("find_irq_by_fd doesn't have descriptor %d\n", fd); -+ goto out; -+ } -+ if((pollfds[i].fd != -1) && (pollfds[i].fd != fd)){ -+ printk("find_irq_by_fd - mismatch between active_fds and " -+ "pollfds, fd %d vs %d, need %d\n", irq->fd, -+ pollfds[i].fd, fd); -+ irq = NULL; -+ goto out; -+ } -+ *index_out = i; -+ out: -+ return(irq); -+} -+ -+void free_irq_later(int irq, void *dev_id) -+{ -+ struct irq_fd *irq_fd; -+ unsigned long flags; -+ -+ flags = irq_lock(); -+ for(irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next){ -+ if((irq_fd->irq == irq) && (irq_fd->id == dev_id)) -+ break; -+ } -+ if(irq_fd == NULL){ -+ printk("free_irq_later found no irq, irq = %d, " -+ "dev_id = 0x%p\n", irq, dev_id); -+ goto out; -+ } -+ irq_fd->freed = 1; -+ out: -+ irq_unlock(flags); -+} -+ -+void reactivate_fd(int fd, int irqnum) -+{ -+ struct irq_fd *irq; -+ unsigned long flags; -+ int i; -+ -+ flags = irq_lock(); -+ irq = find_irq_by_fd(fd, irqnum, &i); -+ if(irq == NULL){ -+ irq_unlock(flags); -+ return; -+ } -+ -+ pollfds[i].fd = irq->fd; -+ -+ irq_unlock(flags); -+ -+ /* This calls activate_fd, so it has to be outside the critical -+ * section. -+ */ -+ maybe_sigio_broken(fd, irq->type); -+} -+ -+void deactivate_fd(int fd, int irqnum) -+{ -+ struct irq_fd *irq; -+ unsigned long flags; -+ int i; -+ -+ flags = irq_lock(); -+ irq = find_irq_by_fd(fd, irqnum, &i); -+ if(irq == NULL) -+ goto out; -+ pollfds[i].fd = -1; -+ out: -+ irq_unlock(flags); -+} -+ -+void forward_ipi(int fd, int pid) -+{ -+ if(fcntl(fd, F_SETOWN, pid) < 0){ -+ int save_errno = errno; -+ if(fcntl(fd, F_GETOWN, 0) != pid){ -+ printk("forward_ipi: F_SETOWN failed, fd = %d, " -+ "me = %d, target = %d, errno = %d\n", fd, -+ os_getpid(), pid, save_errno); -+ } -+ } -+} -+ -+void forward_interrupts(int pid) -+{ -+ struct irq_fd *irq; -+ unsigned long flags; -+ -+ flags = irq_lock(); -+ for(irq=active_fds;irq != NULL;irq = irq->next){ -+ if(fcntl(irq->fd, F_SETOWN, pid) < 0){ -+ int save_errno = errno; -+ if(fcntl(irq->fd, F_GETOWN, 0) != pid){ -+ /* XXX Just remove the irq rather than -+ * print out an infinite stream of these -+ */ -+ printk("Failed to forward %d to pid %d, " -+ "errno = %d\n", irq->fd, pid, -+ save_errno); -+ } -+ } -+ irq->pid = pid; -+ } -+ irq_unlock(flags); -+} -+ -+void init_irq_signals(int on_sigstack) -+{ -+ __sighandler_t h; -+ int flags; -+ -+ flags = on_sigstack ? SA_ONSTACK : 0; -+ if(timer_irq_inited) h = (__sighandler_t) alarm_handler; -+ else h = boot_timer_handler; -+ -+ set_handler(SIGVTALRM, h, flags | SA_RESTART, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, -1); -+ set_handler(SIGIO, (__sighandler_t) sig_handler, flags | SA_RESTART, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); -+ signal(SIGWINCH, SIG_IGN); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/ksyms.c um/arch/um/kernel/ksyms.c ---- orig/arch/um/kernel/ksyms.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/ksyms.c Tue Dec 17 13:29:43 2002 -@@ -0,0 +1,94 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/module.h" -+#include "linux/string.h" -+#include "linux/smp_lock.h" -+#include "linux/spinlock.h" -+#include "asm/current.h" -+#include "asm/delay.h" -+#include "asm/processor.h" -+#include "asm/unistd.h" -+#include "asm/pgalloc.h" -+#include "asm/pgtable.h" -+#include "asm/page.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "os.h" -+#include "helper.h" -+ -+EXPORT_SYMBOL(stop); -+EXPORT_SYMBOL(strtok); -+EXPORT_SYMBOL(uml_physmem); -+EXPORT_SYMBOL(set_signals); -+EXPORT_SYMBOL(get_signals); -+EXPORT_SYMBOL(kernel_thread); -+EXPORT_SYMBOL(__const_udelay); -+EXPORT_SYMBOL(__udelay); -+EXPORT_SYMBOL(sys_waitpid); -+EXPORT_SYMBOL(task_size); -+EXPORT_SYMBOL(flush_tlb_range); -+EXPORT_SYMBOL(host_task_size); -+EXPORT_SYMBOL(arch_validate); -+ -+EXPORT_SYMBOL(region_pa); -+EXPORT_SYMBOL(region_va); -+EXPORT_SYMBOL(phys_mem_map); -+EXPORT_SYMBOL(page_mem_map); -+EXPORT_SYMBOL(high_physmem); -+EXPORT_SYMBOL(empty_zero_page); -+EXPORT_SYMBOL(um_virt_to_phys); -+EXPORT_SYMBOL(mode_tt); -+EXPORT_SYMBOL(handle_page_fault); -+ -+EXPORT_SYMBOL(os_getpid); -+EXPORT_SYMBOL(os_open_file); -+EXPORT_SYMBOL(os_read_file); -+EXPORT_SYMBOL(os_write_file); -+EXPORT_SYMBOL(os_seek_file); -+EXPORT_SYMBOL(os_pipe); -+EXPORT_SYMBOL(os_file_type); -+EXPORT_SYMBOL(os_close_file); -+EXPORT_SYMBOL(helper_wait); -+EXPORT_SYMBOL(os_shutdown_socket); -+EXPORT_SYMBOL(os_connect_socket); -+EXPORT_SYMBOL(run_helper); -+EXPORT_SYMBOL(start_thread); -+EXPORT_SYMBOL(dump_thread); -+ -+/* This is here because UML expands open to sys_open, not to a system -+ * call instruction. -+ */ -+EXPORT_SYMBOL(sys_open); -+EXPORT_SYMBOL(sys_lseek); -+EXPORT_SYMBOL(sys_read); -+EXPORT_SYMBOL(sys_wait4); -+ -+#ifdef CONFIG_SMP -+ -+/* required for SMP */ -+ -+extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); -+EXPORT_SYMBOL_NOVERS(__write_lock_failed); -+ -+extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); -+EXPORT_SYMBOL_NOVERS(__read_lock_failed); -+ -+EXPORT_SYMBOL(kernel_flag_cacheline); -+EXPORT_SYMBOL(smp_num_cpus); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/mem.c um/arch/um/kernel/mem.c ---- orig/arch/um/kernel/mem.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/mem.c Sun Mar 30 14:30:26 2003 -@@ -0,0 +1,852 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/types.h" -+#include "linux/mm.h" -+#include "linux/fs.h" -+#include "linux/init.h" -+#include "linux/bootmem.h" -+#include "linux/swap.h" -+#include "linux/slab.h" -+#include "linux/vmalloc.h" -+#include "linux/highmem.h" -+#include "asm/page.h" -+#include "asm/pgtable.h" -+#include "asm/pgalloc.h" -+#include "asm/bitops.h" -+#include "asm/uaccess.h" -+#include "asm/tlb.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "mem_user.h" -+#include "mem.h" -+#include "kern.h" -+#include "init.h" -+#include "os.h" -+#include "mode_kern.h" -+#include "uml_uaccess.h" -+ -+/* Changed during early boot */ -+pgd_t swapper_pg_dir[1024]; -+unsigned long high_physmem; -+unsigned long vm_start; -+unsigned long vm_end; -+unsigned long highmem; -+unsigned long *empty_zero_page = NULL; -+unsigned long *empty_bad_page = NULL; -+ -+/* Not modified */ -+const char bad_pmd_string[] = "Bad pmd in pte_alloc: %08lx\n"; -+ -+/* Changed during early boot */ -+static unsigned long totalram_pages = 0; -+ -+extern char __init_begin, __init_end; -+extern long physmem_size; -+ -+#ifdef CONFIG_SMP -+/* Not changed by UML */ -+mmu_gather_t mmu_gathers[NR_CPUS]; -+#endif -+ -+/* Changed during early boot */ -+int kmalloc_ok = 0; -+ -+#define NREGIONS (phys_region_index(0xffffffff) - phys_region_index(0x0) + 1) -+struct mem_region *regions[NREGIONS] = { [ 0 ... NREGIONS - 1 ] = NULL }; -+#define REGION_SIZE ((0xffffffff & ~REGION_MASK) + 1) -+ -+/* Changed during early boot */ -+static unsigned long brk_end; -+ -+static void map_cb(void *unused) -+{ -+ map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0); -+} -+ -+void unmap_physmem(void) -+{ -+ os_unmap_memory((void *) brk_end, uml_reserved - brk_end); -+} -+ -+extern char __binary_start; -+ -+void mem_init(void) -+{ -+ unsigned long start; -+ -+#ifdef CONFIG_HIGHMEM -+ highmem_start_page = phys_page(__pa(high_physmem)); -+#endif -+ -+ /* clear the zero-page */ -+ memset((void *) empty_zero_page, 0, PAGE_SIZE); -+ -+ /* Map in the area just after the brk now that kmalloc is about -+ * to be turned on. -+ */ -+ brk_end = (unsigned long) UML_ROUND_UP(sbrk(0)); -+ map_cb(NULL); -+ initial_thread_cb(map_cb, NULL); -+ free_bootmem(__pa(brk_end), uml_reserved - brk_end); -+ uml_reserved = brk_end; -+ -+ /* Fill in any hole at the start of the binary */ -+ start = (unsigned long) &__binary_start; -+ if(uml_physmem != start){ -+ map_memory(uml_physmem, __pa(uml_physmem), start - uml_physmem, -+ 1, 1, 0); -+ } -+ -+ /* this will put all low memory onto the freelists */ -+ totalram_pages = free_all_bootmem(); -+ totalram_pages += highmem >> PAGE_SHIFT; -+ max_mapnr = totalram_pages; -+ num_physpages = totalram_pages; -+ printk(KERN_INFO "Memory: %luk available\n", -+ (unsigned long) nr_free_pages() << (PAGE_SHIFT-10)); -+ kmalloc_ok = 1; -+} -+ -+/* Changed during early boot */ -+static unsigned long kmem_top = 0; -+ -+unsigned long get_kmem_end(void) -+{ -+ if(kmem_top == 0) -+ kmem_top = CHOOSE_MODE(kmem_end_tt, kmem_end_skas); -+ return(kmem_top); -+} -+ -+void set_kmem_end(unsigned long new) -+{ -+ kmem_top = new; -+} -+ -+#if CONFIG_HIGHMEM -+/* Changed during early boot */ -+pte_t *kmap_pte; -+pgprot_t kmap_prot; -+ -+#define kmap_get_fixmap_pte(vaddr) \ -+ pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) -+ -+void __init kmap_init(void) -+{ -+ unsigned long kmap_vstart; -+ -+ /* cache the first kmap pte */ -+ kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); -+ kmap_pte = kmap_get_fixmap_pte(kmap_vstart); -+ -+ kmap_prot = PAGE_KERNEL; -+} -+#endif /* CONFIG_HIGHMEM */ -+ -+static void __init fixrange_init(unsigned long start, unsigned long end, -+ pgd_t *pgd_base) -+{ -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ int i, j; -+ unsigned long vaddr; -+ -+ vaddr = start; -+ i = __pgd_offset(vaddr); -+ j = __pmd_offset(vaddr); -+ pgd = pgd_base + i; -+ -+ for ( ; (i < PTRS_PER_PGD) && (vaddr < end); pgd++, i++) { -+ pmd = (pmd_t *)pgd; -+ for (; (j < PTRS_PER_PMD) && (vaddr != end); pmd++, j++) { -+ if (pmd_none(*pmd)) { -+ pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); -+ set_pmd(pmd, __pmd(_KERNPG_TABLE + -+ (unsigned long) __pa(pte))); -+ if (pte != pte_offset(pmd, 0)) -+ BUG(); -+ } -+ vaddr += PMD_SIZE; -+ } -+ j = 0; -+ } -+} -+ -+int init_maps(struct mem_region *region) -+{ -+ struct page *p, *map; -+ int i, n, len; -+ -+ if(region == &physmem_region){ -+ region->mem_map = mem_map; -+ return(0); -+ } -+ else if(region->mem_map != NULL) return(0); -+ -+ n = region->len >> PAGE_SHIFT; -+ len = n * sizeof(struct page); -+ if(kmalloc_ok){ -+ map = kmalloc(len, GFP_KERNEL); -+ if(map == NULL) map = vmalloc(len); -+ } -+ else map = alloc_bootmem_low_pages(len); -+ -+ if(map == NULL) -+ return(-ENOMEM); -+ for(i = 0; i < n; i++){ -+ p = &map[i]; -+ set_page_count(p, 0); -+ SetPageReserved(p); -+ INIT_LIST_HEAD(&p->list); -+ } -+ region->mem_map = map; -+ return(0); -+} -+ -+DECLARE_MUTEX(regions_sem); -+ -+static int setup_one_range(int fd, char *driver, unsigned long start, -+ unsigned long pfn, int len, -+ struct mem_region *region) -+{ -+ int i; -+ -+ down(®ions_sem); -+ for(i = 0; i < NREGIONS; i++){ -+ if(regions[i] == NULL) break; -+ } -+ if(i == NREGIONS){ -+ printk("setup_one_range : no free regions\n"); -+ i = -1; -+ goto out; -+ } -+ -+ if(fd == -1) -+ fd = create_mem_file(len); -+ -+ if(region == NULL){ -+ if(kmalloc_ok) -+ region = kmalloc(sizeof(*region), GFP_KERNEL); -+ else region = alloc_bootmem_low_pages(sizeof(*region)); -+ if(region == NULL) -+ panic("Failed to allocating mem_region"); -+ } -+ -+ *region = ((struct mem_region) { .driver = driver, -+ .start_pfn = pfn, -+ .start = start, -+ .len = len, -+ .fd = fd } ); -+ regions[i] = region; -+ out: -+ up(®ions_sem); -+ return(i); -+} -+ -+#ifdef CONFIG_HIGHMEM -+static void init_highmem(void) -+{ -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ unsigned long vaddr; -+ -+ /* -+ * Permanent kmaps: -+ */ -+ vaddr = PKMAP_BASE; -+ fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, swapper_pg_dir); -+ -+ pgd = swapper_pg_dir + __pgd_offset(vaddr); -+ pmd = pmd_offset(pgd, vaddr); -+ pte = pte_offset(pmd, vaddr); -+ pkmap_page_table = pte; -+ -+ kmap_init(); -+} -+ -+void setup_highmem(unsigned long len) -+{ -+ struct mem_region *region; -+ struct page *page, *map; -+ unsigned long phys; -+ int i, cur, index; -+ -+ phys = physmem_size; -+ do { -+ cur = min(len, (unsigned long) REGION_SIZE); -+ i = setup_one_range(-1, NULL, -1, phys >> PAGE_SHIFT, cur, -+ NULL); -+ if(i == -1){ -+ printk("setup_highmem - setup_one_range failed\n"); -+ return; -+ } -+ region = regions[i]; -+ index = phys / PAGE_SIZE; -+ region->mem_map = &mem_map[index]; -+ -+ map = region->mem_map; -+ for(i = 0; i < (cur >> PAGE_SHIFT); i++){ -+ page = &map[i]; -+ ClearPageReserved(page); -+ set_bit(PG_highmem, &page->flags); -+ atomic_set(&page->count, 1); -+ __free_page(page); -+ } -+ phys += cur; -+ len -= cur; -+ } while(len > 0); -+} -+#endif -+ -+void paging_init(void) -+{ -+ struct mem_region *region; -+ unsigned long zones_size[MAX_NR_ZONES], start, end, vaddr; -+ int i, index; -+ -+ empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); -+ empty_bad_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); -+ for(i=0;i<sizeof(zones_size)/sizeof(zones_size[0]);i++) -+ zones_size[i] = 0; -+ zones_size[0] = (high_physmem >> PAGE_SHIFT) - -+ (uml_physmem >> PAGE_SHIFT); -+ zones_size[2] = highmem >> PAGE_SHIFT; -+ free_area_init(zones_size); -+ start = phys_region_index(__pa(uml_physmem)); -+ end = phys_region_index(__pa(high_physmem - 1)); -+ for(i = start; i <= end; i++){ -+ region = regions[i]; -+ index = (region->start - uml_physmem) / PAGE_SIZE; -+ region->mem_map = &mem_map[index]; -+ if(i > start) free_bootmem(__pa(region->start), region->len); -+ } -+ -+ /* -+ * Fixed mappings, only the page table structure has to be -+ * created - mappings will be set by set_fixmap(): -+ */ -+ vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; -+ fixrange_init(vaddr, FIXADDR_TOP, swapper_pg_dir); -+ -+#if CONFIG_HIGHMEM -+ init_highmem(); -+ setup_highmem(highmem); -+#endif -+} -+ -+/* Changed by meminfo_compat, which is a setup */ -+static int meminfo_22 = 0; -+ -+static int meminfo_compat(char *str) -+{ -+ meminfo_22 = 1; -+ return(1); -+} -+ -+__setup("22_meminfo", meminfo_compat); -+ -+void si_meminfo(struct sysinfo *val) -+{ -+ val->totalram = totalram_pages; -+ val->sharedram = 0; -+ val->freeram = nr_free_pages(); -+ val->bufferram = atomic_read(&buffermem_pages); -+ val->totalhigh = highmem >> PAGE_SHIFT; -+ val->freehigh = nr_free_highpages(); -+ val->mem_unit = PAGE_SIZE; -+ if(meminfo_22){ -+ val->freeram <<= PAGE_SHIFT; -+ val->bufferram <<= PAGE_SHIFT; -+ val->totalram <<= PAGE_SHIFT; -+ val->sharedram <<= PAGE_SHIFT; -+ } -+} -+ -+pte_t __bad_page(void) -+{ -+ clear_page(empty_bad_page); -+ return pte_mkdirty(mk_pte((struct page *) empty_bad_page, -+ PAGE_SHARED)); -+} -+ -+/* This can't do anything because nothing in the kernel image can be freed -+ * since it's not in kernel physical memory. -+ */ -+ -+void free_initmem(void) -+{ -+} -+ -+#ifdef CONFIG_BLK_DEV_INITRD -+ -+void free_initrd_mem(unsigned long start, unsigned long end) -+{ -+ if (start < end) -+ printk ("Freeing initrd memory: %ldk freed\n", -+ (end - start) >> 10); -+ for (; start < end; start += PAGE_SIZE) { -+ ClearPageReserved(virt_to_page(start)); -+ set_page_count(virt_to_page(start), 1); -+ free_page(start); -+ totalram_pages++; -+ } -+} -+ -+#endif -+ -+int do_check_pgt_cache(int low, int high) -+{ -+ int freed = 0; -+ if(pgtable_cache_size > high) { -+ do { -+ if (pgd_quicklist) { -+ free_pgd_slow(get_pgd_fast()); -+ freed++; -+ } -+ if (pmd_quicklist) { -+ pmd_free_slow(pmd_alloc_one_fast(NULL, 0)); -+ freed++; -+ } -+ if (pte_quicklist) { -+ pte_free_slow(pte_alloc_one_fast(NULL, 0)); -+ freed++; -+ } -+ } while(pgtable_cache_size > low); -+ } -+ return freed; -+} -+ -+void show_mem(void) -+{ -+ int i, total = 0, reserved = 0; -+ int shared = 0, cached = 0; -+ int highmem = 0; -+ -+ printk("Mem-info:\n"); -+ show_free_areas(); -+ printk("Free swap: %6dkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); -+ i = max_mapnr; -+ while(i-- > 0) { -+ total++; -+ if(PageHighMem(mem_map + i)) -+ highmem++; -+ if(PageReserved(mem_map + i)) -+ reserved++; -+ else if(PageSwapCache(mem_map + i)) -+ cached++; -+ else if(page_count(mem_map + i)) -+ shared += page_count(mem_map + i) - 1; -+ } -+ printk("%d pages of RAM\n", total); -+ printk("%d pages of HIGHMEM\n", highmem); -+ printk("%d reserved pages\n", reserved); -+ printk("%d pages shared\n", shared); -+ printk("%d pages swap cached\n", cached); -+ printk("%ld pages in page table cache\n", pgtable_cache_size); -+ show_buffers(); -+} -+ -+static int __init uml_mem_setup(char *line, int *add) -+{ -+ char *retptr; -+ physmem_size = memparse(line,&retptr); -+ return 0; -+} -+__uml_setup("mem=", uml_mem_setup, -+"mem=<Amount of desired ram>\n" -+" This controls how much \"physical\" memory the kernel allocates\n" -+" for the system. The size is specified as a number followed by\n" -+" one of 'k', 'K', 'm', 'M', which have the obvious meanings.\n" -+" This is not related to the amount of memory in the physical\n" -+" machine. It can be more, and the excess, if it's ever used, will\n" -+" just be swapped out.\n Example: mem=64M\n\n" -+); -+ -+struct page *arch_validate(struct page *page, int mask, int order) -+{ -+ unsigned long addr, zero = 0; -+ int i; -+ -+ again: -+ if(page == NULL) return(page); -+ if(PageHighMem(page)) return(page); -+ -+ addr = (unsigned long) page_address(page); -+ for(i = 0; i < (1 << order); i++){ -+ current->thread.fault_addr = (void *) addr; -+ if(__do_copy_to_user((void *) addr, &zero, -+ sizeof(zero), -+ ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)){ -+ if(!(mask & __GFP_WAIT)) return(NULL); -+ else break; -+ } -+ addr += PAGE_SIZE; -+ } -+ if(i == (1 << order)) return(page); -+ page = _alloc_pages(mask, order); -+ goto again; -+} -+ -+DECLARE_MUTEX(vm_reserved_sem); -+static struct list_head vm_reserved = LIST_HEAD_INIT(vm_reserved); -+ -+/* Static structures, linked in to the list in early boot */ -+static struct vm_reserved head = { -+ .list = LIST_HEAD_INIT(head.list), -+ .start = 0, -+ .end = 0xffffffff -+}; -+ -+static struct vm_reserved tail = { -+ .list = LIST_HEAD_INIT(tail.list), -+ .start = 0, -+ .end = 0xffffffff -+}; -+ -+void set_usable_vm(unsigned long start, unsigned long end) -+{ -+ list_add(&head.list, &vm_reserved); -+ list_add(&tail.list, &head.list); -+ head.end = start; -+ tail.start = end; -+} -+ -+int reserve_vm(unsigned long start, unsigned long end, void *e) -+ -+{ -+ struct vm_reserved *entry = e, *reserved, *prev; -+ struct list_head *ele; -+ int err; -+ -+ down(&vm_reserved_sem); -+ list_for_each(ele, &vm_reserved){ -+ reserved = list_entry(ele, struct vm_reserved, list); -+ if(reserved->start >= end) goto found; -+ } -+ panic("Reserved vm out of range"); -+ found: -+ prev = list_entry(ele->prev, struct vm_reserved, list); -+ if(prev->end > start) -+ panic("Can't reserve vm"); -+ if(entry == NULL) -+ entry = kmalloc(sizeof(*entry), GFP_KERNEL); -+ if(entry == NULL){ -+ printk("reserve_vm : Failed to allocate entry\n"); -+ err = -ENOMEM; -+ goto out; -+ } -+ *entry = ((struct vm_reserved) -+ { .list = LIST_HEAD_INIT(entry->list), -+ .start = start, -+ .end = end }); -+ list_add(&entry->list, &prev->list); -+ err = 0; -+ out: -+ up(&vm_reserved_sem); -+ return(0); -+} -+ -+unsigned long get_vm(unsigned long len) -+{ -+ struct vm_reserved *this, *next; -+ struct list_head *ele; -+ unsigned long start; -+ int err; -+ -+ down(&vm_reserved_sem); -+ list_for_each(ele, &vm_reserved){ -+ this = list_entry(ele, struct vm_reserved, list); -+ next = list_entry(ele->next, struct vm_reserved, list); -+ if((this->start < next->start) && -+ (this->end + len + PAGE_SIZE <= next->start)) -+ goto found; -+ } -+ up(&vm_reserved_sem); -+ return(0); -+ found: -+ up(&vm_reserved_sem); -+ start = (unsigned long) UML_ROUND_UP(this->end) + PAGE_SIZE; -+ err = reserve_vm(start, start + len, NULL); -+ if(err) return(0); -+ return(start); -+} -+ -+int nregions(void) -+{ -+ return(NREGIONS); -+} -+ -+void setup_range(int fd, char *driver, unsigned long start, unsigned long pfn, -+ unsigned long len, int need_vm, struct mem_region *region, -+ void *reserved) -+{ -+ int i, cur; -+ -+ do { -+ cur = min(len, (unsigned long) REGION_SIZE); -+ i = setup_one_range(fd, driver, start, pfn, cur, region); -+ region = regions[i]; -+ if(need_vm && setup_region(region, reserved)){ -+ kfree(region); -+ regions[i] = NULL; -+ return; -+ } -+ start += cur; -+ if(pfn != -1) pfn += cur; -+ len -= cur; -+ } while(len > 0); -+} -+ -+struct iomem { -+ char *name; -+ int fd; -+ unsigned long size; -+}; -+ -+/* iomem regions can only be added on the command line at the moment. -+ * Locking will be needed when they can be added via mconsole. -+ */ -+ -+struct iomem iomem_regions[NREGIONS] = { [ 0 ... NREGIONS - 1 ] = -+ { .name = NULL, -+ .fd = -1, -+ .size = 0 } }; -+ -+int num_iomem_regions = 0; -+ -+void add_iomem(char *name, int fd, unsigned long size) -+{ -+ if(num_iomem_regions == sizeof(iomem_regions)/sizeof(iomem_regions[0])) -+ return; -+ size = (size + PAGE_SIZE - 1) & PAGE_MASK; -+ iomem_regions[num_iomem_regions++] = -+ ((struct iomem) { .name = name, -+ .fd = fd, -+ .size = size } ); -+} -+ -+int setup_iomem(void) -+{ -+ struct iomem *iomem; -+ int i; -+ -+ for(i = 0; i < num_iomem_regions; i++){ -+ iomem = &iomem_regions[i]; -+ setup_range(iomem->fd, iomem->name, -1, -1, iomem->size, 1, -+ NULL, NULL); -+ } -+ return(0); -+} -+ -+__initcall(setup_iomem); -+ -+#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) -+#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) -+ -+/* Changed during early boot */ -+static struct mem_region physmem_region; -+static struct vm_reserved physmem_reserved; -+ -+void setup_physmem(unsigned long start, unsigned long reserve_end, -+ unsigned long len) -+{ -+ struct mem_region *region = &physmem_region; -+ struct vm_reserved *reserved = &physmem_reserved; -+ unsigned long cur, pfn = 0; -+ int do_free = 1, bootmap_size; -+ -+ do { -+ cur = min(len, (unsigned long) REGION_SIZE); -+ if(region == NULL) -+ region = alloc_bootmem_low_pages(sizeof(*region)); -+ if(reserved == NULL) -+ reserved = alloc_bootmem_low_pages(sizeof(*reserved)); -+ if((region == NULL) || (reserved == NULL)) -+ panic("Couldn't allocate physmem region or vm " -+ "reservation\n"); -+ setup_range(-1, NULL, start, pfn, cur, 1, region, reserved); -+ -+ if(do_free){ -+ unsigned long reserve = reserve_end - start; -+ int pfn = PFN_UP(__pa(reserve_end)); -+ int delta = (len - reserve) >> PAGE_SHIFT; -+ -+ bootmap_size = init_bootmem(pfn, pfn + delta); -+ free_bootmem(__pa(reserve_end) + bootmap_size, -+ cur - bootmap_size - reserve); -+ do_free = 0; -+ } -+ start += cur; -+ pfn += cur >> PAGE_SHIFT; -+ len -= cur; -+ region = NULL; -+ reserved = NULL; -+ } while(len > 0); -+} -+ -+struct mem_region *phys_region(unsigned long phys) -+{ -+ unsigned int n = phys_region_index(phys); -+ -+ if(regions[n] == NULL) -+ panic("Physical address in uninitialized region"); -+ return(regions[n]); -+} -+ -+unsigned long phys_offset(unsigned long phys) -+{ -+ return(phys_addr(phys)); -+} -+ -+struct page *phys_mem_map(unsigned long phys) -+{ -+ return((struct page *) phys_region(phys)->mem_map); -+} -+ -+struct page *pte_mem_map(pte_t pte) -+{ -+ return(phys_mem_map(pte_val(pte))); -+} -+ -+struct mem_region *page_region(struct page *page, int *index_out) -+{ -+ int i; -+ struct mem_region *region; -+ struct page *map; -+ -+ for(i = 0; i < NREGIONS; i++){ -+ region = regions[i]; -+ if(region == NULL) continue; -+ map = region->mem_map; -+ if((page >= map) && (page < &map[region->len >> PAGE_SHIFT])){ -+ if(index_out != NULL) *index_out = i; -+ return(region); -+ } -+ } -+ panic("No region found for page"); -+ return(NULL); -+} -+ -+unsigned long page_to_pfn(struct page *page) -+{ -+ struct mem_region *region = page_region(page, NULL); -+ -+ return(region->start_pfn + (page - (struct page *) region->mem_map)); -+} -+ -+struct mem_region *pfn_to_region(unsigned long pfn, int *index_out) -+{ -+ struct mem_region *region; -+ int i; -+ -+ for(i = 0; i < NREGIONS; i++){ -+ region = regions[i]; -+ if(region == NULL) -+ continue; -+ -+ if((region->start_pfn <= pfn) && -+ (region->start_pfn + (region->len >> PAGE_SHIFT) > pfn)){ -+ if(index_out != NULL) -+ *index_out = i; -+ return(region); -+ } -+ } -+ return(NULL); -+} -+ -+struct page *pfn_to_page(unsigned long pfn) -+{ -+ struct mem_region *region = pfn_to_region(pfn, NULL); -+ struct page *mem_map = (struct page *) region->mem_map; -+ -+ return(&mem_map[pfn - region->start_pfn]); -+} -+ -+unsigned long phys_to_pfn(unsigned long p) -+{ -+ struct mem_region *region = regions[phys_region_index(p)]; -+ -+ return(region->start_pfn + (phys_addr(p) >> PAGE_SHIFT)); -+} -+ -+unsigned long pfn_to_phys(unsigned long pfn) -+{ -+ int n; -+ struct mem_region *region = pfn_to_region(pfn, &n); -+ -+ return(mk_phys((pfn - region->start_pfn) << PAGE_SHIFT, n)); -+} -+ -+struct page *page_mem_map(struct page *page) -+{ -+ return((struct page *) page_region(page, NULL)->mem_map); -+} -+ -+extern unsigned long region_pa(void *virt) -+{ -+ struct mem_region *region; -+ unsigned long addr = (unsigned long) virt; -+ int i; -+ -+ for(i = 0; i < NREGIONS; i++){ -+ region = regions[i]; -+ if(region == NULL) continue; -+ if((region->start <= addr) && -+ (addr <= region->start + region->len)) -+ return(mk_phys(addr - region->start, i)); -+ } -+ panic("region_pa : no region for virtual address"); -+ return(0); -+} -+ -+extern void *region_va(unsigned long phys) -+{ -+ return((void *) (phys_region(phys)->start + phys_addr(phys))); -+} -+ -+unsigned long page_to_phys(struct page *page) -+{ -+ int n; -+ struct mem_region *region = page_region(page, &n); -+ struct page *map = region->mem_map; -+ return(mk_phys((page - map) << PAGE_SHIFT, n)); -+} -+ -+struct page *phys_to_page(unsigned long phys) -+{ -+ struct page *mem_map; -+ -+ mem_map = phys_mem_map(phys); -+ return(mem_map + (phys_offset(phys) >> PAGE_SHIFT)); -+} -+ -+static int setup_mem_maps(void) -+{ -+ struct mem_region *region; -+ int i; -+ -+ for(i = 0; i < NREGIONS; i++){ -+ region = regions[i]; -+ if((region != NULL) && (region->fd > 0)) init_maps(region); -+ } -+ return(0); -+} -+ -+__initcall(setup_mem_maps); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/mem_user.c um/arch/um/kernel/mem_user.c ---- orig/arch/um/kernel/mem_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/mem_user.c Thu Mar 6 16:05:21 2003 -@@ -0,0 +1,232 @@ -+/* -+ * arch/um/kernel/mem_user.c -+ * -+ * BRIEF MODULE DESCRIPTION -+ * user side memory routines for supporting IO memory inside user mode linux -+ * -+ * Copyright (C) 2001 RidgeRun, Inc. -+ * Author: RidgeRun, Inc. -+ * Greg Lonnon glonnon@ridgerun.com or info@ridgerun.com -+ * -+ * This program is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License as published by the -+ * Free Software Foundation; either version 2 of the License, or (at your -+ * option) any later version. -+ * -+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED -+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN -+ * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF -+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -+ * -+ * You should have received a copy of the GNU General Public License along -+ * with this program; if not, write to the Free Software Foundation, Inc., -+ * 675 Mass Ave, Cambridge, MA 02139, USA. -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <stddef.h> -+#include <stdarg.h> -+#include <unistd.h> -+#include <fcntl.h> -+#include <errno.h> -+#include <string.h> -+#include <sys/stat.h> -+#include <sys/types.h> -+#include <sys/mman.h> -+#include "kern_util.h" -+#include "user.h" -+#include "user_util.h" -+#include "mem_user.h" -+#include "init.h" -+#include "os.h" -+#include "tempfile.h" -+ -+extern struct mem_region physmem_region; -+ -+#define TEMPNAME_TEMPLATE "vm_file-XXXXXX" -+ -+int create_mem_file(unsigned long len) -+{ -+ int fd; -+ char zero; -+ -+ fd = make_tempfile(TEMPNAME_TEMPLATE, NULL, 1); -+ if (fchmod(fd, 0777) < 0){ -+ perror("fchmod"); -+ exit(1); -+ } -+ if(os_seek_file(fd, len) < 0){ -+ perror("lseek"); -+ exit(1); -+ } -+ zero = 0; -+ if(write(fd, &zero, 1) != 1){ -+ perror("write"); -+ exit(1); -+ } -+ if(fcntl(fd, F_SETFD, 1) != 0) -+ perror("Setting FD_CLOEXEC failed"); -+ return(fd); -+} -+ -+int setup_region(struct mem_region *region, void *entry) -+{ -+ void *loc, *start; -+ char *driver; -+ int err, offset; -+ -+ if(region->start != -1){ -+ err = reserve_vm(region->start, -+ region->start + region->len, entry); -+ if(err){ -+ printk("setup_region : failed to reserve " -+ "0x%x - 0x%x for driver '%s'\n", -+ region->start, -+ region->start + region->len, -+ region->driver); -+ return(-1); -+ } -+ } -+ else region->start = get_vm(region->len); -+ if(region->start == 0){ -+ if(region->driver == NULL) driver = "physmem"; -+ else driver = region->driver; -+ printk("setup_region : failed to find vm for " -+ "driver '%s' (length %d)\n", driver, region->len); -+ return(-1); -+ } -+ if(region->start == uml_physmem){ -+ start = (void *) uml_reserved; -+ offset = uml_reserved - uml_physmem; -+ } -+ else { -+ start = (void *) region->start; -+ offset = 0; -+ } -+ -+ loc = mmap(start, region->len - offset, PROT_READ | PROT_WRITE, -+ MAP_SHARED | MAP_FIXED, region->fd, offset); -+ if(loc != start){ -+ perror("Mapping memory"); -+ exit(1); -+ } -+ return(0); -+} -+ -+static int __init parse_iomem(char *str, int *add) -+{ -+ struct stat64 buf; -+ char *file, *driver; -+ int fd; -+ -+ driver = str; -+ file = strchr(str,','); -+ if(file == NULL){ -+ printf("parse_iomem : failed to parse iomem\n"); -+ return(1); -+ } -+ *file = '\0'; -+ file++; -+ fd = os_open_file(file, of_rdwr(OPENFLAGS()), 0); -+ if(fd < 0){ -+ printf("parse_iomem - Couldn't open io file, errno = %d\n", -+ errno); -+ return(1); -+ } -+ if(fstat64(fd, &buf) < 0) { -+ printf("parse_iomem - cannot fstat file, errno = %d\n", errno); -+ return(1); -+ } -+ add_iomem(driver, fd, buf.st_size); -+ return(0); -+} -+ -+__uml_setup("iomem=", parse_iomem, -+"iomem=<name>,<file>\n" -+" Configure <file> as an IO memory region named <name>.\n\n" -+); -+ -+#ifdef notdef -+int logging = 0; -+int logging_fd = -1; -+ -+int logging_line = 0; -+char logging_buf[256]; -+ -+void log(char *fmt, ...) -+{ -+ va_list ap; -+ struct timeval tv; -+ struct openflags flags; -+ -+ if(logging == 0) return; -+ if(logging_fd < 0){ -+ flags = of_create(of_trunc(of_rdrw(OPENFLAGS()))); -+ logging_fd = os_open_file("log", flags, 0644); -+ } -+ gettimeofday(&tv, NULL); -+ sprintf(logging_buf, "%d\t %u.%u ", logging_line++, tv.tv_sec, -+ tv.tv_usec); -+ va_start(ap, fmt); -+ vsprintf(&logging_buf[strlen(logging_buf)], fmt, ap); -+ va_end(ap); -+ write(logging_fd, logging_buf, strlen(logging_buf)); -+} -+#endif -+ -+int map_memory(unsigned long virt, unsigned long phys, unsigned long len, -+ int r, int w, int x) -+{ -+ struct mem_region *region = phys_region(phys); -+ -+ return(os_map_memory((void *) virt, region->fd, phys_offset(phys), len, -+ r, w, x)); -+} -+ -+int protect_memory(unsigned long addr, unsigned long len, int r, int w, int x, -+ int must_succeed) -+{ -+ if(os_protect_memory((void *) addr, len, r, w, x) < 0){ -+ if(must_succeed) -+ panic("protect failed, errno = %d", errno); -+ else return(-errno); -+ } -+ return(0); -+} -+ -+unsigned long find_iomem(char *driver, unsigned long *len_out) -+{ -+ struct mem_region *region; -+ int i, n; -+ -+ n = nregions(); -+ for(i = 0; i < n; i++){ -+ region = regions[i]; -+ if(region == NULL) continue; -+ if((region->driver != NULL) && -+ !strcmp(region->driver, driver)){ -+ *len_out = region->len; -+ return(region->start); -+ } -+ } -+ *len_out = 0; -+ return 0; -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/mprot.h um/arch/um/kernel/mprot.h ---- orig/arch/um/kernel/mprot.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/mprot.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,6 @@ -+#ifndef __MPROT_H__ -+#define __MPROT_H__ -+ -+extern void no_access(unsigned long addr, unsigned int len); -+ -+#endif -diff -Naur -X ../exclude-files orig/arch/um/kernel/process.c um/arch/um/kernel/process.c ---- orig/arch/um/kernel/process.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/process.c Wed Apr 23 20:36:15 2003 -@@ -0,0 +1,286 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <unistd.h> -+#include <signal.h> -+#include <sched.h> -+#include <errno.h> -+#include <stdarg.h> -+#include <fcntl.h> -+#include <stdlib.h> -+#include <setjmp.h> -+#include <sys/time.h> -+#include <sys/ptrace.h> -+#include <sys/ioctl.h> -+#include <sys/wait.h> -+#include <sys/mman.h> -+#include <asm/ptrace.h> -+#include <asm/sigcontext.h> -+#include <asm/unistd.h> -+#include <asm/page.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "process.h" -+#include "signal_kern.h" -+#include "signal_user.h" -+#include "sysdep/ptrace.h" -+#include "sysdep/sigcontext.h" -+#include "irq_user.h" -+#include "ptrace_user.h" -+#include "time_user.h" -+#include "init.h" -+#include "os.h" -+#include "uml-config.h" -+#include "choose-mode.h" -+#include "mode.h" -+#ifdef UML_CONFIG_MODE_SKAS -+#include "skas.h" -+#include "skas_ptrace.h" -+#endif -+ -+void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int)) -+{ -+ int flags = 0, pages; -+ -+ if(sig_stack != NULL){ -+ pages = (1 << UML_CONFIG_KERNEL_STACK_ORDER) - 2; -+ set_sigstack(sig_stack, pages * page_size()); -+ flags = SA_ONSTACK; -+ } -+ if(usr1_handler) set_handler(SIGUSR1, usr1_handler, flags, -1); -+} -+ -+void init_new_thread_signals(int altstack) -+{ -+ int flags = altstack ? SA_ONSTACK : 0; -+ -+ set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); -+ set_handler(SIGTRAP, (__sighandler_t) sig_handler, flags, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); -+ set_handler(SIGFPE, (__sighandler_t) sig_handler, flags, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); -+ set_handler(SIGILL, (__sighandler_t) sig_handler, flags, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); -+ set_handler(SIGBUS, (__sighandler_t) sig_handler, flags, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); -+ set_handler(SIGWINCH, (__sighandler_t) sig_handler, flags, -+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); -+ set_handler(SIGUSR2, (__sighandler_t) sig_handler, -+ SA_NOMASK | flags, -1); -+ signal(SIGHUP, SIG_IGN); -+ -+ init_irq_signals(altstack); -+} -+ -+struct tramp { -+ int (*tramp)(void *); -+ void *tramp_data; -+ unsigned long temp_stack; -+ int flags; -+ int pid; -+}; -+ -+/* See above for why sigkill is here */ -+ -+int sigkill = SIGKILL; -+ -+int outer_tramp(void *arg) -+{ -+ struct tramp *t; -+ int sig = sigkill; -+ -+ t = arg; -+ t->pid = clone(t->tramp, (void *) t->temp_stack + page_size()/2, -+ t->flags, t->tramp_data); -+ if(t->pid > 0) wait_for_stop(t->pid, SIGSTOP, PTRACE_CONT, NULL); -+ kill(os_getpid(), sig); -+ _exit(0); -+} -+ -+int start_fork_tramp(void *thread_arg, unsigned long temp_stack, -+ int clone_flags, int (*tramp)(void *)) -+{ -+ struct tramp arg; -+ unsigned long sp; -+ int new_pid, status, err; -+ -+ /* The trampoline will run on the temporary stack */ -+ sp = stack_sp(temp_stack); -+ -+ clone_flags |= CLONE_FILES | SIGCHLD; -+ -+ arg.tramp = tramp; -+ arg.tramp_data = thread_arg; -+ arg.temp_stack = temp_stack; -+ arg.flags = clone_flags; -+ -+ /* Start the process and wait for it to kill itself */ -+ new_pid = clone(outer_tramp, (void *) sp, clone_flags, &arg); -+ if(new_pid < 0) return(-errno); -+ while((err = waitpid(new_pid, &status, 0) < 0) && (errno == EINTR)) ; -+ if(err < 0) panic("Waiting for outer trampoline failed - errno = %d", -+ errno); -+ if(!WIFSIGNALED(status) || (WTERMSIG(status) != SIGKILL)) -+ panic("outer trampoline didn't exit with SIGKILL"); -+ -+ return(arg.pid); -+} -+ -+void suspend_new_thread(int fd) -+{ -+ char c; -+ -+ os_stop_process(os_getpid()); -+ -+ if(read(fd, &c, sizeof(c)) != sizeof(c)) -+ panic("read failed in suspend_new_thread"); -+} -+ -+static int ptrace_child(void *arg) -+{ -+ int pid = os_getpid(); -+ -+ if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){ -+ perror("ptrace"); -+ os_kill_process(pid, 0); -+ } -+ os_stop_process(pid); -+ _exit(os_getpid() == pid); -+} -+ -+static int start_ptraced_child(void **stack_out) -+{ -+ void *stack; -+ unsigned long sp; -+ int pid, n, status; -+ -+ stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, -+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -+ if(stack == MAP_FAILED) -+ panic("check_ptrace : mmap failed, errno = %d", errno); -+ sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *); -+ pid = clone(ptrace_child, (void *) sp, SIGCHLD, NULL); -+ if(pid < 0) -+ panic("check_ptrace : clone failed, errno = %d", errno); -+ n = waitpid(pid, &status, WUNTRACED); -+ if(n < 0) -+ panic("check_ptrace : wait failed, errno = %d", errno); -+ if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) -+ panic("check_ptrace : expected SIGSTOP, got status = %d", -+ status); -+ -+ *stack_out = stack; -+ return(pid); -+} -+ -+static void stop_ptraced_child(int pid, void *stack, int exitcode) -+{ -+ int status, n; -+ -+ if(ptrace(PTRACE_CONT, pid, 0, 0) < 0) -+ panic("check_ptrace : ptrace failed, errno = %d", errno); -+ n = waitpid(pid, &status, 0); -+ if(!WIFEXITED(status) || (WEXITSTATUS(status) != exitcode)) -+ panic("check_ptrace : child exited with status 0x%x", status); -+ -+ if(munmap(stack, PAGE_SIZE) < 0) -+ panic("check_ptrace : munmap failed, errno = %d", errno); -+} -+ -+void __init check_ptrace(void) -+{ -+ void *stack; -+ int pid, syscall, n, status; -+ -+ printk("Checking that ptrace can change system call numbers..."); -+ pid = start_ptraced_child(&stack); -+ -+ while(1){ -+ if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) -+ panic("check_ptrace : ptrace failed, errno = %d", -+ errno); -+ n = waitpid(pid, &status, WUNTRACED); -+ if(n < 0) -+ panic("check_ptrace : wait failed, errno = %d", errno); -+ if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) -+ panic("check_ptrace : expected SIGTRAP, " -+ "got status = %d", status); -+ -+ syscall = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_NR_OFFSET, -+ 0); -+ if(syscall == __NR_getpid){ -+ n = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, -+ __NR_getppid); -+ if(n < 0) -+ panic("check_ptrace : failed to modify system " -+ "call, errno = %d", errno); -+ break; -+ } -+ } -+ stop_ptraced_child(pid, stack, 0); -+ printk("OK\n"); -+} -+ -+int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr) -+{ -+ jmp_buf buf; -+ int n; -+ -+ *jmp_ptr = &buf; -+ n = setjmp(buf); -+ if(n != 0) -+ return(n); -+ (*fn)(arg); -+ return(0); -+} -+ -+int can_do_skas(void) -+{ -+#ifdef UML_CONFIG_MODE_SKAS -+ struct ptrace_faultinfo fi; -+ void *stack; -+ int pid, n, ret = 1; -+ -+ printf("Checking for the skas3 patch in the host..."); -+ pid = start_ptraced_child(&stack); -+ -+ n = ptrace(PTRACE_FAULTINFO, pid, 0, &fi); -+ if(n < 0){ -+ if(errno == EIO) -+ printf("not found\n"); -+ else printf("No (unexpected errno - %d)\n", errno); -+ ret = 0; -+ } -+ else printf("found\n"); -+ -+ init_registers(pid); -+ stop_ptraced_child(pid, stack, 1); -+ -+ printf("Checking for /proc/mm..."); -+ if(access("/proc/mm", W_OK)){ -+ printf("not found\n"); -+ ret = 0; -+ } -+ else printf("found\n"); -+ -+ return(ret); -+#else -+ return(0); -+#endif -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/process_kern.c um/arch/um/kernel/process_kern.c ---- orig/arch/um/kernel/process_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/process_kern.c Wed Apr 16 16:02:09 2003 -@@ -0,0 +1,391 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/kernel.h" -+#include "linux/sched.h" -+#include "linux/interrupt.h" -+#include "linux/mm.h" -+#include "linux/slab.h" -+#include "linux/utsname.h" -+#include "linux/fs.h" -+#include "linux/utime.h" -+#include "linux/smp_lock.h" -+#include "linux/module.h" -+#include "linux/init.h" -+#include "linux/capability.h" -+#include "asm/unistd.h" -+#include "asm/mman.h" -+#include "asm/segment.h" -+#include "asm/stat.h" -+#include "asm/pgtable.h" -+#include "asm/processor.h" -+#include "asm/pgalloc.h" -+#include "asm/spinlock.h" -+#include "asm/uaccess.h" -+#include "asm/user.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "signal_kern.h" -+#include "signal_user.h" -+#include "init.h" -+#include "irq_user.h" -+#include "mem_user.h" -+#include "time_user.h" -+#include "tlb.h" -+#include "frame_kern.h" -+#include "sigcontext.h" -+#include "2_5compat.h" -+#include "os.h" -+#include "mode.h" -+#include "mode_kern.h" -+#include "choose-mode.h" -+ -+/* This is a per-cpu array. A processor only modifies its entry and it only -+ * cares about its entry, so it's OK if another processor is modifying its -+ * entry. -+ */ -+struct cpu_task cpu_tasks[NR_CPUS] = { [0 ... NR_CPUS - 1] = { -1, NULL } }; -+ -+struct task_struct *get_task(int pid, int require) -+{ -+ struct task_struct *ret; -+ -+ read_lock(&tasklist_lock); -+ ret = find_task_by_pid(pid); -+ read_unlock(&tasklist_lock); -+ -+ if(require && (ret == NULL)) panic("get_task couldn't find a task\n"); -+ return(ret); -+} -+ -+int external_pid(void *t) -+{ -+ struct task_struct *task = t ? t : current; -+ -+ return(CHOOSE_MODE_PROC(external_pid_tt, external_pid_skas, task)); -+} -+ -+int pid_to_processor_id(int pid) -+{ -+ int i; -+ -+ for(i = 0; i < smp_num_cpus; i++){ -+ if(cpu_tasks[i].pid == pid) return(i); -+ } -+ return(-1); -+} -+ -+void free_stack(unsigned long stack, int order) -+{ -+ free_pages(stack, order); -+} -+ -+unsigned long alloc_stack(int order, int atomic) -+{ -+ unsigned long page; -+ int flags = GFP_KERNEL; -+ -+ if(atomic) flags |= GFP_ATOMIC; -+ if((page = __get_free_pages(flags, order)) == 0) -+ return(0); -+ stack_protections(page); -+ return(page); -+} -+ -+int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) -+{ -+ int pid; -+ -+ current->thread.request.u.thread.proc = fn; -+ current->thread.request.u.thread.arg = arg; -+ pid = do_fork(CLONE_VM | flags, 0, NULL, 0); -+ if(pid < 0) panic("do_fork failed in kernel_thread"); -+ return(pid); -+} -+ -+void switch_mm(struct mm_struct *prev, struct mm_struct *next, -+ struct task_struct *tsk, unsigned cpu) -+{ -+ if (prev != next) -+ clear_bit(cpu, &prev->cpu_vm_mask); -+ set_bit(cpu, &next->cpu_vm_mask); -+} -+ -+void set_current(void *t) -+{ -+ struct task_struct *task = t; -+ -+ cpu_tasks[task->processor] = ((struct cpu_task) -+ { external_pid(task), task }); -+} -+ -+void *_switch_to(void *prev, void *next) -+{ -+ return(CHOOSE_MODE(_switch_to_tt(prev, next), -+ _switch_to_skas(prev, next))); -+} -+ -+void interrupt_end(void) -+{ -+ if(current->need_resched) schedule(); -+ if(current->sigpending != 0) do_signal(0); -+} -+ -+void release_thread(struct task_struct *task) -+{ -+ CHOOSE_MODE(release_thread_tt(task), release_thread_skas(task)); -+} -+ -+void exit_thread(void) -+{ -+ CHOOSE_MODE(exit_thread_tt(), exit_thread_skas()); -+ unprotect_stack((unsigned long) current); -+} -+ -+void *get_current(void) -+{ -+ return(current); -+} -+ -+int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, -+ unsigned long stack_top, struct task_struct * p, -+ struct pt_regs *regs) -+{ -+ p->thread = (struct thread_struct) INIT_THREAD; -+ p->thread.kernel_stack = (unsigned long) p + 2 * PAGE_SIZE; -+ -+ return(CHOOSE_MODE_PROC(copy_thread_tt, copy_thread_skas, nr, -+ clone_flags, sp, stack_top, p, regs)); -+} -+ -+void initial_thread_cb(void (*proc)(void *), void *arg) -+{ -+ int save_kmalloc_ok = kmalloc_ok; -+ -+ kmalloc_ok = 0; -+ CHOOSE_MODE_PROC(initial_thread_cb_tt, initial_thread_cb_skas, proc, -+ arg); -+ kmalloc_ok = save_kmalloc_ok; -+} -+ -+unsigned long stack_sp(unsigned long page) -+{ -+ return(page + PAGE_SIZE - sizeof(void *)); -+} -+ -+int current_pid(void) -+{ -+ return(current->pid); -+} -+ -+void cpu_idle(void) -+{ -+ CHOOSE_MODE(init_idle_tt(), init_idle_skas()); -+ -+ atomic_inc(&init_mm.mm_count); -+ current->mm = &init_mm; -+ current->active_mm = &init_mm; -+ -+ while(1){ -+ /* endless idle loop with no priority at all */ -+ SET_PRI(current); -+ -+ /* -+ * although we are an idle CPU, we do not want to -+ * get into the scheduler unnecessarily. -+ */ -+ if (current->need_resched) { -+ schedule(); -+ check_pgt_cache(); -+ } -+ idle_sleep(10); -+ } -+} -+ -+int page_size(void) -+{ -+ return(PAGE_SIZE); -+} -+ -+int page_mask(void) -+{ -+ return(PAGE_MASK); -+} -+ -+void *um_virt_to_phys(struct task_struct *task, unsigned long addr, -+ pte_t *pte_out) -+{ -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ -+ if(task->mm == NULL) -+ return(ERR_PTR(-EINVAL)); -+ pgd = pgd_offset(task->mm, addr); -+ pmd = pmd_offset(pgd, addr); -+ if(!pmd_present(*pmd)) -+ return(ERR_PTR(-EINVAL)); -+ pte = pte_offset(pmd, addr); -+ if(!pte_present(*pte)) -+ return(ERR_PTR(-EINVAL)); -+ if(pte_out != NULL) -+ *pte_out = *pte; -+ return((void *) (pte_val(*pte) & PAGE_MASK) + (addr & ~PAGE_MASK)); -+} -+ -+char *current_cmd(void) -+{ -+#if defined(CONFIG_SMP) || defined(CONFIG_HIGHMEM) -+ return("(Unknown)"); -+#else -+ void *addr = um_virt_to_phys(current, current->mm->arg_start, NULL); -+ return IS_ERR(addr) ? "(Unknown)": __va((unsigned long) addr); -+#endif -+} -+ -+void force_sigbus(void) -+{ -+ printk(KERN_ERR "Killing pid %d because of a lack of memory\n", -+ current->pid); -+ lock_kernel(); -+ sigaddset(¤t->pending.signal, SIGBUS); -+ recalc_sigpending(current); -+ current->flags |= PF_SIGNALED; -+ do_exit(SIGBUS | 0x80); -+} -+ -+void dump_thread(struct pt_regs *regs, struct user *u) -+{ -+} -+ -+void enable_hlt(void) -+{ -+ panic("enable_hlt"); -+} -+ -+void disable_hlt(void) -+{ -+ panic("disable_hlt"); -+} -+ -+extern int signal_frame_size; -+ -+void *um_kmalloc(int size) -+{ -+ return(kmalloc(size, GFP_KERNEL)); -+} -+ -+void *um_kmalloc_atomic(int size) -+{ -+ return(kmalloc(size, GFP_ATOMIC)); -+} -+ -+unsigned long get_fault_addr(void) -+{ -+ return((unsigned long) current->thread.fault_addr); -+} -+ -+EXPORT_SYMBOL(get_fault_addr); -+ -+void not_implemented(void) -+{ -+ printk(KERN_DEBUG "Something isn't implemented in here\n"); -+} -+ -+EXPORT_SYMBOL(not_implemented); -+ -+int user_context(unsigned long sp) -+{ -+ unsigned long stack; -+ -+ stack = sp & (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER); -+ stack += 2 * PAGE_SIZE; -+ return(stack != current->thread.kernel_stack); -+} -+ -+extern void remove_umid_dir(void); -+ -+__uml_exitcall(remove_umid_dir); -+ -+extern exitcall_t __uml_exitcall_begin, __uml_exitcall_end; -+ -+void do_uml_exitcalls(void) -+{ -+ exitcall_t *call; -+ -+ call = &__uml_exitcall_end; -+ while (--call >= &__uml_exitcall_begin) -+ (*call)(); -+} -+ -+char *uml_strdup(char *string) -+{ -+ char *new; -+ -+ new = kmalloc(strlen(string) + 1, GFP_KERNEL); -+ if(new == NULL) return(NULL); -+ strcpy(new, string); -+ return(new); -+} -+ -+void *get_init_task(void) -+{ -+ return(&init_task_union.task); -+} -+ -+int copy_to_user_proc(void *to, void *from, int size) -+{ -+ return(copy_to_user(to, from, size)); -+} -+ -+int copy_from_user_proc(void *to, void *from, int size) -+{ -+ return(copy_from_user(to, from, size)); -+} -+ -+int clear_user_proc(void *buf, int size) -+{ -+ return(clear_user(buf, size)); -+} -+ -+int strlen_user_proc(char *str) -+{ -+ return(strlen_user(str)); -+} -+ -+int smp_sigio_handler(void) -+{ -+#ifdef CONFIG_SMP -+ int cpu = current->processor; -+ -+ IPI_handler(cpu); -+ if(cpu != 0) -+ return(1); -+#endif -+ return(0); -+} -+ -+int um_in_interrupt(void) -+{ -+ return(in_interrupt()); -+} -+ -+int cpu(void) -+{ -+ return(current->processor); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/ptrace.c um/arch/um/kernel/ptrace.c ---- orig/arch/um/kernel/ptrace.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/ptrace.c Sat Dec 28 22:50:21 2002 -@@ -0,0 +1,325 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "linux/mm.h" -+#include "linux/errno.h" -+#include "linux/smp_lock.h" -+#ifdef CONFIG_PROC_MM -+#include "linux/proc_mm.h" -+#endif -+#include "asm/ptrace.h" -+#include "asm/uaccess.h" -+#include "kern_util.h" -+#include "ptrace_user.h" -+ -+/* -+ * Called by kernel/ptrace.c when detaching.. -+ */ -+void ptrace_disable(struct task_struct *child) -+{ -+} -+ -+extern long do_mmap2(struct task_struct *task, unsigned long addr, -+ unsigned long len, unsigned long prot, -+ unsigned long flags, unsigned long fd, -+ unsigned long pgoff); -+ -+int sys_ptrace(long request, long pid, long addr, long data) -+{ -+ struct task_struct *child; -+ int i, ret; -+ -+ lock_kernel(); -+ ret = -EPERM; -+ if (request == PTRACE_TRACEME) { -+ /* are we already being traced? */ -+ if (current->ptrace & PT_PTRACED) -+ goto out; -+ /* set the ptrace bit in the process flags. */ -+ current->ptrace |= PT_PTRACED; -+ ret = 0; -+ goto out; -+ } -+ ret = -ESRCH; -+ read_lock(&tasklist_lock); -+ child = find_task_by_pid(pid); -+ if (child) -+ get_task_struct(child); -+ read_unlock(&tasklist_lock); -+ if (!child) -+ goto out; -+ -+ ret = -EPERM; -+ if (pid == 1) /* you may not mess with init */ -+ goto out_tsk; -+ -+ if (request == PTRACE_ATTACH) { -+ ret = ptrace_attach(child); -+ goto out_tsk; -+ } -+ -+ ret = ptrace_check_attach(child, request == PTRACE_KILL); -+ if (ret < 0) -+ goto out_tsk; -+ -+ switch (request) { -+ /* when I and D space are separate, these will need to be fixed. */ -+ case PTRACE_PEEKTEXT: /* read word at location addr. */ -+ case PTRACE_PEEKDATA: { -+ unsigned long tmp; -+ int copied; -+ -+ ret = -EIO; -+ copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); -+ if (copied != sizeof(tmp)) -+ break; -+ ret = put_user(tmp,(unsigned long *) data); -+ break; -+ } -+ -+ /* read the word at location addr in the USER area. */ -+ case PTRACE_PEEKUSR: { -+ unsigned long tmp; -+ -+ ret = -EIO; -+ if ((addr & 3) || addr < 0) -+ break; -+ -+ tmp = 0; /* Default return condition */ -+ if(addr < FRAME_SIZE_OFFSET){ -+ tmp = getreg(child, addr); -+ } -+ else if((addr >= offsetof(struct user, u_debugreg[0])) && -+ (addr <= offsetof(struct user, u_debugreg[7]))){ -+ addr -= offsetof(struct user, u_debugreg[0]); -+ addr = addr >> 2; -+ tmp = child->thread.arch.debugregs[addr]; -+ } -+ ret = put_user(tmp, (unsigned long *) data); -+ break; -+ } -+ -+ /* when I and D space are separate, this will have to be fixed. */ -+ case PTRACE_POKETEXT: /* write the word at location addr. */ -+ case PTRACE_POKEDATA: -+ ret = -EIO; -+ if (access_process_vm(child, addr, &data, sizeof(data), -+ 1) != sizeof(data)) -+ break; -+ ret = 0; -+ break; -+ -+ case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ -+ ret = -EIO; -+ if ((addr & 3) || addr < 0) -+ break; -+ -+ if (addr < FRAME_SIZE_OFFSET) { -+ ret = putreg(child, addr, data); -+ break; -+ } -+ else if((addr >= offsetof(struct user, u_debugreg[0])) && -+ (addr <= offsetof(struct user, u_debugreg[7]))){ -+ addr -= offsetof(struct user, u_debugreg[0]); -+ addr = addr >> 2; -+ if((addr == 4) || (addr == 5)) break; -+ child->thread.arch.debugregs[addr] = data; -+ ret = 0; -+ } -+ -+ break; -+ -+ case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ -+ case PTRACE_CONT: { /* restart after signal. */ -+ ret = -EIO; -+ if ((unsigned long) data > _NSIG) -+ break; -+ if (request == PTRACE_SYSCALL) -+ child->ptrace |= PT_TRACESYS; -+ else -+ child->ptrace &= ~PT_TRACESYS; -+ child->exit_code = data; -+ wake_up_process(child); -+ ret = 0; -+ break; -+ } -+ -+/* -+ * make the child exit. Best I can do is send it a sigkill. -+ * perhaps it should be put in the status that it wants to -+ * exit. -+ */ -+ case PTRACE_KILL: { -+ ret = 0; -+ if (child->state == TASK_ZOMBIE) /* already dead */ -+ break; -+ child->exit_code = SIGKILL; -+ wake_up_process(child); -+ break; -+ } -+ -+ case PTRACE_SINGLESTEP: { /* set the trap flag. */ -+ ret = -EIO; -+ if ((unsigned long) data > _NSIG) -+ break; -+ child->ptrace &= ~PT_TRACESYS; -+ child->ptrace |= PT_DTRACE; -+ child->exit_code = data; -+ /* give it a chance to run. */ -+ wake_up_process(child); -+ ret = 0; -+ break; -+ } -+ -+ case PTRACE_DETACH: -+ /* detach a process that was attached. */ -+ ret = ptrace_detach(child, data); -+ break; -+ -+#ifdef PTRACE_GETREGS -+ case PTRACE_GETREGS: { /* Get all gp regs from the child. */ -+ if (!access_ok(VERIFY_WRITE, (unsigned long *)data, -+ FRAME_SIZE_OFFSET)) { -+ ret = -EIO; -+ break; -+ } -+ for ( i = 0; i < FRAME_SIZE_OFFSET; i += sizeof(long) ) { -+ __put_user(getreg(child, i), (unsigned long *) data); -+ data += sizeof(long); -+ } -+ ret = 0; -+ break; -+ } -+#endif -+#ifdef PTRACE_SETREGS -+ case PTRACE_SETREGS: { /* Set all gp regs in the child. */ -+ unsigned long tmp = 0; -+ if (!access_ok(VERIFY_READ, (unsigned *)data, -+ FRAME_SIZE_OFFSET)) { -+ ret = -EIO; -+ break; -+ } -+ for ( i = 0; i < FRAME_SIZE_OFFSET; i += sizeof(long) ) { -+ __get_user(tmp, (unsigned long *) data); -+ putreg(child, i, tmp); -+ data += sizeof(long); -+ } -+ ret = 0; -+ break; -+ } -+#endif -+#ifdef PTRACE_GETFPREGS -+ case PTRACE_GETFPREGS: /* Get the child FPU state. */ -+ ret = get_fpregs(data, child); -+ break; -+#endif -+#ifdef PTRACE_SETFPREGS -+ case PTRACE_SETFPREGS: /* Set the child FPU state. */ -+ ret = set_fpregs(data, child); -+ break; -+#endif -+#ifdef PTRACE_GETFPXREGS -+ case PTRACE_GETFPXREGS: /* Get the child FPU state. */ -+ ret = get_fpxregs(data, child); -+ break; -+#endif -+#ifdef PTRACE_SETFPXREGS -+ case PTRACE_SETFPXREGS: /* Set the child FPU state. */ -+ ret = set_fpxregs(data, child); -+ break; -+#endif -+ case PTRACE_FAULTINFO: { -+ struct ptrace_faultinfo fault; -+ -+ fault = ((struct ptrace_faultinfo) -+ { .is_write = child->thread.err, -+ .addr = child->thread.cr2 }); -+ ret = copy_to_user((unsigned long *) data, &fault, -+ sizeof(fault)); -+ if(ret) -+ break; -+ break; -+ } -+ case PTRACE_SIGPENDING: -+ ret = copy_to_user((unsigned long *) data, -+ &child->pending.signal, -+ sizeof(child->pending.signal)); -+ break; -+ -+ case PTRACE_LDT: { -+ struct ptrace_ldt ldt; -+ -+ if(copy_from_user(&ldt, (unsigned long *) data, -+ sizeof(ldt))){ -+ ret = -EIO; -+ break; -+ } -+ -+ /* This one is confusing, so just punt and return -EIO for -+ * now -+ */ -+ ret = -EIO; -+ break; -+ } -+#ifdef CONFIG_PROC_MM -+ case PTRACE_SWITCH_MM: { -+ struct mm_struct *old = child->mm; -+ struct mm_struct *new = proc_mm_get_mm(data); -+ -+ if(IS_ERR(new)){ -+ ret = PTR_ERR(new); -+ break; -+ } -+ -+ atomic_inc(&new->mm_users); -+ child->mm = new; -+ child->active_mm = new; -+ mmput(old); -+ ret = 0; -+ break; -+ } -+#endif -+ default: -+ ret = -EIO; -+ break; -+ } -+ out_tsk: -+ free_task_struct(child); -+ out: -+ unlock_kernel(); -+ return ret; -+} -+ -+void syscall_trace(void) -+{ -+ if ((current->ptrace & (PT_PTRACED|PT_TRACESYS)) -+ != (PT_PTRACED|PT_TRACESYS)) -+ return; -+ current->exit_code = SIGTRAP; -+ current->state = TASK_STOPPED; -+ notify_parent(current, SIGCHLD); -+ schedule(); -+ /* -+ * this isn't the same as continuing with a signal, but it will do -+ * for normal use. strace only continues with a signal if the -+ * stopping signal is not SIGTRAP. -brl -+ */ -+ if (current->exit_code) { -+ send_sig(current->exit_code, current, 1); -+ current->exit_code = 0; -+ } -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/reboot.c um/arch/um/kernel/reboot.c ---- orig/arch/um/kernel/reboot.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/reboot.c Mon Dec 30 20:57:42 2002 -@@ -0,0 +1,71 @@ -+/* -+ * Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "os.h" -+#include "mode.h" -+#include "choose-mode.h" -+ -+#ifdef CONFIG_SMP -+static void kill_idlers(int me) -+{ -+ struct task_struct *p; -+ int i; -+ -+ for(i = 0; i < sizeof(init_tasks)/sizeof(init_tasks[0]); i++){ -+ p = init_tasks[i]; -+ if((p != NULL) && (p->thread.mode.tt.extern_pid != me) && -+ (p->thread.mode.tt.extern_pid != -1)) -+ os_kill_process(p->thread.mode.tt.extern_pid, 0); -+ } -+} -+#endif -+ -+static void kill_off_processes(void) -+{ -+ CHOOSE_MODE(kill_off_processes_tt(), kill_off_processes_skas()); -+#ifdef CONFIG_SMP -+ kill_idlers(os_getpid()); -+#endif -+} -+ -+void uml_cleanup(void) -+{ -+ kill_off_processes(); -+ do_uml_exitcalls(); -+} -+ -+void machine_restart(char * __unused) -+{ -+ do_uml_exitcalls(); -+ kill_off_processes(); -+ CHOOSE_MODE(reboot_tt(), reboot_skas()); -+} -+ -+void machine_power_off(void) -+{ -+ do_uml_exitcalls(); -+ kill_off_processes(); -+ CHOOSE_MODE(halt_tt(), halt_skas()); -+} -+ -+void machine_halt(void) -+{ -+ machine_power_off(); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/resource.c um/arch/um/kernel/resource.c ---- orig/arch/um/kernel/resource.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/resource.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,23 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/pci.h" -+ -+unsigned long resource_fixup(struct pci_dev * dev, struct resource * res, -+ unsigned long start, unsigned long size) -+{ -+ return start; -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/sigio_kern.c um/arch/um/kernel/sigio_kern.c ---- orig/arch/um/kernel/sigio_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/sigio_kern.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,56 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/list.h" -+#include "linux/slab.h" -+#include "asm/irq.h" -+#include "init.h" -+#include "sigio.h" -+#include "irq_user.h" -+ -+/* Protected by sigio_lock() called from write_sigio_workaround */ -+static int sigio_irq_fd = -1; -+ -+void sigio_interrupt(int irq, void *data, struct pt_regs *unused) -+{ -+ read_sigio_fd(sigio_irq_fd); -+ reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ); -+} -+ -+int write_sigio_irq(int fd) -+{ -+ if(um_request_irq(SIGIO_WRITE_IRQ, fd, IRQ_READ, sigio_interrupt, -+ SA_INTERRUPT | SA_SAMPLE_RANDOM, "write sigio", -+ NULL)){ -+ printk("write_sigio_irq : um_request_irq failed\n"); -+ return(-1); -+ } -+ sigio_irq_fd = fd; -+ return(0); -+} -+ -+static spinlock_t sigio_spinlock = SPIN_LOCK_UNLOCKED; -+ -+void sigio_lock(void) -+{ -+ spin_lock(&sigio_spinlock); -+} -+ -+void sigio_unlock(void) -+{ -+ spin_unlock(&sigio_spinlock); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/sigio_user.c um/arch/um/kernel/sigio_user.c ---- orig/arch/um/kernel/sigio_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/sigio_user.c Sun Dec 29 23:36:35 2002 -@@ -0,0 +1,440 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <unistd.h> -+#include <stdlib.h> -+#include <termios.h> -+#include <pty.h> -+#include <fcntl.h> -+#include <signal.h> -+#include <errno.h> -+#include <string.h> -+#include <sched.h> -+#include <sys/socket.h> -+#include <sys/poll.h> -+#include "init.h" -+#include "user.h" -+#include "kern_util.h" -+#include "sigio.h" -+#include "helper.h" -+#include "os.h" -+ -+/* Changed during early boot */ -+int pty_output_sigio = 0; -+int pty_close_sigio = 0; -+ -+/* Used as a flag during SIGIO testing early in boot */ -+static int got_sigio = 0; -+ -+void __init handler(int sig) -+{ -+ got_sigio = 1; -+} -+ -+struct openpty_arg { -+ int master; -+ int slave; -+ int err; -+}; -+ -+static void openpty_cb(void *arg) -+{ -+ struct openpty_arg *info = arg; -+ -+ info->err = 0; -+ if(openpty(&info->master, &info->slave, NULL, NULL, NULL)) -+ info->err = errno; -+} -+ -+void __init check_one_sigio(void (*proc)(int, int)) -+{ -+ struct sigaction old, new; -+ struct termios tt; -+ struct openpty_arg pty = { .master = -1, .slave = -1 }; -+ int master, slave, flags; -+ -+ initial_thread_cb(openpty_cb, &pty); -+ if(pty.err){ -+ printk("openpty failed, errno = %d\n", pty.err); -+ return; -+ } -+ -+ master = pty.master; -+ slave = pty.slave; -+ -+ if((master == -1) || (slave == -1)){ -+ printk("openpty failed to allocate a pty\n"); -+ return; -+ } -+ -+ if(tcgetattr(master, &tt) < 0) -+ panic("check_sigio : tcgetattr failed, errno = %d\n", errno); -+ cfmakeraw(&tt); -+ if(tcsetattr(master, TCSADRAIN, &tt) < 0) -+ panic("check_sigio : tcsetattr failed, errno = %d\n", errno); -+ -+ if((flags = fcntl(master, F_GETFL)) < 0) -+ panic("tty_fds : fcntl F_GETFL failed, errno = %d\n", errno); -+ -+ if((fcntl(master, F_SETFL, flags | O_NONBLOCK | O_ASYNC) < 0) || -+ (fcntl(master, F_SETOWN, os_getpid()) < 0)) -+ panic("check_sigio : fcntl F_SETFL or F_SETOWN failed, " -+ "errno = %d\n", errno); -+ -+ if((fcntl(slave, F_SETFL, flags | O_NONBLOCK) < 0)) -+ panic("check_sigio : fcntl F_SETFL failed, errno = %d\n", -+ errno); -+ -+ if(sigaction(SIGIO, NULL, &old) < 0) -+ panic("check_sigio : sigaction 1 failed, errno = %d\n", errno); -+ new = old; -+ new.sa_handler = handler; -+ if(sigaction(SIGIO, &new, NULL) < 0) -+ panic("check_sigio : sigaction 2 failed, errno = %d\n", errno); -+ -+ got_sigio = 0; -+ (*proc)(master, slave); -+ -+ close(master); -+ close(slave); -+ -+ if(sigaction(SIGIO, &old, NULL) < 0) -+ panic("check_sigio : sigaction 3 failed, errno = %d\n", errno); -+} -+ -+static void tty_output(int master, int slave) -+{ -+ int n; -+ char buf[512]; -+ -+ printk("Checking that host ptys support output SIGIO..."); -+ -+ memset(buf, 0, sizeof(buf)); -+ while(write(master, buf, sizeof(buf)) > 0) ; -+ if(errno != EAGAIN) -+ panic("check_sigio : write failed, errno = %d\n", errno); -+ -+ while(((n = read(slave, buf, sizeof(buf))) > 0) && !got_sigio) ; -+ -+ if(got_sigio){ -+ printk("Yes\n"); -+ pty_output_sigio = 1; -+ } -+ else if(errno == EAGAIN) printk("No, enabling workaround\n"); -+ else panic("check_sigio : read failed, errno = %d\n", errno); -+} -+ -+static void tty_close(int master, int slave) -+{ -+ printk("Checking that host ptys support SIGIO on close..."); -+ -+ close(slave); -+ if(got_sigio){ -+ printk("Yes\n"); -+ pty_close_sigio = 1; -+ } -+ else printk("No, enabling workaround\n"); -+} -+ -+void __init check_sigio(void) -+{ -+ if(access("/dev/ptmx", R_OK) && access("/dev/ptyp0", R_OK)){ -+ printk("No pseudo-terminals available - skipping pty SIGIO " -+ "check\n"); -+ return; -+ } -+ check_one_sigio(tty_output); -+ check_one_sigio(tty_close); -+} -+ -+/* Protected by sigio_lock(), also used by sigio_cleanup, which is an -+ * exitcall. -+ */ -+static int write_sigio_pid = -1; -+ -+/* These arrays are initialized before the sigio thread is started, and -+ * the descriptors closed after it is killed. So, it can't see them change. -+ * On the UML side, they are changed under the sigio_lock. -+ */ -+static int write_sigio_fds[2] = { -1, -1 }; -+static int sigio_private[2] = { -1, -1 }; -+ -+struct pollfds { -+ struct pollfd *poll; -+ int size; -+ int used; -+}; -+ -+/* Protected by sigio_lock(). Used by the sigio thread, but the UML thread -+ * synchronizes with it. -+ */ -+struct pollfds current_poll = { -+ .poll = NULL, -+ .size = 0, -+ .used = 0 -+}; -+ -+struct pollfds next_poll = { -+ .poll = NULL, -+ .size = 0, -+ .used = 0 -+}; -+ -+static int write_sigio_thread(void *unused) -+{ -+ struct pollfds *fds, tmp; -+ struct pollfd *p; -+ int i, n, respond_fd; -+ char c; -+ -+ fds = ¤t_poll; -+ while(1){ -+ n = poll(fds->poll, fds->used, -1); -+ if(n < 0){ -+ if(errno == EINTR) continue; -+ printk("write_sigio_thread : poll returned %d, " -+ "errno = %d\n", n, errno); -+ } -+ for(i = 0; i < fds->used; i++){ -+ p = &fds->poll[i]; -+ if(p->revents == 0) continue; -+ if(p->fd == sigio_private[1]){ -+ n = read(sigio_private[1], &c, sizeof(c)); -+ if(n != sizeof(c)) -+ printk("write_sigio_thread : " -+ "read failed, errno = %d\n", -+ errno); -+ tmp = current_poll; -+ current_poll = next_poll; -+ next_poll = tmp; -+ respond_fd = sigio_private[1]; -+ } -+ else { -+ respond_fd = write_sigio_fds[1]; -+ fds->used--; -+ memmove(&fds->poll[i], &fds->poll[i + 1], -+ (fds->used - i) * sizeof(*fds->poll)); -+ } -+ -+ n = write(respond_fd, &c, sizeof(c)); -+ if(n != sizeof(c)) -+ printk("write_sigio_thread : write failed, " -+ "errno = %d\n", errno); -+ } -+ } -+} -+ -+static int need_poll(int n) -+{ -+ if(n <= next_poll.size){ -+ next_poll.used = n; -+ return(0); -+ } -+ if(next_poll.poll != NULL) kfree(next_poll.poll); -+ next_poll.poll = um_kmalloc_atomic(n * sizeof(struct pollfd)); -+ if(next_poll.poll == NULL){ -+ printk("need_poll : failed to allocate new pollfds\n"); -+ next_poll.size = 0; -+ next_poll.used = 0; -+ return(-1); -+ } -+ next_poll.size = n; -+ next_poll.used = n; -+ return(0); -+} -+ -+static void update_thread(void) -+{ -+ unsigned long flags; -+ int n; -+ char c; -+ -+ flags = set_signals(0); -+ n = write(sigio_private[0], &c, sizeof(c)); -+ if(n != sizeof(c)){ -+ printk("update_thread : write failed, errno = %d\n", errno); -+ goto fail; -+ } -+ -+ n = read(sigio_private[0], &c, sizeof(c)); -+ if(n != sizeof(c)){ -+ printk("update_thread : read failed, errno = %d\n", errno); -+ goto fail; -+ } -+ -+ set_signals(flags); -+ return; -+ fail: -+ sigio_lock(); -+ if(write_sigio_pid != -1) -+ os_kill_process(write_sigio_pid, 1); -+ write_sigio_pid = -1; -+ close(sigio_private[0]); -+ close(sigio_private[1]); -+ close(write_sigio_fds[0]); -+ close(write_sigio_fds[1]); -+ sigio_unlock(); -+ set_signals(flags); -+} -+ -+int add_sigio_fd(int fd, int read) -+{ -+ int err = 0, i, n, events; -+ -+ sigio_lock(); -+ for(i = 0; i < current_poll.used; i++){ -+ if(current_poll.poll[i].fd == fd) -+ goto out; -+ } -+ -+ n = current_poll.used + 1; -+ err = need_poll(n); -+ if(err) -+ goto out; -+ -+ for(i = 0; i < current_poll.used; i++) -+ next_poll.poll[i] = current_poll.poll[i]; -+ -+ if(read) events = POLLIN; -+ else events = POLLOUT; -+ -+ next_poll.poll[n - 1] = ((struct pollfd) { .fd = fd, -+ .events = events, -+ .revents = 0 }); -+ update_thread(); -+ out: -+ sigio_unlock(); -+ return(err); -+} -+ -+int ignore_sigio_fd(int fd) -+{ -+ struct pollfd *p; -+ int err = 0, i, n = 0; -+ -+ sigio_lock(); -+ for(i = 0; i < current_poll.used; i++){ -+ if(current_poll.poll[i].fd == fd) break; -+ } -+ if(i == current_poll.used) -+ goto out; -+ -+ err = need_poll(current_poll.used - 1); -+ if(err) -+ goto out; -+ -+ for(i = 0; i < current_poll.used; i++){ -+ p = ¤t_poll.poll[i]; -+ if(p->fd != fd) next_poll.poll[n++] = current_poll.poll[i]; -+ } -+ if(n == i){ -+ printk("ignore_sigio_fd : fd %d not found\n", fd); -+ err = -1; -+ goto out; -+ } -+ -+ update_thread(); -+ out: -+ sigio_unlock(); -+ return(err); -+} -+ -+static int setup_initial_poll(int fd) -+{ -+ struct pollfd *p; -+ -+ p = um_kmalloc(sizeof(struct pollfd)); -+ if(p == NULL){ -+ printk("setup_initial_poll : failed to allocate poll\n"); -+ return(-1); -+ } -+ *p = ((struct pollfd) { .fd = fd, -+ .events = POLLIN, -+ .revents = 0 }); -+ current_poll = ((struct pollfds) { .poll = p, -+ .used = 1, -+ .size = 1 }); -+ return(0); -+} -+ -+void write_sigio_workaround(void) -+{ -+ unsigned long stack; -+ int err; -+ -+ sigio_lock(); -+ if(write_sigio_pid != -1) -+ goto out; -+ -+ err = os_pipe(write_sigio_fds, 1, 1); -+ if(err){ -+ printk("write_sigio_workaround - os_pipe 1 failed, " -+ "errno = %d\n", -err); -+ goto out; -+ } -+ err = os_pipe(sigio_private, 1, 1); -+ if(err){ -+ printk("write_sigio_workaround - os_pipe 2 failed, " -+ "errno = %d\n", -err); -+ goto out_close1; -+ } -+ if(setup_initial_poll(sigio_private[1])) -+ goto out_close2; -+ -+ write_sigio_pid = run_helper_thread(write_sigio_thread, NULL, -+ CLONE_FILES | CLONE_VM, &stack, 0); -+ -+ if(write_sigio_pid < 0) goto out_close2; -+ -+ if(write_sigio_irq(write_sigio_fds[0])) -+ goto out_kill; -+ -+ out: -+ sigio_unlock(); -+ return; -+ -+ out_kill: -+ os_kill_process(write_sigio_pid, 1); -+ write_sigio_pid = -1; -+ out_close2: -+ close(sigio_private[0]); -+ close(sigio_private[1]); -+ out_close1: -+ close(write_sigio_fds[0]); -+ close(write_sigio_fds[1]); -+ sigio_unlock(); -+} -+ -+int read_sigio_fd(int fd) -+{ -+ int n; -+ char c; -+ -+ n = read(fd, &c, sizeof(c)); -+ if(n != sizeof(c)){ -+ printk("read_sigio_fd - read failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ return(n); -+} -+ -+static void sigio_cleanup(void) -+{ -+ if(write_sigio_pid != -1) -+ os_kill_process(write_sigio_pid, 1); -+} -+ -+__uml_exitcall(sigio_cleanup); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/signal_kern.c um/arch/um/kernel/signal_kern.c ---- orig/arch/um/kernel/signal_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/signal_kern.c Sun Dec 8 19:44:13 2002 -@@ -0,0 +1,367 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/stddef.h" -+#include "linux/sys.h" -+#include "linux/sched.h" -+#include "linux/wait.h" -+#include "linux/kernel.h" -+#include "linux/smp_lock.h" -+#include "linux/module.h" -+#include "linux/slab.h" -+#include "asm/signal.h" -+#include "asm/uaccess.h" -+#include "asm/ucontext.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "signal_kern.h" -+#include "signal_user.h" -+#include "kern.h" -+#include "frame_kern.h" -+#include "sigcontext.h" -+#include "mode.h" -+ -+EXPORT_SYMBOL(block_signals); -+EXPORT_SYMBOL(unblock_signals); -+ -+static void force_segv(int sig) -+{ -+ if(sig == SIGSEGV){ -+ struct k_sigaction *ka; -+ -+ ka = ¤t->sig->action[SIGSEGV - 1]; -+ ka->sa.sa_handler = SIG_DFL; -+ } -+ force_sig(SIGSEGV, current); -+} -+ -+#define _S(nr) (1<<((nr)-1)) -+ -+#define _BLOCKABLE (~(_S(SIGKILL) | _S(SIGSTOP))) -+ -+/* -+ * OK, we're invoking a handler -+ */ -+static int handle_signal(struct pt_regs *regs, unsigned long signr, -+ struct k_sigaction *ka, siginfo_t *info, -+ sigset_t *oldset, int error) -+{ -+ __sighandler_t handler; -+ void (*restorer)(void); -+ unsigned long sp; -+ sigset_t save; -+ int err, ret; -+ -+ ret = 0; -+ switch(error){ -+ case -ERESTARTNOHAND: -+ ret = -EINTR; -+ break; -+ -+ case -ERESTARTSYS: -+ if (!(ka->sa.sa_flags & SA_RESTART)) { -+ ret = -EINTR; -+ break; -+ } -+ /* fallthrough */ -+ case -ERESTARTNOINTR: -+ PT_REGS_RESTART_SYSCALL(regs); -+ PT_REGS_ORIG_SYSCALL(regs) = PT_REGS_SYSCALL_NR(regs); -+ -+ /* This is because of the UM_SET_SYSCALL_RETURN and the fact -+ * that on i386 the system call number and return value are -+ * in the same register. When the system call restarts, %eax -+ * had better have the system call number in it. Since the -+ * return value doesn't matter (except that it shouldn't be -+ * -ERESTART*), we'll stick the system call number there. -+ */ -+ ret = PT_REGS_SYSCALL_NR(regs); -+ break; -+ } -+ -+ handler = ka->sa.sa_handler; -+ save = *oldset; -+ -+ if (ka->sa.sa_flags & SA_ONESHOT) -+ ka->sa.sa_handler = SIG_DFL; -+ -+ if (!(ka->sa.sa_flags & SA_NODEFER)) { -+ spin_lock_irq(¤t->sigmask_lock); -+ sigorsets(¤t->blocked, ¤t->blocked, -+ &ka->sa.sa_mask); -+ sigaddset(¤t->blocked, signr); -+ recalc_sigpending(current); -+ spin_unlock_irq(¤t->sigmask_lock); -+ } -+ -+ sp = PT_REGS_SP(regs); -+ -+ if((ka->sa.sa_flags & SA_ONSTACK) && (sas_ss_flags(sp) == 0)) -+ sp = current->sas_ss_sp + current->sas_ss_size; -+ -+ if(error != 0) PT_REGS_SET_SYSCALL_RETURN(regs, ret); -+ -+ if (ka->sa.sa_flags & SA_RESTORER) restorer = ka->sa.sa_restorer; -+ else restorer = NULL; -+ -+ if(ka->sa.sa_flags & SA_SIGINFO) -+ err = setup_signal_stack_si(sp, signr, (unsigned long) handler, -+ restorer, regs, info, &save); -+ else -+ err = setup_signal_stack_sc(sp, signr, (unsigned long) handler, -+ restorer, regs, &save); -+ if(err) goto segv; -+ -+ return(0); -+ segv: -+ force_segv(signr); -+ return(1); -+} -+ -+/* -+ * Note that 'init' is a special process: it doesn't get signals it doesn't -+ * want to handle. Thus you cannot kill init even with a SIGKILL even by -+ * mistake. -+ */ -+ -+static int kern_do_signal(struct pt_regs *regs, sigset_t *oldset, int error) -+{ -+ siginfo_t info; -+ struct k_sigaction *ka; -+ int err; -+ -+ if (!oldset) -+ oldset = ¤t->blocked; -+ -+ for (;;) { -+ unsigned long signr; -+ -+ spin_lock_irq(¤t->sigmask_lock); -+ signr = dequeue_signal(¤t->blocked, &info); -+ spin_unlock_irq(¤t->sigmask_lock); -+ -+ if (!signr) -+ break; -+ -+ if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { -+ /* Let the debugger run. */ -+ current->exit_code = signr; -+ current->state = TASK_STOPPED; -+ notify_parent(current, SIGCHLD); -+ schedule(); -+ -+ /* We're back. Did the debugger cancel the sig? */ -+ if (!(signr = current->exit_code)) -+ continue; -+ current->exit_code = 0; -+ -+ /* The debugger continued. Ignore SIGSTOP. */ -+ if (signr == SIGSTOP) -+ continue; -+ -+ /* Update the siginfo structure. Is this good? */ -+ if (signr != info.si_signo) { -+ info.si_signo = signr; -+ info.si_errno = 0; -+ info.si_code = SI_USER; -+ info.si_pid = current->p_pptr->pid; -+ info.si_uid = current->p_pptr->uid; -+ } -+ -+ /* If the (new) signal is now blocked, requeue it. */ -+ if (sigismember(¤t->blocked, signr)) { -+ send_sig_info(signr, &info, current); -+ continue; -+ } -+ } -+ -+ ka = ¤t->sig->action[signr-1]; -+ if (ka->sa.sa_handler == SIG_IGN) { -+ if (signr != SIGCHLD) -+ continue; -+ /* Check for SIGCHLD: it's special. */ -+ while (sys_wait4(-1, NULL, WNOHANG, NULL) > 0) -+ /* nothing */; -+ continue; -+ } -+ -+ if (ka->sa.sa_handler == SIG_DFL) { -+ int exit_code = signr; -+ -+ /* Init gets no signals it doesn't want. */ -+ if (current->pid == 1) -+ continue; -+ -+ switch (signr) { -+ case SIGCONT: case SIGCHLD: case SIGWINCH: case SIGURG: -+ continue; -+ -+ case SIGTSTP: case SIGTTIN: case SIGTTOU: -+ if (is_orphaned_pgrp(current->pgrp)) -+ continue; -+ /* FALLTHRU */ -+ -+ case SIGSTOP: { -+ struct signal_struct *sig; -+ current->state = TASK_STOPPED; -+ current->exit_code = signr; -+ sig = current->p_pptr->sig; -+ if (sig && !(sig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) -+ notify_parent(current, SIGCHLD); -+ schedule(); -+ continue; -+ } -+ case SIGQUIT: case SIGILL: case SIGTRAP: -+ case SIGABRT: case SIGFPE: case SIGSEGV: -+ case SIGBUS: case SIGSYS: case SIGXCPU: case SIGXFSZ: -+ if (do_coredump(signr, ¤t->thread.regs)) -+ exit_code |= 0x80; -+ /* FALLTHRU */ -+ -+ default: -+ sig_exit(signr, exit_code, &info); -+ /* NOTREACHED */ -+ } -+ } -+ -+ /* Whee! Actually deliver the signal. */ -+ err = handle_signal(regs, signr, ka, &info, oldset, error); -+ if(!err) return(1); -+ } -+ -+ /* Did we come from a system call? */ -+ if(PT_REGS_SYSCALL_NR(regs) >= 0){ -+ /* Restart the system call - no handlers present */ -+ if(PT_REGS_SYSCALL_RET(regs) == -ERESTARTNOHAND || -+ PT_REGS_SYSCALL_RET(regs) == -ERESTARTSYS || -+ PT_REGS_SYSCALL_RET(regs) == -ERESTARTNOINTR){ -+ PT_REGS_ORIG_SYSCALL(regs) = PT_REGS_SYSCALL_NR(regs); -+ PT_REGS_RESTART_SYSCALL(regs); -+ } -+ } -+ -+ /* This closes a way to execute a system call on the host. If -+ * you set a breakpoint on a system call instruction and singlestep -+ * from it, the tracing thread used to PTRACE_SINGLESTEP the process -+ * rather than PTRACE_SYSCALL it, allowing the system call to execute -+ * on the host. The tracing thread will check this flag and -+ * PTRACE_SYSCALL if necessary. -+ */ -+ if((current->ptrace & PT_DTRACE) && -+ is_syscall(PT_REGS_IP(¤t->thread.regs))) -+ (void) CHOOSE_MODE(current->thread.mode.tt.singlestep_syscall = 1, 0); -+ -+ return(0); -+} -+ -+int do_signal(int error) -+{ -+ return(kern_do_signal(¤t->thread.regs, NULL, error)); -+} -+ -+/* -+ * Atomically swap in the new signal mask, and wait for a signal. -+ */ -+int sys_sigsuspend(int history0, int history1, old_sigset_t mask) -+{ -+ sigset_t saveset; -+ -+ mask &= _BLOCKABLE; -+ spin_lock_irq(¤t->sigmask_lock); -+ saveset = current->blocked; -+ siginitset(¤t->blocked, mask); -+ recalc_sigpending(current); -+ spin_unlock_irq(¤t->sigmask_lock); -+ -+ while (1) { -+ current->state = TASK_INTERRUPTIBLE; -+ schedule(); -+ if(kern_do_signal(¤t->thread.regs, &saveset, -EINTR)) -+ return(-EINTR); -+ } -+} -+ -+int sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize) -+{ -+ sigset_t saveset, newset; -+ -+ /* XXX: Don't preclude handling different sized sigset_t's. */ -+ if (sigsetsize != sizeof(sigset_t)) -+ return -EINVAL; -+ -+ if (copy_from_user(&newset, unewset, sizeof(newset))) -+ return -EFAULT; -+ sigdelsetmask(&newset, ~_BLOCKABLE); -+ -+ spin_lock_irq(¤t->sigmask_lock); -+ saveset = current->blocked; -+ current->blocked = newset; -+ recalc_sigpending(current); -+ spin_unlock_irq(¤t->sigmask_lock); -+ -+ while (1) { -+ current->state = TASK_INTERRUPTIBLE; -+ schedule(); -+ if (kern_do_signal(¤t->thread.regs, &saveset, -EINTR)) -+ return(-EINTR); -+ } -+} -+ -+static int copy_sc_from_user(struct pt_regs *to, void *from, -+ struct arch_frame_data *arch) -+{ -+ int ret; -+ -+ ret = CHOOSE_MODE(copy_sc_from_user_tt(UPT_SC(&to->regs), from, arch), -+ copy_sc_from_user_skas(&to->regs, from)); -+ return(ret); -+} -+ -+int sys_sigreturn(struct pt_regs regs) -+{ -+ void *sc = sp_to_sc(PT_REGS_SP(¤t->thread.regs)); -+ void *mask = sp_to_mask(PT_REGS_SP(¤t->thread.regs)); -+ int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long); -+ -+ spin_lock_irq(¤t->sigmask_lock); -+ copy_from_user(¤t->blocked.sig[0], sc_sigmask(sc), -+ sizeof(current->blocked.sig[0])); -+ copy_from_user(¤t->blocked.sig[1], mask, sig_size); -+ sigdelsetmask(¤t->blocked, ~_BLOCKABLE); -+ recalc_sigpending(current); -+ spin_unlock_irq(¤t->sigmask_lock); -+ copy_sc_from_user(¤t->thread.regs, sc, -+ &signal_frame_sc.common.arch); -+ return(PT_REGS_SYSCALL_RET(¤t->thread.regs)); -+} -+ -+int sys_rt_sigreturn(struct pt_regs regs) -+{ -+ struct ucontext *uc = sp_to_uc(PT_REGS_SP(¤t->thread.regs)); -+ void *fp; -+ int sig_size = _NSIG_WORDS * sizeof(unsigned long); -+ -+ spin_lock_irq(¤t->sigmask_lock); -+ copy_from_user(¤t->blocked, &uc->uc_sigmask, sig_size); -+ sigdelsetmask(¤t->blocked, ~_BLOCKABLE); -+ recalc_sigpending(current); -+ spin_unlock_irq(¤t->sigmask_lock); -+ fp = (void *) (((unsigned long) uc) + sizeof(struct ucontext)); -+ copy_sc_from_user(¤t->thread.regs, &uc->uc_mcontext, -+ &signal_frame_si.common.arch); -+ return(PT_REGS_SYSCALL_RET(¤t->thread.regs)); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/signal_user.c um/arch/um/kernel/signal_user.c ---- orig/arch/um/kernel/signal_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/signal_user.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,142 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <unistd.h> -+#include <stdlib.h> -+#include <signal.h> -+#include <errno.h> -+#include <stdarg.h> -+#include <string.h> -+#include <sys/mman.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "signal_user.h" -+#include "signal_kern.h" -+#include "sysdep/sigcontext.h" -+#include "sigcontext.h" -+ -+void set_sigstack(void *sig_stack, int size) -+{ -+ stack_t stack = ((stack_t) { .ss_flags = 0, -+ .ss_sp = (__ptr_t) sig_stack, -+ .ss_size = size - sizeof(void *) }); -+ -+ if(sigaltstack(&stack, NULL) != 0) -+ panic("enabling signal stack failed, errno = %d\n", errno); -+} -+ -+void set_handler(int sig, void (*handler)(int), int flags, ...) -+{ -+ struct sigaction action; -+ va_list ap; -+ int mask; -+ -+ va_start(ap, flags); -+ action.sa_handler = handler; -+ sigemptyset(&action.sa_mask); -+ while((mask = va_arg(ap, int)) != -1){ -+ sigaddset(&action.sa_mask, mask); -+ } -+ action.sa_flags = flags; -+ action.sa_restorer = NULL; -+ if(sigaction(sig, &action, NULL) < 0) -+ panic("sigaction failed"); -+} -+ -+int change_sig(int signal, int on) -+{ -+ sigset_t sigset, old; -+ -+ sigemptyset(&sigset); -+ sigaddset(&sigset, signal); -+ sigprocmask(on ? SIG_UNBLOCK : SIG_BLOCK, &sigset, &old); -+ return(!sigismember(&old, signal)); -+} -+ -+static void change_signals(int type) -+{ -+ sigset_t mask; -+ -+ sigemptyset(&mask); -+ sigaddset(&mask, SIGVTALRM); -+ sigaddset(&mask, SIGALRM); -+ sigaddset(&mask, SIGIO); -+ sigaddset(&mask, SIGPROF); -+ if(sigprocmask(type, &mask, NULL) < 0) -+ panic("Failed to change signal mask - errno = %d", errno); -+} -+ -+void block_signals(void) -+{ -+ change_signals(SIG_BLOCK); -+} -+ -+void unblock_signals(void) -+{ -+ change_signals(SIG_UNBLOCK); -+} -+ -+#define SIGIO_BIT 0 -+#define SIGVTALRM_BIT 1 -+ -+static int enable_mask(sigset_t *mask) -+{ -+ int sigs; -+ -+ sigs = sigismember(mask, SIGIO) ? 0 : 1 << SIGIO_BIT; -+ sigs |= sigismember(mask, SIGVTALRM) ? 0 : 1 << SIGVTALRM_BIT; -+ sigs |= sigismember(mask, SIGALRM) ? 0 : 1 << SIGVTALRM_BIT; -+ return(sigs); -+} -+ -+int get_signals(void) -+{ -+ sigset_t mask; -+ -+ if(sigprocmask(SIG_SETMASK, NULL, &mask) < 0) -+ panic("Failed to get signal mask"); -+ return(enable_mask(&mask)); -+} -+ -+int set_signals(int enable) -+{ -+ sigset_t mask; -+ int ret; -+ -+ sigemptyset(&mask); -+ if(enable & (1 << SIGIO_BIT)) -+ sigaddset(&mask, SIGIO); -+ if(enable & (1 << SIGVTALRM_BIT)){ -+ sigaddset(&mask, SIGVTALRM); -+ sigaddset(&mask, SIGALRM); -+ } -+ if(sigprocmask(SIG_UNBLOCK, &mask, &mask) < 0) -+ panic("Failed to enable signals"); -+ ret = enable_mask(&mask); -+ sigemptyset(&mask); -+ if((enable & (1 << SIGIO_BIT)) == 0) -+ sigaddset(&mask, SIGIO); -+ if((enable & (1 << SIGVTALRM_BIT)) == 0){ -+ sigaddset(&mask, SIGVTALRM); -+ sigaddset(&mask, SIGALRM); -+ } -+ if(sigprocmask(SIG_BLOCK, &mask, NULL) < 0) -+ panic("Failed to block signals"); -+ -+ return(ret); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/Makefile um/arch/um/kernel/skas/Makefile ---- orig/arch/um/kernel/skas/Makefile Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/Makefile Fri Nov 1 16:05:44 2002 -@@ -0,0 +1,30 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET = skas.o -+ -+obj-y = exec_kern.o exec_user.o mem.o mem_user.o mmu.o process.o \ -+ process_kern.o syscall_kern.o syscall_user.o time.o tlb.o trap_user.o -+ -+subdir-y = sys-$(SUBARCH) -+ -+obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) -+ -+USER_OBJS = $(filter %_user.o,$(obj-y)) process.o time.o -+ -+include $(TOPDIR)/Rules.make -+ -+include/skas_ptregs.h : util/mk_ptregs -+ util/mk_ptregs > $@ -+ -+util/mk_ptregs : -+ $(MAKE) -C util -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+clean : -+ $(MAKE) -C util clean -+ $(RM) -f include/skas_ptregs.h -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/exec_kern.c um/arch/um/kernel/skas/exec_kern.c ---- orig/arch/um/kernel/skas/exec_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/exec_kern.c Mon Nov 11 18:57:19 2002 -@@ -0,0 +1,41 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/kernel.h" -+#include "asm/current.h" -+#include "asm/page.h" -+#include "asm/signal.h" -+#include "asm/ptrace.h" -+#include "asm/uaccess.h" -+#include "asm/mmu_context.h" -+#include "tlb.h" -+#include "skas.h" -+#include "mmu.h" -+#include "os.h" -+ -+void flush_thread_skas(void) -+{ -+ force_flush_all(); -+ switch_mm_skas(current->mm->context.skas.mm_fd); -+} -+ -+void start_thread_skas(struct pt_regs *regs, unsigned long eip, -+ unsigned long esp) -+{ -+ set_fs(USER_DS); -+ PT_REGS_IP(regs) = eip; -+ PT_REGS_SP(regs) = esp; -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/exec_user.c um/arch/um/kernel/skas/exec_user.c ---- orig/arch/um/kernel/skas/exec_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/exec_user.c Sun Nov 3 19:23:01 2002 -@@ -0,0 +1,61 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <errno.h> -+#include <signal.h> -+#include <sched.h> -+#include <sys/wait.h> -+#include <sys/ptrace.h> -+#include "user.h" -+#include "kern_util.h" -+#include "os.h" -+#include "time_user.h" -+ -+static int user_thread_tramp(void *arg) -+{ -+ if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) -+ panic("user_thread_tramp - PTRACE_TRACEME failed, " -+ "errno = %d\n", errno); -+ enable_timer(); -+ os_stop_process(os_getpid()); -+ return(0); -+} -+ -+int user_thread(unsigned long stack, int flags) -+{ -+ int pid, status; -+ -+ pid = clone(user_thread_tramp, (void *) stack_sp(stack), -+ flags | CLONE_FILES | SIGCHLD, NULL); -+ if(pid < 0){ -+ printk("user_thread - clone failed, errno = %d\n", errno); -+ return(pid); -+ } -+ -+ if(waitpid(pid, &status, WUNTRACED) < 0){ -+ printk("user_thread - waitpid failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ -+ if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)){ -+ printk("user_thread - trampoline didn't stop, status = %d\n", -+ status); -+ return(-EINVAL); -+ } -+ -+ return(pid); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/include/mmu.h um/arch/um/kernel/skas/include/mmu.h ---- orig/arch/um/kernel/skas/include/mmu.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/include/mmu.h Sun Nov 10 21:21:50 2002 -@@ -0,0 +1,27 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SKAS_MMU_H -+#define __SKAS_MMU_H -+ -+#include "linux/list.h" -+#include "linux/spinlock.h" -+ -+struct mmu_context_skas { -+ int mm_fd; -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/include/mode.h um/arch/um/kernel/skas/include/mode.h ---- orig/arch/um/kernel/skas/include/mode.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/include/mode.h Wed Mar 26 13:27:46 2003 -@@ -0,0 +1,36 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MODE_SKAS_H__ -+#define __MODE_SKAS_H__ -+ -+extern unsigned long exec_regs[]; -+extern unsigned long exec_fp_regs[]; -+extern unsigned long exec_fpx_regs[]; -+extern int have_fpx_regs; -+ -+extern void user_time_init_skas(void); -+extern int copy_sc_from_user_skas(union uml_pt_regs *regs, void *from_ptr); -+extern int copy_sc_to_user_skas(void *to_ptr, void *fp, -+ union uml_pt_regs *regs, -+ unsigned long fault_addr, int fault_type); -+extern void sig_handler_common_skas(int sig, void *sc_ptr); -+extern void halt_skas(void); -+extern void reboot_skas(void); -+extern void kill_off_processes_skas(void); -+extern int is_skas_winch(int pid, int fd, void *data); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/include/mode_kern.h um/arch/um/kernel/skas/include/mode_kern.h ---- orig/arch/um/kernel/skas/include/mode_kern.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/include/mode_kern.h Mon Dec 16 21:49:11 2002 -@@ -0,0 +1,51 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SKAS_MODE_KERN_H__ -+#define __SKAS_MODE_KERN_H__ -+ -+#include "linux/sched.h" -+#include "asm/page.h" -+#include "asm/ptrace.h" -+ -+extern void flush_thread_skas(void); -+extern void *_switch_to_skas(void *prev, void *next); -+extern void start_thread_skas(struct pt_regs *regs, unsigned long eip, -+ unsigned long esp); -+extern int copy_thread_skas(int nr, unsigned long clone_flags, -+ unsigned long sp, unsigned long stack_top, -+ struct task_struct *p, struct pt_regs *regs); -+extern void release_thread_skas(struct task_struct *task); -+extern void exit_thread_skas(void); -+extern void initial_thread_cb_skas(void (*proc)(void *), void *arg); -+extern void init_idle_skas(void); -+extern void flush_tlb_kernel_vm_skas(void); -+extern void __flush_tlb_one_skas(unsigned long addr); -+extern void flush_tlb_range_skas(struct mm_struct *mm, unsigned long start, -+ unsigned long end); -+extern void flush_tlb_mm_skas(struct mm_struct *mm); -+extern void force_flush_all_skas(void); -+extern long execute_syscall_skas(void *r); -+extern void before_mem_skas(unsigned long unused); -+extern unsigned long set_task_sizes_skas(int arg, unsigned long *host_size_out, -+ unsigned long *task_size_out); -+extern int start_uml_skas(void); -+extern int external_pid_skas(struct task_struct *task); -+extern int thread_pid_skas(struct thread_struct *thread); -+ -+#define kmem_end_skas (host_task_size - 1024 * 1024) -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/include/proc_mm.h um/arch/um/kernel/skas/include/proc_mm.h ---- orig/arch/um/kernel/skas/include/proc_mm.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/include/proc_mm.h Wed Nov 13 11:57:23 2002 -@@ -0,0 +1,55 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SKAS_PROC_MM_H -+#define __SKAS_PROC_MM_H -+ -+#define MM_MMAP 54 -+#define MM_MUNMAP 55 -+#define MM_MPROTECT 56 -+#define MM_COPY_SEGMENTS 57 -+ -+struct mm_mmap { -+ unsigned long addr; -+ unsigned long len; -+ unsigned long prot; -+ unsigned long flags; -+ unsigned long fd; -+ unsigned long offset; -+}; -+ -+struct mm_munmap { -+ unsigned long addr; -+ unsigned long len; -+}; -+ -+struct mm_mprotect { -+ unsigned long addr; -+ unsigned long len; -+ unsigned int prot; -+}; -+ -+struct proc_mm_op { -+ int op; -+ union { -+ struct mm_mmap mmap; -+ struct mm_munmap munmap; -+ struct mm_mprotect mprotect; -+ int copy_segments; -+ } u; -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/include/ptrace-skas.h um/arch/um/kernel/skas/include/ptrace-skas.h ---- orig/arch/um/kernel/skas/include/ptrace-skas.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/include/ptrace-skas.h Fri Jan 17 13:22:09 2003 -@@ -0,0 +1,57 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __PTRACE_SKAS_H -+#define __PTRACE_SKAS_H -+ -+#include "uml-config.h" -+ -+#ifdef UML_CONFIG_MODE_SKAS -+ -+#include "skas_ptregs.h" -+ -+#define HOST_FRAME_SIZE 17 -+ -+#define REGS_IP(r) ((r)[HOST_IP]) -+#define REGS_SP(r) ((r)[HOST_SP]) -+#define REGS_EFLAGS(r) ((r)[HOST_EFLAGS]) -+#define REGS_EAX(r) ((r)[HOST_EAX]) -+#define REGS_EBX(r) ((r)[HOST_EBX]) -+#define REGS_ECX(r) ((r)[HOST_ECX]) -+#define REGS_EDX(r) ((r)[HOST_EDX]) -+#define REGS_ESI(r) ((r)[HOST_ESI]) -+#define REGS_EDI(r) ((r)[HOST_EDI]) -+#define REGS_EBP(r) ((r)[HOST_EBP]) -+#define REGS_CS(r) ((r)[HOST_CS]) -+#define REGS_SS(r) ((r)[HOST_SS]) -+#define REGS_DS(r) ((r)[HOST_DS]) -+#define REGS_ES(r) ((r)[HOST_ES]) -+#define REGS_FS(r) ((r)[HOST_FS]) -+#define REGS_GS(r) ((r)[HOST_GS]) -+ -+#define REGS_SET_SYSCALL_RETURN(r, res) REGS_EAX(r) = (res) -+ -+#define REGS_RESTART_SYSCALL(r) IP_RESTART_SYSCALL(REGS_IP(r)) -+ -+#define REGS_SEGV_IS_FIXABLE(r) SEGV_IS_FIXABLE((r)->trap_type) -+ -+#define REGS_FAULT_ADDR(r) ((r)->fault_addr) -+ -+#define REGS_FAULT_WRITE(r) FAULT_WRITE((r)->fault_type) -+ -+#endif -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/include/skas.h um/arch/um/kernel/skas/include/skas.h ---- orig/arch/um/kernel/skas/include/skas.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/include/skas.h Sun Dec 8 21:00:12 2002 -@@ -0,0 +1,49 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SKAS_H -+#define __SKAS_H -+ -+#include "sysdep/ptrace.h" -+ -+extern int userspace_pid; -+ -+extern void switch_threads(void *me, void *next); -+extern void thread_wait(void *sw, void *fb); -+extern void new_thread(void *stack, void **switch_buf_ptr, void **fork_buf_ptr, -+ void (*handler)(int)); -+extern int start_idle_thread(void *stack, void *switch_buf_ptr, -+ void **fork_buf_ptr); -+extern int user_thread(unsigned long stack, int flags); -+extern void userspace(union uml_pt_regs *regs); -+extern void new_thread_proc(void *stack, void (*handler)(int sig)); -+extern void remove_sigstack(void); -+extern void new_thread_handler(int sig); -+extern void handle_syscall(union uml_pt_regs *regs); -+extern void map(int fd, unsigned long virt, unsigned long phys, -+ unsigned long len, int r, int w, int x); -+extern int unmap(int fd, void *addr, int len); -+extern int protect(int fd, unsigned long addr, unsigned long len, -+ int r, int w, int x, int must_succeed); -+extern void user_signal(int sig, union uml_pt_regs *regs); -+extern int singlestepping_skas(void); -+extern int new_mm(int from); -+extern void save_registers(union uml_pt_regs *regs); -+extern void restore_registers(union uml_pt_regs *regs); -+extern void start_userspace(void); -+extern void init_registers(int pid); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/include/uaccess.h um/arch/um/kernel/skas/include/uaccess.h ---- orig/arch/um/kernel/skas/include/uaccess.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/include/uaccess.h Fri Jan 31 23:05:56 2003 -@@ -0,0 +1,232 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __SKAS_UACCESS_H -+#define __SKAS_UACCESS_H -+ -+#include "linux/string.h" -+#include "linux/sched.h" -+#include "asm/processor.h" -+#include "asm/pgtable.h" -+#include "asm/errno.h" -+#include "asm/current.h" -+#include "asm/a.out.h" -+#include "kern_util.h" -+ -+#define access_ok_skas(type, addr, size) \ -+ ((segment_eq(get_fs(), KERNEL_DS)) || \ -+ (((unsigned long) (addr) < TASK_SIZE) && \ -+ ((unsigned long) (addr) + (size) <= TASK_SIZE))) -+ -+static inline int verify_area_skas(int type, const void * addr, -+ unsigned long size) -+{ -+ return(access_ok_skas(type, addr, size) ? 0 : -EFAULT); -+} -+ -+static inline unsigned long maybe_map(unsigned long virt, int is_write) -+{ -+ pte_t pte; -+ -+ void *phys = um_virt_to_phys(current, virt, &pte); -+ int dummy_code; -+ -+ if(IS_ERR(phys) || (is_write && !pte_write(pte))){ -+ if(!handle_page_fault(virt, 0, is_write, 0, &dummy_code)) -+ return(0); -+ phys = um_virt_to_phys(current, virt, NULL); -+ } -+ return((unsigned long) __va((unsigned long) phys)); -+} -+ -+static inline int buffer_op(unsigned long addr, int len, -+ int (*op)(unsigned long addr, int len, void *arg), -+ void *arg) -+{ -+ int size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len); -+ int remain = len, n; -+ -+ n = (*op)(addr, size, arg); -+ if(n != 0) -+ return(n < 0 ? remain : 0); -+ -+ addr += size; -+ remain -= size; -+ if(remain == 0) -+ return(0); -+ -+ while(addr < ((addr + remain) & PAGE_MASK)){ -+ n = (*op)(addr, PAGE_SIZE, arg); -+ if(n != 0) -+ return(n < 0 ? remain : 0); -+ -+ addr += PAGE_SIZE; -+ remain -= PAGE_SIZE; -+ } -+ if(remain == 0) -+ return(0); -+ -+ n = (*op)(addr, remain, arg); -+ if(n != 0) -+ return(n < 0 ? remain : 0); -+ return(0); -+} -+ -+static inline int copy_chunk_from_user(unsigned long from, int len, void *arg) -+{ -+ unsigned long *to_ptr = arg, to = *to_ptr; -+ -+ from = maybe_map(from, 0); -+ if(from == 0) -+ return(-1); -+ -+ memcpy((void *) to, (void *) from, len); -+ *to_ptr += len; -+ return(0); -+} -+ -+static inline int copy_from_user_skas(void *to, const void *from, int n) -+{ -+ if(segment_eq(get_fs(), KERNEL_DS)){ -+ memcpy(to, from, n); -+ return(0); -+ } -+ -+ return(access_ok_skas(VERIFY_READ, from, n) ? -+ buffer_op((unsigned long) from, n, copy_chunk_from_user, &to) : -+ n); -+} -+ -+static inline int copy_chunk_to_user(unsigned long to, int len, void *arg) -+{ -+ unsigned long *from_ptr = arg, from = *from_ptr; -+ -+ to = maybe_map(to, 1); -+ if(to == 0) -+ return(-1); -+ -+ memcpy((void *) to, (void *) from, len); -+ *from_ptr += len; -+ return(0); -+} -+ -+static inline int copy_to_user_skas(void *to, const void *from, int n) -+{ -+ if(segment_eq(get_fs(), KERNEL_DS)){ -+ memcpy(to, from, n); -+ return(0); -+ } -+ -+ return(access_ok_skas(VERIFY_WRITE, to, n) ? -+ buffer_op((unsigned long) to, n, copy_chunk_to_user, &from) : -+ n); -+} -+ -+static inline int strncpy_chunk_from_user(unsigned long from, int len, -+ void *arg) -+{ -+ char **to_ptr = arg, *to = *to_ptr; -+ int n; -+ -+ from = maybe_map(from, 0); -+ if(from == 0) -+ return(-1); -+ -+ strncpy(to, (void *) from, len); -+ n = strnlen(to, len); -+ *to_ptr += n; -+ -+ if(n < len) -+ return(1); -+ return(0); -+} -+ -+static inline int strncpy_from_user_skas(char *dst, const char *src, int count) -+{ -+ int n; -+ char *ptr = dst; -+ -+ if(segment_eq(get_fs(), KERNEL_DS)){ -+ strncpy(dst, src, count); -+ return(strnlen(dst, count)); -+ } -+ -+ if(!access_ok_skas(VERIFY_READ, src, 1)) -+ return(-EFAULT); -+ -+ n = buffer_op((unsigned long) src, count, strncpy_chunk_from_user, -+ &ptr); -+ if(n != 0) -+ return(-EFAULT); -+ return(strnlen(dst, count)); -+} -+ -+static inline int clear_chunk(unsigned long addr, int len, void *unused) -+{ -+ addr = maybe_map(addr, 1); -+ if(addr == 0) -+ return(-1); -+ -+ memset((void *) addr, 0, len); -+ return(0); -+} -+ -+static inline int __clear_user_skas(void *mem, int len) -+{ -+ return(buffer_op((unsigned long) mem, len, clear_chunk, NULL)); -+} -+ -+static inline int clear_user_skas(void *mem, int len) -+{ -+ if(segment_eq(get_fs(), KERNEL_DS)){ -+ memset(mem, 0, len); -+ return(0); -+ } -+ -+ return(access_ok_skas(VERIFY_WRITE, mem, len) ? -+ buffer_op((unsigned long) mem, len, clear_chunk, NULL) : len); -+} -+ -+static inline int strnlen_chunk(unsigned long str, int len, void *arg) -+{ -+ int *len_ptr = arg, n; -+ -+ str = maybe_map(str, 0); -+ if(str == 0) -+ return(-1); -+ -+ n = strnlen((void *) str, len); -+ *len_ptr += n; -+ -+ if(n < len) -+ return(1); -+ return(0); -+} -+ -+static inline int strnlen_user_skas(const void *str, int len) -+{ -+ int count = 0, n; -+ -+ if(segment_eq(get_fs(), KERNEL_DS)) -+ return(strnlen(str, len) + 1); -+ -+ n = buffer_op((unsigned long) str, len, strnlen_chunk, &count); -+ if(n == 0) -+ return(count + 1); -+ return(-EFAULT); -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/mem.c um/arch/um/kernel/skas/mem.c ---- orig/arch/um/kernel/skas/mem.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/mem.c Mon Dec 16 21:49:39 2002 -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/mm.h" -+#include "mem_user.h" -+ -+unsigned long set_task_sizes_skas(int arg, unsigned long *host_size_out, -+ unsigned long *task_size_out) -+{ -+ /* Round up to the nearest 4M */ -+ unsigned long top = ROUND_4M((unsigned long) &arg); -+ -+ *host_size_out = top; -+ *task_size_out = top; -+ return(((unsigned long) set_task_sizes_skas) & ~0xffffff); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/mem_user.c um/arch/um/kernel/skas/mem_user.c ---- orig/arch/um/kernel/skas/mem_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/mem_user.c Tue Dec 31 00:13:18 2002 -@@ -0,0 +1,95 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <errno.h> -+#include <sys/mman.h> -+#include <sys/ptrace.h> -+#include "mem_user.h" -+#include "user.h" -+#include "os.h" -+#include "proc_mm.h" -+ -+void map(int fd, unsigned long virt, unsigned long phys, unsigned long len, -+ int r, int w, int x) -+{ -+ struct proc_mm_op map; -+ struct mem_region *region; -+ int prot, n; -+ -+ prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | -+ (x ? PROT_EXEC : 0); -+ region = phys_region(phys); -+ -+ map = ((struct proc_mm_op) { .op = MM_MMAP, -+ .u = -+ { .mmap = -+ { .addr = virt, -+ .len = len, -+ .prot = prot, -+ .flags = MAP_SHARED | -+ MAP_FIXED, -+ .fd = region->fd, -+ .offset = phys_offset(phys) -+ } } } ); -+ n = os_write_file(fd, &map, sizeof(map)); -+ if(n != sizeof(map)) -+ printk("map : /proc/mm map failed, errno = %d\n", errno); -+} -+ -+int unmap(int fd, void *addr, int len) -+{ -+ struct proc_mm_op unmap; -+ int n; -+ -+ unmap = ((struct proc_mm_op) { .op = MM_MUNMAP, -+ .u = -+ { .munmap = -+ { .addr = (unsigned long) addr, -+ .len = len } } } ); -+ n = os_write_file(fd, &unmap, sizeof(unmap)); -+ if((n != 0) && (n != sizeof(unmap))) -+ return(-errno); -+ return(0); -+} -+ -+int protect(int fd, unsigned long addr, unsigned long len, int r, int w, -+ int x, int must_succeed) -+{ -+ struct proc_mm_op protect; -+ int prot, n; -+ -+ prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | -+ (x ? PROT_EXEC : 0); -+ -+ protect = ((struct proc_mm_op) { .op = MM_MPROTECT, -+ .u = -+ { .mprotect = -+ { .addr = (unsigned long) addr, -+ .len = len, -+ .prot = prot } } } ); -+ -+ n = os_write_file(fd, &protect, sizeof(protect)); -+ if((n != 0) && (n != sizeof(protect))){ -+ if(must_succeed) -+ panic("protect failed, errno = %d", errno); -+ return(-errno); -+ } -+ return(0); -+} -+ -+void before_mem_skas(unsigned long unused) -+{ -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/mmu.c um/arch/um/kernel/skas/mmu.c ---- orig/arch/um/kernel/skas/mmu.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/mmu.c Wed Nov 13 13:09:57 2002 -@@ -0,0 +1,44 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/list.h" -+#include "linux/spinlock.h" -+#include "linux/slab.h" -+#include "asm/segment.h" -+#include "asm/mmu.h" -+#include "os.h" -+#include "skas.h" -+ -+int init_new_context_skas(struct task_struct *task, struct mm_struct *mm) -+{ -+ int from; -+ -+ if((current->mm != NULL) && (current->mm != &init_mm)) -+ from = current->mm->context.skas.mm_fd; -+ else from = -1; -+ -+ mm->context.skas.mm_fd = new_mm(from); -+ if(mm->context.skas.mm_fd < 0) -+ panic("init_new_context_skas - new_mm failed, errno = %d\n", -+ mm->context.skas.mm_fd); -+ -+ return(0); -+} -+ -+void destroy_context_skas(struct mm_struct *mm) -+{ -+ os_close_file(mm->context.skas.mm_fd); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/process.c um/arch/um/kernel/skas/process.c ---- orig/arch/um/kernel/skas/process.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/process.c Wed Mar 26 14:43:19 2003 -@@ -0,0 +1,407 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <unistd.h> -+#include <errno.h> -+#include <signal.h> -+#include <setjmp.h> -+#include <sched.h> -+#include <sys/wait.h> -+#include <sys/ptrace.h> -+#include <sys/mman.h> -+#include <sys/user.h> -+#include <asm/unistd.h> -+#include "user.h" -+#include "ptrace_user.h" -+#include "time_user.h" -+#include "sysdep/ptrace.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "skas.h" -+#include "sysdep/sigcontext.h" -+#include "os.h" -+#include "proc_mm.h" -+#include "skas_ptrace.h" -+#include "chan_user.h" -+ -+int is_skas_winch(int pid, int fd, void *data) -+{ -+ if(pid != getpid()) -+ return(0); -+ -+ register_winch_irq(-1, fd, -1, data); -+ return(1); -+} -+ -+unsigned long exec_regs[FRAME_SIZE]; -+unsigned long exec_fp_regs[HOST_FP_SIZE]; -+unsigned long exec_fpx_regs[HOST_XFP_SIZE]; -+int have_fpx_regs = 1; -+ -+static void handle_segv(int pid) -+{ -+ struct ptrace_faultinfo fault; -+ int err; -+ -+ err = ptrace(PTRACE_FAULTINFO, pid, 0, &fault); -+ if(err) -+ panic("handle_segv - PTRACE_FAULTINFO failed, errno = %d\n", -+ errno); -+ -+ segv(fault.addr, 0, FAULT_WRITE(fault.is_write), 1, NULL); -+} -+ -+static void handle_trap(int pid, union uml_pt_regs *regs) -+{ -+ int err, syscall_nr, status; -+ -+ syscall_nr = PT_SYSCALL_NR(regs->skas.regs); -+ if(syscall_nr < 1){ -+ relay_signal(SIGTRAP, regs); -+ return; -+ } -+ UPT_SYSCALL_NR(regs) = syscall_nr; -+ -+ err = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, __NR_getpid); -+ if(err < 0) -+ panic("handle_trap - nullifying syscall failed errno = %d\n", -+ errno); -+ -+ err = ptrace(PTRACE_SYSCALL, pid, 0, 0); -+ if(err < 0) -+ panic("handle_trap - continuing to end of syscall failed, " -+ "errno = %d\n", errno); -+ -+ err = waitpid(pid, &status, WUNTRACED); -+ if((err < 0) || !WIFSTOPPED(status) || (WSTOPSIG(status) != SIGTRAP)) -+ panic("handle_trap - failed to wait at end of syscall, " -+ "errno = %d, status = %d\n", errno, status); -+ -+ handle_syscall(regs); -+} -+ -+static int userspace_tramp(void *arg) -+{ -+ init_new_thread_signals(0); -+ enable_timer(); -+ ptrace(PTRACE_TRACEME, 0, 0, 0); -+ os_stop_process(os_getpid()); -+ return(0); -+} -+ -+int userspace_pid; -+ -+void start_userspace(void) -+{ -+ void *stack; -+ unsigned long sp; -+ int pid, status, n; -+ -+ stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, -+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -+ if(stack == MAP_FAILED) -+ panic("start_userspace : mmap failed, errno = %d", errno); -+ sp = (unsigned long) stack + PAGE_SIZE - sizeof(void *); -+ -+ pid = clone(userspace_tramp, (void *) sp, -+ CLONE_FILES | CLONE_VM | SIGCHLD, NULL); -+ if(pid < 0) -+ panic("start_userspace : clone failed, errno = %d", errno); -+ -+ do { -+ n = waitpid(pid, &status, WUNTRACED); -+ if(n < 0) -+ panic("start_userspace : wait failed, errno = %d", -+ errno); -+ } while(WIFSTOPPED(status) && (WSTOPSIG(status) == SIGVTALRM)); -+ -+ if(!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) -+ panic("start_userspace : expected SIGSTOP, got status = %d", -+ status); -+ -+ if(munmap(stack, PAGE_SIZE) < 0) -+ panic("start_userspace : munmap failed, errno = %d\n", errno); -+ -+ userspace_pid = pid; -+} -+ -+void userspace(union uml_pt_regs *regs) -+{ -+ int err, status, op; -+ -+ restore_registers(regs); -+ -+ err = ptrace(PTRACE_SYSCALL, userspace_pid, 0, 0); -+ if(err) -+ panic("userspace - PTRACE_SYSCALL failed, errno = %d\n", -+ errno); -+ while(1){ -+ err = waitpid(userspace_pid, &status, WUNTRACED); -+ if(err < 0) -+ panic("userspace - waitpid failed, errno = %d\n", -+ errno); -+ -+ regs->skas.is_user = 1; -+ save_registers(regs); -+ -+ if(WIFSTOPPED(status)){ -+ switch(WSTOPSIG(status)){ -+ case SIGSEGV: -+ handle_segv(userspace_pid); -+ break; -+ case SIGTRAP: -+ handle_trap(userspace_pid, regs); -+ break; -+ case SIGIO: -+ case SIGVTALRM: -+ case SIGILL: -+ case SIGBUS: -+ case SIGFPE: -+ case SIGWINCH: -+ user_signal(WSTOPSIG(status), regs); -+ break; -+ default: -+ printk("userspace - child stopped with signal " -+ "%d\n", WSTOPSIG(status)); -+ } -+ interrupt_end(); -+ } -+ -+ restore_registers(regs); -+ -+ op = singlestepping_skas() ? PTRACE_SINGLESTEP : -+ PTRACE_SYSCALL; -+ err = ptrace(op, userspace_pid, 0, 0); -+ if(err) -+ panic("userspace - PTRACE_SYSCALL failed, " -+ "errno = %d\n", errno); -+ } -+} -+ -+void new_thread(void *stack, void **switch_buf_ptr, void **fork_buf_ptr, -+ void (*handler)(int)) -+{ -+ jmp_buf switch_buf, fork_buf; -+ -+ *switch_buf_ptr = &switch_buf; -+ *fork_buf_ptr = &fork_buf; -+ -+ if(setjmp(fork_buf) == 0) -+ new_thread_proc(stack, handler); -+ -+ remove_sigstack(); -+} -+ -+void thread_wait(void *sw, void *fb) -+{ -+ jmp_buf buf, **switch_buf = sw, *fork_buf; -+ -+ *switch_buf = &buf; -+ fork_buf = fb; -+ if(setjmp(buf) == 0) -+ longjmp(*fork_buf, 1); -+} -+ -+static int move_registers(int int_op, int fp_op, union uml_pt_regs *regs, -+ unsigned long *fp_regs) -+{ -+ if(ptrace(int_op, userspace_pid, 0, regs->skas.regs) < 0) -+ return(-errno); -+ if(ptrace(fp_op, userspace_pid, 0, fp_regs) < 0) -+ return(-errno); -+ return(0); -+} -+ -+void save_registers(union uml_pt_regs *regs) -+{ -+ unsigned long *fp_regs; -+ int err, fp_op; -+ -+ if(have_fpx_regs){ -+ fp_op = PTRACE_GETFPXREGS; -+ fp_regs = regs->skas.xfp; -+ } -+ else { -+ fp_op = PTRACE_GETFPREGS; -+ fp_regs = regs->skas.fp; -+ } -+ -+ err = move_registers(PTRACE_GETREGS, fp_op, regs, fp_regs); -+ if(err) -+ panic("save_registers - saving registers failed, errno = %d\n", -+ err); -+} -+ -+void restore_registers(union uml_pt_regs *regs) -+{ -+ unsigned long *fp_regs; -+ int err, fp_op; -+ -+ if(have_fpx_regs){ -+ fp_op = PTRACE_SETFPXREGS; -+ fp_regs = regs->skas.xfp; -+ } -+ else { -+ fp_op = PTRACE_SETFPREGS; -+ fp_regs = regs->skas.fp; -+ } -+ -+ err = move_registers(PTRACE_SETREGS, fp_op, regs, fp_regs); -+ if(err) -+ panic("restore_registers - saving registers failed, " -+ "errno = %d\n", err); -+} -+ -+void switch_threads(void *me, void *next) -+{ -+ jmp_buf my_buf, **me_ptr = me, *next_buf = next; -+ -+ *me_ptr = &my_buf; -+ if(setjmp(my_buf) == 0) -+ longjmp(*next_buf, 1); -+} -+ -+static jmp_buf initial_jmpbuf; -+ -+/* XXX Make these percpu */ -+static void (*cb_proc)(void *arg); -+static void *cb_arg; -+static jmp_buf *cb_back; -+ -+int start_idle_thread(void *stack, void *switch_buf_ptr, void **fork_buf_ptr) -+{ -+ jmp_buf **switch_buf = switch_buf_ptr; -+ int n; -+ -+ *fork_buf_ptr = &initial_jmpbuf; -+ n = setjmp(initial_jmpbuf); -+ if(n == 0) -+ new_thread_proc((void *) stack, new_thread_handler); -+ else if(n == 1) -+ remove_sigstack(); -+ else if(n == 2){ -+ (*cb_proc)(cb_arg); -+ longjmp(*cb_back, 1); -+ } -+ else if(n == 3){ -+ kmalloc_ok = 0; -+ return(0); -+ } -+ else if(n == 4){ -+ kmalloc_ok = 0; -+ return(1); -+ } -+ longjmp(**switch_buf, 1); -+} -+ -+void remove_sigstack(void) -+{ -+ stack_t stack = ((stack_t) { .ss_flags = SS_DISABLE, -+ .ss_sp = NULL, -+ .ss_size = 0 }); -+ -+ if(sigaltstack(&stack, NULL) != 0) -+ panic("disabling signal stack failed, errno = %d\n", errno); -+} -+ -+void initial_thread_cb_skas(void (*proc)(void *), void *arg) -+{ -+ jmp_buf here; -+ -+ cb_proc = proc; -+ cb_arg = arg; -+ cb_back = &here; -+ -+ block_signals(); -+ if(setjmp(here) == 0) -+ longjmp(initial_jmpbuf, 2); -+ unblock_signals(); -+ -+ cb_proc = NULL; -+ cb_arg = NULL; -+ cb_back = NULL; -+} -+ -+void halt_skas(void) -+{ -+ block_signals(); -+ longjmp(initial_jmpbuf, 3); -+} -+ -+void reboot_skas(void) -+{ -+ block_signals(); -+ longjmp(initial_jmpbuf, 4); -+} -+ -+int new_mm(int from) -+{ -+ struct proc_mm_op copy; -+ int n, fd = os_open_file("/proc/mm", of_write(OPENFLAGS()), 0); -+ -+ if(fd < 0) -+ return(-errno); -+ -+ if(from != -1){ -+ copy = ((struct proc_mm_op) { .op = MM_COPY_SEGMENTS, -+ .u = -+ { .copy_segments = from } } ); -+ n = os_write_file(fd, ©, sizeof(copy)); -+ if(n != sizeof(copy)) -+ printk("new_mm : /proc/mm copy_segments failed, " -+ "errno = %d\n", errno); -+ } -+ return(fd); -+} -+ -+void switch_mm_skas(int mm_fd) -+{ -+ int err; -+ -+ err = ptrace(PTRACE_SWITCH_MM, userspace_pid, 0, mm_fd); -+ if(err) -+ panic("switch_mm_skas - PTRACE_SWITCH_MM failed, errno = %d\n", -+ errno); -+} -+ -+void kill_off_processes_skas(void) -+{ -+ os_kill_process(userspace_pid, 1); -+} -+ -+void init_registers(int pid) -+{ -+ int err; -+ -+ if(ptrace(PTRACE_GETREGS, pid, 0, exec_regs) < 0) -+ panic("check_ptrace : PTRACE_GETREGS failed, errno = %d", -+ errno); -+ -+ err = ptrace(PTRACE_GETFPXREGS, pid, 0, exec_fpx_regs); -+ if(!err) -+ return; -+ -+ have_fpx_regs = 0; -+ if(errno != EIO) -+ panic("check_ptrace : PTRACE_GETFPXREGS failed, errno = %d", -+ errno); -+ -+ err = ptrace(PTRACE_GETFPREGS, pid, 0, exec_fp_regs); -+ if(err) -+ panic("check_ptrace : PTRACE_GETFPREGS failed, errno = %d", -+ errno); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/process_kern.c um/arch/um/kernel/skas/process_kern.c ---- orig/arch/um/kernel/skas/process_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/process_kern.c Sun Dec 22 20:37:39 2002 -@@ -0,0 +1,191 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "linux/slab.h" -+#include "kern_util.h" -+#include "time_user.h" -+#include "signal_user.h" -+#include "skas.h" -+#include "os.h" -+#include "user_util.h" -+#include "tlb.h" -+#include "frame.h" -+#include "kern.h" -+#include "mode.h" -+ -+int singlestepping_skas(void) -+{ -+ int ret = current->ptrace & PT_DTRACE; -+ -+ current->ptrace &= ~PT_DTRACE; -+ return(ret); -+} -+ -+void *_switch_to_skas(void *prev, void *next) -+{ -+ struct task_struct *from, *to; -+ -+ from = prev; -+ to = next; -+ -+ /* XXX need to check runqueues[cpu].idle */ -+ if(current->pid == 0) -+ switch_timers(0); -+ -+ to->thread.prev_sched = from; -+ set_current(to); -+ -+ switch_threads(&from->thread.mode.skas.switch_buf, -+ to->thread.mode.skas.switch_buf); -+ -+ if(current->pid == 0) -+ switch_timers(1); -+ -+ return(current->thread.prev_sched); -+} -+ -+extern void schedule_tail(struct task_struct *prev); -+ -+void new_thread_handler(int sig) -+{ -+ int (*fn)(void *), n; -+ void *arg; -+ -+ fn = current->thread.request.u.thread.proc; -+ arg = current->thread.request.u.thread.arg; -+ change_sig(SIGUSR1, 1); -+ thread_wait(¤t->thread.mode.skas.switch_buf, -+ current->thread.mode.skas.fork_buf); -+ -+ if(current->thread.prev_sched != NULL) -+ schedule_tail(current->thread.prev_sched); -+ current->thread.prev_sched = NULL; -+ -+ n = run_kernel_thread(fn, arg, ¤t->thread.exec_buf); -+ if(n == 1) -+ userspace(¤t->thread.regs.regs); -+ else do_exit(0); -+} -+ -+void new_thread_proc(void *stack, void (*handler)(int sig)) -+{ -+ init_new_thread_stack(stack, handler); -+ os_usr1_process(os_getpid()); -+} -+ -+void release_thread_skas(struct task_struct *task) -+{ -+} -+ -+void exit_thread_skas(void) -+{ -+} -+ -+void fork_handler(int sig) -+{ -+ change_sig(SIGUSR1, 1); -+ thread_wait(¤t->thread.mode.skas.switch_buf, -+ current->thread.mode.skas.fork_buf); -+ -+ force_flush_all(); -+ if(current->thread.prev_sched != NULL) -+ schedule_tail(current->thread.prev_sched); -+ current->thread.prev_sched = NULL; -+ unblock_signals(); -+ -+ userspace(¤t->thread.regs.regs); -+} -+ -+int copy_thread_skas(int nr, unsigned long clone_flags, unsigned long sp, -+ unsigned long stack_top, struct task_struct * p, -+ struct pt_regs *regs) -+{ -+ void (*handler)(int); -+ -+ if(current->thread.forking){ -+ memcpy(&p->thread.regs.regs.skas, -+ ¤t->thread.regs.regs.skas, -+ sizeof(p->thread.regs.regs.skas)); -+ REGS_SET_SYSCALL_RETURN(p->thread.regs.regs.skas.regs, 0); -+ if(sp != 0) REGS_SP(p->thread.regs.regs.skas.regs) = sp; -+ -+ handler = fork_handler; -+ } -+ else { -+ memcpy(p->thread.regs.regs.skas.regs, exec_regs, -+ sizeof(p->thread.regs.regs.skas.regs)); -+ memcpy(p->thread.regs.regs.skas.fp, exec_fp_regs, -+ sizeof(p->thread.regs.regs.skas.fp)); -+ memcpy(p->thread.regs.regs.skas.xfp, exec_fpx_regs, -+ sizeof(p->thread.regs.regs.skas.xfp)); -+ p->thread.request.u.thread = current->thread.request.u.thread; -+ handler = new_thread_handler; -+ } -+ -+ new_thread((void *) p->thread.kernel_stack, -+ &p->thread.mode.skas.switch_buf, -+ &p->thread.mode.skas.fork_buf, handler); -+ return(0); -+} -+ -+void init_idle_skas(void) -+{ -+ cpu_tasks[current->processor].pid = os_getpid(); -+} -+ -+extern void start_kernel(void); -+ -+static int start_kernel_proc(void *unused) -+{ -+ int pid; -+ -+ block_signals(); -+ pid = os_getpid(); -+ -+ cpu_tasks[0].pid = pid; -+ cpu_tasks[0].task = current; -+#ifdef CONFIG_SMP -+ cpu_online_map = 1; -+#endif -+ start_kernel(); -+ return(0); -+} -+ -+int start_uml_skas(void) -+{ -+ start_userspace(); -+ capture_signal_stack(); -+ -+ init_new_thread_signals(1); -+ idle_timer(); -+ -+ init_task.thread.request.u.thread.proc = start_kernel_proc; -+ init_task.thread.request.u.thread.arg = NULL; -+ return(start_idle_thread((void *) init_task.thread.kernel_stack, -+ &init_task.thread.mode.skas.switch_buf, -+ &init_task.thread.mode.skas.fork_buf)); -+} -+ -+int external_pid_skas(struct task_struct *task) -+{ -+ return(userspace_pid); -+} -+ -+int thread_pid_skas(struct thread_struct *thread) -+{ -+ return(userspace_pid); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/sys-i386/Makefile um/arch/um/kernel/skas/sys-i386/Makefile ---- orig/arch/um/kernel/skas/sys-i386/Makefile Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/sys-i386/Makefile Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,17 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET = sys-i386.o -+ -+obj-y = sigcontext.o -+ -+USER_OBJS = sigcontext.o -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+clean : -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/sys-i386/sigcontext.c um/arch/um/kernel/skas/sys-i386/sigcontext.c ---- orig/arch/um/kernel/skas/sys-i386/sigcontext.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/sys-i386/sigcontext.c Sun Dec 8 20:38:46 2002 -@@ -0,0 +1,115 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <errno.h> -+#include <asm/sigcontext.h> -+#include <sys/ptrace.h> -+#include <linux/ptrace.h> -+#include "sysdep/ptrace.h" -+#include "sysdep/ptrace_user.h" -+#include "kern_util.h" -+#include "user.h" -+#include "sigcontext.h" -+ -+extern int userspace_pid; -+ -+int copy_sc_from_user_skas(union uml_pt_regs *regs, void *from_ptr) -+{ -+ struct sigcontext sc, *from = from_ptr; -+ unsigned long fpregs[FP_FRAME_SIZE]; -+ int err; -+ -+ err = copy_from_user_proc(&sc, from, sizeof(sc)); -+ err |= copy_from_user_proc(fpregs, sc.fpstate, sizeof(fpregs)); -+ if(err) -+ return(err); -+ -+ regs->skas.regs[GS] = sc.gs; -+ regs->skas.regs[FS] = sc.fs; -+ regs->skas.regs[ES] = sc.es; -+ regs->skas.regs[DS] = sc.ds; -+ regs->skas.regs[EDI] = sc.edi; -+ regs->skas.regs[ESI] = sc.esi; -+ regs->skas.regs[EBP] = sc.ebp; -+ regs->skas.regs[UESP] = sc.esp; -+ regs->skas.regs[EBX] = sc.ebx; -+ regs->skas.regs[EDX] = sc.edx; -+ regs->skas.regs[ECX] = sc.ecx; -+ regs->skas.regs[EAX] = sc.eax; -+ regs->skas.regs[EIP] = sc.eip; -+ regs->skas.regs[CS] = sc.cs; -+ regs->skas.regs[EFL] = sc.eflags; -+ regs->skas.regs[UESP] = sc.esp_at_signal; -+ regs->skas.regs[SS] = sc.ss; -+ regs->skas.fault_addr = sc.cr2; -+ regs->skas.fault_type = FAULT_WRITE(sc.err); -+ regs->skas.trap_type = sc.trapno; -+ -+ err = ptrace(PTRACE_SETFPREGS, userspace_pid, 0, fpregs); -+ if(err < 0){ -+ printk("copy_sc_to_user - PTRACE_SETFPREGS failed, " -+ "errno = %d\n", errno); -+ return(1); -+ } -+ -+ return(0); -+} -+ -+int copy_sc_to_user_skas(void *to_ptr, void *fp, union uml_pt_regs *regs, -+ unsigned long fault_addr, int fault_type) -+{ -+ struct sigcontext sc, *to = to_ptr; -+ struct _fpstate *to_fp; -+ unsigned long fpregs[FP_FRAME_SIZE]; -+ int err; -+ -+ sc.gs = regs->skas.regs[GS]; -+ sc.fs = regs->skas.regs[FS]; -+ sc.es = regs->skas.regs[ES]; -+ sc.ds = regs->skas.regs[DS]; -+ sc.edi = regs->skas.regs[EDI]; -+ sc.esi = regs->skas.regs[ESI]; -+ sc.ebp = regs->skas.regs[EBP]; -+ sc.esp = regs->skas.regs[UESP]; -+ sc.ebx = regs->skas.regs[EBX]; -+ sc.edx = regs->skas.regs[EDX]; -+ sc.ecx = regs->skas.regs[ECX]; -+ sc.eax = regs->skas.regs[EAX]; -+ sc.eip = regs->skas.regs[EIP]; -+ sc.cs = regs->skas.regs[CS]; -+ sc.eflags = regs->skas.regs[EFL]; -+ sc.esp_at_signal = regs->skas.regs[UESP]; -+ sc.ss = regs->skas.regs[SS]; -+ sc.cr2 = fault_addr; -+ sc.err = TO_SC_ERR(fault_type); -+ sc.trapno = regs->skas.trap_type; -+ -+ err = ptrace(PTRACE_GETFPREGS, userspace_pid, 0, fpregs); -+ if(err < 0){ -+ printk("copy_sc_to_user - PTRACE_GETFPREGS failed, " -+ "errno = %d\n", errno); -+ return(1); -+ } -+ to_fp = (struct _fpstate *) -+ (fp ? (unsigned long) fp : ((unsigned long) to + sizeof(*to))); -+ sc.fpstate = to_fp; -+ -+ if(err) -+ return(err); -+ -+ return(copy_to_user_proc(to, &sc, sizeof(sc)) || -+ copy_to_user_proc(to_fp, fpregs, sizeof(fpregs))); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/syscall_kern.c um/arch/um/kernel/skas/syscall_kern.c ---- orig/arch/um/kernel/skas/syscall_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/syscall_kern.c Sun Dec 8 21:01:44 2002 -@@ -0,0 +1,42 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sys.h" -+#include "asm/errno.h" -+#include "asm/unistd.h" -+#include "asm/ptrace.h" -+#include "asm/current.h" -+#include "sysdep/syscalls.h" -+#include "kern_util.h" -+ -+extern syscall_handler_t *sys_call_table[]; -+ -+long execute_syscall_skas(void *r) -+{ -+ struct pt_regs *regs = r; -+ long res; -+ int syscall; -+ -+ current->thread.nsyscalls++; -+ nsyscalls++; -+ syscall = UPT_SYSCALL_NR(®s->regs); -+ -+ if((syscall >= NR_syscalls) || (syscall < 0)) -+ res = -ENOSYS; -+ else res = EXECUTE_SYSCALL(syscall, regs); -+ -+ return(res); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/syscall_user.c um/arch/um/kernel/skas/syscall_user.c ---- orig/arch/um/kernel/skas/syscall_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/syscall_user.c Sun Dec 8 21:00:12 2002 -@@ -0,0 +1,46 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <signal.h> -+#include "kern_util.h" -+#include "syscall_user.h" -+#include "sysdep/ptrace.h" -+#include "sysdep/sigcontext.h" -+ -+/* XXX Bogus */ -+#define ERESTARTSYS 512 -+#define ERESTARTNOINTR 513 -+#define ERESTARTNOHAND 514 -+ -+void handle_syscall(union uml_pt_regs *regs) -+{ -+ long result; -+ int index; -+ -+ index = record_syscall_start(UPT_SYSCALL_NR(regs)); -+ -+ syscall_trace(); -+ result = execute_syscall(regs); -+ -+ REGS_SET_SYSCALL_RETURN(regs->skas.regs, result); -+ if((result == -ERESTARTNOHAND) || (result == -ERESTARTSYS) || -+ (result == -ERESTARTNOINTR)) -+ do_signal(result); -+ -+ syscall_trace(); -+ record_syscall_end(index, result); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/time.c um/arch/um/kernel/skas/time.c ---- orig/arch/um/kernel/skas/time.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/time.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,30 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <sys/signal.h> -+#include <sys/time.h> -+#include "time_user.h" -+#include "process.h" -+#include "user.h" -+ -+void user_time_init_skas(void) -+{ -+ if(signal(SIGALRM, (__sighandler_t) alarm_handler) == SIG_ERR) -+ panic("Couldn't set SIGALRM handler"); -+ if(signal(SIGVTALRM, (__sighandler_t) alarm_handler) == SIG_ERR) -+ panic("Couldn't set SIGVTALRM handler"); -+ set_interval(ITIMER_VIRTUAL); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/tlb.c um/arch/um/kernel/skas/tlb.c ---- orig/arch/um/kernel/skas/tlb.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/tlb.c Sun Dec 22 18:30:35 2002 -@@ -0,0 +1,153 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/stddef.h" -+#include "linux/sched.h" -+#include "asm/page.h" -+#include "asm/pgtable.h" -+#include "asm/mmu.h" -+#include "user_util.h" -+#include "mem_user.h" -+#include "skas.h" -+#include "os.h" -+ -+static void fix_range(struct mm_struct *mm, unsigned long start_addr, -+ unsigned long end_addr, int force) -+{ -+ pgd_t *npgd; -+ pmd_t *npmd; -+ pte_t *npte; -+ unsigned long addr; -+ int r, w, x, err, fd; -+ -+ if(mm == NULL) return; -+ fd = mm->context.skas.mm_fd; -+ for(addr = start_addr; addr < end_addr;){ -+ npgd = pgd_offset(mm, addr); -+ npmd = pmd_offset(npgd, addr); -+ if(pmd_present(*npmd)){ -+ npte = pte_offset(npmd, addr); -+ r = pte_read(*npte); -+ w = pte_write(*npte); -+ x = pte_exec(*npte); -+ if(!pte_dirty(*npte)) w = 0; -+ if(!pte_young(*npte)){ -+ r = 0; -+ w = 0; -+ } -+ if(force || pte_newpage(*npte)){ -+ err = unmap(fd, (void *) addr, PAGE_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ if(pte_present(*npte)) -+ map(fd, addr, -+ pte_val(*npte) & PAGE_MASK, -+ PAGE_SIZE, r, w, x); -+ } -+ else if(pte_newprot(*npte)){ -+ protect(fd, addr, PAGE_SIZE, r, w, x, 1); -+ } -+ *npte = pte_mkuptodate(*npte); -+ addr += PAGE_SIZE; -+ } -+ else { -+ if(force || pmd_newpage(*npmd)){ -+ err = unmap(fd, (void *) addr, PMD_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ pmd_mkuptodate(*npmd); -+ } -+ addr += PMD_SIZE; -+ } -+ } -+} -+ -+static void flush_kernel_vm_range(unsigned long start, unsigned long end) -+{ -+ struct mm_struct *mm; -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ unsigned long addr; -+ int updated = 0, err; -+ -+ mm = &init_mm; -+ for(addr = start_vm; addr < end_vm;){ -+ pgd = pgd_offset(mm, addr); -+ pmd = pmd_offset(pgd, addr); -+ if(pmd_present(*pmd)){ -+ pte = pte_offset(pmd, addr); -+ if(!pte_present(*pte) || pte_newpage(*pte)){ -+ updated = 1; -+ err = os_unmap_memory((void *) addr, -+ PAGE_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ if(pte_present(*pte)) -+ map_memory(addr, -+ pte_val(*pte) & PAGE_MASK, -+ PAGE_SIZE, 1, 1, 1); -+ } -+ else if(pte_newprot(*pte)){ -+ updated = 1; -+ protect_memory(addr, PAGE_SIZE, 1, 1, 1, 1); -+ } -+ addr += PAGE_SIZE; -+ } -+ else { -+ if(pmd_newpage(*pmd)){ -+ updated = 1; -+ err = os_unmap_memory((void *) addr, PMD_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ } -+ addr += PMD_SIZE; -+ } -+ } -+} -+ -+void flush_tlb_kernel_vm_skas(void) -+{ -+ flush_kernel_vm_range(start_vm, end_vm); -+} -+ -+void __flush_tlb_one_skas(unsigned long addr) -+{ -+ flush_kernel_vm_range(addr, addr + PAGE_SIZE); -+} -+ -+void flush_tlb_range_skas(struct mm_struct *mm, unsigned long start, -+ unsigned long end) -+{ -+ if(mm == NULL) -+ flush_kernel_vm_range(start, end); -+ else fix_range(mm, start, end, 0); -+} -+ -+void flush_tlb_mm_skas(struct mm_struct *mm) -+{ -+ flush_tlb_kernel_vm_skas(); -+ fix_range(mm, 0, host_task_size, 0); -+} -+ -+void force_flush_all_skas(void) -+{ -+ fix_range(current->mm, 0, host_task_size, 1); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/trap_user.c um/arch/um/kernel/skas/trap_user.c ---- orig/arch/um/kernel/skas/trap_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/trap_user.c Sun Dec 15 13:28:41 2002 -@@ -0,0 +1,65 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <signal.h> -+#include <errno.h> -+#include <asm/sigcontext.h> -+#include "sysdep/ptrace.h" -+#include "signal_user.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "task.h" -+#include "sigcontext.h" -+ -+void sig_handler_common_skas(int sig, void *sc_ptr) -+{ -+ struct sigcontext *sc = sc_ptr; -+ struct skas_regs *r; -+ struct signal_info *info; -+ int save_errno = errno; -+ -+ r = &TASK_REGS(get_current())->skas; -+ r->is_user = 0; -+ r->fault_addr = SC_FAULT_ADDR(sc); -+ r->fault_type = SC_FAULT_TYPE(sc); -+ r->trap_type = SC_TRAP_TYPE(sc); -+ -+ change_sig(SIGUSR1, 1); -+ info = &sig_info[sig]; -+ if(!info->is_irq) unblock_signals(); -+ -+ (*info->handler)(sig, (union uml_pt_regs *) r); -+ -+ errno = save_errno; -+} -+ -+extern int missed_ticks[]; -+ -+void user_signal(int sig, union uml_pt_regs *regs) -+{ -+ struct signal_info *info; -+ -+ if(sig == SIGVTALRM) -+ missed_ticks[cpu()]++; -+ regs->skas.is_user = 1; -+ regs->skas.fault_addr = 0; -+ regs->skas.fault_type = 0; -+ regs->skas.trap_type = 0; -+ info = &sig_info[sig]; -+ (*info->handler)(sig, regs); -+ -+ unblock_signals(); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/util/Makefile um/arch/um/kernel/skas/util/Makefile ---- orig/arch/um/kernel/skas/util/Makefile Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/util/Makefile Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,10 @@ -+all: mk_ptregs -+ -+mk_ptregs : mk_ptregs.o -+ $(CC) -o mk_ptregs mk_ptregs.o -+ -+mk_ptregs.o : mk_ptregs.c -+ $(CC) -c $< -+ -+clean : -+ $(RM) -f mk_ptregs *.o *~ -diff -Naur -X ../exclude-files orig/arch/um/kernel/skas/util/mk_ptregs.c um/arch/um/kernel/skas/util/mk_ptregs.c ---- orig/arch/um/kernel/skas/util/mk_ptregs.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/skas/util/mk_ptregs.c Mon Nov 11 12:10:06 2002 -@@ -0,0 +1,50 @@ -+#include <asm/ptrace.h> -+#include <asm/user.h> -+ -+#define PRINT_REG(name, val) printf("#define HOST_%s %d\n", (name), (val)) -+ -+int main(int argc, char **argv) -+{ -+ printf("/* Automatically generated by " -+ "arch/um/kernel/skas/util/mk_ptregs */\n"); -+ printf("\n"); -+ printf("#ifndef __SKAS_PT_REGS_\n"); -+ printf("#define __SKAS_PT_REGS_\n"); -+ printf("\n"); -+ printf("#define HOST_FRAME_SIZE %d\n", FRAME_SIZE); -+ printf("#define HOST_FP_SIZE %d\n", -+ sizeof(struct user_i387_struct) / sizeof(unsigned long)); -+ printf("#define HOST_XFP_SIZE %d\n", -+ sizeof(struct user_fxsr_struct) / sizeof(unsigned long)); -+ -+ PRINT_REG("IP", EIP); -+ PRINT_REG("SP", UESP); -+ PRINT_REG("EFLAGS", EFL); -+ PRINT_REG("EAX", EAX); -+ PRINT_REG("EBX", EBX); -+ PRINT_REG("ECX", ECX); -+ PRINT_REG("EDX", EDX); -+ PRINT_REG("ESI", ESI); -+ PRINT_REG("EDI", EDI); -+ PRINT_REG("EBP", EBP); -+ PRINT_REG("CS", CS); -+ PRINT_REG("SS", SS); -+ PRINT_REG("DS", DS); -+ PRINT_REG("FS", FS); -+ PRINT_REG("ES", ES); -+ PRINT_REG("GS", GS); -+ printf("\n"); -+ printf("#endif\n"); -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/smp.c um/arch/um/kernel/smp.c ---- orig/arch/um/kernel/smp.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/smp.c Sat Feb 22 14:28:45 2003 -@@ -0,0 +1,324 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+ -+ -+#ifdef CONFIG_SMP -+ -+#include "linux/sched.h" -+#include "linux/threads.h" -+#include "linux/interrupt.h" -+#include "asm/smp.h" -+#include "asm/processor.h" -+#include "asm/spinlock.h" -+#include "asm/softirq.h" -+#include "asm/hardirq.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "irq_user.h" -+#include "kern.h" -+#include "os.h" -+ -+/* Total count of live CPUs, set by smp_boot_cpus */ -+int smp_num_cpus = 1; -+ -+/* The 'big kernel lock' */ -+spinlock_cacheline_t kernel_flag_cacheline = {SPIN_LOCK_UNLOCKED}; -+ -+/* Per CPU bogomips and other parameters */ -+ -+/* The only piece used here is the ipi pipe, which is set before SMP is -+ * started and never changed. -+ */ -+struct cpuinfo_um cpu_data[NR_CPUS]; -+ -+/* CPU online map, set by smp_boot_cpus */ -+unsigned long cpu_online_map; -+ -+atomic_t global_bh_count; -+ -+/* Set when the idlers are all forked */ -+int smp_threads_ready = 0; -+ -+/* Not used by UML */ -+unsigned char global_irq_holder = 0; -+unsigned volatile long global_irq_lock; -+ -+/* A statistic, can be a little off */ -+static int num_reschedules_sent = 0; -+ -+void smp_send_reschedule(int cpu) -+{ -+ write(cpu_data[cpu].ipi_pipe[1], "R", 1); -+ num_reschedules_sent++; -+} -+ -+static void show(char * str) -+{ -+ int cpu = smp_processor_id(); -+ -+ printk(KERN_INFO "\n%s, CPU %d:\n", str, cpu); -+} -+ -+#define MAXCOUNT 100000000 -+ -+static inline void wait_on_bh(void) -+{ -+ int count = MAXCOUNT; -+ do { -+ if (!--count) { -+ show("wait_on_bh"); -+ count = ~0; -+ } -+ /* nothing .. wait for the other bh's to go away */ -+ } while (atomic_read(&global_bh_count) != 0); -+} -+ -+/* -+ * This is called when we want to synchronize with -+ * bottom half handlers. We need to wait until -+ * no other CPU is executing any bottom half handler. -+ * -+ * Don't wait if we're already running in an interrupt -+ * context or are inside a bh handler. -+ */ -+void synchronize_bh(void) -+{ -+ if (atomic_read(&global_bh_count) && !in_interrupt()) -+ wait_on_bh(); -+} -+ -+void smp_send_stop(void) -+{ -+ int i; -+ -+ printk(KERN_INFO "Stopping all CPUs..."); -+ for(i = 0; i < ncpus; i++){ -+ if(i == current->processor) -+ continue; -+ write(cpu_data[i].ipi_pipe[1], "S", 1); -+ } -+ printk("done\n"); -+} -+ -+ -+static atomic_t smp_commenced = ATOMIC_INIT(0); -+static volatile unsigned long smp_callin_map = 0; -+ -+void smp_commence(void) -+{ -+ printk("All CPUs are go!\n"); -+ -+ wmb(); -+ atomic_set(&smp_commenced, 1); -+} -+ -+static int idle_proc(void *unused) -+{ -+ int cpu, err; -+ -+ set_current(current); -+ del_from_runqueue(current); -+ unhash_process(current); -+ -+ cpu = current->processor; -+ err = os_pipe(cpu_data[cpu].ipi_pipe, 1, 1); -+ if(err) -+ panic("CPU#%d failed to create IPI pipe, errno = %d", cpu, -+ -err); -+ -+ activate_ipi(cpu_data[cpu].ipi_pipe[0], -+ current->thread.mode.tt.extern_pid); -+ -+ wmb(); -+ if (test_and_set_bit(current->processor, &smp_callin_map)) { -+ printk("huh, CPU#%d already present??\n", current->processor); -+ BUG(); -+ } -+ -+ while (!atomic_read(&smp_commenced)) -+ cpu_relax(); -+ -+ init_idle(); -+ cpu_idle(); -+ return(0); -+} -+ -+static int idle_thread(int (*fn)(void *), int cpu) -+{ -+ struct task_struct *new_task; -+ int pid; -+ unsigned char c; -+ -+ current->thread.request.u.thread.proc = fn; -+ current->thread.request.u.thread.arg = NULL; -+ pid = do_fork(CLONE_VM | CLONE_PID, 0, NULL, 0); -+ if(pid < 0) panic("do_fork failed in idle_thread"); -+ new_task = get_task(pid, 1); -+ -+ cpu_tasks[cpu].pid = new_task->thread.mode.tt.extern_pid; -+ cpu_tasks[cpu].task = new_task; -+ init_tasks[cpu] = new_task; -+ new_task->processor = cpu; -+ new_task->cpus_allowed = 1 << cpu; -+ new_task->cpus_runnable = new_task->cpus_allowed; -+ CHOOSE_MODE(write(new_task->thread.mode.tt.switch_pipe[1], &c, -+ sizeof(c)), -+ ({ panic("skas mode doesn't support SMP"); })); -+ return(new_task->thread.mode.tt.extern_pid); -+} -+ -+void smp_boot_cpus(void) -+{ -+ int err; -+ -+ set_bit(0, &cpu_online_map); -+ set_bit(0, &smp_callin_map); -+ -+ err = os_pipe(cpu_data[0].ipi_pipe, 1, 1); -+ if(err) panic("CPU#0 failed to create IPI pipe, errno = %d", -err); -+ -+ activate_ipi(cpu_data[0].ipi_pipe[0], -+ current->thread.mode.tt.extern_pid); -+ -+ if(ncpus < 1){ -+ printk(KERN_INFO "ncpus set to 1\n"); -+ ncpus = 1; -+ } -+ else if(ncpus > NR_CPUS){ -+ printk(KERN_INFO -+ "ncpus can't be greater than NR_CPUS, set to %d\n", -+ NR_CPUS); -+ ncpus = NR_CPUS; -+ } -+ -+ if(ncpus > 1){ -+ int i, pid; -+ -+ printk(KERN_INFO "Starting up other processors:\n"); -+ for(i=1;i<ncpus;i++){ -+ int waittime; -+ -+ /* Do this early, for hard_smp_processor_id() */ -+ cpu_tasks[i].pid = -1; -+ set_bit(i, &cpu_online_map); -+ smp_num_cpus++; -+ -+ pid = idle_thread(idle_proc, i); -+ printk(KERN_INFO "\t#%d - idle thread pid = %d.. ", -+ i, pid); -+ -+ waittime = 200000000; -+ while (waittime-- && !test_bit(i, &smp_callin_map)) -+ cpu_relax(); -+ -+ if (test_bit(i, &smp_callin_map)) -+ printk("online\n"); -+ else { -+ printk("failed\n"); -+ clear_bit(i, &cpu_online_map); -+ } -+ } -+ } -+} -+ -+int setup_profiling_timer(unsigned int multiplier) -+{ -+ printk(KERN_INFO "setup_profiling_timer\n"); -+ return(0); -+} -+ -+void smp_call_function_slave(int cpu); -+ -+void IPI_handler(int cpu) -+{ -+ unsigned char c; -+ int fd; -+ -+ fd = cpu_data[cpu].ipi_pipe[0]; -+ while (read(fd, &c, 1) == 1) { -+ switch (c) { -+ case 'C': -+ smp_call_function_slave(cpu); -+ break; -+ -+ case 'R': -+ current->need_resched = 1; -+ break; -+ -+ case 'S': -+ printk("CPU#%d stopping\n", cpu); -+ while(1) -+ pause(); -+ break; -+ -+ default: -+ printk("CPU#%d received unknown IPI [%c]!\n", cpu, c); -+ break; -+ } -+ } -+} -+ -+int hard_smp_processor_id(void) -+{ -+ return(pid_to_processor_id(os_getpid())); -+} -+ -+static spinlock_t call_lock = SPIN_LOCK_UNLOCKED; -+static atomic_t scf_started; -+static atomic_t scf_finished; -+static void (*func)(void *info); -+static void *info; -+ -+void smp_call_function_slave(int cpu) -+{ -+ atomic_inc(&scf_started); -+ (*func)(info); -+ atomic_inc(&scf_finished); -+} -+ -+int smp_call_function(void (*_func)(void *info), void *_info, int nonatomic, -+ int wait) -+{ -+ int cpus = smp_num_cpus - 1; -+ int i; -+ -+ if (!cpus) -+ return 0; -+ -+ spin_lock_bh(&call_lock); -+ atomic_set(&scf_started, 0); -+ atomic_set(&scf_finished, 0); -+ func = _func; -+ info = _info; -+ -+ for (i=0;i<NR_CPUS;i++) -+ if (i != current->processor && test_bit(i, &cpu_online_map)) -+ write(cpu_data[i].ipi_pipe[1], "C", 1); -+ -+ while (atomic_read(&scf_started) != cpus) -+ barrier(); -+ -+ if (wait) -+ while (atomic_read(&scf_finished) != cpus) -+ barrier(); -+ -+ spin_unlock_bh(&call_lock); -+ return 0; -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/sys_call_table.c um/arch/um/kernel/sys_call_table.c ---- orig/arch/um/kernel/sys_call_table.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/sys_call_table.c Thu Feb 27 13:33:23 2003 -@@ -0,0 +1,485 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/unistd.h" -+#include "linux/version.h" -+#include "linux/sys.h" -+#include "asm/signal.h" -+#include "sysdep/syscalls.h" -+#include "kern_util.h" -+ -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_exit; -+extern syscall_handler_t sys_fork; -+extern syscall_handler_t sys_creat; -+extern syscall_handler_t sys_link; -+extern syscall_handler_t sys_unlink; -+extern syscall_handler_t sys_chdir; -+extern syscall_handler_t sys_mknod; -+extern syscall_handler_t sys_chmod; -+extern syscall_handler_t sys_lchown16; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_stat; -+extern syscall_handler_t sys_getpid; -+extern syscall_handler_t sys_oldumount; -+extern syscall_handler_t sys_setuid16; -+extern syscall_handler_t sys_getuid16; -+extern syscall_handler_t sys_ptrace; -+extern syscall_handler_t sys_alarm; -+extern syscall_handler_t sys_fstat; -+extern syscall_handler_t sys_pause; -+extern syscall_handler_t sys_utime; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_access; -+extern syscall_handler_t sys_nice; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_sync; -+extern syscall_handler_t sys_kill; -+extern syscall_handler_t sys_rename; -+extern syscall_handler_t sys_mkdir; -+extern syscall_handler_t sys_rmdir; -+extern syscall_handler_t sys_pipe; -+extern syscall_handler_t sys_times; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_brk; -+extern syscall_handler_t sys_setgid16; -+extern syscall_handler_t sys_getgid16; -+extern syscall_handler_t sys_signal; -+extern syscall_handler_t sys_geteuid16; -+extern syscall_handler_t sys_getegid16; -+extern syscall_handler_t sys_acct; -+extern syscall_handler_t sys_umount; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_ioctl; -+extern syscall_handler_t sys_fcntl; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_setpgid; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_olduname; -+extern syscall_handler_t sys_umask; -+extern syscall_handler_t sys_chroot; -+extern syscall_handler_t sys_ustat; -+extern syscall_handler_t sys_dup2; -+extern syscall_handler_t sys_getppid; -+extern syscall_handler_t sys_getpgrp; -+extern syscall_handler_t sys_sigaction; -+extern syscall_handler_t sys_sgetmask; -+extern syscall_handler_t sys_ssetmask; -+extern syscall_handler_t sys_setreuid16; -+extern syscall_handler_t sys_setregid16; -+extern syscall_handler_t sys_sigsuspend; -+extern syscall_handler_t sys_sigpending; -+extern syscall_handler_t sys_sethostname; -+extern syscall_handler_t sys_setrlimit; -+extern syscall_handler_t sys_old_getrlimit; -+extern syscall_handler_t sys_getrusage; -+extern syscall_handler_t sys_gettimeofday; -+extern syscall_handler_t sys_settimeofday; -+extern syscall_handler_t sys_getgroups16; -+extern syscall_handler_t sys_setgroups16; -+extern syscall_handler_t sys_symlink; -+extern syscall_handler_t sys_lstat; -+extern syscall_handler_t sys_readlink; -+extern syscall_handler_t sys_uselib; -+extern syscall_handler_t sys_swapon; -+extern syscall_handler_t sys_reboot; -+extern syscall_handler_t old_readdir; -+extern syscall_handler_t sys_munmap; -+extern syscall_handler_t sys_truncate; -+extern syscall_handler_t sys_ftruncate; -+extern syscall_handler_t sys_fchmod; -+extern syscall_handler_t sys_fchown16; -+extern syscall_handler_t sys_getpriority; -+extern syscall_handler_t sys_setpriority; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_statfs; -+extern syscall_handler_t sys_fstatfs; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_socketcall; -+extern syscall_handler_t sys_syslog; -+extern syscall_handler_t sys_setitimer; -+extern syscall_handler_t sys_getitimer; -+extern syscall_handler_t sys_newstat; -+extern syscall_handler_t sys_newlstat; -+extern syscall_handler_t sys_newfstat; -+extern syscall_handler_t sys_uname; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_vhangup; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_swapoff; -+extern syscall_handler_t sys_sysinfo; -+extern syscall_handler_t sys_ipc; -+extern syscall_handler_t sys_fsync; -+extern syscall_handler_t sys_sigreturn; -+extern syscall_handler_t sys_rt_sigreturn; -+extern syscall_handler_t sys_clone; -+extern syscall_handler_t sys_setdomainname; -+extern syscall_handler_t sys_newuname; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_adjtimex; -+extern syscall_handler_t sys_mprotect; -+extern syscall_handler_t sys_sigprocmask; -+extern syscall_handler_t sys_create_module; -+extern syscall_handler_t sys_init_module; -+extern syscall_handler_t sys_delete_module; -+extern syscall_handler_t sys_get_kernel_syms; -+extern syscall_handler_t sys_quotactl; -+extern syscall_handler_t sys_getpgid; -+extern syscall_handler_t sys_fchdir; -+extern syscall_handler_t sys_bdflush; -+extern syscall_handler_t sys_sysfs; -+extern syscall_handler_t sys_personality; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_setfsuid16; -+extern syscall_handler_t sys_setfsgid16; -+extern syscall_handler_t sys_llseek; -+extern syscall_handler_t sys_getdents; -+extern syscall_handler_t sys_flock; -+extern syscall_handler_t sys_msync; -+extern syscall_handler_t sys_readv; -+extern syscall_handler_t sys_writev; -+extern syscall_handler_t sys_getsid; -+extern syscall_handler_t sys_fdatasync; -+extern syscall_handler_t sys_sysctl; -+extern syscall_handler_t sys_mlock; -+extern syscall_handler_t sys_munlock; -+extern syscall_handler_t sys_mlockall; -+extern syscall_handler_t sys_munlockall; -+extern syscall_handler_t sys_sched_setparam; -+extern syscall_handler_t sys_sched_getparam; -+extern syscall_handler_t sys_sched_setscheduler; -+extern syscall_handler_t sys_sched_getscheduler; -+extern syscall_handler_t sys_sched_get_priority_max; -+extern syscall_handler_t sys_sched_get_priority_min; -+extern syscall_handler_t sys_sched_rr_get_interval; -+extern syscall_handler_t sys_nanosleep; -+extern syscall_handler_t sys_mremap; -+extern syscall_handler_t sys_setresuid16; -+extern syscall_handler_t sys_getresuid16; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_query_module; -+extern syscall_handler_t sys_poll; -+extern syscall_handler_t sys_nfsservctl; -+extern syscall_handler_t sys_setresgid16; -+extern syscall_handler_t sys_getresgid16; -+extern syscall_handler_t sys_prctl; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_rt_sigaction; -+extern syscall_handler_t sys_rt_sigprocmask; -+extern syscall_handler_t sys_rt_sigpending; -+extern syscall_handler_t sys_rt_sigtimedwait; -+extern syscall_handler_t sys_rt_sigqueueinfo; -+extern syscall_handler_t sys_rt_sigsuspend; -+extern syscall_handler_t sys_pread; -+extern syscall_handler_t sys_pwrite; -+extern syscall_handler_t sys_chown16; -+extern syscall_handler_t sys_getcwd; -+extern syscall_handler_t sys_capget; -+extern syscall_handler_t sys_capset; -+extern syscall_handler_t sys_sigaltstack; -+extern syscall_handler_t sys_sendfile; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_ni_syscall; -+extern syscall_handler_t sys_vfork; -+extern syscall_handler_t sys_getrlimit; -+extern syscall_handler_t sys_mmap2; -+extern syscall_handler_t sys_truncate64; -+extern syscall_handler_t sys_ftruncate64; -+extern syscall_handler_t sys_stat64; -+extern syscall_handler_t sys_lstat64; -+extern syscall_handler_t sys_fstat64; -+extern syscall_handler_t sys_lchown; -+extern syscall_handler_t sys_getuid; -+extern syscall_handler_t sys_getgid; -+extern syscall_handler_t sys_geteuid; -+extern syscall_handler_t sys_getegid; -+extern syscall_handler_t sys_setreuid; -+extern syscall_handler_t sys_setregid; -+extern syscall_handler_t sys_getgroups; -+extern syscall_handler_t sys_setgroups; -+extern syscall_handler_t sys_fchown; -+extern syscall_handler_t sys_setresuid; -+extern syscall_handler_t sys_getresuid; -+extern syscall_handler_t sys_setresgid; -+extern syscall_handler_t sys_getresgid; -+extern syscall_handler_t sys_chown; -+extern syscall_handler_t sys_setuid; -+extern syscall_handler_t sys_setgid; -+extern syscall_handler_t sys_setfsuid; -+extern syscall_handler_t sys_setfsgid; -+extern syscall_handler_t sys_pivot_root; -+extern syscall_handler_t sys_mincore; -+extern syscall_handler_t sys_madvise; -+extern syscall_handler_t sys_fcntl64; -+extern syscall_handler_t sys_getdents64; -+extern syscall_handler_t sys_gettid; -+extern syscall_handler_t sys_readahead; -+extern syscall_handler_t sys_tkill; -+extern syscall_handler_t sys_setxattr; -+extern syscall_handler_t sys_lsetxattr; -+extern syscall_handler_t sys_fsetxattr; -+extern syscall_handler_t sys_getxattr; -+extern syscall_handler_t sys_lgetxattr; -+extern syscall_handler_t sys_fgetxattr; -+extern syscall_handler_t sys_listxattr; -+extern syscall_handler_t sys_llistxattr; -+extern syscall_handler_t sys_flistxattr; -+extern syscall_handler_t sys_removexattr; -+extern syscall_handler_t sys_lremovexattr; -+extern syscall_handler_t sys_fremovexattr; -+ -+extern syscall_handler_t um_mount; -+extern syscall_handler_t um_time; -+extern syscall_handler_t um_stime; -+ -+#define LAST_GENERIC_SYSCALL __NR_sched_getaffinity -+ -+#if LAST_GENERIC_SYSCALL > LAST_ARCH_SYSCALL -+#define LAST_SYSCALL LAST_GENERIC_SYSCALL -+#else -+#define LAST_SYSCALL LAST_ARCH_SYSCALL -+#endif -+ -+syscall_handler_t *sys_call_table[] = { -+ [ 0 ] = sys_ni_syscall, -+ [ __NR_exit ] = sys_exit, -+ [ __NR_fork ] = sys_fork, -+ [ __NR_read ] = (syscall_handler_t *) sys_read, -+ [ __NR_write ] = (syscall_handler_t *) sys_write, -+ -+ /* These three are declared differently in asm/unistd.h */ -+ [ __NR_open ] = (syscall_handler_t *) sys_open, -+ [ __NR_close ] = (syscall_handler_t *) sys_close, -+ [ __NR_waitpid ] = (syscall_handler_t *) sys_waitpid, -+ [ __NR_creat ] = sys_creat, -+ [ __NR_link ] = sys_link, -+ [ __NR_unlink ] = sys_unlink, -+ -+ /* declared differently in kern_util.h */ -+ [ __NR_execve ] = (syscall_handler_t *) sys_execve, -+ [ __NR_chdir ] = sys_chdir, -+ [ __NR_time ] = um_time, -+ [ __NR_mknod ] = sys_mknod, -+ [ __NR_chmod ] = sys_chmod, -+ [ __NR_lchown ] = sys_lchown16, -+ [ __NR_break ] = sys_ni_syscall, -+ [ __NR_oldstat ] = sys_stat, -+ [ __NR_lseek ] = (syscall_handler_t *) sys_lseek, -+ [ __NR_getpid ] = sys_getpid, -+ [ __NR_mount ] = um_mount, -+ [ __NR_umount ] = sys_oldumount, -+ [ __NR_setuid ] = sys_setuid16, -+ [ __NR_getuid ] = sys_getuid16, -+ [ __NR_stime ] = um_stime, -+ [ __NR_ptrace ] = sys_ptrace, -+ [ __NR_alarm ] = sys_alarm, -+ [ __NR_oldfstat ] = sys_fstat, -+ [ __NR_pause ] = sys_pause, -+ [ __NR_utime ] = sys_utime, -+ [ __NR_stty ] = sys_ni_syscall, -+ [ __NR_gtty ] = sys_ni_syscall, -+ [ __NR_access ] = sys_access, -+ [ __NR_nice ] = sys_nice, -+ [ __NR_ftime ] = sys_ni_syscall, -+ [ __NR_sync ] = sys_sync, -+ [ __NR_kill ] = sys_kill, -+ [ __NR_rename ] = sys_rename, -+ [ __NR_mkdir ] = sys_mkdir, -+ [ __NR_rmdir ] = sys_rmdir, -+ -+ /* Declared differently in asm/unistd.h */ -+ [ __NR_dup ] = (syscall_handler_t *) sys_dup, -+ [ __NR_pipe ] = sys_pipe, -+ [ __NR_times ] = sys_times, -+ [ __NR_prof ] = sys_ni_syscall, -+ [ __NR_brk ] = sys_brk, -+ [ __NR_setgid ] = sys_setgid16, -+ [ __NR_getgid ] = sys_getgid16, -+ [ __NR_signal ] = sys_signal, -+ [ __NR_geteuid ] = sys_geteuid16, -+ [ __NR_getegid ] = sys_getegid16, -+ [ __NR_acct ] = sys_acct, -+ [ __NR_umount2 ] = sys_umount, -+ [ __NR_lock ] = sys_ni_syscall, -+ [ __NR_ioctl ] = sys_ioctl, -+ [ __NR_fcntl ] = sys_fcntl, -+ [ __NR_mpx ] = sys_ni_syscall, -+ [ __NR_setpgid ] = sys_setpgid, -+ [ __NR_ulimit ] = sys_ni_syscall, -+ [ __NR_oldolduname ] = sys_olduname, -+ [ __NR_umask ] = sys_umask, -+ [ __NR_chroot ] = sys_chroot, -+ [ __NR_ustat ] = sys_ustat, -+ [ __NR_dup2 ] = sys_dup2, -+ [ __NR_getppid ] = sys_getppid, -+ [ __NR_getpgrp ] = sys_getpgrp, -+ [ __NR_setsid ] = (syscall_handler_t *) sys_setsid, -+ [ __NR_sigaction ] = sys_sigaction, -+ [ __NR_sgetmask ] = sys_sgetmask, -+ [ __NR_ssetmask ] = sys_ssetmask, -+ [ __NR_setreuid ] = sys_setreuid16, -+ [ __NR_setregid ] = sys_setregid16, -+ [ __NR_sigsuspend ] = sys_sigsuspend, -+ [ __NR_sigpending ] = sys_sigpending, -+ [ __NR_sethostname ] = sys_sethostname, -+ [ __NR_setrlimit ] = sys_setrlimit, -+ [ __NR_getrlimit ] = sys_old_getrlimit, -+ [ __NR_getrusage ] = sys_getrusage, -+ [ __NR_gettimeofday ] = sys_gettimeofday, -+ [ __NR_settimeofday ] = sys_settimeofday, -+ [ __NR_getgroups ] = sys_getgroups16, -+ [ __NR_setgroups ] = sys_setgroups16, -+ [ __NR_symlink ] = sys_symlink, -+ [ __NR_oldlstat ] = sys_lstat, -+ [ __NR_readlink ] = sys_readlink, -+ [ __NR_uselib ] = sys_uselib, -+ [ __NR_swapon ] = sys_swapon, -+ [ __NR_reboot ] = sys_reboot, -+ [ __NR_readdir ] = old_readdir, -+ [ __NR_munmap ] = sys_munmap, -+ [ __NR_truncate ] = sys_truncate, -+ [ __NR_ftruncate ] = sys_ftruncate, -+ [ __NR_fchmod ] = sys_fchmod, -+ [ __NR_fchown ] = sys_fchown16, -+ [ __NR_getpriority ] = sys_getpriority, -+ [ __NR_setpriority ] = sys_setpriority, -+ [ __NR_profil ] = sys_ni_syscall, -+ [ __NR_statfs ] = sys_statfs, -+ [ __NR_fstatfs ] = sys_fstatfs, -+ [ __NR_ioperm ] = sys_ni_syscall, -+ [ __NR_socketcall ] = sys_socketcall, -+ [ __NR_syslog ] = sys_syslog, -+ [ __NR_setitimer ] = sys_setitimer, -+ [ __NR_getitimer ] = sys_getitimer, -+ [ __NR_stat ] = sys_newstat, -+ [ __NR_lstat ] = sys_newlstat, -+ [ __NR_fstat ] = sys_newfstat, -+ [ __NR_olduname ] = sys_uname, -+ [ __NR_iopl ] = sys_ni_syscall, -+ [ __NR_vhangup ] = sys_vhangup, -+ [ __NR_idle ] = sys_ni_syscall, -+ [ __NR_wait4 ] = (syscall_handler_t *) sys_wait4, -+ [ __NR_swapoff ] = sys_swapoff, -+ [ __NR_sysinfo ] = sys_sysinfo, -+ [ __NR_ipc ] = sys_ipc, -+ [ __NR_fsync ] = sys_fsync, -+ [ __NR_sigreturn ] = sys_sigreturn, -+ [ __NR_clone ] = sys_clone, -+ [ __NR_setdomainname ] = sys_setdomainname, -+ [ __NR_uname ] = sys_newuname, -+ [ __NR_adjtimex ] = sys_adjtimex, -+ [ __NR_mprotect ] = sys_mprotect, -+ [ __NR_sigprocmask ] = sys_sigprocmask, -+ [ __NR_create_module ] = sys_create_module, -+ [ __NR_init_module ] = sys_init_module, -+ [ __NR_delete_module ] = sys_delete_module, -+ [ __NR_get_kernel_syms ] = sys_get_kernel_syms, -+ [ __NR_quotactl ] = sys_quotactl, -+ [ __NR_getpgid ] = sys_getpgid, -+ [ __NR_fchdir ] = sys_fchdir, -+ [ __NR_bdflush ] = sys_bdflush, -+ [ __NR_sysfs ] = sys_sysfs, -+ [ __NR_personality ] = sys_personality, -+ [ __NR_afs_syscall ] = sys_ni_syscall, -+ [ __NR_setfsuid ] = sys_setfsuid16, -+ [ __NR_setfsgid ] = sys_setfsgid16, -+ [ __NR__llseek ] = sys_llseek, -+ [ __NR_getdents ] = sys_getdents, -+ [ __NR__newselect ] = (syscall_handler_t *) sys_select, -+ [ __NR_flock ] = sys_flock, -+ [ __NR_msync ] = sys_msync, -+ [ __NR_readv ] = sys_readv, -+ [ __NR_writev ] = sys_writev, -+ [ __NR_getsid ] = sys_getsid, -+ [ __NR_fdatasync ] = sys_fdatasync, -+ [ __NR__sysctl ] = sys_sysctl, -+ [ __NR_mlock ] = sys_mlock, -+ [ __NR_munlock ] = sys_munlock, -+ [ __NR_mlockall ] = sys_mlockall, -+ [ __NR_munlockall ] = sys_munlockall, -+ [ __NR_sched_setparam ] = sys_sched_setparam, -+ [ __NR_sched_getparam ] = sys_sched_getparam, -+ [ __NR_sched_setscheduler ] = sys_sched_setscheduler, -+ [ __NR_sched_getscheduler ] = sys_sched_getscheduler, -+ [ __NR_sched_yield ] = (syscall_handler_t *) yield, -+ [ __NR_sched_get_priority_max ] = sys_sched_get_priority_max, -+ [ __NR_sched_get_priority_min ] = sys_sched_get_priority_min, -+ [ __NR_sched_rr_get_interval ] = sys_sched_rr_get_interval, -+ [ __NR_nanosleep ] = sys_nanosleep, -+ [ __NR_mremap ] = sys_mremap, -+ [ __NR_setresuid ] = sys_setresuid16, -+ [ __NR_getresuid ] = sys_getresuid16, -+ [ __NR_vm86 ] = sys_ni_syscall, -+ [ __NR_query_module ] = sys_query_module, -+ [ __NR_poll ] = sys_poll, -+ [ __NR_nfsservctl ] = sys_nfsservctl, -+ [ __NR_setresgid ] = sys_setresgid16, -+ [ __NR_getresgid ] = sys_getresgid16, -+ [ __NR_prctl ] = sys_prctl, -+ [ __NR_rt_sigreturn ] = sys_rt_sigreturn, -+ [ __NR_rt_sigaction ] = sys_rt_sigaction, -+ [ __NR_rt_sigprocmask ] = sys_rt_sigprocmask, -+ [ __NR_rt_sigpending ] = sys_rt_sigpending, -+ [ __NR_rt_sigtimedwait ] = sys_rt_sigtimedwait, -+ [ __NR_rt_sigqueueinfo ] = sys_rt_sigqueueinfo, -+ [ __NR_rt_sigsuspend ] = sys_rt_sigsuspend, -+ [ __NR_pread ] = sys_pread, -+ [ __NR_pwrite ] = sys_pwrite, -+ [ __NR_chown ] = sys_chown16, -+ [ __NR_getcwd ] = sys_getcwd, -+ [ __NR_capget ] = sys_capget, -+ [ __NR_capset ] = sys_capset, -+ [ __NR_sigaltstack ] = sys_sigaltstack, -+ [ __NR_sendfile ] = sys_sendfile, -+ [ __NR_getpmsg ] = sys_ni_syscall, -+ [ __NR_putpmsg ] = sys_ni_syscall, -+ [ __NR_vfork ] = sys_vfork, -+ [ __NR_ugetrlimit ] = sys_getrlimit, -+ [ __NR_mmap2 ] = sys_mmap2, -+ [ __NR_truncate64 ] = sys_truncate64, -+ [ __NR_ftruncate64 ] = sys_ftruncate64, -+ [ __NR_stat64 ] = sys_stat64, -+ [ __NR_lstat64 ] = sys_lstat64, -+ [ __NR_fstat64 ] = sys_fstat64, -+ [ __NR_fcntl64 ] = sys_fcntl64, -+ [ __NR_getdents64 ] = sys_getdents64, -+ [ __NR_security ] = sys_ni_syscall, -+ [ __NR_gettid ] = sys_gettid, -+ [ __NR_readahead ] = sys_readahead, -+ [ __NR_setxattr ] = sys_setxattr, -+ [ __NR_lsetxattr ] = sys_lsetxattr, -+ [ __NR_fsetxattr ] = sys_fsetxattr, -+ [ __NR_getxattr ] = sys_getxattr, -+ [ __NR_lgetxattr ] = sys_lgetxattr, -+ [ __NR_fgetxattr ] = sys_fgetxattr, -+ [ __NR_listxattr ] = sys_listxattr, -+ [ __NR_llistxattr ] = sys_llistxattr, -+ [ __NR_flistxattr ] = sys_flistxattr, -+ [ __NR_removexattr ] = sys_removexattr, -+ [ __NR_lremovexattr ] = sys_lremovexattr, -+ [ __NR_fremovexattr ] = sys_fremovexattr, -+ [ __NR_tkill ] = sys_tkill, -+ [ __NR_sendfile64 ] = sys_ni_syscall, -+ [ __NR_futex ] = sys_ni_syscall, -+ [ __NR_sched_setaffinity ] = sys_ni_syscall, -+ [ __NR_sched_getaffinity ] = sys_ni_syscall, -+ -+ ARCH_SYSCALLS -+ [ LAST_SYSCALL + 1 ... NR_syscalls ] = -+ (syscall_handler_t *) sys_ni_syscall -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/syscall_kern.c um/arch/um/kernel/syscall_kern.c ---- orig/arch/um/kernel/syscall_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/syscall_kern.c Fri Nov 8 14:04:10 2002 -@@ -0,0 +1,343 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "linux/file.h" -+#include "linux/smp_lock.h" -+#include "linux/mm.h" -+#include "linux/utsname.h" -+#include "linux/msg.h" -+#include "linux/shm.h" -+#include "linux/sys.h" -+#include "linux/unistd.h" -+#include "linux/slab.h" -+#include "linux/utime.h" -+#include "asm/mman.h" -+#include "asm/uaccess.h" -+#include "asm/ipc.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "sysdep/syscalls.h" -+#include "mode_kern.h" -+#include "choose-mode.h" -+ -+/* Unlocked, I don't care if this is a bit off */ -+int nsyscalls = 0; -+ -+long um_mount(char * dev_name, char * dir_name, char * type, -+ unsigned long new_flags, void * data) -+{ -+ if(type == NULL) type = ""; -+ return(sys_mount(dev_name, dir_name, type, new_flags, data)); -+} -+ -+long sys_fork(void) -+{ -+ long ret; -+ -+ current->thread.forking = 1; -+ ret = do_fork(SIGCHLD, 0, NULL, 0); -+ current->thread.forking = 0; -+ return(ret); -+} -+ -+long sys_clone(unsigned long clone_flags, unsigned long newsp) -+{ -+ long ret; -+ -+ current->thread.forking = 1; -+ ret = do_fork(clone_flags, newsp, NULL, 0); -+ current->thread.forking = 0; -+ return(ret); -+} -+ -+long sys_vfork(void) -+{ -+ long ret; -+ -+ current->thread.forking = 1; -+ ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, NULL, 0); -+ current->thread.forking = 0; -+ return(ret); -+} -+ -+/* common code for old and new mmaps */ -+long do_mmap2(struct mm_struct *mm, unsigned long addr, unsigned long len, -+ unsigned long prot, unsigned long flags, unsigned long fd, -+ unsigned long pgoff) -+{ -+ int error = -EBADF; -+ struct file * file = NULL; -+ -+ flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); -+ if (!(flags & MAP_ANONYMOUS)) { -+ file = fget(fd); -+ if (!file) -+ goto out; -+ } -+ -+ down_write(&mm->mmap_sem); -+ error = do_mmap_pgoff(mm, file, addr, len, prot, flags, pgoff); -+ up_write(&mm->mmap_sem); -+ -+ if (file) -+ fput(file); -+ out: -+ return error; -+} -+ -+long sys_mmap2(unsigned long addr, unsigned long len, -+ unsigned long prot, unsigned long flags, -+ unsigned long fd, unsigned long pgoff) -+{ -+ return do_mmap2(current->mm, addr, len, prot, flags, fd, pgoff); -+} -+ -+/* -+ * Perform the select(nd, in, out, ex, tv) and mmap() system -+ * calls. Linux/i386 didn't use to be able to handle more than -+ * 4 system call parameters, so these system calls used a memory -+ * block for parameter passing.. -+ */ -+ -+struct mmap_arg_struct { -+ unsigned long addr; -+ unsigned long len; -+ unsigned long prot; -+ unsigned long flags; -+ unsigned long fd; -+ unsigned long offset; -+}; -+ -+int old_mmap(unsigned long addr, unsigned long len, -+ unsigned long prot, unsigned long flags, -+ unsigned long fd, unsigned long offset) -+{ -+ int err = -EINVAL; -+ if (offset & ~PAGE_MASK) -+ goto out; -+ -+ err = do_mmap2(current->mm, addr, len, prot, flags, fd, -+ offset >> PAGE_SHIFT); -+ out: -+ return err; -+} -+/* -+ * sys_pipe() is the normal C calling standard for creating -+ * a pipe. It's not the way unix traditionally does this, though. -+ */ -+int sys_pipe(unsigned long * fildes) -+{ -+ int fd[2]; -+ int error; -+ -+ error = do_pipe(fd); -+ if (!error) { -+ if (copy_to_user(fildes, fd, 2*sizeof(int))) -+ error = -EFAULT; -+ } -+ return error; -+} -+ -+int sys_pause(void) -+{ -+ current->state = TASK_INTERRUPTIBLE; -+ schedule(); -+ return -ERESTARTNOHAND; -+} -+ -+int sys_sigaction(int sig, const struct old_sigaction *act, -+ struct old_sigaction *oact) -+{ -+ struct k_sigaction new_ka, old_ka; -+ int ret; -+ -+ if (act) { -+ old_sigset_t mask; -+ if (verify_area(VERIFY_READ, act, sizeof(*act)) || -+ __get_user(new_ka.sa.sa_handler, &act->sa_handler) || -+ __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) -+ return -EFAULT; -+ __get_user(new_ka.sa.sa_flags, &act->sa_flags); -+ __get_user(mask, &act->sa_mask); -+ siginitset(&new_ka.sa.sa_mask, mask); -+ } -+ -+ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); -+ -+ if (!ret && oact) { -+ if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || -+ __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || -+ __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) -+ return -EFAULT; -+ __put_user(old_ka.sa.sa_flags, &oact->sa_flags); -+ __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); -+ } -+ -+ return ret; -+} -+ -+/* -+ * sys_ipc() is the de-multiplexer for the SysV IPC calls.. -+ * -+ * This is really horribly ugly. -+ */ -+int sys_ipc (uint call, int first, int second, -+ int third, void *ptr, long fifth) -+{ -+ int version, ret; -+ -+ version = call >> 16; /* hack for backward compatibility */ -+ call &= 0xffff; -+ -+ switch (call) { -+ case SEMOP: -+ return sys_semop (first, (struct sembuf *)ptr, second); -+ case SEMGET: -+ return sys_semget (first, second, third); -+ case SEMCTL: { -+ union semun fourth; -+ if (!ptr) -+ return -EINVAL; -+ if (get_user(fourth.__pad, (void **) ptr)) -+ return -EFAULT; -+ return sys_semctl (first, second, third, fourth); -+ } -+ -+ case MSGSND: -+ return sys_msgsnd (first, (struct msgbuf *) ptr, -+ second, third); -+ case MSGRCV: -+ switch (version) { -+ case 0: { -+ struct ipc_kludge tmp; -+ if (!ptr) -+ return -EINVAL; -+ -+ if (copy_from_user(&tmp, -+ (struct ipc_kludge *) ptr, -+ sizeof (tmp))) -+ return -EFAULT; -+ return sys_msgrcv (first, tmp.msgp, second, -+ tmp.msgtyp, third); -+ } -+ default: -+ panic("msgrcv with version != 0"); -+ return sys_msgrcv (first, -+ (struct msgbuf *) ptr, -+ second, fifth, third); -+ } -+ case MSGGET: -+ return sys_msgget ((key_t) first, second); -+ case MSGCTL: -+ return sys_msgctl (first, second, (struct msqid_ds *) ptr); -+ -+ case SHMAT: -+ switch (version) { -+ default: { -+ ulong raddr; -+ ret = sys_shmat (first, (char *) ptr, second, &raddr); -+ if (ret) -+ return ret; -+ return put_user (raddr, (ulong *) third); -+ } -+ case 1: /* iBCS2 emulator entry point */ -+ if (!segment_eq(get_fs(), get_ds())) -+ return -EINVAL; -+ return sys_shmat (first, (char *) ptr, second, (ulong *) third); -+ } -+ case SHMDT: -+ return sys_shmdt ((char *)ptr); -+ case SHMGET: -+ return sys_shmget (first, second, third); -+ case SHMCTL: -+ return sys_shmctl (first, second, -+ (struct shmid_ds *) ptr); -+ default: -+ return -EINVAL; -+ } -+} -+ -+int sys_uname(struct old_utsname * name) -+{ -+ int err; -+ if (!name) -+ return -EFAULT; -+ down_read(&uts_sem); -+ err=copy_to_user(name, &system_utsname, sizeof (*name)); -+ up_read(&uts_sem); -+ return err?-EFAULT:0; -+} -+ -+int sys_olduname(struct oldold_utsname * name) -+{ -+ int error; -+ -+ if (!name) -+ return -EFAULT; -+ if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname))) -+ return -EFAULT; -+ -+ down_read(&uts_sem); -+ -+ error = __copy_to_user(&name->sysname,&system_utsname.sysname, -+ __OLD_UTS_LEN); -+ error |= __put_user(0,name->sysname+__OLD_UTS_LEN); -+ error |= __copy_to_user(&name->nodename,&system_utsname.nodename, -+ __OLD_UTS_LEN); -+ error |= __put_user(0,name->nodename+__OLD_UTS_LEN); -+ error |= __copy_to_user(&name->release,&system_utsname.release, -+ __OLD_UTS_LEN); -+ error |= __put_user(0,name->release+__OLD_UTS_LEN); -+ error |= __copy_to_user(&name->version,&system_utsname.version, -+ __OLD_UTS_LEN); -+ error |= __put_user(0,name->version+__OLD_UTS_LEN); -+ error |= __copy_to_user(&name->machine,&system_utsname.machine, -+ __OLD_UTS_LEN); -+ error |= __put_user(0,name->machine+__OLD_UTS_LEN); -+ -+ up_read(&uts_sem); -+ -+ error = error ? -EFAULT : 0; -+ -+ return error; -+} -+ -+int sys_sigaltstack(const stack_t *uss, stack_t *uoss) -+{ -+ return(do_sigaltstack(uss, uoss, PT_REGS_SP(¤t->thread.regs))); -+} -+ -+long execute_syscall(void *r) -+{ -+ return(CHOOSE_MODE_PROC(execute_syscall_tt, execute_syscall_skas, r)); -+} -+ -+spinlock_t syscall_lock = SPIN_LOCK_UNLOCKED; -+ -+static int syscall_index = 0; -+ -+int next_syscall_index(int limit) -+{ -+ int ret; -+ -+ spin_lock(&syscall_lock); -+ ret = syscall_index; -+ if(++syscall_index == limit) -+ syscall_index = 0; -+ spin_unlock(&syscall_lock); -+ return(ret); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/syscall_user.c um/arch/um/kernel/syscall_user.c ---- orig/arch/um/kernel/syscall_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/syscall_user.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,48 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <sys/time.h> -+#include "kern_util.h" -+#include "syscall_user.h" -+ -+struct { -+ int syscall; -+ int pid; -+ int result; -+ struct timeval start; -+ struct timeval end; -+} syscall_record[1024]; -+ -+int record_syscall_start(int syscall) -+{ -+ int max, index; -+ -+ max = sizeof(syscall_record)/sizeof(syscall_record[0]); -+ index = next_syscall_index(max); -+ -+ syscall_record[index].syscall = syscall; -+ syscall_record[index].pid = current_pid(); -+ syscall_record[index].result = 0xdeadbeef; -+ gettimeofday(&syscall_record[index].start, NULL); -+ return(index); -+} -+ -+void record_syscall_end(int index, int result) -+{ -+ syscall_record[index].result = result; -+ gettimeofday(&syscall_record[index].end, NULL); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/sysrq.c um/arch/um/kernel/sysrq.c ---- orig/arch/um/kernel/sysrq.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/sysrq.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,98 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "linux/kernel.h" -+#include "linux/module.h" -+#include "asm/page.h" -+#include "asm/processor.h" -+#include "sysrq.h" -+#include "user_util.h" -+ -+ /* -+ * If the address is either in the .text section of the -+ * kernel, or in the vmalloc'ed module regions, it *may* -+ * be the address of a calling routine -+ */ -+ -+#ifdef CONFIG_MODULES -+ -+extern struct module *module_list; -+extern struct module kernel_module; -+ -+static inline int kernel_text_address(unsigned long addr) -+{ -+ int retval = 0; -+ struct module *mod; -+ -+ if (addr >= (unsigned long) &_stext && -+ addr <= (unsigned long) &_etext) -+ return 1; -+ -+ for (mod = module_list; mod != &kernel_module; mod = mod->next) { -+ /* mod_bound tests for addr being inside the vmalloc'ed -+ * module area. Of course it'd be better to test only -+ * for the .text subset... */ -+ if (mod_bound(addr, 0, mod)) { -+ retval = 1; -+ break; -+ } -+ } -+ -+ return retval; -+} -+ -+#else -+ -+static inline int kernel_text_address(unsigned long addr) -+{ -+ return (addr >= (unsigned long) &_stext && -+ addr <= (unsigned long) &_etext); -+} -+ -+#endif -+ -+void show_trace(unsigned long * stack) -+{ -+ int i; -+ unsigned long addr; -+ -+ if (!stack) -+ stack = (unsigned long*) &stack; -+ -+ printk("Call Trace: "); -+ i = 1; -+ while (((long) stack & (THREAD_SIZE-1)) != 0) { -+ addr = *stack++; -+ if (kernel_text_address(addr)) { -+ if (i && ((i % 6) == 0)) -+ printk("\n "); -+ printk("[<%08lx>] ", addr); -+ i++; -+ } -+ } -+ printk("\n"); -+} -+ -+void show_trace_task(struct task_struct *tsk) -+{ -+ unsigned long esp = PT_REGS_SP(&tsk->thread.regs); -+ -+ /* User space on another CPU? */ -+ if ((esp ^ (unsigned long)tsk) & (PAGE_MASK<<1)) -+ return; -+ show_trace((unsigned long *)esp); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tempfile.c um/arch/um/kernel/tempfile.c ---- orig/arch/um/kernel/tempfile.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tempfile.c Fri Jan 17 23:16:19 2003 -@@ -0,0 +1,80 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <string.h> -+#include <errno.h> -+#include <sys/param.h> -+#include "init.h" -+ -+/* Modified from create_mem_file and start_debugger */ -+static char *tempdir = NULL; -+ -+static void __init find_tempdir(void) -+{ -+ char *dirs[] = { "TMP", "TEMP", "TMPDIR", NULL }; -+ int i; -+ char *dir = NULL; -+ -+ if(tempdir != NULL) return; /* We've already been called */ -+ for(i = 0; dirs[i]; i++){ -+ dir = getenv(dirs[i]); -+ if((dir != NULL) && (*dir != '\0')) -+ break; -+ } -+ if((dir == NULL) || (*dir == '\0')) -+ dir = "/tmp"; -+ -+ tempdir = malloc(strlen(dir) + 2); -+ if(tempdir == NULL){ -+ fprintf(stderr, "Failed to malloc tempdir, " -+ "errno = %d\n", errno); -+ return; -+ } -+ strcpy(tempdir, dir); -+ strcat(tempdir, "/"); -+} -+ -+int make_tempfile(const char *template, char **out_tempname, int do_unlink) -+{ -+ char tempname[MAXPATHLEN]; -+ int fd; -+ -+ find_tempdir(); -+ if (*template != '/') -+ strcpy(tempname, tempdir); -+ else -+ *tempname = 0; -+ strcat(tempname, template); -+ if((fd = mkstemp(tempname)) < 0){ -+ fprintf(stderr, "open - cannot create %s: %s\n", tempname, -+ strerror(errno)); -+ return -1; -+ } -+ if(do_unlink && (unlink(tempname) < 0)){ -+ perror("unlink"); -+ return -1; -+ } -+ if(out_tempname){ -+ if((*out_tempname = strdup(tempname)) == NULL){ -+ perror("strdup"); -+ return -1; -+ } -+ } -+ return(fd); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/time.c um/arch/um/kernel/time.c ---- orig/arch/um/kernel/time.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/time.c Wed Apr 23 20:45:19 2003 -@@ -0,0 +1,127 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <unistd.h> -+#include <time.h> -+#include <sys/time.h> -+#include <signal.h> -+#include <errno.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "process.h" -+#include "signal_user.h" -+#include "time_user.h" -+ -+extern struct timeval xtime; -+ -+struct timeval local_offset = { 0, 0 }; -+ -+void timer(void) -+{ -+ gettimeofday(&xtime, NULL); -+ timeradd(&xtime, &local_offset, &xtime); -+} -+ -+void set_interval(int timer_type) -+{ -+ int usec = 1000000/hz(); -+ struct itimerval interval = ((struct itimerval) { { 0, usec }, -+ { 0, usec } }); -+ -+ if(setitimer(timer_type, &interval, NULL) == -1) -+ panic("setitimer failed - errno = %d\n", errno); -+} -+ -+void enable_timer(void) -+{ -+ int usec = 1000000/hz(); -+ struct itimerval enable = ((struct itimerval) { { 0, usec }, -+ { 0, usec }}); -+ if(setitimer(ITIMER_VIRTUAL, &enable, NULL)) -+ printk("enable_timer - setitimer failed, errno = %d\n", -+ errno); -+} -+ -+void switch_timers(int to_real) -+{ -+ struct itimerval disable = ((struct itimerval) { { 0, 0 }, { 0, 0 }}); -+ struct itimerval enable = ((struct itimerval) { { 0, 1000000/hz() }, -+ { 0, 1000000/hz() }}); -+ int old, new; -+ -+ if(to_real){ -+ old = ITIMER_VIRTUAL; -+ new = ITIMER_REAL; -+ } -+ else { -+ old = ITIMER_REAL; -+ new = ITIMER_VIRTUAL; -+ } -+ -+ if((setitimer(old, &disable, NULL) < 0) || -+ (setitimer(new, &enable, NULL))) -+ printk("switch_timers - setitimer failed, errno = %d\n", -+ errno); -+} -+ -+void idle_timer(void) -+{ -+ if(signal(SIGVTALRM, SIG_IGN) == SIG_ERR) -+ panic("Couldn't unset SIGVTALRM handler"); -+ -+ set_handler(SIGALRM, (__sighandler_t) alarm_handler, -+ SA_RESTART, SIGUSR1, SIGIO, SIGWINCH, SIGVTALRM, -1); -+ set_interval(ITIMER_REAL); -+} -+ -+void time_init(void) -+{ -+ if(signal(SIGVTALRM, boot_timer_handler) == SIG_ERR) -+ panic("Couldn't set SIGVTALRM handler"); -+ set_interval(ITIMER_VIRTUAL); -+} -+ -+void do_gettimeofday(struct timeval *tv) -+{ -+ unsigned long flags; -+ -+ flags = time_lock(); -+ gettimeofday(tv, NULL); -+ timeradd(tv, &local_offset, tv); -+ time_unlock(flags); -+} -+ -+void do_settimeofday(struct timeval *tv) -+{ -+ struct timeval now; -+ unsigned long flags; -+ -+ flags = time_lock(); -+ gettimeofday(&now, NULL); -+ timersub(tv, &now, &local_offset); -+ time_unlock(flags); -+} -+ -+void idle_sleep(int secs) -+{ -+ struct timespec ts; -+ -+ ts.tv_sec = secs; -+ ts.tv_nsec = 0; -+ nanosleep(&ts, NULL); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/time_kern.c um/arch/um/kernel/time_kern.c ---- orig/arch/um/kernel/time_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/time_kern.c Wed Apr 23 22:19:08 2003 -@@ -0,0 +1,172 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/unistd.h" -+#include "linux/stddef.h" -+#include "linux/spinlock.h" -+#include "linux/sched.h" -+#include "linux/interrupt.h" -+#include "linux/init.h" -+#include "linux/delay.h" -+#include "asm/irq.h" -+#include "asm/param.h" -+#include "asm/current.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "time_user.h" -+#include "mode.h" -+ -+extern rwlock_t xtime_lock; -+ -+int hz(void) -+{ -+ return(HZ); -+} -+ -+/* Changed at early boot */ -+int timer_irq_inited = 0; -+ -+/* missed_ticks will be modified after kernel memory has been -+ * write-protected, so this puts it in a section which will be left -+ * write-enabled. -+ */ -+int __attribute__ ((__section__ (".unprotected"))) missed_ticks[NR_CPUS]; -+ -+void timer_irq(union uml_pt_regs *regs) -+{ -+ int cpu = current->processor, ticks = missed_ticks[cpu]; -+ -+ if(!timer_irq_inited) return; -+ missed_ticks[cpu] = 0; -+ while(ticks--) do_IRQ(TIMER_IRQ, regs); -+} -+ -+void boot_timer_handler(int sig) -+{ -+ struct pt_regs regs; -+ -+ CHOOSE_MODE((void) -+ (UPT_SC(®s.regs) = (struct sigcontext *) (&sig + 1)), -+ (void) (regs.regs.skas.is_user = 0)); -+ do_timer(®s); -+} -+ -+void um_timer(int irq, void *dev, struct pt_regs *regs) -+{ -+ do_timer(regs); -+ write_lock(&xtime_lock); -+ vxtime_lock(); -+ timer(); -+ vxtime_unlock(); -+ write_unlock(&xtime_lock); -+} -+ -+long um_time(int * tloc) -+{ -+ struct timeval now; -+ -+ do_gettimeofday(&now); -+ if (tloc) { -+ if (put_user(now.tv_sec,tloc)) -+ now.tv_sec = -EFAULT; -+ } -+ return now.tv_sec; -+} -+ -+long um_stime(int * tptr) -+{ -+ int value; -+ struct timeval new; -+ -+ if (get_user(value, tptr)) -+ return -EFAULT; -+ new.tv_sec = value; -+ new.tv_usec = 0; -+ do_settimeofday(&new); -+ return 0; -+} -+ -+/* XXX Needs to be moved under sys-i386 */ -+void __delay(um_udelay_t time) -+{ -+ /* Stolen from the i386 __loop_delay */ -+ int d0; -+ __asm__ __volatile__( -+ "\tjmp 1f\n" -+ ".align 16\n" -+ "1:\tjmp 2f\n" -+ ".align 16\n" -+ "2:\tdecl %0\n\tjns 2b" -+ :"=&a" (d0) -+ :"0" (time)); -+} -+ -+void __udelay(um_udelay_t usecs) -+{ -+ int i, n; -+ -+ n = (loops_per_jiffy * HZ * usecs) / 1000000; -+ for(i=0;i<n;i++) ; -+} -+ -+void __const_udelay(um_udelay_t usecs) -+{ -+ int i, n; -+ -+ n = (loops_per_jiffy * HZ * usecs) / 1000000; -+ for(i=0;i<n;i++) ; -+} -+ -+void timer_handler(int sig, union uml_pt_regs *regs) -+{ -+#ifdef CONFIG_SMP -+ update_process_times(user_context(UPT_SP(regs))); -+#endif -+ if(current->processor == 0) -+ timer_irq(regs); -+} -+ -+static spinlock_t timer_spinlock = SPIN_LOCK_UNLOCKED; -+ -+unsigned long time_lock(void) -+{ -+ unsigned long flags; -+ -+ spin_lock_irqsave(&timer_spinlock, flags); -+ return(flags); -+} -+ -+void time_unlock(unsigned long flags) -+{ -+ spin_unlock_irqrestore(&timer_spinlock, flags); -+} -+ -+int __init timer_init(void) -+{ -+ int err; -+ -+ CHOOSE_MODE(user_time_init_tt(), user_time_init_skas()); -+ if((err = request_irq(TIMER_IRQ, um_timer, SA_INTERRUPT, "timer", -+ NULL)) != 0) -+ printk(KERN_ERR "timer_init : request_irq failed - " -+ "errno = %d\n", -err); -+ timer_irq_inited = 1; -+ return(0); -+} -+ -+__initcall(timer_init); -+ -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tlb.c um/arch/um/kernel/tlb.c ---- orig/arch/um/kernel/tlb.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tlb.c Wed Oct 23 22:15:51 2002 -@@ -0,0 +1,80 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/mm.h" -+#include "asm/page.h" -+#include "asm/pgalloc.h" -+#include "choose-mode.h" -+#include "mode_kern.h" -+ -+void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) -+{ -+ address &= PAGE_MASK; -+ flush_tlb_range(vma->vm_mm, address, address + PAGE_SIZE); -+} -+ -+void flush_tlb_all(void) -+{ -+ flush_tlb_mm(current->mm); -+} -+ -+void flush_tlb_kernel_vm(void) -+{ -+ CHOOSE_MODE(flush_tlb_kernel_vm_tt(), flush_tlb_kernel_vm_skas()); -+} -+ -+void __flush_tlb_one(unsigned long addr) -+{ -+ CHOOSE_MODE_PROC(__flush_tlb_one_tt, __flush_tlb_one_skas, addr); -+} -+ -+void flush_tlb_range(struct mm_struct *mm, unsigned long start, -+ unsigned long end) -+{ -+ CHOOSE_MODE_PROC(flush_tlb_range_tt, flush_tlb_range_skas, mm, start, -+ end); -+} -+ -+void flush_tlb_mm(struct mm_struct *mm) -+{ -+ CHOOSE_MODE_PROC(flush_tlb_mm_tt, flush_tlb_mm_skas, mm); -+} -+ -+void force_flush_all(void) -+{ -+ CHOOSE_MODE(force_flush_all_tt(), force_flush_all_skas()); -+} -+ -+ -+pgd_t *pgd_offset_proc(struct mm_struct *mm, unsigned long address) -+{ -+ return(pgd_offset(mm, address)); -+} -+ -+pmd_t *pmd_offset_proc(pgd_t *pgd, unsigned long address) -+{ -+ return(pmd_offset(pgd, address)); -+} -+ -+pte_t *pte_offset_proc(pmd_t *pmd, unsigned long address) -+{ -+ return(pte_offset(pmd, address)); -+} -+ -+pte_t *addr_pte(struct task_struct *task, unsigned long addr) -+{ -+ return(pte_offset(pmd_offset(pgd_offset(task->mm, addr), addr), addr)); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/trap_kern.c um/arch/um/kernel/trap_kern.c ---- orig/arch/um/kernel/trap_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/trap_kern.c Wed Mar 26 13:26:00 2003 -@@ -0,0 +1,192 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/sched.h" -+#include "linux/mm.h" -+#include "linux/spinlock.h" -+#include "linux/config.h" -+#include "linux/init.h" -+#include "asm/semaphore.h" -+#include "asm/pgtable.h" -+#include "asm/pgalloc.h" -+#include "asm/a.out.h" -+#include "asm/current.h" -+#include "asm/irq.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "chan_kern.h" -+#include "mconsole_kern.h" -+#include "2_5compat.h" -+ -+unsigned long handle_page_fault(unsigned long address, unsigned long ip, -+ int is_write, int is_user, int *code_out) -+{ -+ struct mm_struct *mm = current->mm; -+ struct vm_area_struct *vma; -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ unsigned long page; -+ int handled = 0; -+ -+ *code_out = SEGV_MAPERR; -+ down_read(&mm->mmap_sem); -+ vma = find_vma(mm, address); -+ if(!vma) -+ goto out; -+ else if(vma->vm_start <= address) -+ goto good_area; -+ else if(!(vma->vm_flags & VM_GROWSDOWN)) -+ goto out; -+ else if(expand_stack(vma, address)) -+ goto out; -+ -+ good_area: -+ *code_out = SEGV_ACCERR; -+ if(is_write && !(vma->vm_flags & VM_WRITE)) -+ goto out; -+ page = address & PAGE_MASK; -+ if(page == (unsigned long) current + PAGE_SIZE) -+ panic("Kernel stack overflow"); -+ pgd = pgd_offset(mm, page); -+ pmd = pmd_offset(pgd, page); -+ do { -+ survive: -+ switch (handle_mm_fault(mm, vma, address, is_write)) { -+ case 1: -+ current->min_flt++; -+ break; -+ case 2: -+ current->maj_flt++; -+ break; -+ default: -+ if (current->pid == 1) { -+ up_read(&mm->mmap_sem); -+ yield(); -+ down_read(&mm->mmap_sem); -+ goto survive; -+ } -+ /* Fall through to bad area case */ -+ case 0: -+ goto out; -+ } -+ pte = pte_offset(pmd, page); -+ } while(!pte_present(*pte)); -+ handled = 1; -+ *pte = pte_mkyoung(*pte); -+ if(pte_write(*pte)) *pte = pte_mkdirty(*pte); -+ flush_tlb_page(vma, page); -+ out: -+ up_read(&mm->mmap_sem); -+ return(handled); -+} -+ -+unsigned long segv(unsigned long address, unsigned long ip, int is_write, -+ int is_user, void *sc) -+{ -+ struct siginfo si; -+ void *catcher; -+ int handled; -+ -+ if(!is_user && (address >= start_vm) && (address < end_vm)){ -+ flush_tlb_kernel_vm(); -+ return(0); -+ } -+ if(current->mm == NULL) -+ panic("Segfault with no mm"); -+ -+ handled = handle_page_fault(address, ip, is_write, is_user, -+ &si.si_code); -+ -+ catcher = current->thread.fault_catcher; -+ if(handled) -+ return(0); -+ else if(catcher != NULL){ -+ current->thread.fault_addr = (void *) address; -+ do_longjmp(catcher, 1); -+ } -+ else if(current->thread.fault_addr != NULL){ -+ panic("fault_addr set but no fault catcher"); -+ } -+ else if(arch_fixup(ip, sc)) -+ return(0); -+ -+ if(!is_user) -+ panic("Kernel mode fault at addr 0x%lx, ip 0x%lx", -+ address, ip); -+ si.si_signo = SIGSEGV; -+ si.si_addr = (void *) address; -+ current->thread.cr2 = address; -+ current->thread.err = is_write; -+ force_sig_info(SIGSEGV, &si, current); -+ return(0); -+} -+ -+void bad_segv(unsigned long address, unsigned long ip, int is_write) -+{ -+ struct siginfo si; -+ -+ printk(KERN_ERR "Unfixable SEGV in '%s' (pid %d) at 0x%lx " -+ "(ip 0x%lx)\n", current->comm, current->pid, address, ip); -+ si.si_signo = SIGSEGV; -+ si.si_code = SEGV_ACCERR; -+ si.si_addr = (void *) address; -+ current->thread.cr2 = address; -+ current->thread.err = is_write; -+ force_sig_info(SIGSEGV, &si, current); -+} -+ -+void relay_signal(int sig, union uml_pt_regs *regs) -+{ -+ if(arch_handle_signal(sig, regs)) return; -+ if(!UPT_IS_USER(regs)) -+ panic("Kernel mode signal %d", sig); -+ force_sig(sig, current); -+} -+ -+void bus_handler(int sig, union uml_pt_regs *regs) -+{ -+ if(current->thread.fault_catcher != NULL) -+ do_longjmp(current->thread.fault_catcher, 1); -+ else relay_signal(sig, regs); -+} -+ -+void winch(int sig, union uml_pt_regs *regs) -+{ -+ do_IRQ(WINCH_IRQ, regs); -+} -+ -+void trap_init(void) -+{ -+} -+ -+spinlock_t trap_lock = SPIN_LOCK_UNLOCKED; -+ -+static int trap_index = 0; -+ -+int next_trap_index(int limit) -+{ -+ int ret; -+ -+ spin_lock(&trap_lock); -+ ret = trap_index; -+ if(++trap_index == limit) -+ trap_index = 0; -+ spin_unlock(&trap_lock); -+ return(ret); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/trap_user.c um/arch/um/kernel/trap_user.c ---- orig/arch/um/kernel/trap_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/trap_user.c Wed Mar 26 13:25:50 2003 -@@ -0,0 +1,140 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <errno.h> -+#include <fcntl.h> -+#include <setjmp.h> -+#include <signal.h> -+#include <sys/time.h> -+#include <sys/ioctl.h> -+#include <sys/ptrace.h> -+#include <sys/wait.h> -+#include <asm/page.h> -+#include <asm/unistd.h> -+#include <asm/ptrace.h> -+#include "init.h" -+#include "sysdep/ptrace.h" -+#include "sigcontext.h" -+#include "sysdep/sigcontext.h" -+#include "irq_user.h" -+#include "frame_user.h" -+#include "signal_user.h" -+#include "time_user.h" -+#include "task.h" -+#include "mode.h" -+#include "choose-mode.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "os.h" -+ -+void kill_child_dead(int pid) -+{ -+ kill(pid, SIGKILL); -+ kill(pid, SIGCONT); -+ while(waitpid(pid, NULL, 0) > 0) kill(pid, SIGCONT); -+} -+ -+/* Unlocked - don't care if this is a bit off */ -+int nsegfaults = 0; -+ -+struct { -+ unsigned long address; -+ int is_write; -+ int pid; -+ unsigned long sp; -+ int is_user; -+} segfault_record[1024]; -+ -+void segv_handler(int sig, union uml_pt_regs *regs) -+{ -+ int index, max; -+ -+ if(UPT_IS_USER(regs) && !UPT_SEGV_IS_FIXABLE(regs)){ -+ bad_segv(UPT_FAULT_ADDR(regs), UPT_IP(regs), -+ UPT_FAULT_WRITE(regs)); -+ return; -+ } -+ max = sizeof(segfault_record)/sizeof(segfault_record[0]); -+ index = next_trap_index(max); -+ -+ nsegfaults++; -+ segfault_record[index].address = UPT_FAULT_ADDR(regs); -+ segfault_record[index].pid = os_getpid(); -+ segfault_record[index].is_write = UPT_FAULT_WRITE(regs); -+ segfault_record[index].sp = UPT_SP(regs); -+ segfault_record[index].is_user = UPT_IS_USER(regs); -+ segv(UPT_FAULT_ADDR(regs), UPT_IP(regs), UPT_FAULT_WRITE(regs), -+ UPT_IS_USER(regs), regs); -+} -+ -+void usr2_handler(int sig, union uml_pt_regs *regs) -+{ -+ CHOOSE_MODE(syscall_handler_tt(sig, regs), (void) 0); -+} -+ -+struct signal_info sig_info[] = { -+ [ SIGTRAP ] { .handler = relay_signal, -+ .is_irq = 0 }, -+ [ SIGFPE ] { .handler = relay_signal, -+ .is_irq = 0 }, -+ [ SIGILL ] { .handler = relay_signal, -+ .is_irq = 0 }, -+ [ SIGWINCH ] { .handler = winch, -+ .is_irq = 1 }, -+ [ SIGBUS ] { .handler = bus_handler, -+ .is_irq = 0 }, -+ [ SIGSEGV] { .handler = segv_handler, -+ .is_irq = 0 }, -+ [ SIGIO ] { .handler = sigio_handler, -+ .is_irq = 1 }, -+ [ SIGVTALRM ] { .handler = timer_handler, -+ .is_irq = 1 }, -+ [ SIGALRM ] { .handler = timer_handler, -+ .is_irq = 1 }, -+ [ SIGUSR2 ] { .handler = usr2_handler, -+ .is_irq = 0 }, -+}; -+ -+void sig_handler(int sig, struct sigcontext sc) -+{ -+ CHOOSE_MODE_PROC(sig_handler_common_tt, sig_handler_common_skas, -+ sig, &sc); -+} -+ -+extern int timer_irq_inited, missed_ticks[]; -+ -+void alarm_handler(int sig, struct sigcontext sc) -+{ -+ if(!timer_irq_inited) return; -+ missed_ticks[cpu()]++; -+ -+ if(sig == SIGALRM) -+ switch_timers(0); -+ -+ CHOOSE_MODE_PROC(sig_handler_common_tt, sig_handler_common_skas, -+ sig, &sc); -+ -+ if(sig == SIGALRM) -+ switch_timers(1); -+} -+ -+void do_longjmp(void *b, int val) -+{ -+ jmp_buf *buf = b; -+ -+ longjmp(*buf, val); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/Makefile um/arch/um/kernel/tt/Makefile ---- orig/arch/um/kernel/tt/Makefile Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/Makefile Fri Dec 20 23:29:42 2002 -@@ -0,0 +1,39 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET = tt.o -+ -+obj-y = exec_kern.o exec_user.o gdb.o ksyms.o mem.o mem_user.o process_kern.o \ -+ syscall_kern.o syscall_user.o time.o tlb.o tracer.o trap_user.o \ -+ uaccess_user.o -+ -+obj-$(CONFIG_PT_PROXY) += gdb_kern.o -+ -+subdir-y = sys-$(SUBARCH) -+subdir-$(CONFIG_PT_PROXY) += ptproxy -+ -+obj-y += $(join $(subdir-y),$(subdir-y:%=/%.o)) -+ -+export-objs = ksyms.o -+ -+USER_OBJS = $(filter %_user.o,$(obj-y)) gdb.o time.o tracer.o -+ -+UNMAP_CFLAGS := $(patsubst -pg -DPROFILING,,$(USER_CFLAGS)) -+UNMAP_CFLAGS := $(patsubst -fprofile-arcs -ftest-coverage,,$(UNMAP_CFLAGS)) -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+$(O_TARGET) : unmap_fin.o -+ -+unmap.o: unmap.c -+ $(CC) $(UNMAP_CFLAGS) -c -o $@ $< -+ -+unmap_fin.o : unmap.o -+ ld -r -o $@ $< -lc -L/usr/lib -+ -+clean : -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/exec_kern.c um/arch/um/kernel/tt/exec_kern.c ---- orig/arch/um/kernel/tt/exec_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/exec_kern.c Thu Oct 24 19:22:17 2002 -@@ -0,0 +1,83 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/mm.h" -+#include "asm/signal.h" -+#include "asm/ptrace.h" -+#include "asm/uaccess.h" -+#include "asm/pgalloc.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "irq_user.h" -+#include "time_user.h" -+#include "mem_user.h" -+#include "os.h" -+#include "tlb.h" -+ -+static int exec_tramp(void *sig_stack) -+{ -+ init_new_thread_stack(sig_stack, NULL); -+ init_new_thread_signals(1); -+ os_stop_process(os_getpid()); -+ return(0); -+} -+ -+void flush_thread_tt(void) -+{ -+ unsigned long stack; -+ int new_pid; -+ -+ stack = alloc_stack(0, 0); -+ if(stack == 0){ -+ printk(KERN_ERR -+ "flush_thread : failed to allocate temporary stack\n"); -+ do_exit(SIGKILL); -+ } -+ -+ new_pid = start_fork_tramp((void *) current->thread.kernel_stack, -+ stack, 0, exec_tramp); -+ if(new_pid < 0){ -+ printk(KERN_ERR -+ "flush_thread : new thread failed, errno = %d\n", -+ -new_pid); -+ do_exit(SIGKILL); -+ } -+ -+ if(current->processor == 0) -+ forward_interrupts(new_pid); -+ current->thread.request.op = OP_EXEC; -+ current->thread.request.u.exec.pid = new_pid; -+ unprotect_stack((unsigned long) current); -+ os_usr1_process(os_getpid()); -+ -+ enable_timer(); -+ free_page(stack); -+ protect_memory(uml_reserved, high_physmem - uml_reserved, 1, 1, 0, 1); -+ task_protections((unsigned long) current); -+ force_flush_all(); -+ unblock_signals(); -+} -+ -+void start_thread_tt(struct pt_regs *regs, unsigned long eip, -+ unsigned long esp) -+{ -+ set_fs(USER_DS); -+ flush_tlb_mm(current->mm); -+ PT_REGS_IP(regs) = eip; -+ PT_REGS_SP(regs) = esp; -+ PT_FIX_EXEC_STACK(esp); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/exec_user.c um/arch/um/kernel/tt/exec_user.c ---- orig/arch/um/kernel/tt/exec_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/exec_user.c Thu Dec 5 19:36:57 2002 -@@ -0,0 +1,49 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <unistd.h> -+#include <stdlib.h> -+#include <sched.h> -+#include <errno.h> -+#include <sys/wait.h> -+#include <sys/ptrace.h> -+#include <signal.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "ptrace_user.h" -+ -+void do_exec(int old_pid, int new_pid) -+{ -+ unsigned long regs[FRAME_SIZE]; -+ -+ if((ptrace(PTRACE_ATTACH, new_pid, 0, 0) < 0) || -+ (ptrace(PTRACE_CONT, new_pid, 0, 0) < 0) || -+ (waitpid(new_pid, 0, WUNTRACED) < 0)) -+ tracer_panic("do_exec failed to attach proc - errno = %d", -+ errno); -+ -+ if(ptrace_getregs(old_pid, regs) < 0) -+ tracer_panic("do_exec failed to get registers - errno = %d", -+ errno); -+ -+ kill(old_pid, SIGKILL); -+ -+ if(ptrace_setregs(new_pid, regs) < 0) -+ tracer_panic("do_exec failed to start new proc - errno = %d", -+ errno); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/gdb.c um/arch/um/kernel/tt/gdb.c ---- orig/arch/um/kernel/tt/gdb.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/gdb.c Fri Jan 17 13:23:31 2003 -@@ -0,0 +1,278 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <errno.h> -+#include <string.h> -+#include <signal.h> -+#include <sys/ptrace.h> -+#include <sys/types.h> -+#include "uml-config.h" -+#include "kern_constants.h" -+#include "chan_user.h" -+#include "init.h" -+#include "user.h" -+#include "debug.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "tt.h" -+#include "sysdep/thread.h" -+ -+extern int debugger_pid; -+extern int debugger_fd; -+extern int debugger_parent; -+ -+int detach(int pid, int sig) -+{ -+ return(ptrace(PTRACE_DETACH, pid, 0, sig)); -+} -+ -+int attach(int pid) -+{ -+ int err; -+ -+ err = ptrace(PTRACE_ATTACH, pid, 0, 0); -+ if(err < 0) return(-errno); -+ else return(err); -+} -+ -+int cont(int pid) -+{ -+ return(ptrace(PTRACE_CONT, pid, 0, 0)); -+} -+ -+#ifdef UML_CONFIG_PT_PROXY -+ -+int debugger_signal(int status, pid_t pid) -+{ -+ return(debugger_proxy(status, pid)); -+} -+ -+void child_signal(pid_t pid, int status) -+{ -+ child_proxy(pid, status); -+} -+ -+static void gdb_announce(char *dev_name, int dev) -+{ -+ printf("gdb assigned device '%s'\n", dev_name); -+} -+ -+static struct chan_opts opts = { -+ .announce = gdb_announce, -+ .xterm_title = "UML kernel debugger", -+ .raw = 0, -+ .tramp_stack = 0, -+ .in_kernel = 0, -+}; -+ -+/* Accessed by the tracing thread, which automatically serializes access */ -+static void *xterm_data; -+static int xterm_fd; -+ -+extern void *xterm_init(char *, int, struct chan_opts *); -+extern int xterm_open(int, int, int, void *, char **); -+extern void xterm_close(int, void *); -+ -+int open_gdb_chan(void) -+{ -+ char stack[UM_KERN_PAGE_SIZE], *dummy; -+ -+ opts.tramp_stack = (unsigned long) stack; -+ xterm_data = xterm_init("", 0, &opts); -+ xterm_fd = xterm_open(1, 1, 1, xterm_data, &dummy); -+ return(xterm_fd); -+} -+ -+static void exit_debugger_cb(void *unused) -+{ -+ if(debugger_pid != -1){ -+ if(gdb_pid != -1){ -+ fake_child_exit(); -+ gdb_pid = -1; -+ } -+ else kill_child_dead(debugger_pid); -+ debugger_pid = -1; -+ if(debugger_parent != -1) -+ detach(debugger_parent, SIGINT); -+ } -+ if(xterm_data != NULL) xterm_close(xterm_fd, xterm_data); -+} -+ -+static void exit_debugger(void) -+{ -+ initial_thread_cb(exit_debugger_cb, NULL); -+} -+ -+__uml_exitcall(exit_debugger); -+ -+struct gdb_data { -+ char *str; -+ int err; -+}; -+ -+static void config_gdb_cb(void *arg) -+{ -+ struct gdb_data *data = arg; -+ void *task; -+ int pid; -+ -+ data->err = -1; -+ if(debugger_pid != -1) exit_debugger_cb(NULL); -+ if(!strncmp(data->str, "pid,", strlen("pid,"))){ -+ data->str += strlen("pid,"); -+ pid = strtoul(data->str, NULL, 0); -+ task = cpu_tasks[0].task; -+ debugger_pid = attach_debugger(TASK_EXTERN_PID(task), pid, 0); -+ if(debugger_pid != -1){ -+ data->err = 0; -+ gdb_pid = pid; -+ } -+ return; -+ } -+ data->err = 0; -+ debugger_pid = start_debugger(linux_prog, 0, 0, &debugger_fd); -+ init_proxy(debugger_pid, 0, 0); -+} -+ -+int gdb_config(char *str) -+{ -+ struct gdb_data data; -+ -+ if(*str++ != '=') return(-1); -+ data.str = str; -+ initial_thread_cb(config_gdb_cb, &data); -+ return(data.err); -+} -+ -+void remove_gdb_cb(void *unused) -+{ -+ exit_debugger_cb(NULL); -+} -+ -+int gdb_remove(char *unused) -+{ -+ initial_thread_cb(remove_gdb_cb, NULL); -+ return(0); -+} -+ -+void signal_usr1(int sig) -+{ -+ if(debugger_pid != -1){ -+ printk(UM_KERN_ERR "The debugger is already running\n"); -+ return; -+ } -+ debugger_pid = start_debugger(linux_prog, 0, 0, &debugger_fd); -+ init_proxy(debugger_pid, 0, 0); -+} -+ -+int init_ptrace_proxy(int idle_pid, int startup, int stop) -+{ -+ int pid, status; -+ -+ pid = start_debugger(linux_prog, startup, stop, &debugger_fd); -+ status = wait_for_stop(idle_pid, SIGSTOP, PTRACE_CONT, NULL); -+ if(pid < 0){ -+ cont(idle_pid); -+ return(-1); -+ } -+ init_proxy(pid, 1, status); -+ return(pid); -+} -+ -+int attach_debugger(int idle_pid, int pid, int stop) -+{ -+ int status = 0, err; -+ -+ err = attach(pid); -+ if(err < 0){ -+ printf("Failed to attach pid %d, errno = %d\n", pid, -err); -+ return(-1); -+ } -+ if(stop) status = wait_for_stop(idle_pid, SIGSTOP, PTRACE_CONT, NULL); -+ init_proxy(pid, 1, status); -+ return(pid); -+} -+ -+#ifdef notdef /* Put this back in when it does something useful */ -+static int __init uml_gdb_init_setup(char *line, int *add) -+{ -+ gdb_init = uml_strdup(line); -+ return 0; -+} -+ -+__uml_setup("gdb=", uml_gdb_init_setup, -+"gdb=<channel description>\n\n" -+); -+#endif -+ -+static int __init uml_gdb_pid_setup(char *line, int *add) -+{ -+ gdb_pid = strtoul(line, NULL, 0); -+ *add = 0; -+ return 0; -+} -+ -+__uml_setup("gdb-pid=", uml_gdb_pid_setup, -+"gdb-pid=<pid>\n" -+" gdb-pid is used to attach an external debugger to UML. This may be\n" -+" an already-running gdb or a debugger-like process like strace.\n\n" -+); -+ -+#else -+ -+int debugger_signal(int status, pid_t pid){ return(0); } -+void child_signal(pid_t pid, int status){ } -+int init_ptrace_proxy(int idle_pid, int startup, int stop) -+{ -+ printk(UM_KERN_ERR "debug requested when CONFIG_PT_PROXY is off\n"); -+ kill_child_dead(idle_pid); -+ exit(1); -+} -+ -+void signal_usr1(int sig) -+{ -+ printk(UM_KERN_ERR "debug requested when CONFIG_PT_PROXY is off\n"); -+} -+ -+int attach_debugger(int idle_pid, int pid, int stop) -+{ -+ printk(UM_KERN_ERR "attach_debugger called when CONFIG_PT_PROXY " -+ "is off\n"); -+ return(-1); -+} -+ -+int config_gdb(char *str) -+{ -+ return(-1); -+} -+ -+int remove_gdb(void) -+{ -+ return(-1); -+} -+ -+int init_parent_proxy(int pid) -+{ -+ return(-1); -+} -+ -+void debugger_parent_signal(int status, int pid) -+{ -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/gdb_kern.c um/arch/um/kernel/tt/gdb_kern.c ---- orig/arch/um/kernel/tt/gdb_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/gdb_kern.c Sun Dec 15 21:16:17 2002 -@@ -0,0 +1,40 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/init.h" -+#include "linux/config.h" -+#include "mconsole_kern.h" -+ -+#ifdef CONFIG_MCONSOLE -+ -+extern int gdb_config(char *str); -+extern int gdb_remove(char *unused); -+ -+static struct mc_device gdb_mc = { -+ .name = "gdb", -+ .config = gdb_config, -+ .remove = gdb_remove, -+}; -+ -+int gdb_mc_init(void) -+{ -+ mconsole_register_dev(&gdb_mc); -+ return(0); -+} -+ -+__initcall(gdb_mc_init); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/include/debug.h um/arch/um/kernel/tt/include/debug.h ---- orig/arch/um/kernel/tt/include/debug.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/include/debug.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,29 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) and -+ * Lars Brinkhoff. -+ * Licensed under the GPL -+ */ -+ -+#ifndef __DEBUG_H -+#define __DEBUG_H -+ -+extern int debugger_proxy(int status, pid_t pid); -+extern void child_proxy(pid_t pid, int status); -+extern void init_proxy (pid_t pid, int waiting, int status); -+extern int start_debugger(char *prog, int startup, int stop, int *debugger_fd); -+extern void fake_child_exit(void); -+extern int gdb_config(char *str); -+extern int gdb_remove(char *unused); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/include/mmu.h um/arch/um/kernel/tt/include/mmu.h ---- orig/arch/um/kernel/tt/include/mmu.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/include/mmu.h Sat Nov 9 12:51:32 2002 -@@ -0,0 +1,23 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __TT_MMU_H -+#define __TT_MMU_H -+ -+struct mmu_context_tt { -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/include/mode.h um/arch/um/kernel/tt/include/mode.h ---- orig/arch/um/kernel/tt/include/mode.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/include/mode.h Mon Dec 9 00:34:40 2002 -@@ -0,0 +1,36 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MODE_TT_H__ -+#define __MODE_TT_H__ -+ -+#include "sysdep/ptrace.h" -+ -+extern int tracing_pid; -+ -+extern int tracer(int (*init_proc)(void *), void *sp); -+extern void user_time_init_tt(void); -+extern int copy_sc_from_user_tt(void *to_ptr, void *from_ptr, void *data); -+extern int copy_sc_to_user_tt(void *to_ptr, void *fp, void *from_ptr, -+ void *data); -+extern void sig_handler_common_tt(int sig, void *sc); -+extern void syscall_handler_tt(int sig, union uml_pt_regs *regs); -+extern void reboot_tt(void); -+extern void halt_tt(void); -+extern int is_tracer_winch(int pid, int fd, void *data); -+extern void kill_off_processes_tt(void); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/include/mode_kern.h um/arch/um/kernel/tt/include/mode_kern.h ---- orig/arch/um/kernel/tt/include/mode_kern.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/include/mode_kern.h Mon Dec 16 21:49:18 2002 -@@ -0,0 +1,52 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __TT_MODE_KERN_H__ -+#define __TT_MODE_KERN_H__ -+ -+#include "linux/sched.h" -+#include "asm/page.h" -+#include "asm/ptrace.h" -+#include "asm/uaccess.h" -+ -+extern void *_switch_to_tt(void *prev, void *next); -+extern void flush_thread_tt(void); -+extern void start_thread_tt(struct pt_regs *regs, unsigned long eip, -+ unsigned long esp); -+extern int copy_thread_tt(int nr, unsigned long clone_flags, unsigned long sp, -+ unsigned long stack_top, struct task_struct *p, -+ struct pt_regs *regs); -+extern void release_thread_tt(struct task_struct *task); -+extern void exit_thread_tt(void); -+extern void initial_thread_cb_tt(void (*proc)(void *), void *arg); -+extern void init_idle_tt(void); -+extern void flush_tlb_kernel_vm_tt(void); -+extern void __flush_tlb_one_tt(unsigned long addr); -+extern void flush_tlb_range_tt(struct mm_struct *mm, unsigned long start, -+ unsigned long end); -+extern void flush_tlb_mm_tt(struct mm_struct *mm); -+extern void force_flush_all_tt(void); -+extern long execute_syscall_tt(void *r); -+extern void before_mem_tt(unsigned long brk_start); -+extern unsigned long set_task_sizes_tt(int arg, unsigned long *host_size_out, -+ unsigned long *task_size_out); -+extern int start_uml_tt(void); -+extern int external_pid_tt(struct task_struct *task); -+extern int thread_pid_tt(struct thread_struct *thread); -+ -+#define kmem_end_tt (host_task_size - ABOVE_KMEM) -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/include/ptrace-tt.h um/arch/um/kernel/tt/include/ptrace-tt.h ---- orig/arch/um/kernel/tt/include/ptrace-tt.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/include/ptrace-tt.h Fri Jan 17 13:23:30 2003 -@@ -0,0 +1,26 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __PTRACE_TT_H -+#define __PTRACE_TT_H -+ -+#include "uml-config.h" -+ -+#ifdef UML_CONFIG_MODE_TT -+#include "sysdep/sc.h" -+#endif -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/include/tt.h um/arch/um/kernel/tt/include/tt.h ---- orig/arch/um/kernel/tt/include/tt.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/include/tt.h Fri Dec 20 23:29:11 2002 -@@ -0,0 +1,46 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __TT_H__ -+#define __TT_H__ -+ -+#include "sysdep/ptrace.h" -+ -+extern int gdb_pid; -+extern int debug; -+extern int debug_stop; -+extern int debug_trace; -+ -+extern int honeypot; -+ -+extern int fork_tramp(void *sig_stack); -+extern int do_proc_op(void *t, int proc_id); -+extern int tracer(int (*init_proc)(void *), void *sp); -+extern void attach_process(int pid); -+extern void tracer_panic(char *format, ...); -+extern void set_init_pid(int pid); -+extern int set_user_mode(void *task); -+extern void set_tracing(void *t, int tracing); -+extern int is_tracing(void *task); -+extern int singlestepping_tt(void *t); -+extern void clear_singlestep(void *t); -+extern void syscall_handler(int sig, union uml_pt_regs *regs); -+extern void exit_kernel(int pid, void *task); -+extern int do_syscall(void *task, int pid); -+extern int is_valid_pid(int pid); -+extern void remap_data(void *segment_start, void *segment_end, int w); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/include/uaccess.h um/arch/um/kernel/tt/include/uaccess.h ---- orig/arch/um/kernel/tt/include/uaccess.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/include/uaccess.h Tue Mar 25 16:58:42 2003 -@@ -0,0 +1,122 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __TT_UACCESS_H -+#define __TT_UACCESS_H -+ -+#include "linux/string.h" -+#include "linux/sched.h" -+#include "asm/processor.h" -+#include "asm/errno.h" -+#include "asm/current.h" -+#include "asm/a.out.h" -+#include "uml_uaccess.h" -+ -+#define ABOVE_KMEM (16 * 1024 * 1024) -+ -+extern unsigned long end_vm; -+extern unsigned long uml_physmem; -+ -+#define under_task_size(addr, size) \ -+ (((unsigned long) (addr) < TASK_SIZE) && \ -+ (((unsigned long) (addr) + (size)) < TASK_SIZE)) -+ -+#define is_stack(addr, size) \ -+ (((unsigned long) (addr) < STACK_TOP) && \ -+ ((unsigned long) (addr) >= STACK_TOP - ABOVE_KMEM) && \ -+ (((unsigned long) (addr) + (size)) <= STACK_TOP)) -+ -+#define access_ok_tt(type, addr, size) \ -+ ((type == VERIFY_READ) || (segment_eq(get_fs(), KERNEL_DS)) || \ -+ (((unsigned long) (addr) <= ((unsigned long) (addr) + (size))) && \ -+ (under_task_size(addr, size) || is_stack(addr, size)))) -+ -+static inline int verify_area_tt(int type, const void * addr, -+ unsigned long size) -+{ -+ return(access_ok_tt(type, addr, size) ? 0 : -EFAULT); -+} -+ -+extern unsigned long get_fault_addr(void); -+ -+extern int __do_copy_from_user(void *to, const void *from, int n, -+ void **fault_addr, void **fault_catcher); -+ -+static inline int copy_from_user_tt(void *to, const void *from, int n) -+{ -+ if(!access_ok_tt(VERIFY_READ, from, n)) -+ return(n); -+ -+ return(__do_copy_from_user(to, from, n, ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)); -+} -+ -+static inline int copy_to_user_tt(void *to, const void *from, int n) -+{ -+ if(!access_ok_tt(VERIFY_WRITE, to, n)) -+ return(n); -+ -+ return(__do_copy_to_user(to, from, n, ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)); -+} -+ -+extern int __do_strncpy_from_user(char *dst, const char *src, size_t n, -+ void **fault_addr, void **fault_catcher); -+ -+static inline int strncpy_from_user_tt(char *dst, const char *src, int count) -+{ -+ int n; -+ -+ if(!access_ok_tt(VERIFY_READ, src, 1)) -+ return(-EFAULT); -+ -+ n = __do_strncpy_from_user(dst, src, count, -+ ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher); -+ if(n < 0) return(-EFAULT); -+ return(n); -+} -+ -+extern int __do_clear_user(void *mem, size_t len, void **fault_addr, -+ void **fault_catcher); -+ -+static inline int __clear_user_tt(void *mem, int len) -+{ -+ return(__do_clear_user(mem, len, -+ ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)); -+} -+ -+static inline int clear_user_tt(void *mem, int len) -+{ -+ if(!access_ok_tt(VERIFY_WRITE, mem, len)) -+ return(len); -+ -+ return(__do_clear_user(mem, len, ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)); -+} -+ -+extern int __do_strnlen_user(const char *str, unsigned long n, -+ void **fault_addr, void **fault_catcher); -+ -+static inline int strnlen_user_tt(const void *str, int len) -+{ -+ return(__do_strnlen_user(str, len, -+ ¤t->thread.fault_addr, -+ ¤t->thread.fault_catcher)); -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/ksyms.c um/arch/um/kernel/tt/ksyms.c ---- orig/arch/um/kernel/tt/ksyms.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/ksyms.c Sun Oct 27 17:01:56 2002 -@@ -0,0 +1,28 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/module.h" -+#include "asm/uaccess.h" -+#include "mode.h" -+ -+EXPORT_SYMBOL(__do_copy_from_user); -+EXPORT_SYMBOL(__do_copy_to_user); -+EXPORT_SYMBOL(__do_strncpy_from_user); -+EXPORT_SYMBOL(__do_strnlen_user); -+EXPORT_SYMBOL(__do_clear_user); -+ -+EXPORT_SYMBOL(tracing_pid); -+EXPORT_SYMBOL(honeypot); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/mem.c um/arch/um/kernel/tt/mem.c ---- orig/arch/um/kernel/tt/mem.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/mem.c Mon Dec 16 21:49:51 2002 -@@ -0,0 +1,51 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/stddef.h" -+#include "linux/config.h" -+#include "linux/mm.h" -+#include "asm/uaccess.h" -+#include "mem_user.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "kern.h" -+#include "tt.h" -+ -+void before_mem_tt(unsigned long brk_start) -+{ -+ if(!jail || debug) -+ remap_data(UML_ROUND_DOWN(&_stext), UML_ROUND_UP(&_etext), 1); -+ remap_data(UML_ROUND_DOWN(&_sdata), UML_ROUND_UP(&_edata), 1); -+ remap_data(UML_ROUND_DOWN(&__bss_start), UML_ROUND_UP(brk_start), 1); -+} -+ -+#ifdef CONFIG_HOST_2G_2G -+#define TOP 0x80000000 -+#else -+#define TOP 0xc0000000 -+#endif -+ -+#define SIZE ((CONFIG_NEST_LEVEL + CONFIG_KERNEL_HALF_GIGS) * 0x20000000) -+#define START (TOP - SIZE) -+ -+unsigned long set_task_sizes_tt(int arg, unsigned long *host_size_out, -+ unsigned long *task_size_out) -+{ -+ /* Round up to the nearest 4M */ -+ *host_size_out = ROUND_4M((unsigned long) &arg); -+ *task_size_out = START; -+ return(START); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/mem_user.c um/arch/um/kernel/tt/mem_user.c ---- orig/arch/um/kernel/tt/mem_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/mem_user.c Fri Jan 17 22:07:31 2003 -@@ -0,0 +1,50 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <stdio.h> -+#include <unistd.h> -+#include <string.h> -+#include <errno.h> -+#include <sys/mman.h> -+#include "tt.h" -+#include "mem_user.h" -+#include "user_util.h" -+ -+void remap_data(void *segment_start, void *segment_end, int w) -+{ -+ void *addr; -+ unsigned long size; -+ int data, prot; -+ -+ if(w) prot = PROT_WRITE; -+ else prot = 0; -+ prot |= PROT_READ | PROT_EXEC; -+ size = (unsigned long) segment_end - -+ (unsigned long) segment_start; -+ data = create_mem_file(size); -+ if((addr = mmap(NULL, size, PROT_WRITE | PROT_READ, -+ MAP_SHARED, data, 0)) == MAP_FAILED){ -+ perror("mapping new data segment"); -+ exit(1); -+ } -+ memcpy(addr, segment_start, size); -+ if(switcheroo(data, prot, addr, segment_start, -+ size) < 0){ -+ printf("switcheroo failed\n"); -+ exit(1); -+ } -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/process_kern.c um/arch/um/kernel/tt/process_kern.c ---- orig/arch/um/kernel/tt/process_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/process_kern.c Sun Feb 16 21:34:23 2003 -@@ -0,0 +1,516 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "linux/signal.h" -+#include "linux/kernel.h" -+#include "asm/system.h" -+#include "asm/pgalloc.h" -+#include "asm/ptrace.h" -+#include "irq_user.h" -+#include "signal_user.h" -+#include "kern_util.h" -+#include "user_util.h" -+#include "os.h" -+#include "kern.h" -+#include "sigcontext.h" -+#include "time_user.h" -+#include "mem_user.h" -+#include "tlb.h" -+#include "mode.h" -+#include "init.h" -+#include "tt.h" -+ -+void *_switch_to_tt(void *prev, void *next) -+{ -+ struct task_struct *from, *to; -+ unsigned long flags; -+ int err, vtalrm, alrm, prof, cpu; -+ char c; -+ /* jailing and SMP are incompatible, so this doesn't need to be -+ * made per-cpu -+ */ -+ static int reading; -+ -+ from = prev; -+ to = next; -+ -+ to->thread.prev_sched = from; -+ -+ cpu = from->processor; -+ if(cpu == 0) -+ forward_interrupts(to->thread.mode.tt.extern_pid); -+#ifdef CONFIG_SMP -+ forward_ipi(cpu_data[cpu].ipi_pipe[0], to->thread.mode.tt.extern_pid); -+#endif -+ local_irq_save(flags); -+ -+ vtalrm = change_sig(SIGVTALRM, 0); -+ alrm = change_sig(SIGALRM, 0); -+ prof = change_sig(SIGPROF, 0); -+ -+ c = 0; -+ set_current(to); -+ -+ reading = 0; -+ err = os_write_file(to->thread.mode.tt.switch_pipe[1], &c, sizeof(c)); -+ if(err != sizeof(c)) -+ panic("write of switch_pipe failed, errno = %d", -err); -+ -+ reading = 1; -+ if(from->state == TASK_ZOMBIE) -+ os_kill_process(os_getpid(), 0); -+ -+ err = os_read_file(from->thread.mode.tt.switch_pipe[0], &c, sizeof(c)); -+ if(err != sizeof(c)) -+ panic("read of switch_pipe failed, errno = %d", -err); -+ -+ /* This works around a nasty race with 'jail'. If we are switching -+ * between two threads of a threaded app and the incoming process -+ * runs before the outgoing process reaches the read, and it makes -+ * it all the way out to userspace, then it will have write-protected -+ * the outgoing process stack. Then, when the outgoing process -+ * returns from the write, it will segfault because it can no longer -+ * write its own stack. So, in order to avoid that, the incoming -+ * thread sits in a loop yielding until 'reading' is set. This -+ * isn't entirely safe, since there may be a reschedule from a timer -+ * happening between setting 'reading' and sleeping in read. But, -+ * it should get a whole quantum in which to reach the read and sleep, -+ * which should be enough. -+ */ -+ -+ if(jail){ -+ while(!reading) sched_yield(); -+ } -+ -+ change_sig(SIGVTALRM, vtalrm); -+ change_sig(SIGALRM, alrm); -+ change_sig(SIGPROF, prof); -+ -+ arch_switch(); -+ -+ flush_tlb_all(); -+ local_irq_restore(flags); -+ -+ return(current->thread.prev_sched); -+} -+ -+void release_thread_tt(struct task_struct *task) -+{ -+ os_kill_process(task->thread.mode.tt.extern_pid, 0); -+} -+ -+void exit_thread_tt(void) -+{ -+ close(current->thread.mode.tt.switch_pipe[0]); -+ close(current->thread.mode.tt.switch_pipe[1]); -+} -+ -+extern void schedule_tail(struct task_struct *prev); -+ -+static void new_thread_handler(int sig) -+{ -+ int (*fn)(void *); -+ void *arg; -+ -+ fn = current->thread.request.u.thread.proc; -+ arg = current->thread.request.u.thread.arg; -+ UPT_SC(¤t->thread.regs.regs) = (void *) (&sig + 1); -+ suspend_new_thread(current->thread.mode.tt.switch_pipe[0]); -+ -+ init_new_thread_signals(1); -+ enable_timer(); -+ free_page(current->thread.temp_stack); -+ set_cmdline("(kernel thread)"); -+ force_flush_all(); -+ -+ if(current->thread.prev_sched != NULL) -+ schedule_tail(current->thread.prev_sched); -+ current->thread.prev_sched = NULL; -+ -+ change_sig(SIGUSR1, 1); -+ change_sig(SIGVTALRM, 1); -+ change_sig(SIGPROF, 1); -+ sti(); -+ if(!run_kernel_thread(fn, arg, ¤t->thread.exec_buf)) -+ do_exit(0); -+} -+ -+static int new_thread_proc(void *stack) -+{ -+ cli(); -+ init_new_thread_stack(stack, new_thread_handler); -+ os_usr1_process(os_getpid()); -+ return(0); -+} -+ -+/* Signal masking - signals are blocked at the start of fork_tramp. They -+ * are re-enabled when finish_fork_handler is entered by fork_tramp hitting -+ * itself with a SIGUSR1. set_user_mode has to be run with SIGUSR1 off, -+ * so it is blocked before it's called. They are re-enabled on sigreturn -+ * despite the fact that they were blocked when the SIGUSR1 was issued because -+ * copy_thread copies the parent's signcontext, including the signal mask -+ * onto the signal frame. -+ */ -+ -+static void finish_fork_handler(int sig) -+{ -+ UPT_SC(¤t->thread.regs.regs) = (void *) (&sig + 1); -+ suspend_new_thread(current->thread.mode.tt.switch_pipe[0]); -+ -+ init_new_thread_signals(1); -+ enable_timer(); -+ sti(); -+ force_flush_all(); -+ if(current->mm != current->p_pptr->mm) -+ protect_memory(uml_reserved, high_physmem - uml_reserved, 1, -+ 1, 0, 1); -+ task_protections((unsigned long) current); -+ -+ if(current->thread.prev_sched != NULL) -+ schedule_tail(current->thread.prev_sched); -+ current->thread.prev_sched = NULL; -+ -+ free_page(current->thread.temp_stack); -+ cli(); -+ change_sig(SIGUSR1, 0); -+ set_user_mode(current); -+} -+ -+int fork_tramp(void *stack) -+{ -+ cli(); -+ init_new_thread_stack(stack, finish_fork_handler); -+ os_usr1_process(os_getpid()); -+ return(0); -+} -+ -+int copy_thread_tt(int nr, unsigned long clone_flags, unsigned long sp, -+ unsigned long stack_top, struct task_struct * p, -+ struct pt_regs *regs) -+{ -+ int (*tramp)(void *); -+ int new_pid, err; -+ unsigned long stack; -+ -+ if(current->thread.forking) -+ tramp = fork_tramp; -+ else { -+ tramp = new_thread_proc; -+ p->thread.request.u.thread = current->thread.request.u.thread; -+ } -+ -+ err = os_pipe(p->thread.mode.tt.switch_pipe, 1, 1); -+ if(err){ -+ printk("copy_thread : pipe failed, errno = %d\n", -err); -+ return(err); -+ } -+ -+ stack = alloc_stack(0, 0); -+ if(stack == 0){ -+ printk(KERN_ERR "copy_thread : failed to allocate " -+ "temporary stack\n"); -+ return(-ENOMEM); -+ } -+ -+ clone_flags &= CLONE_VM; -+ p->thread.temp_stack = stack; -+ new_pid = start_fork_tramp((void *) p->thread.kernel_stack, stack, -+ clone_flags, tramp); -+ if(new_pid < 0){ -+ printk(KERN_ERR "copy_thread : clone failed - errno = %d\n", -+ -new_pid); -+ return(new_pid); -+ } -+ -+ if(current->thread.forking){ -+ sc_to_sc(UPT_SC(&p->thread.regs.regs), -+ UPT_SC(¤t->thread.regs.regs)); -+ SC_SET_SYSCALL_RETURN(UPT_SC(&p->thread.regs.regs), 0); -+ if(sp != 0) SC_SP(UPT_SC(&p->thread.regs.regs)) = sp; -+ } -+ p->thread.mode.tt.extern_pid = new_pid; -+ -+ current->thread.request.op = OP_FORK; -+ current->thread.request.u.fork.pid = new_pid; -+ os_usr1_process(os_getpid()); -+ return(0); -+} -+ -+void reboot_tt(void) -+{ -+ current->thread.request.op = OP_REBOOT; -+ os_usr1_process(os_getpid()); -+ os_kill_process(os_getpid(), 0); -+} -+ -+void halt_tt(void) -+{ -+ current->thread.request.op = OP_HALT; -+ os_usr1_process(os_getpid()); -+ os_kill_process(os_getpid(), 0); -+} -+ -+void kill_off_processes_tt(void) -+{ -+ struct task_struct *p; -+ int me; -+ -+ me = os_getpid(); -+ for_each_task(p){ -+ int pid = p->thread.mode.tt.extern_pid; -+ if((pid != me) && (pid != -1)) -+ os_kill_process(p->thread.mode.tt.extern_pid, 0); -+ } -+ if((init_task.thread.mode.tt.extern_pid != me) && -+ (init_task.thread.mode.tt.extern_pid != -1)) -+ os_kill_process(init_task.thread.mode.tt.extern_pid, 0); -+} -+ -+void initial_thread_cb_tt(void (*proc)(void *), void *arg) -+{ -+ if(os_getpid() == tracing_pid){ -+ (*proc)(arg); -+ } -+ else { -+ current->thread.request.op = OP_CB; -+ current->thread.request.u.cb.proc = proc; -+ current->thread.request.u.cb.arg = arg; -+ os_usr1_process(os_getpid()); -+ } -+} -+ -+int do_proc_op(void *t, int proc_id) -+{ -+ struct task_struct *task; -+ struct thread_struct *thread; -+ int op, pid; -+ -+ task = t; -+ thread = &task->thread; -+ op = thread->request.op; -+ switch(op){ -+ case OP_NONE: -+ case OP_TRACE_ON: -+ break; -+ case OP_EXEC: -+ pid = thread->request.u.exec.pid; -+ do_exec(thread->mode.tt.extern_pid, pid); -+ thread->mode.tt.extern_pid = pid; -+ cpu_tasks[task->processor].pid = pid; -+ break; -+ case OP_FORK: -+ attach_process(thread->request.u.fork.pid); -+ break; -+ case OP_CB: -+ (*thread->request.u.cb.proc)(thread->request.u.cb.arg); -+ break; -+ case OP_REBOOT: -+ case OP_HALT: -+ break; -+ default: -+ tracer_panic("Bad op in do_proc_op"); -+ break; -+ } -+ thread->request.op = OP_NONE; -+ return(op); -+} -+ -+void init_idle_tt(void) -+{ -+ idle_timer(); -+} -+ -+/* Changed by jail_setup, which is a setup */ -+int jail = 0; -+ -+int __init jail_setup(char *line, int *add) -+{ -+ int ok = 1; -+ -+ if(jail) return(0); -+#ifdef CONFIG_SMP -+ printf("'jail' may not used used in a kernel with CONFIG_SMP " -+ "enabled\n"); -+ ok = 0; -+#endif -+#ifdef CONFIG_HOSTFS -+ printf("'jail' may not used used in a kernel with CONFIG_HOSTFS " -+ "enabled\n"); -+ ok = 0; -+#endif -+#ifdef CONFIG_MODULES -+ printf("'jail' may not used used in a kernel with CONFIG_MODULES " -+ "enabled\n"); -+ ok = 0; -+#endif -+ if(!ok) exit(1); -+ -+ /* CAP_SYS_RAWIO controls the ability to open /dev/mem and /dev/kmem. -+ * Removing it from the bounding set eliminates the ability of anything -+ * to acquire it, and thus read or write kernel memory. -+ */ -+ cap_lower(cap_bset, CAP_SYS_RAWIO); -+ jail = 1; -+ return(0); -+} -+ -+__uml_setup("jail", jail_setup, -+"jail\n" -+" Enables the protection of kernel memory from processes.\n\n" -+); -+ -+static void mprotect_kernel_mem(int w) -+{ -+ unsigned long start, end; -+ int pages; -+ -+ if(!jail || (current == &init_task)) return; -+ -+ pages = (1 << CONFIG_KERNEL_STACK_ORDER); -+ -+ start = (unsigned long) current + PAGE_SIZE; -+ end = (unsigned long) current + PAGE_SIZE * pages; -+ protect_memory(uml_reserved, start - uml_reserved, 1, w, 1, 1); -+ protect_memory(end, high_physmem - end, 1, w, 1, 1); -+ -+ start = (unsigned long) UML_ROUND_DOWN(&_stext); -+ end = (unsigned long) UML_ROUND_UP(&_etext); -+ protect_memory(start, end - start, 1, w, 1, 1); -+ -+ start = (unsigned long) UML_ROUND_DOWN(&_unprotected_end); -+ end = (unsigned long) UML_ROUND_UP(&_edata); -+ protect_memory(start, end - start, 1, w, 1, 1); -+ -+ start = (unsigned long) UML_ROUND_DOWN(&__bss_start); -+ end = (unsigned long) UML_ROUND_UP(brk_start); -+ protect_memory(start, end - start, 1, w, 1, 1); -+ -+ mprotect_kernel_vm(w); -+} -+ -+void unprotect_kernel_mem(void) -+{ -+ mprotect_kernel_mem(1); -+} -+ -+void protect_kernel_mem(void) -+{ -+ mprotect_kernel_mem(0); -+} -+ -+extern void start_kernel(void); -+ -+static int start_kernel_proc(void *unused) -+{ -+ int pid; -+ -+ block_signals(); -+ pid = os_getpid(); -+ -+ cpu_tasks[0].pid = pid; -+ cpu_tasks[0].task = current; -+#ifdef CONFIG_SMP -+ cpu_online_map = 1; -+#endif -+ if(debug) os_stop_process(pid); -+ start_kernel(); -+ return(0); -+} -+ -+void set_tracing(void *task, int tracing) -+{ -+ ((struct task_struct *) task)->thread.mode.tt.tracing = tracing; -+} -+ -+int is_tracing(void *t) -+{ -+ return (((struct task_struct *) t)->thread.mode.tt.tracing); -+} -+ -+int set_user_mode(void *t) -+{ -+ struct task_struct *task; -+ -+ task = t ? t : current; -+ if(task->thread.mode.tt.tracing) -+ return(1); -+ task->thread.request.op = OP_TRACE_ON; -+ os_usr1_process(os_getpid()); -+ return(0); -+} -+ -+void set_init_pid(int pid) -+{ -+ int err; -+ -+ init_task.thread.mode.tt.extern_pid = pid; -+ err = os_pipe(init_task.thread.mode.tt.switch_pipe, 1, 1); -+ if(err) panic("Can't create switch pipe for init_task, errno = %d", -+ err); -+} -+ -+int singlestepping_tt(void *t) -+{ -+ struct task_struct *task = t; -+ -+ if(task->thread.mode.tt.singlestep_syscall) -+ return(0); -+ return(task->ptrace & PT_DTRACE); -+} -+ -+void clear_singlestep(void *t) -+{ -+ struct task_struct *task = t; -+ -+ task->ptrace &= ~PT_DTRACE; -+} -+ -+int start_uml_tt(void) -+{ -+ void *sp; -+ int pages; -+ -+ pages = (1 << CONFIG_KERNEL_STACK_ORDER) - 2; -+ sp = (void *) init_task.thread.kernel_stack + pages * PAGE_SIZE - -+ sizeof(unsigned long); -+ return(tracer(start_kernel_proc, sp)); -+} -+ -+int external_pid_tt(struct task_struct *task) -+{ -+ return(task->thread.mode.tt.extern_pid); -+} -+ -+int thread_pid_tt(struct thread_struct *thread) -+{ -+ return(thread->mode.tt.extern_pid); -+} -+ -+int is_valid_pid(int pid) -+{ -+ struct task_struct *task; -+ -+ read_lock(&tasklist_lock); -+ for_each_task(task){ -+ if(task->thread.mode.tt.extern_pid == pid){ -+ read_unlock(&tasklist_lock); -+ return(1); -+ } -+ } -+ read_unlock(&tasklist_lock); -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/ptproxy/Makefile um/arch/um/kernel/tt/ptproxy/Makefile ---- orig/arch/um/kernel/tt/ptproxy/Makefile Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/ptproxy/Makefile Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,12 @@ -+O_TARGET = ptproxy.o -+ -+obj-y = proxy.o ptrace.o sysdep.o wait.o -+ -+USER_OBJS = $(obj-y) -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+clean: -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/ptproxy/proxy.c um/arch/um/kernel/tt/ptproxy/proxy.c ---- orig/arch/um/kernel/tt/ptproxy/proxy.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/ptproxy/proxy.c Wed Apr 16 14:01:03 2003 -@@ -0,0 +1,370 @@ -+/********************************************************************** -+proxy.c -+ -+Copyright (C) 1999 Lars Brinkhoff. See the file COPYING for licensing -+terms and conditions. -+ -+Jeff Dike (jdike@karaya.com) : Modified for integration into uml -+**********************************************************************/ -+ -+/* XXX This file shouldn't refer to CONFIG_* */ -+ -+#include <errno.h> -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <signal.h> -+#include <string.h> -+#include <fcntl.h> -+#include <termios.h> -+#include <sys/wait.h> -+#include <sys/types.h> -+#include <sys/ptrace.h> -+#include <sys/ioctl.h> -+#include <asm/unistd.h> -+ -+#include "ptproxy.h" -+#include "sysdep.h" -+#include "wait.h" -+ -+#include "user_util.h" -+#include "user.h" -+#include "os.h" -+#include "tempfile.h" -+ -+static int debugger_wait(debugger_state *debugger, int *status, int options, -+ int (*syscall)(debugger_state *debugger, pid_t child), -+ int (*normal_return)(debugger_state *debugger, -+ pid_t unused), -+ int (*wait_return)(debugger_state *debugger, -+ pid_t unused)) -+{ -+ if(debugger->real_wait){ -+ debugger->handle_trace = normal_return; -+ syscall_continue(debugger->pid); -+ debugger->real_wait = 0; -+ return(1); -+ } -+ debugger->wait_status_ptr = status; -+ debugger->wait_options = options; -+ if((debugger->debugee != NULL) && debugger->debugee->event){ -+ syscall_continue(debugger->pid); -+ wait_for_stop(debugger->pid, SIGTRAP, PTRACE_SYSCALL, -+ NULL); -+ (*wait_return)(debugger, -1); -+ return(0); -+ } -+ else if(debugger->wait_options & WNOHANG){ -+ syscall_cancel(debugger->pid, 0); -+ debugger->handle_trace = syscall; -+ return(0); -+ } -+ else { -+ syscall_pause(debugger->pid); -+ debugger->handle_trace = wait_return; -+ debugger->waiting = 1; -+ } -+ return(1); -+} -+ -+/* -+ * Handle debugger trap, i.e. syscall. -+ */ -+ -+int debugger_syscall(debugger_state *debugger, pid_t child) -+{ -+ long arg1, arg2, arg3, arg4, arg5, result; -+ int syscall, ret = 0; -+ -+ syscall = get_syscall(debugger->pid, &arg1, &arg2, &arg3, &arg4, -+ &arg5); -+ -+ switch(syscall){ -+ case __NR_execve: -+ /* execve never returns */ -+ debugger->handle_trace = debugger_syscall; -+ break; -+ -+ case __NR_ptrace: -+ if(debugger->debugee->pid != 0) arg2 = debugger->debugee->pid; -+ if(!debugger->debugee->in_context) -+ child = debugger->debugee->pid; -+ result = proxy_ptrace(debugger, arg1, arg2, arg3, arg4, child, -+ &ret); -+ syscall_cancel(debugger->pid, result); -+ debugger->handle_trace = debugger_syscall; -+ return(ret); -+ -+ case __NR_waitpid: -+ case __NR_wait4: -+ if(!debugger_wait(debugger, (int *) arg2, arg3, -+ debugger_syscall, debugger_normal_return, -+ proxy_wait_return)) -+ return(0); -+ break; -+ -+ case __NR_kill: -+ if(!debugger->debugee->in_context) -+ child = debugger->debugee->pid; -+ if(arg1 == debugger->debugee->pid){ -+ result = kill(child, arg2); -+ syscall_cancel(debugger->pid, result); -+ debugger->handle_trace = debugger_syscall; -+ return(0); -+ } -+ else debugger->handle_trace = debugger_normal_return; -+ break; -+ -+ default: -+ debugger->handle_trace = debugger_normal_return; -+ } -+ -+ syscall_continue(debugger->pid); -+ return(0); -+} -+ -+/* Used by the tracing thread */ -+static debugger_state parent; -+static int parent_syscall(debugger_state *debugger, int pid); -+ -+int init_parent_proxy(int pid) -+{ -+ parent = ((debugger_state) { .pid = pid, -+ .wait_options = 0, -+ .wait_status_ptr = NULL, -+ .waiting = 0, -+ .real_wait = 0, -+ .expecting_child = 0, -+ .handle_trace = parent_syscall, -+ .debugee = NULL } ); -+ return(0); -+} -+ -+int parent_normal_return(debugger_state *debugger, pid_t unused) -+{ -+ debugger->handle_trace = parent_syscall; -+ syscall_continue(debugger->pid); -+ return(0); -+} -+ -+static int parent_syscall(debugger_state *debugger, int pid) -+{ -+ long arg1, arg2, arg3, arg4, arg5; -+ int syscall; -+ -+ syscall = get_syscall(pid, &arg1, &arg2, &arg3, &arg4, &arg5); -+ -+ if((syscall == __NR_waitpid) || (syscall == __NR_wait4)){ -+ debugger_wait(&parent, (int *) arg2, arg3, parent_syscall, -+ parent_normal_return, parent_wait_return); -+ } -+ else ptrace(PTRACE_SYSCALL, pid, 0, 0); -+ return(0); -+} -+ -+int debugger_normal_return(debugger_state *debugger, pid_t unused) -+{ -+ debugger->handle_trace = debugger_syscall; -+ syscall_continue(debugger->pid); -+ return(0); -+} -+ -+void debugger_cancelled_return(debugger_state *debugger, int result) -+{ -+ debugger->handle_trace = debugger_syscall; -+ syscall_set_result(debugger->pid, result); -+ syscall_continue(debugger->pid); -+} -+ -+/* Used by the tracing thread */ -+static debugger_state debugger; -+static debugee_state debugee; -+ -+void init_proxy (pid_t debugger_pid, int stopped, int status) -+{ -+ debugger.pid = debugger_pid; -+ debugger.handle_trace = debugger_syscall; -+ debugger.debugee = &debugee; -+ debugger.waiting = 0; -+ debugger.real_wait = 0; -+ debugger.expecting_child = 0; -+ -+ debugee.pid = 0; -+ debugee.traced = 0; -+ debugee.stopped = stopped; -+ debugee.event = 0; -+ debugee.zombie = 0; -+ debugee.died = 0; -+ debugee.wait_status = status; -+ debugee.in_context = 1; -+} -+ -+int debugger_proxy(int status, int pid) -+{ -+ int ret = 0, sig; -+ -+ if(WIFSTOPPED(status)){ -+ sig = WSTOPSIG(status); -+ if (sig == SIGTRAP) -+ ret = (*debugger.handle_trace)(&debugger, pid); -+ -+ else if(sig == SIGCHLD){ -+ if(debugger.expecting_child){ -+ ptrace(PTRACE_SYSCALL, debugger.pid, 0, sig); -+ debugger.expecting_child = 0; -+ } -+ else if(debugger.waiting) -+ real_wait_return(&debugger); -+ else { -+ ptrace(PTRACE_SYSCALL, debugger.pid, 0, sig); -+ debugger.real_wait = 1; -+ } -+ } -+ else ptrace(PTRACE_SYSCALL, debugger.pid, 0, sig); -+ } -+ else if(WIFEXITED(status)){ -+ tracer_panic("debugger (pid %d) exited with status %d", -+ debugger.pid, WEXITSTATUS(status)); -+ } -+ else if(WIFSIGNALED(status)){ -+ tracer_panic("debugger (pid %d) exited with signal %d", -+ debugger.pid, WTERMSIG(status)); -+ } -+ else { -+ tracer_panic("proxy got unknown status (0x%x) on debugger " -+ "(pid %d)", status, debugger.pid); -+ } -+ return(ret); -+} -+ -+void child_proxy(pid_t pid, int status) -+{ -+ debugee.event = 1; -+ debugee.wait_status = status; -+ -+ if(WIFSTOPPED(status)){ -+ debugee.stopped = 1; -+ debugger.expecting_child = 1; -+ kill(debugger.pid, SIGCHLD); -+ } -+ else if(WIFEXITED(status) || WIFSIGNALED(status)){ -+ debugee.zombie = 1; -+ debugger.expecting_child = 1; -+ kill(debugger.pid, SIGCHLD); -+ } -+ else panic("proxy got unknown status (0x%x) on child (pid %d)", -+ status, pid); -+} -+ -+void debugger_parent_signal(int status, int pid) -+{ -+ int sig; -+ -+ if(WIFSTOPPED(status)){ -+ sig = WSTOPSIG(status); -+ if(sig == SIGTRAP) (*parent.handle_trace)(&parent, pid); -+ else ptrace(PTRACE_SYSCALL, pid, 0, sig); -+ } -+} -+ -+void fake_child_exit(void) -+{ -+ int status, pid; -+ -+ child_proxy(1, W_EXITCODE(0, 0)); -+ while(debugger.waiting == 1){ -+ pid = waitpid(debugger.pid, &status, WUNTRACED); -+ if(pid != debugger.pid){ -+ printk("fake_child_exit - waitpid failed, " -+ "errno = %d\n", errno); -+ return; -+ } -+ debugger_proxy(status, debugger.pid); -+ } -+ pid = waitpid(debugger.pid, &status, WUNTRACED); -+ if(pid != debugger.pid){ -+ printk("fake_child_exit - waitpid failed, " -+ "errno = %d\n", errno); -+ return; -+ } -+ if(ptrace(PTRACE_DETACH, debugger.pid, 0, SIGCONT) < 0) -+ printk("fake_child_exit - PTRACE_DETACH failed, errno = %d\n", -+ errno); -+} -+ -+char gdb_init_string[] = -+"att 1 \n\ -+b panic \n\ -+b stop \n\ -+handle SIGWINCH nostop noprint pass \n\ -+"; -+ -+int start_debugger(char *prog, int startup, int stop, int *fd_out) -+{ -+ int slave, child; -+ -+ slave = open_gdb_chan(); -+ if((child = fork()) == 0){ -+ char *tempname = NULL; -+ int fd; -+ -+ if(setsid() < 0) perror("setsid"); -+ if((dup2(slave, 0) < 0) || (dup2(slave, 1) < 0) || -+ (dup2(slave, 2) < 0)){ -+ printk("start_debugger : dup2 failed, errno = %d\n", -+ errno); -+ exit(1); -+ } -+ if(ioctl(0, TIOCSCTTY, 0) < 0){ -+ printk("start_debugger : TIOCSCTTY failed, " -+ "errno = %d\n", errno); -+ exit(1); -+ } -+ if(tcsetpgrp (1, os_getpid()) < 0){ -+ printk("start_debugger : tcsetpgrp failed, " -+ "errno = %d\n", errno); -+#ifdef notdef -+ exit(1); -+#endif -+ } -+ if((fd = make_tempfile("/tmp/gdb_init-XXXXXX", &tempname, 0)) < 0){ -+ printk("start_debugger : make_tempfile failed, errno = %d\n", -+ errno); -+ exit(1); -+ } -+ write(fd, gdb_init_string, sizeof(gdb_init_string) - 1); -+ if(startup){ -+ if(stop){ -+ write(fd, "b start_kernel\n", -+ strlen("b start_kernel\n")); -+ } -+ write(fd, "c\n", strlen("c\n")); -+ } -+ if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){ -+ printk("start_debugger : PTRACE_TRACEME failed, " -+ "errno = %d\n", errno); -+ exit(1); -+ } -+ execlp("gdb", "gdb", "--command", tempname, prog, NULL); -+ printk("start_debugger : exec of gdb failed, errno = %d\n", -+ errno); -+ } -+ if(child < 0){ -+ printk("start_debugger : fork for gdb failed, errno = %d\n", -+ errno); -+ return(-1); -+ } -+ *fd_out = slave; -+ return(child); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/ptproxy/ptproxy.h um/arch/um/kernel/tt/ptproxy/ptproxy.h ---- orig/arch/um/kernel/tt/ptproxy/ptproxy.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/ptproxy/ptproxy.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,61 @@ -+/********************************************************************** -+ptproxy.h -+ -+Copyright (C) 1999 Lars Brinkhoff. See the file COPYING for licensing -+terms and conditions. -+**********************************************************************/ -+ -+#ifndef __PTPROXY_H -+#define __PTPROXY_H -+ -+#include <sys/types.h> -+ -+typedef struct debugger debugger_state; -+typedef struct debugee debugee_state; -+ -+struct debugger -+{ -+ pid_t pid; -+ int wait_options; -+ int *wait_status_ptr; -+ unsigned int waiting : 1; -+ unsigned int real_wait : 1; -+ unsigned int expecting_child : 1; -+ int (*handle_trace) (debugger_state *, pid_t); -+ -+ debugee_state *debugee; -+}; -+ -+struct debugee -+{ -+ pid_t pid; -+ int wait_status; -+ unsigned int died : 1; -+ unsigned int event : 1; -+ unsigned int stopped : 1; -+ unsigned int trace_singlestep : 1; -+ unsigned int trace_syscall : 1; -+ unsigned int traced : 1; -+ unsigned int zombie : 1; -+ unsigned int in_context : 1; -+}; -+ -+extern int debugger_syscall(debugger_state *debugger, pid_t pid); -+extern int debugger_normal_return (debugger_state *debugger, pid_t unused); -+ -+extern long proxy_ptrace (struct debugger *, int, pid_t, long, long, pid_t, -+ int *strace_out); -+extern void debugger_cancelled_return(debugger_state *debugger, int result); -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/ptproxy/ptrace.c um/arch/um/kernel/tt/ptproxy/ptrace.c ---- orig/arch/um/kernel/tt/ptproxy/ptrace.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/ptproxy/ptrace.c Mon Nov 11 13:06:03 2002 -@@ -0,0 +1,239 @@ -+/********************************************************************** -+ptrace.c -+ -+Copyright (C) 1999 Lars Brinkhoff. See the file COPYING for licensing -+terms and conditions. -+ -+Jeff Dike (jdike@karaya.com) : Modified for integration into uml -+**********************************************************************/ -+ -+#include <errno.h> -+#include <unistd.h> -+#include <signal.h> -+#include <sys/types.h> -+#include <sys/time.h> -+#include <sys/ptrace.h> -+#include <sys/wait.h> -+#include <asm/ptrace.h> -+ -+#include "ptproxy.h" -+#include "debug.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "ptrace_user.h" -+#include "tt.h" -+ -+long proxy_ptrace(struct debugger *debugger, int arg1, pid_t arg2, -+ long arg3, long arg4, pid_t child, int *ret) -+{ -+ sigset_t relay; -+ long result; -+ int status; -+ -+ *ret = 0; -+ if(debugger->debugee->died) return(-ESRCH); -+ -+ switch(arg1){ -+ case PTRACE_ATTACH: -+ if(debugger->debugee->traced) return(-EPERM); -+ -+ debugger->debugee->pid = arg2; -+ debugger->debugee->traced = 1; -+ -+ if(is_valid_pid(arg2) && (arg2 != child)){ -+ debugger->debugee->in_context = 0; -+ kill(arg2, SIGSTOP); -+ debugger->debugee->event = 1; -+ debugger->debugee->wait_status = W_STOPCODE(SIGSTOP); -+ } -+ else { -+ debugger->debugee->in_context = 1; -+ if(debugger->debugee->stopped) -+ child_proxy(child, W_STOPCODE(SIGSTOP)); -+ else kill(child, SIGSTOP); -+ } -+ -+ return(0); -+ -+ case PTRACE_DETACH: -+ if(!debugger->debugee->traced) return(-EPERM); -+ -+ debugger->debugee->traced = 0; -+ debugger->debugee->pid = 0; -+ if(!debugger->debugee->in_context) -+ kill(child, SIGCONT); -+ -+ return(0); -+ -+ case PTRACE_CONT: -+ if(!debugger->debugee->in_context) return(-EPERM); -+ *ret = PTRACE_CONT; -+ return(ptrace(PTRACE_CONT, child, arg3, arg4)); -+ -+#ifdef UM_HAVE_GETFPREGS -+ case PTRACE_GETFPREGS: -+ { -+ long regs[FP_FRAME_SIZE]; -+ int i, result; -+ -+ result = ptrace(PTRACE_GETFPREGS, child, 0, regs); -+ if(result == -1) return(-errno); -+ -+ for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++) -+ ptrace(PTRACE_POKEDATA, debugger->pid, arg4 + 4 * i, -+ regs[i]); -+ return(result); -+ } -+#endif -+ -+#ifdef UM_HAVE_GETFPXREGS -+ case PTRACE_GETFPXREGS: -+ { -+ long regs[FPX_FRAME_SIZE]; -+ int i, result; -+ -+ result = ptrace(PTRACE_GETFPXREGS, child, 0, regs); -+ if(result == -1) return(-errno); -+ -+ for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++) -+ ptrace(PTRACE_POKEDATA, debugger->pid, arg4 + 4 * i, -+ regs[i]); -+ return(result); -+ } -+#endif -+ -+#ifdef UM_HAVE_GETREGS -+ case PTRACE_GETREGS: -+ { -+ long regs[FRAME_SIZE]; -+ int i, result; -+ -+ result = ptrace(PTRACE_GETREGS, child, 0, regs); -+ if(result == -1) return(-errno); -+ -+ for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++) -+ ptrace (PTRACE_POKEDATA, debugger->pid, -+ arg4 + 4 * i, regs[i]); -+ return(result); -+ } -+ break; -+#endif -+ -+ case PTRACE_KILL: -+ result = ptrace(PTRACE_KILL, child, arg3, arg4); -+ if(result == -1) return(-errno); -+ -+ return(result); -+ -+ case PTRACE_PEEKDATA: -+ case PTRACE_PEEKTEXT: -+ case PTRACE_PEEKUSER: -+ /* The value being read out could be -1, so we have to -+ * check errno to see if there's an error, and zero it -+ * beforehand so we're not faked out by an old error -+ */ -+ -+ errno = 0; -+ result = ptrace(arg1, child, arg3, 0); -+ if((result == -1) && (errno != 0)) return(-errno); -+ -+ result = ptrace(PTRACE_POKEDATA, debugger->pid, arg4, result); -+ if(result == -1) return(-errno); -+ -+ return(result); -+ -+ case PTRACE_POKEDATA: -+ case PTRACE_POKETEXT: -+ case PTRACE_POKEUSER: -+ result = ptrace(arg1, child, arg3, arg4); -+ if(result == -1) return(-errno); -+ -+ if(arg1 == PTRACE_POKEUSER) ptrace_pokeuser(arg3, arg4); -+ return(result); -+ -+#ifdef UM_HAVE_SETFPREGS -+ case PTRACE_SETFPREGS: -+ { -+ long regs[FP_FRAME_SIZE]; -+ int i; -+ -+ for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++) -+ regs[i] = ptrace (PTRACE_PEEKDATA, debugger->pid, -+ arg4 + 4 * i, 0); -+ result = ptrace(PTRACE_SETFPREGS, child, 0, regs); -+ if(result == -1) return(-errno); -+ -+ return(result); -+ } -+#endif -+ -+#ifdef UM_HAVE_SETFPXREGS -+ case PTRACE_SETFPXREGS: -+ { -+ long regs[FPX_FRAME_SIZE]; -+ int i; -+ -+ for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++) -+ regs[i] = ptrace (PTRACE_PEEKDATA, debugger->pid, -+ arg4 + 4 * i, 0); -+ result = ptrace(PTRACE_SETFPXREGS, child, 0, regs); -+ if(result == -1) return(-errno); -+ -+ return(result); -+ } -+#endif -+ -+#ifdef UM_HAVE_SETREGS -+ case PTRACE_SETREGS: -+ { -+ long regs[FRAME_SIZE]; -+ int i; -+ -+ for (i = 0; i < sizeof(regs)/sizeof(regs[0]); i++) -+ regs[i] = ptrace(PTRACE_PEEKDATA, debugger->pid, -+ arg4 + 4 * i, 0); -+ result = ptrace(PTRACE_SETREGS, child, 0, regs); -+ if(result == -1) return(-errno); -+ -+ return(result); -+ } -+#endif -+ -+ case PTRACE_SINGLESTEP: -+ if(!debugger->debugee->in_context) return(-EPERM); -+ sigemptyset(&relay); -+ sigaddset(&relay, SIGSEGV); -+ sigaddset(&relay, SIGILL); -+ sigaddset(&relay, SIGBUS); -+ result = ptrace(PTRACE_SINGLESTEP, child, arg3, arg4); -+ if(result == -1) return(-errno); -+ -+ status = wait_for_stop(child, SIGTRAP, PTRACE_SINGLESTEP, -+ &relay); -+ child_proxy(child, status); -+ return(result); -+ -+ case PTRACE_SYSCALL: -+ if(!debugger->debugee->in_context) return(-EPERM); -+ result = ptrace(PTRACE_SYSCALL, child, arg3, arg4); -+ if(result == -1) return(-errno); -+ -+ *ret = PTRACE_SYSCALL; -+ return(result); -+ -+ case PTRACE_TRACEME: -+ default: -+ return(-EINVAL); -+ } -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/ptproxy/sysdep.c um/arch/um/kernel/tt/ptproxy/sysdep.c ---- orig/arch/um/kernel/tt/ptproxy/sysdep.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/ptproxy/sysdep.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,71 @@ -+/********************************************************************** -+sysdep.c -+ -+Copyright (C) 1999 Lars Brinkhoff. See the file COPYING for licensing -+terms and conditions. -+**********************************************************************/ -+ -+#include <stdio.h> -+#include <string.h> -+#include <stdlib.h> -+#include <signal.h> -+#include <sys/types.h> -+#include <sys/ptrace.h> -+#include <asm/ptrace.h> -+#include <linux/unistd.h> -+#include "ptrace_user.h" -+#include "user_util.h" -+#include "user.h" -+ -+int get_syscall(pid_t pid, long *arg1, long *arg2, long *arg3, long *arg4, -+ long *arg5) -+{ -+ *arg1 = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_ARG1_OFFSET, 0); -+ *arg2 = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_ARG2_OFFSET, 0); -+ *arg3 = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_ARG3_OFFSET, 0); -+ *arg4 = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_ARG4_OFFSET, 0); -+ *arg5 = ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_ARG5_OFFSET, 0); -+ return(ptrace(PTRACE_PEEKUSER, pid, PT_SYSCALL_NR_OFFSET, 0)); -+} -+ -+void syscall_cancel(pid_t pid, int result) -+{ -+ if((ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, -+ __NR_getpid) < 0) || -+ (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) || -+ (wait_for_stop(pid, SIGTRAP, PTRACE_SYSCALL, NULL) < 0) || -+ (ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET, result) < 0) || -+ (ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0)) -+ printk("ptproxy: couldn't cancel syscall: errno = %d\n", -+ errno); -+} -+ -+void syscall_set_result(pid_t pid, long result) -+{ -+ ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_RET_OFFSET, result); -+} -+ -+void syscall_continue(pid_t pid) -+{ -+ ptrace(PTRACE_SYSCALL, pid, 0, 0); -+} -+ -+int syscall_pause(pid_t pid) -+{ -+ if(ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, __NR_pause) < 0){ -+ printk("syscall_change - ptrace failed, errno = %d\n", errno); -+ return(-1); -+ } -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/ptproxy/sysdep.h um/arch/um/kernel/tt/ptproxy/sysdep.h ---- orig/arch/um/kernel/tt/ptproxy/sysdep.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/ptproxy/sysdep.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,25 @@ -+/********************************************************************** -+sysdep.h -+ -+Copyright (C) 1999 Lars Brinkhoff. -+Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+See the file COPYING for licensing terms and conditions. -+**********************************************************************/ -+ -+extern int get_syscall(pid_t pid, long *arg1, long *arg2, long *arg3, -+ long *arg4, long *arg5); -+extern void syscall_cancel (pid_t pid, long result); -+extern void syscall_set_result (pid_t pid, long result); -+extern void syscall_continue (pid_t pid); -+extern int syscall_pause(pid_t pid); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/ptproxy/wait.c um/arch/um/kernel/tt/ptproxy/wait.c ---- orig/arch/um/kernel/tt/ptproxy/wait.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/ptproxy/wait.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,86 @@ -+/********************************************************************** -+wait.c -+ -+Copyright (C) 1999 Lars Brinkhoff. See the file COPYING for licensing -+terms and conditions. -+ -+**********************************************************************/ -+ -+#include <errno.h> -+#include <signal.h> -+#include <sys/wait.h> -+#include <sys/ptrace.h> -+#include <asm/ptrace.h> -+ -+#include "ptproxy.h" -+#include "sysdep.h" -+#include "wait.h" -+#include "user_util.h" -+#include "sysdep/ptrace.h" -+#include "sysdep/ptrace_user.h" -+#include "sysdep/sigcontext.h" -+ -+int proxy_wait_return(struct debugger *debugger, pid_t unused) -+{ -+ debugger->waiting = 0; -+ -+ if(debugger->debugee->died || (debugger->wait_options & __WCLONE)){ -+ debugger_cancelled_return(debugger, -ECHILD); -+ return(0); -+ } -+ -+ if(debugger->debugee->zombie && debugger->debugee->event) -+ debugger->debugee->died = 1; -+ -+ if(debugger->debugee->event){ -+ debugger->debugee->event = 0; -+ ptrace(PTRACE_POKEDATA, debugger->pid, -+ debugger->wait_status_ptr, -+ debugger->debugee->wait_status); -+ /* if (wait4) -+ ptrace (PTRACE_POKEDATA, pid, rusage_ptr, ...); */ -+ debugger_cancelled_return(debugger, debugger->debugee->pid); -+ return(0); -+ } -+ -+ /* pause will return -EINTR, which happens to be right for wait */ -+ debugger_normal_return(debugger, -1); -+ return(0); -+} -+ -+int parent_wait_return(struct debugger *debugger, pid_t unused) -+{ -+ return(debugger_normal_return(debugger, -1)); -+} -+ -+int real_wait_return(struct debugger *debugger) -+{ -+ unsigned long ip; -+ int err, pid; -+ -+ pid = debugger->pid; -+ ip = ptrace(PTRACE_PEEKUSER, pid, PT_IP_OFFSET, 0); -+ ip = IP_RESTART_SYSCALL(ip); -+ err = ptrace(PTRACE_POKEUSER, pid, PT_IP_OFFSET, ip); -+ if(ptrace(PTRACE_POKEUSER, pid, PT_IP_OFFSET, ip) < 0) -+ tracer_panic("real_wait_return : Failed to restart system " -+ "call, errno = %d\n"); -+ if((ptrace(PTRACE_SYSCALL, debugger->pid, 0, SIGCHLD) < 0) || -+ (ptrace(PTRACE_SYSCALL, debugger->pid, 0, 0) < 0) || -+ (ptrace(PTRACE_SYSCALL, debugger->pid, 0, 0) < 0) || -+ debugger_normal_return(debugger, -1)) -+ tracer_panic("real_wait_return : gdb failed to wait, " -+ "errno = %d\n"); -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/ptproxy/wait.h um/arch/um/kernel/tt/ptproxy/wait.h ---- orig/arch/um/kernel/tt/ptproxy/wait.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/ptproxy/wait.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,15 @@ -+/********************************************************************** -+wait.h -+ -+Copyright (C) 1999 Lars Brinkhoff. See the file COPYING for licensing -+terms and conditions. -+**********************************************************************/ -+ -+#ifndef __PTPROXY_WAIT_H -+#define __PTPROXY_WAIT_H -+ -+extern int proxy_wait_return(struct debugger *debugger, pid_t unused); -+extern int real_wait_return(struct debugger *debugger); -+extern int parent_wait_return(struct debugger *debugger, pid_t unused); -+ -+#endif -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/sys-i386/Makefile um/arch/um/kernel/tt/sys-i386/Makefile ---- orig/arch/um/kernel/tt/sys-i386/Makefile Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/sys-i386/Makefile Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,17 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET = sys-i386.o -+ -+obj-y = sigcontext.o -+ -+USER_OBJS = sigcontext.o -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+clean : -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/sys-i386/sigcontext.c um/arch/um/kernel/tt/sys-i386/sigcontext.c ---- orig/arch/um/kernel/tt/sys-i386/sigcontext.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/sys-i386/sigcontext.c Sun Dec 1 23:33:52 2002 -@@ -0,0 +1,60 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <asm/sigcontext.h> -+#include "kern_util.h" -+#include "sysdep/frame.h" -+ -+int copy_sc_from_user_tt(void *to_ptr, void *from_ptr, void *data) -+{ -+ struct arch_frame_data *arch = data; -+ struct sigcontext *to = to_ptr, *from = from_ptr; -+ struct _fpstate *to_fp, *from_fp; -+ unsigned long sigs; -+ int err; -+ -+ to_fp = to->fpstate; -+ from_fp = from->fpstate; -+ sigs = to->oldmask; -+ err = copy_from_user_proc(to, from, sizeof(*to)); -+ to->oldmask = sigs; -+ if(to_fp != NULL){ -+ err |= copy_from_user_proc(&to->fpstate, &to_fp, -+ sizeof(to->fpstate)); -+ err |= copy_from_user_proc(to_fp, from_fp, arch->fpstate_size); -+ } -+ return(err); -+} -+ -+int copy_sc_to_user_tt(void *to_ptr, void *fp, void *from_ptr, void *data) -+{ -+ struct arch_frame_data *arch = data; -+ struct sigcontext *to = to_ptr, *from = from_ptr; -+ struct _fpstate *to_fp, *from_fp; -+ int err; -+ -+ to_fp = (struct _fpstate *) -+ (fp ? (unsigned long) fp : ((unsigned long) to + sizeof(*to))); -+ from_fp = from->fpstate; -+ err = copy_to_user_proc(to, from, sizeof(*to)); -+ if(from_fp != NULL){ -+ err |= copy_to_user_proc(&to->fpstate, &to_fp, -+ sizeof(to->fpstate)); -+ err |= copy_to_user_proc(to_fp, from_fp, arch->fpstate_size); -+ } -+ return(err); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/syscall_kern.c um/arch/um/kernel/tt/syscall_kern.c ---- orig/arch/um/kernel/tt/syscall_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/syscall_kern.c Sun Dec 8 19:32:53 2002 -@@ -0,0 +1,142 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/types.h" -+#include "linux/utime.h" -+#include "linux/sys.h" -+#include "asm/unistd.h" -+#include "asm/ptrace.h" -+#include "asm/uaccess.h" -+#include "sysdep/syscalls.h" -+#include "kern_util.h" -+ -+static inline int check_area(void *ptr, int size) -+{ -+ return(verify_area(VERIFY_WRITE, ptr, size)); -+} -+ -+static int check_readlink(struct pt_regs *regs) -+{ -+ return(check_area((void *) UPT_SYSCALL_ARG1(®s->regs), -+ UPT_SYSCALL_ARG2(®s->regs))); -+} -+ -+static int check_utime(struct pt_regs *regs) -+{ -+ return(check_area((void *) UPT_SYSCALL_ARG1(®s->regs), -+ sizeof(struct utimbuf))); -+} -+ -+static int check_oldstat(struct pt_regs *regs) -+{ -+ return(check_area((void *) UPT_SYSCALL_ARG1(®s->regs), -+ sizeof(struct __old_kernel_stat))); -+} -+ -+static int check_stat(struct pt_regs *regs) -+{ -+ return(check_area((void *) UPT_SYSCALL_ARG1(®s->regs), -+ sizeof(struct stat))); -+} -+ -+static int check_stat64(struct pt_regs *regs) -+{ -+ return(check_area((void *) UPT_SYSCALL_ARG1(®s->regs), -+ sizeof(struct stat64))); -+} -+ -+struct bogus { -+ int kernel_ds; -+ int (*check_params)(struct pt_regs *); -+}; -+ -+struct bogus this_is_bogus[256] = { -+ [ __NR_mknod ] = { 1, NULL }, -+ [ __NR_mkdir ] = { 1, NULL }, -+ [ __NR_rmdir ] = { 1, NULL }, -+ [ __NR_unlink ] = { 1, NULL }, -+ [ __NR_symlink ] = { 1, NULL }, -+ [ __NR_link ] = { 1, NULL }, -+ [ __NR_rename ] = { 1, NULL }, -+ [ __NR_umount ] = { 1, NULL }, -+ [ __NR_mount ] = { 1, NULL }, -+ [ __NR_pivot_root ] = { 1, NULL }, -+ [ __NR_chdir ] = { 1, NULL }, -+ [ __NR_chroot ] = { 1, NULL }, -+ [ __NR_open ] = { 1, NULL }, -+ [ __NR_quotactl ] = { 1, NULL }, -+ [ __NR_sysfs ] = { 1, NULL }, -+ [ __NR_readlink ] = { 1, check_readlink }, -+ [ __NR_acct ] = { 1, NULL }, -+ [ __NR_execve ] = { 1, NULL }, -+ [ __NR_uselib ] = { 1, NULL }, -+ [ __NR_statfs ] = { 1, NULL }, -+ [ __NR_truncate ] = { 1, NULL }, -+ [ __NR_access ] = { 1, NULL }, -+ [ __NR_chmod ] = { 1, NULL }, -+ [ __NR_chown ] = { 1, NULL }, -+ [ __NR_lchown ] = { 1, NULL }, -+ [ __NR_utime ] = { 1, check_utime }, -+ [ __NR_oldlstat ] = { 1, check_oldstat }, -+ [ __NR_oldstat ] = { 1, check_oldstat }, -+ [ __NR_stat ] = { 1, check_stat }, -+ [ __NR_lstat ] = { 1, check_stat }, -+ [ __NR_stat64 ] = { 1, check_stat64 }, -+ [ __NR_lstat64 ] = { 1, check_stat64 }, -+ [ __NR_chown32 ] = { 1, NULL }, -+}; -+ -+/* sys_utimes */ -+ -+static int check_bogosity(struct pt_regs *regs) -+{ -+ struct bogus *bogon = &this_is_bogus[UPT_SYSCALL_NR(®s->regs)]; -+ -+ if(!bogon->kernel_ds) return(0); -+ if(bogon->check_params && (*bogon->check_params)(regs)) -+ return(-EFAULT); -+ set_fs(KERNEL_DS); -+ return(0); -+} -+ -+extern syscall_handler_t *sys_call_table[]; -+ -+long execute_syscall_tt(void *r) -+{ -+ struct pt_regs *regs = r; -+ long res; -+ int syscall; -+ -+ current->thread.nsyscalls++; -+ nsyscalls++; -+ syscall = UPT_SYSCALL_NR(®s->regs); -+ -+ if((syscall >= NR_syscalls) || (syscall < 0)) -+ res = -ENOSYS; -+ else if(honeypot && check_bogosity(regs)) -+ res = -EFAULT; -+ else res = EXECUTE_SYSCALL(syscall, regs); -+ -+ set_fs(USER_DS); -+ -+ if(current->thread.mode.tt.singlestep_syscall){ -+ current->thread.mode.tt.singlestep_syscall = 0; -+ current->ptrace &= ~PT_DTRACE; -+ force_sig(SIGTRAP, current); -+ } -+ -+ return(res); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/syscall_user.c um/arch/um/kernel/tt/syscall_user.c ---- orig/arch/um/kernel/tt/syscall_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/syscall_user.c Sun Dec 8 21:00:11 2002 -@@ -0,0 +1,89 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <unistd.h> -+#include <signal.h> -+#include <errno.h> -+#include <sys/ptrace.h> -+#include <asm/unistd.h> -+#include "sysdep/ptrace.h" -+#include "sigcontext.h" -+#include "ptrace_user.h" -+#include "task.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "syscall_user.h" -+#include "tt.h" -+ -+/* XXX Bogus */ -+#define ERESTARTSYS 512 -+#define ERESTARTNOINTR 513 -+#define ERESTARTNOHAND 514 -+ -+void syscall_handler_tt(int sig, union uml_pt_regs *regs) -+{ -+ void *sc; -+ long result; -+ int index, syscall; -+ -+ syscall = UPT_SYSCALL_NR(regs); -+ sc = UPT_SC(regs); -+ SC_START_SYSCALL(sc); -+ -+ index = record_syscall_start(syscall); -+ syscall_trace(); -+ result = execute_syscall(regs); -+ -+ /* regs->sc may have changed while the system call ran (there may -+ * have been an interrupt or segfault), so it needs to be refreshed. -+ */ -+ UPT_SC(regs) = sc; -+ -+ SC_SET_SYSCALL_RETURN(sc, result); -+ if((result == -ERESTARTNOHAND) || (result == -ERESTARTSYS) || -+ (result == -ERESTARTNOINTR)) -+ do_signal(result); -+ -+ syscall_trace(); -+ record_syscall_end(index, result); -+} -+ -+int do_syscall(void *task, int pid) -+{ -+ unsigned long proc_regs[FRAME_SIZE]; -+ union uml_pt_regs *regs; -+ int syscall; -+ -+ if(ptrace_getregs(pid, proc_regs) < 0) -+ tracer_panic("Couldn't read registers"); -+ syscall = PT_SYSCALL_NR(proc_regs); -+ -+ regs = TASK_REGS(task); -+ UPT_SYSCALL_NR(regs) = syscall; -+ -+ if(syscall < 1) return(0); -+ -+ if((syscall != __NR_sigreturn) && -+ ((unsigned long *) PT_IP(proc_regs) >= &_stext) && -+ ((unsigned long *) PT_IP(proc_regs) <= &_etext)) -+ tracer_panic("I'm tracing myself and I can't get out"); -+ -+ if(ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, -+ __NR_getpid) < 0) -+ tracer_panic("do_syscall : Nullifying syscall failed, " -+ "errno = %d", errno); -+ return(1); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/time.c um/arch/um/kernel/tt/time.c ---- orig/arch/um/kernel/tt/time.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/time.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,28 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <signal.h> -+#include <sys/time.h> -+#include <time_user.h> -+#include "process.h" -+#include "user.h" -+ -+void user_time_init_tt(void) -+{ -+ if(signal(SIGVTALRM, (__sighandler_t) alarm_handler) == SIG_ERR) -+ panic("Couldn't set SIGVTALRM handler"); -+ set_interval(ITIMER_VIRTUAL); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/tlb.c um/arch/um/kernel/tt/tlb.c ---- orig/arch/um/kernel/tt/tlb.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/tlb.c Thu Dec 19 13:03:11 2002 -@@ -0,0 +1,220 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/stddef.h" -+#include "linux/kernel.h" -+#include "linux/sched.h" -+#include "asm/page.h" -+#include "asm/pgtable.h" -+#include "asm/uaccess.h" -+#include "user_util.h" -+#include "mem_user.h" -+#include "os.h" -+ -+static void fix_range(struct mm_struct *mm, unsigned long start_addr, -+ unsigned long end_addr, int force) -+{ -+ pgd_t *npgd; -+ pmd_t *npmd; -+ pte_t *npte; -+ unsigned long addr; -+ int r, w, x, err; -+ -+ if((current->thread.mode.tt.extern_pid != -1) && -+ (current->thread.mode.tt.extern_pid != os_getpid())) -+ panic("fix_range fixing wrong address space, current = 0x%p", -+ current); -+ if(mm == NULL) return; -+ for(addr=start_addr;addr<end_addr;){ -+ if(addr == TASK_SIZE){ -+ /* Skip over kernel text, kernel data, and physical -+ * memory, which don't have ptes, plus kernel virtual -+ * memory, which is flushed separately, and remap -+ * the process stack. The only way to get here is -+ * if (end_addr == STACK_TOP) > TASK_SIZE, which is -+ * only true in the honeypot case. -+ */ -+ addr = STACK_TOP - ABOVE_KMEM; -+ continue; -+ } -+ npgd = pgd_offset(mm, addr); -+ npmd = pmd_offset(npgd, addr); -+ if(pmd_present(*npmd)){ -+ npte = pte_offset(npmd, addr); -+ r = pte_read(*npte); -+ w = pte_write(*npte); -+ x = pte_exec(*npte); -+ if(!pte_dirty(*npte)) w = 0; -+ if(!pte_young(*npte)){ -+ r = 0; -+ w = 0; -+ } -+ if(force || pte_newpage(*npte)){ -+ err = os_unmap_memory((void *) addr, -+ PAGE_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ if(pte_present(*npte)) -+ map_memory(addr, -+ pte_val(*npte) & PAGE_MASK, -+ PAGE_SIZE, r, w, x); -+ } -+ else if(pte_newprot(*npte)){ -+ protect_memory(addr, PAGE_SIZE, r, w, x, 1); -+ } -+ *npte = pte_mkuptodate(*npte); -+ addr += PAGE_SIZE; -+ } -+ else { -+ if(force || pmd_newpage(*npmd)){ -+ err = os_unmap_memory((void *) addr, PMD_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ pmd_mkuptodate(*npmd); -+ } -+ addr += PMD_SIZE; -+ } -+ } -+} -+ -+atomic_t vmchange_seq = ATOMIC_INIT(1); -+ -+static void flush_kernel_vm_range(unsigned long start, unsigned long end, -+ int update_seq) -+{ -+ struct mm_struct *mm; -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ unsigned long addr; -+ int updated = 0, err; -+ -+ mm = &init_mm; -+ for(addr = start; addr < end;){ -+ pgd = pgd_offset(mm, addr); -+ pmd = pmd_offset(pgd, addr); -+ if(pmd_present(*pmd)){ -+ pte = pte_offset(pmd, addr); -+ if(!pte_present(*pte) || pte_newpage(*pte)){ -+ updated = 1; -+ err = os_unmap_memory((void *) addr, -+ PAGE_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ if(pte_present(*pte)) -+ map_memory(addr, -+ pte_val(*pte) & PAGE_MASK, -+ PAGE_SIZE, 1, 1, 1); -+ } -+ else if(pte_newprot(*pte)){ -+ updated = 1; -+ protect_memory(addr, PAGE_SIZE, 1, 1, 1, 1); -+ } -+ addr += PAGE_SIZE; -+ } -+ else { -+ if(pmd_newpage(*pmd)){ -+ updated = 1; -+ err = os_unmap_memory((void *) addr, PMD_SIZE); -+ if(err < 0) -+ panic("munmap failed, errno = %d\n", -+ -err); -+ } -+ addr += PMD_SIZE; -+ } -+ } -+ if(updated && update_seq) atomic_inc(&vmchange_seq); -+} -+ -+static void protect_vm_page(unsigned long addr, int w, int must_succeed) -+{ -+ int err; -+ -+ err = protect_memory(addr, PAGE_SIZE, 1, w, 1, must_succeed); -+ if(err == 0) return; -+ else if((err == -EFAULT) || (err == -ENOMEM)){ -+ flush_kernel_vm_range(addr, addr + PAGE_SIZE, 1); -+ protect_vm_page(addr, w, 1); -+ } -+ else panic("protect_vm_page : protect failed, errno = %d\n", err); -+} -+ -+void mprotect_kernel_vm(int w) -+{ -+ struct mm_struct *mm; -+ pgd_t *pgd; -+ pmd_t *pmd; -+ pte_t *pte; -+ unsigned long addr; -+ -+ mm = &init_mm; -+ for(addr = start_vm; addr < end_vm;){ -+ pgd = pgd_offset(mm, addr); -+ pmd = pmd_offset(pgd, addr); -+ if(pmd_present(*pmd)){ -+ pte = pte_offset(pmd, addr); -+ if(pte_present(*pte)) protect_vm_page(addr, w, 0); -+ addr += PAGE_SIZE; -+ } -+ else addr += PMD_SIZE; -+ } -+} -+ -+void flush_tlb_kernel_vm_tt(void) -+{ -+ flush_kernel_vm_range(start_vm, end_vm, 1); -+} -+ -+void __flush_tlb_one_tt(unsigned long addr) -+{ -+ flush_kernel_vm_range(addr, addr + PAGE_SIZE, 1); -+} -+ -+void flush_tlb_range_tt(struct mm_struct *mm, unsigned long start, -+ unsigned long end) -+{ -+ if(mm != current->mm) return; -+ -+ /* Assumes that the range start ... end is entirely within -+ * either process memory or kernel vm -+ */ -+ if((start >= start_vm) && (start < end_vm)) -+ flush_kernel_vm_range(start, end, 1); -+ else fix_range(mm, start, end, 0); -+} -+ -+void flush_tlb_mm_tt(struct mm_struct *mm) -+{ -+ unsigned long seq; -+ -+ if(mm != current->mm) return; -+ -+ fix_range(mm, 0, STACK_TOP, 0); -+ -+ seq = atomic_read(&vmchange_seq); -+ if(current->thread.mode.tt.vm_seq == seq) return; -+ current->thread.mode.tt.vm_seq = seq; -+ flush_kernel_vm_range(start_vm, end_vm, 0); -+} -+ -+void force_flush_all_tt(void) -+{ -+ fix_range(current->mm, 0, STACK_TOP, 1); -+ flush_kernel_vm_range(start_vm, end_vm, 0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/tracer.c um/arch/um/kernel/tt/tracer.c ---- orig/arch/um/kernel/tt/tracer.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/tracer.c Wed Mar 26 10:01:33 2003 -@@ -0,0 +1,453 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <stdarg.h> -+#include <unistd.h> -+#include <signal.h> -+#include <errno.h> -+#include <sched.h> -+#include <string.h> -+#include <sys/mman.h> -+#include <sys/ptrace.h> -+#include <sys/time.h> -+#include <sys/wait.h> -+#include "user.h" -+#include "sysdep/ptrace.h" -+#include "sigcontext.h" -+#include "sysdep/sigcontext.h" -+#include "os.h" -+#include "signal_user.h" -+#include "user_util.h" -+#include "mem_user.h" -+#include "process.h" -+#include "kern_util.h" -+#include "frame.h" -+#include "chan_user.h" -+#include "ptrace_user.h" -+#include "mode.h" -+#include "tt.h" -+ -+static int tracer_winch[2]; -+ -+int is_tracer_winch(int pid, int fd, void *data) -+{ -+ if(pid != tracing_pid) -+ return(0); -+ -+ register_winch_irq(tracer_winch[0], fd, -1, data); -+ return(1); -+} -+ -+static void tracer_winch_handler(int sig) -+{ -+ char c = 1; -+ -+ if(write(tracer_winch[1], &c, sizeof(c)) != sizeof(c)) -+ printk("tracer_winch_handler - write failed, errno = %d\n", -+ errno); -+} -+ -+/* Called only by the tracing thread during initialization */ -+ -+static void setup_tracer_winch(void) -+{ -+ int err; -+ -+ err = os_pipe(tracer_winch, 1, 1); -+ if(err){ -+ printk("setup_tracer_winch : os_pipe failed, errno = %d\n", -+ -err); -+ return; -+ } -+ signal(SIGWINCH, tracer_winch_handler); -+} -+ -+void attach_process(int pid) -+{ -+ if((ptrace(PTRACE_ATTACH, pid, 0, 0) < 0) || -+ (ptrace(PTRACE_CONT, pid, 0, 0) < 0)) -+ tracer_panic("OP_FORK failed to attach pid"); -+ wait_for_stop(pid, SIGSTOP, PTRACE_CONT, NULL); -+ if(ptrace(PTRACE_CONT, pid, 0, 0) < 0) -+ tracer_panic("OP_FORK failed to continue process"); -+} -+ -+void tracer_panic(char *format, ...) -+{ -+ va_list ap; -+ -+ va_start(ap, format); -+ vprintf(format, ap); -+ printf("\n"); -+ while(1) pause(); -+} -+ -+static void tracer_segv(int sig, struct sigcontext sc) -+{ -+ printf("Tracing thread segfault at address 0x%lx, ip 0x%lx\n", -+ SC_FAULT_ADDR(&sc), SC_IP(&sc)); -+ while(1) -+ pause(); -+} -+ -+/* Changed early in boot, and then only read */ -+int debug = 0; -+int debug_stop = 1; -+int debug_parent = 0; -+int honeypot = 0; -+ -+static int signal_tramp(void *arg) -+{ -+ int (*proc)(void *); -+ -+ if(honeypot && munmap((void *) (host_task_size - 0x10000000), -+ 0x10000000)) -+ panic("Unmapping stack failed"); -+ if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) -+ panic("ptrace PTRACE_TRACEME failed"); -+ os_stop_process(os_getpid()); -+ change_sig(SIGWINCH, 0); -+ signal(SIGUSR1, SIG_IGN); -+ change_sig(SIGCHLD, 0); -+ signal(SIGSEGV, (__sighandler_t) sig_handler); -+ set_cmdline("(idle thread)"); -+ set_init_pid(os_getpid()); -+ proc = arg; -+ return((*proc)(NULL)); -+} -+ -+static void sleeping_process_signal(int pid, int sig) -+{ -+ switch(sig){ -+ /* These two result from UML being ^Z-ed and bg-ed. PTRACE_CONT is -+ * right because the process must be in the kernel already. -+ */ -+ case SIGCONT: -+ case SIGTSTP: -+ if(ptrace(PTRACE_CONT, pid, 0, sig) < 0) -+ tracer_panic("sleeping_process_signal : Failed to " -+ "continue pid %d, errno = %d\n", pid, -+ sig); -+ break; -+ -+ /* This happens when the debugger (e.g. strace) is doing system call -+ * tracing on the kernel. During a context switch, the current task -+ * will be set to the incoming process and the outgoing process will -+ * hop into write and then read. Since it's not the current process -+ * any more, the trace of those will land here. So, we need to just -+ * PTRACE_SYSCALL it. -+ */ -+ case SIGTRAP: -+ if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) -+ tracer_panic("sleeping_process_signal : Failed to " -+ "PTRACE_SYSCALL pid %d, errno = %d\n", -+ pid, sig); -+ break; -+ case SIGSTOP: -+ break; -+ default: -+ tracer_panic("sleeping process %d got unexpected " -+ "signal : %d\n", pid, sig); -+ break; -+ } -+} -+ -+/* Accessed only by the tracing thread */ -+int debugger_pid = -1; -+int debugger_parent = -1; -+int debugger_fd = -1; -+int gdb_pid = -1; -+ -+struct { -+ int pid; -+ int signal; -+ unsigned long addr; -+ struct timeval time; -+} signal_record[1024][32]; -+ -+int signal_index[32]; -+int nsignals = 0; -+int debug_trace = 0; -+extern int io_nsignals, io_count, intr_count; -+ -+extern void signal_usr1(int sig); -+ -+int tracing_pid = -1; -+ -+int tracer(int (*init_proc)(void *), void *sp) -+{ -+ void *task = NULL; -+ unsigned long eip = 0; -+ int status, pid = 0, sig = 0, cont_type, tracing = 0, op = 0; -+ int last_index, proc_id = 0, n, err, old_tracing = 0, strace = 0; -+ -+ capture_signal_stack(); -+ signal(SIGPIPE, SIG_IGN); -+ setup_tracer_winch(); -+ tracing_pid = os_getpid(); -+ printf("tracing thread pid = %d\n", tracing_pid); -+ -+ pid = clone(signal_tramp, sp, CLONE_FILES | SIGCHLD, init_proc); -+ n = waitpid(pid, &status, WUNTRACED); -+ if(n < 0){ -+ printf("waitpid on idle thread failed, errno = %d\n", errno); -+ exit(1); -+ } -+ if((ptrace(PTRACE_CONT, pid, 0, 0) < 0)){ -+ printf("Failed to continue idle thread, errno = %d\n", errno); -+ exit(1); -+ } -+ -+ signal(SIGSEGV, (sighandler_t) tracer_segv); -+ signal(SIGUSR1, signal_usr1); -+ if(debug_trace){ -+ printf("Tracing thread pausing to be attached\n"); -+ stop(); -+ } -+ if(debug){ -+ if(gdb_pid != -1) -+ debugger_pid = attach_debugger(pid, gdb_pid, 1); -+ else debugger_pid = init_ptrace_proxy(pid, 1, debug_stop); -+ if(debug_parent){ -+ debugger_parent = os_process_parent(debugger_pid); -+ init_parent_proxy(debugger_parent); -+ err = attach(debugger_parent); -+ if(err){ -+ printf("Failed to attach debugger parent %d, " -+ "errno = %d\n", debugger_parent, err); -+ debugger_parent = -1; -+ } -+ else { -+ if(ptrace(PTRACE_SYSCALL, debugger_parent, -+ 0, 0) < 0){ -+ printf("Failed to continue debugger " -+ "parent, errno = %d\n", errno); -+ debugger_parent = -1; -+ } -+ } -+ } -+ } -+ set_cmdline("(tracing thread)"); -+ while(1){ -+ if((pid = waitpid(-1, &status, WUNTRACED)) <= 0){ -+ if(errno != ECHILD){ -+ printf("wait failed - errno = %d\n", errno); -+ } -+ continue; -+ } -+ if(pid == debugger_pid){ -+ int cont = 0; -+ -+ if(WIFEXITED(status) || WIFSIGNALED(status)) -+ debugger_pid = -1; -+ /* XXX Figure out how to deal with gdb and SMP */ -+ else cont = debugger_signal(status, cpu_tasks[0].pid); -+ if(cont == PTRACE_SYSCALL) strace = 1; -+ continue; -+ } -+ else if(pid == debugger_parent){ -+ debugger_parent_signal(status, pid); -+ continue; -+ } -+ nsignals++; -+ if(WIFEXITED(status)) ; -+#ifdef notdef -+ { -+ printf("Child %d exited with status %d\n", pid, -+ WEXITSTATUS(status)); -+ } -+#endif -+ else if(WIFSIGNALED(status)){ -+ sig = WTERMSIG(status); -+ if(sig != 9){ -+ printf("Child %d exited with signal %d\n", pid, -+ sig); -+ } -+ } -+ else if(WIFSTOPPED(status)){ -+ proc_id = pid_to_processor_id(pid); -+ sig = WSTOPSIG(status); -+ if(signal_index[proc_id] == 1024){ -+ signal_index[proc_id] = 0; -+ last_index = 1023; -+ } -+ else last_index = signal_index[proc_id] - 1; -+ if(((sig == SIGPROF) || (sig == SIGVTALRM) || -+ (sig == SIGALRM)) && -+ (signal_record[proc_id][last_index].signal == sig)&& -+ (signal_record[proc_id][last_index].pid == pid)) -+ signal_index[proc_id] = last_index; -+ signal_record[proc_id][signal_index[proc_id]].pid = pid; -+ gettimeofday(&signal_record[proc_id][signal_index[proc_id]].time, NULL); -+ eip = ptrace(PTRACE_PEEKUSER, pid, PT_IP_OFFSET, 0); -+ signal_record[proc_id][signal_index[proc_id]].addr = eip; -+ signal_record[proc_id][signal_index[proc_id]++].signal = sig; -+ -+ if(proc_id == -1){ -+ sleeping_process_signal(pid, sig); -+ continue; -+ } -+ -+ task = cpu_tasks[proc_id].task; -+ tracing = is_tracing(task); -+ old_tracing = tracing; -+ -+ switch(sig){ -+ case SIGUSR1: -+ sig = 0; -+ op = do_proc_op(task, proc_id); -+ switch(op){ -+ case OP_TRACE_ON: -+ arch_leave_kernel(task, pid); -+ tracing = 1; -+ break; -+ case OP_REBOOT: -+ case OP_HALT: -+ unmap_physmem(); -+ kmalloc_ok = 0; -+ ptrace(PTRACE_KILL, pid, 0, 0); -+ return(op == OP_REBOOT); -+ case OP_NONE: -+ printf("Detaching pid %d\n", pid); -+ detach(pid, SIGSTOP); -+ continue; -+ default: -+ break; -+ } -+ /* OP_EXEC switches host processes on us, -+ * we want to continue the new one. -+ */ -+ pid = cpu_tasks[proc_id].pid; -+ break; -+ case SIGTRAP: -+ if(!tracing && (debugger_pid != -1)){ -+ child_signal(pid, status); -+ continue; -+ } -+ tracing = 0; -+ if(do_syscall(task, pid)) sig = SIGUSR2; -+ else clear_singlestep(task); -+ break; -+ case SIGPROF: -+ if(tracing) sig = 0; -+ break; -+ case SIGCHLD: -+ case SIGHUP: -+ sig = 0; -+ break; -+ case SIGSEGV: -+ case SIGIO: -+ case SIGALRM: -+ case SIGVTALRM: -+ case SIGFPE: -+ case SIGBUS: -+ case SIGILL: -+ case SIGWINCH: -+ default: -+ tracing = 0; -+ break; -+ } -+ set_tracing(task, tracing); -+ -+ if(!tracing && old_tracing) -+ arch_enter_kernel(task, pid); -+ -+ if(!tracing && (debugger_pid != -1) && (sig != 0) && -+ (sig != SIGALRM) && (sig != SIGVTALRM) && -+ (sig != SIGSEGV) && (sig != SIGTRAP) && -+ (sig != SIGUSR2) && (sig != SIGIO) && -+ (sig != SIGFPE)){ -+ child_signal(pid, status); -+ continue; -+ } -+ -+ if(tracing){ -+ if(singlestepping_tt(task)) -+ cont_type = PTRACE_SINGLESTEP; -+ else cont_type = PTRACE_SYSCALL; -+ } -+ else cont_type = PTRACE_CONT; -+ -+ if((cont_type == PTRACE_CONT) && -+ (debugger_pid != -1) && strace) -+ cont_type = PTRACE_SYSCALL; -+ -+ if(ptrace(cont_type, pid, 0, sig) != 0){ -+ tracer_panic("ptrace failed to continue " -+ "process - errno = %d\n", -+ errno); -+ } -+ } -+ } -+ return(0); -+} -+ -+static int __init uml_debug_setup(char *line, int *add) -+{ -+ char *next; -+ -+ debug = 1; -+ *add = 0; -+ if(*line != '=') return(0); -+ line++; -+ -+ while(line != NULL){ -+ next = strchr(line, ','); -+ if(next) *next++ = '\0'; -+ -+ if(!strcmp(line, "go")) debug_stop = 0; -+ else if(!strcmp(line, "parent")) debug_parent = 1; -+ else printf("Unknown debug option : '%s'\n", line); -+ -+ line = next; -+ } -+ return(0); -+} -+ -+__uml_setup("debug", uml_debug_setup, -+"debug\n" -+" Starts up the kernel under the control of gdb. See the \n" -+" kernel debugging tutorial and the debugging session pages\n" -+" at http://user-mode-linux.sourceforge.net/ for more information.\n\n" -+); -+ -+static int __init uml_debugtrace_setup(char *line, int *add) -+{ -+ debug_trace = 1; -+ return 0; -+} -+__uml_setup("debugtrace", uml_debugtrace_setup, -+"debugtrace\n" -+" Causes the tracing thread to pause until it is attached by a\n" -+" debugger and continued. This is mostly for debugging crashes\n" -+" early during boot, and should be pretty much obsoleted by\n" -+" the debug switch.\n\n" -+); -+ -+static int __init uml_honeypot_setup(char *line, int *add) -+{ -+ jail_setup("", add); -+ honeypot = 1; -+ return 0; -+} -+__uml_setup("honeypot", uml_honeypot_setup, -+"honeypot\n" -+" This makes UML put process stacks in the same location as they are\n" -+" on the host, allowing expoits such as stack smashes to work against\n" -+" UML. This implies 'jail'.\n\n" -+); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/trap_user.c um/arch/um/kernel/tt/trap_user.c ---- orig/arch/um/kernel/tt/trap_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/trap_user.c Mon Dec 9 13:14:42 2002 -@@ -0,0 +1,59 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <errno.h> -+#include <signal.h> -+#include <asm/sigcontext.h> -+#include "sysdep/ptrace.h" -+#include "signal_user.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "task.h" -+#include "tt.h" -+ -+void sig_handler_common_tt(int sig, void *sc_ptr) -+{ -+ struct sigcontext *sc = sc_ptr; -+ struct tt_regs save_regs, *r; -+ struct signal_info *info; -+ int save_errno = errno, is_user; -+ -+ unprotect_kernel_mem(); -+ -+ r = &TASK_REGS(get_current())->tt; -+ save_regs = *r; -+ is_user = user_context(SC_SP(sc)); -+ r->sc = sc; -+ if(sig != SIGUSR2) -+ r->syscall = -1; -+ -+ change_sig(SIGUSR1, 1); -+ info = &sig_info[sig]; -+ if(!info->is_irq) unblock_signals(); -+ -+ (*info->handler)(sig, (union uml_pt_regs *) r); -+ -+ if(is_user){ -+ interrupt_end(); -+ block_signals(); -+ change_sig(SIGUSR1, 0); -+ set_user_mode(NULL); -+ } -+ *r = save_regs; -+ errno = save_errno; -+ if(is_user) protect_kernel_mem(); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/uaccess_user.c um/arch/um/kernel/tt/uaccess_user.c ---- orig/arch/um/kernel/tt/uaccess_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/uaccess_user.c Tue Mar 25 17:10:54 2003 -@@ -0,0 +1,100 @@ -+/* -+ * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk) -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <setjmp.h> -+#include <string.h> -+#include "user_util.h" -+#include "uml_uaccess.h" -+#include "task.h" -+#include "kern_util.h" -+ -+int __do_copy_from_user(void *to, const void *from, int n, -+ void **fault_addr, void **fault_catcher) -+{ -+ struct tt_regs save = TASK_REGS(get_current())->tt; -+ unsigned long fault; -+ int faulted; -+ -+ fault = __do_user_copy(to, from, n, fault_addr, fault_catcher, -+ __do_copy, &faulted); -+ TASK_REGS(get_current())->tt = save; -+ -+ if(!faulted) return(0); -+ else return(n - (fault - (unsigned long) from)); -+} -+ -+static void __do_strncpy(void *dst, const void *src, int count) -+{ -+ strncpy(dst, src, count); -+} -+ -+int __do_strncpy_from_user(char *dst, const char *src, unsigned long count, -+ void **fault_addr, void **fault_catcher) -+{ -+ struct tt_regs save = TASK_REGS(get_current())->tt; -+ unsigned long fault; -+ int faulted; -+ -+ fault = __do_user_copy(dst, src, count, fault_addr, fault_catcher, -+ __do_strncpy, &faulted); -+ TASK_REGS(get_current())->tt = save; -+ -+ if(!faulted) return(strlen(dst)); -+ else return(-1); -+} -+ -+static void __do_clear(void *to, const void *from, int n) -+{ -+ memset(to, 0, n); -+} -+ -+int __do_clear_user(void *mem, unsigned long len, -+ void **fault_addr, void **fault_catcher) -+{ -+ struct tt_regs save = TASK_REGS(get_current())->tt; -+ unsigned long fault; -+ int faulted; -+ -+ fault = __do_user_copy(mem, NULL, len, fault_addr, fault_catcher, -+ __do_clear, &faulted); -+ TASK_REGS(get_current())->tt = save; -+ -+ if(!faulted) return(0); -+ else return(len - (fault - (unsigned long) mem)); -+} -+ -+int __do_strnlen_user(const char *str, unsigned long n, -+ void **fault_addr, void **fault_catcher) -+{ -+ struct tt_regs save = TASK_REGS(get_current())->tt; -+ int ret; -+ unsigned long *faddrp = (unsigned long *)fault_addr; -+ jmp_buf jbuf; -+ -+ *fault_catcher = &jbuf; -+ if(setjmp(jbuf) == 0){ -+ ret = strlen(str) + 1; -+ } -+ else { -+ ret = *faddrp - (unsigned long) str; -+ } -+ *fault_addr = NULL; -+ *fault_catcher = NULL; -+ -+ TASK_REGS(get_current())->tt = save; -+ return ret; -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tt/unmap.c um/arch/um/kernel/tt/unmap.c ---- orig/arch/um/kernel/tt/unmap.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tt/unmap.c Wed Dec 11 10:42:21 2002 -@@ -0,0 +1,31 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <sys/mman.h> -+ -+int switcheroo(int fd, int prot, void *from, void *to, int size) -+{ -+ if(munmap(to, size) < 0){ -+ return(-1); -+ } -+ if(mmap(to, size, prot, MAP_SHARED | MAP_FIXED, fd, 0) != to){ -+ return(-1); -+ } -+ if(munmap(from, size) < 0){ -+ return(-1); -+ } -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/tty_log.c um/arch/um/kernel/tty_log.c ---- orig/arch/um/kernel/tty_log.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/tty_log.c Wed Apr 16 16:35:20 2003 -@@ -0,0 +1,213 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) and -+ * geoffrey hing <ghing@net.ohio-state.edu> -+ * Licensed under the GPL -+ */ -+ -+#include <errno.h> -+#include <string.h> -+#include <stdio.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <fcntl.h> -+#include <sys/time.h> -+#include "init.h" -+#include "user.h" -+#include "kern_util.h" -+#include "os.h" -+ -+#define TTY_LOG_DIR "./" -+ -+/* Set early in boot and then unchanged */ -+static char *tty_log_dir = TTY_LOG_DIR; -+static int tty_log_fd = -1; -+ -+#define TTY_LOG_OPEN 1 -+#define TTY_LOG_CLOSE 2 -+#define TTY_LOG_WRITE 3 -+#define TTY_LOG_EXEC 4 -+ -+#define TTY_READ 1 -+#define TTY_WRITE 2 -+ -+struct tty_log_buf { -+ int what; -+ unsigned long tty; -+ int len; -+ int direction; -+ unsigned long sec; -+ unsigned long usec; -+}; -+ -+int open_tty_log(void *tty, void *current_tty) -+{ -+ struct timeval tv; -+ struct tty_log_buf data; -+ char buf[strlen(tty_log_dir) + sizeof("01234567890-01234567\0")]; -+ int fd; -+ -+ gettimeofday(&tv, NULL); -+ if(tty_log_fd != -1){ -+ data = ((struct tty_log_buf) { .what = TTY_LOG_OPEN, -+ .tty = (unsigned long) tty, -+ .len = sizeof(current_tty), -+ .direction = 0, -+ .sec = tv.tv_sec, -+ .usec = tv.tv_usec } ); -+ write(tty_log_fd, &data, sizeof(data)); -+ write(tty_log_fd, ¤t_tty, data.len); -+ return(tty_log_fd); -+ } -+ -+ sprintf(buf, "%s/%0u-%0u", tty_log_dir, (unsigned int) tv.tv_sec, -+ (unsigned int) tv.tv_usec); -+ -+ fd = os_open_file(buf, of_append(of_create(of_rdwr(OPENFLAGS()))), -+ 0644); -+ if(fd < 0){ -+ printk("open_tty_log : couldn't open '%s', errno = %d\n", -+ buf, -fd); -+ } -+ return(fd); -+} -+ -+void close_tty_log(int fd, void *tty) -+{ -+ struct tty_log_buf data; -+ struct timeval tv; -+ -+ if(tty_log_fd != -1){ -+ gettimeofday(&tv, NULL); -+ data = ((struct tty_log_buf) { .what = TTY_LOG_CLOSE, -+ .tty = (unsigned long) tty, -+ .len = 0, -+ .direction = 0, -+ .sec = tv.tv_sec, -+ .usec = tv.tv_usec } ); -+ write(tty_log_fd, &data, sizeof(data)); -+ return; -+ } -+ close(fd); -+} -+ -+static int log_chunk(int fd, char *buf, int len) -+{ -+ int total = 0, try, missed, n; -+ char chunk[64]; -+ -+ while(len > 0){ -+ try = (len > sizeof(chunk)) ? sizeof(chunk) : len; -+ missed = copy_from_user_proc(chunk, buf, try); -+ try -= missed; -+ n = write(fd, chunk, try); -+ if(n != try) -+ return(-errno); -+ if(missed != 0) -+ return(-EFAULT); -+ -+ len -= try; -+ total += try; -+ buf += try; -+ } -+ -+ return(total); -+} -+ -+int write_tty_log(int fd, char *buf, int len, void *tty, int is_read) -+{ -+ struct timeval tv; -+ struct tty_log_buf data; -+ int direction; -+ -+ if(fd == tty_log_fd){ -+ gettimeofday(&tv, NULL); -+ direction = is_read ? TTY_READ : TTY_WRITE; -+ data = ((struct tty_log_buf) { .what = TTY_LOG_WRITE, -+ .tty = (unsigned long) tty, -+ .len = len, -+ .direction = direction, -+ .sec = tv.tv_sec, -+ .usec = tv.tv_usec } ); -+ write(tty_log_fd, &data, sizeof(data)); -+ } -+ -+ return(log_chunk(fd, buf, len)); -+} -+ -+void log_exec(char **argv, void *tty) -+{ -+ struct timeval tv; -+ struct tty_log_buf data; -+ char **ptr,*arg; -+ int len; -+ -+ if(tty_log_fd == -1) return; -+ -+ gettimeofday(&tv, NULL); -+ -+ len = 0; -+ for(ptr = argv; ; ptr++){ -+ if(copy_from_user_proc(&arg, ptr, sizeof(arg))) -+ return; -+ if(arg == NULL) break; -+ len += strlen_user_proc(arg); -+ } -+ -+ data = ((struct tty_log_buf) { .what = TTY_LOG_EXEC, -+ .tty = (unsigned long) tty, -+ .len = len, -+ .direction = 0, -+ .sec = tv.tv_sec, -+ .usec = tv.tv_usec } ); -+ write(tty_log_fd, &data, sizeof(data)); -+ -+ for(ptr = argv; ; ptr++){ -+ if(copy_from_user_proc(&arg, ptr, sizeof(arg))) -+ return; -+ if(arg == NULL) break; -+ log_chunk(tty_log_fd, arg, strlen_user_proc(arg)); -+ } -+} -+ -+static int __init set_tty_log_dir(char *name, int *add) -+{ -+ tty_log_dir = name; -+ return 0; -+} -+ -+__uml_setup("tty_log_dir=", set_tty_log_dir, -+"tty_log_dir=<directory>\n" -+" This is used to specify the directory where the logs of all pty\n" -+" data from this UML machine will be written.\n\n" -+); -+ -+static int __init set_tty_log_fd(char *name, int *add) -+{ -+ char *end; -+ -+ tty_log_fd = strtoul(name, &end, 0); -+ if((*end != '\0') || (end == name)){ -+ printf("set_tty_log_fd - strtoul failed on '%s'\n", name); -+ tty_log_fd = -1; -+ } -+ return 0; -+} -+ -+__uml_setup("tty_log_fd=", set_tty_log_fd, -+"tty_log_fd=<fd>\n" -+" This is used to specify a preconfigured file descriptor to which all\n" -+" tty data will be written. Preconfigure the descriptor with something\n" -+" like '10>tty_log tty_log_fd=10'.\n\n" -+); -+ -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/uaccess_user.c um/arch/um/kernel/uaccess_user.c ---- orig/arch/um/kernel/uaccess_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/uaccess_user.c Tue Mar 25 17:06:05 2003 -@@ -0,0 +1,64 @@ -+/* -+ * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk) -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <setjmp.h> -+#include <string.h> -+ -+/* These are here rather than tt/uaccess.c because skas mode needs them in -+ * order to do SIGBUS recovery when a tmpfs mount runs out of room. -+ */ -+ -+unsigned long __do_user_copy(void *to, const void *from, int n, -+ void **fault_addr, void **fault_catcher, -+ void (*op)(void *to, const void *from, -+ int n), int *faulted_out) -+{ -+ unsigned long *faddrp = (unsigned long *) fault_addr, ret; -+ -+ jmp_buf jbuf; -+ *fault_catcher = &jbuf; -+ if(setjmp(jbuf) == 0){ -+ (*op)(to, from, n); -+ ret = 0; -+ *faulted_out = 0; -+ } -+ else { -+ ret = *faddrp; -+ *faulted_out = 1; -+ } -+ *fault_addr = NULL; -+ *fault_catcher = NULL; -+ return ret; -+} -+ -+void __do_copy(void *to, const void *from, int n) -+{ -+ memcpy(to, from, n); -+} -+ -+ -+int __do_copy_to_user(void *to, const void *from, int n, -+ void **fault_addr, void **fault_catcher) -+{ -+ unsigned long fault; -+ int faulted; -+ -+ fault = __do_user_copy(to, from, n, fault_addr, fault_catcher, -+ __do_copy, &faulted); -+ if(!faulted) return(0); -+ else return(n - (fault - (unsigned long) to)); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/um_arch.c um/arch/um/kernel/um_arch.c ---- orig/arch/um/kernel/um_arch.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/um_arch.c Thu Mar 6 19:06:09 2003 -@@ -0,0 +1,425 @@ -+/* -+ * Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/kernel.h" -+#include "linux/sched.h" -+#include "linux/notifier.h" -+#include "linux/mm.h" -+#include "linux/types.h" -+#include "linux/tty.h" -+#include "linux/init.h" -+#include "linux/bootmem.h" -+#include "linux/spinlock.h" -+#include "linux/utsname.h" -+#include "linux/sysrq.h" -+#include "linux/seq_file.h" -+#include "linux/delay.h" -+#include "asm/page.h" -+#include "asm/pgtable.h" -+#include "asm/ptrace.h" -+#include "asm/elf.h" -+#include "asm/user.h" -+#include "ubd_user.h" -+#include "asm/current.h" -+#include "user_util.h" -+#include "kern_util.h" -+#include "kern.h" -+#include "mprot.h" -+#include "mem_user.h" -+#include "mem.h" -+#include "umid.h" -+#include "initrd.h" -+#include "init.h" -+#include "os.h" -+#include "choose-mode.h" -+#include "mode_kern.h" -+#include "mode.h" -+ -+#define DEFAULT_COMMAND_LINE "root=/dev/ubd0" -+ -+struct cpuinfo_um boot_cpu_data = { -+ .loops_per_jiffy = 0, -+ .pgd_quick = NULL, -+ .pmd_quick = NULL, -+ .pte_quick = NULL, -+ .pgtable_cache_sz = 0, -+ .ipi_pipe = { -1, -1 } -+}; -+ -+unsigned long thread_saved_pc(struct thread_struct *thread) -+{ -+ return(os_process_pc(CHOOSE_MODE_PROC(thread_pid_tt, thread_pid_skas, -+ thread))); -+} -+ -+static int show_cpuinfo(struct seq_file *m, void *v) -+{ -+ int index; -+ -+ index = (struct cpuinfo_um *)v - cpu_data; -+#ifdef CONFIG_SMP -+ if (!(cpu_online_map & (1 << index))) -+ return 0; -+#endif -+ -+ seq_printf(m, "processor\t: %d\n", index); -+ seq_printf(m, "vendor_id\t: User Mode Linux\n"); -+ seq_printf(m, "model name\t: UML\n"); -+ seq_printf(m, "mode\t\t: %s\n", CHOOSE_MODE("tt", "skas")); -+ seq_printf(m, "host\t\t: %s\n", host_info); -+ seq_printf(m, "bogomips\t: %lu.%02lu\n\n", -+ loops_per_jiffy/(500000/HZ), -+ (loops_per_jiffy/(5000/HZ)) % 100); -+ -+ return(0); -+} -+ -+static void *c_start(struct seq_file *m, loff_t *pos) -+{ -+ return *pos < NR_CPUS ? cpu_data + *pos : NULL; -+} -+ -+static void *c_next(struct seq_file *m, void *v, loff_t *pos) -+{ -+ ++*pos; -+ return c_start(m, pos); -+} -+ -+static void c_stop(struct seq_file *m, void *v) -+{ -+} -+ -+struct seq_operations cpuinfo_op = { -+ .start = c_start, -+ .next = c_next, -+ .stop = c_stop, -+ .show = show_cpuinfo, -+}; -+ -+pte_t * __bad_pagetable(void) -+{ -+ panic("Someone should implement __bad_pagetable"); -+ return(NULL); -+} -+ -+/* Set in linux_main */ -+unsigned long host_task_size; -+unsigned long task_size; -+unsigned long uml_start; -+ -+/* Set in early boot */ -+unsigned long uml_physmem; -+unsigned long uml_reserved; -+unsigned long start_vm; -+unsigned long end_vm; -+int ncpus = 1; -+ -+#ifdef CONFIG_MODE_TT -+/* Pointer set in linux_main, the array itself is private to each thread, -+ * and changed at address space creation time so this poses no concurrency -+ * problems. -+ */ -+static char *argv1_begin = NULL; -+static char *argv1_end = NULL; -+#endif -+ -+/* Set in early boot */ -+static int have_root __initdata = 0; -+long physmem_size = 32 * 1024 * 1024; -+ -+void set_cmdline(char *cmd) -+{ -+#ifdef CONFIG_MODE_TT -+ char *umid, *ptr; -+ -+ if(CHOOSE_MODE(honeypot, 0)) return; -+ -+ umid = get_umid(1); -+ if(umid != NULL){ -+ snprintf(argv1_begin, -+ (argv1_end - argv1_begin) * sizeof(*ptr), -+ "(%s) ", umid); -+ ptr = &argv1_begin[strlen(argv1_begin)]; -+ } -+ else ptr = argv1_begin; -+ -+ snprintf(ptr, (argv1_end - ptr) * sizeof(*ptr), "[%s]", cmd); -+ memset(argv1_begin + strlen(argv1_begin), '\0', -+ argv1_end - argv1_begin - strlen(argv1_begin)); -+#endif -+} -+ -+static char *usage_string = -+"User Mode Linux v%s\n" -+" available at http://user-mode-linux.sourceforge.net/\n\n"; -+ -+static int __init uml_version_setup(char *line, int *add) -+{ -+ printf("%s\n", system_utsname.release); -+ exit(0); -+} -+ -+__uml_setup("--version", uml_version_setup, -+"--version\n" -+" Prints the version number of the kernel.\n\n" -+); -+ -+static int __init uml_root_setup(char *line, int *add) -+{ -+ have_root = 1; -+ return 0; -+} -+ -+__uml_setup("root=", uml_root_setup, -+"root=<file containing the root fs>\n" -+" This is actually used by the generic kernel in exactly the same\n" -+" way as in any other kernel. If you configure a number of block\n" -+" devices and want to boot off something other than ubd0, you \n" -+" would use something like:\n" -+" root=/dev/ubd5\n\n" -+); -+ -+#ifdef CONFIG_SMP -+static int __init uml_ncpus_setup(char *line, int *add) -+{ -+ if (!sscanf(line, "%d", &ncpus)) { -+ printf("Couldn't parse [%s]\n", line); -+ return -1; -+ } -+ -+ return 0; -+} -+ -+__uml_setup("ncpus=", uml_ncpus_setup, -+"ncpus=<# of desired CPUs>\n" -+" This tells an SMP kernel how many virtual processors to start.\n\n" -+); -+#endif -+ -+int force_tt = 0; -+ -+#if defined(CONFIG_MODE_TT) && defined(CONFIG_MODE_SKAS) -+#define DEFAULT_TT 0 -+ -+static int __init mode_tt_setup(char *line, int *add) -+{ -+ force_tt = 1; -+ return(0); -+} -+ -+#else -+#ifdef CONFIG_MODE_SKAS -+ -+#define DEFAULT_TT 0 -+ -+static int __init mode_tt_setup(char *line, int *add) -+{ -+ printf("CONFIG_MODE_TT disabled - 'mode=tt' ignored\n"); -+ return(0); -+} -+ -+#else -+#ifdef CONFIG_MODE_TT -+ -+#define DEFAULT_TT 1 -+ -+static int __init mode_tt_setup(char *line, int *add) -+{ -+ printf("CONFIG_MODE_SKAS disabled - 'mode=tt' redundant\n"); -+ return(0); -+} -+ -+#else -+ -+#error Either CONFIG_MODE_TT or CONFIG_MODE_SKAS must be enabled -+ -+#endif -+#endif -+#endif -+ -+__uml_setup("mode=tt", mode_tt_setup, -+"mode=tt\n" -+" When both CONFIG_MODE_TT and CONFIG_MODE_SKAS are enabled, this option\n" -+" forces UML to run in tt (tracing thread) mode. It is not the default\n" -+" because it's slower and less secure than skas mode.\n\n" -+); -+ -+int mode_tt = DEFAULT_TT; -+ -+static int __init Usage(char *line, int *add) -+{ -+ const char **p; -+ -+ printf(usage_string, system_utsname.release); -+ p = &__uml_help_start; -+ while (p < &__uml_help_end) { -+ printf("%s", *p); -+ p++; -+ } -+ exit(0); -+} -+ -+__uml_setup("--help", Usage, -+"--help\n" -+" Prints this message.\n\n" -+); -+ -+static int __init uml_checksetup(char *line, int *add) -+{ -+ struct uml_param *p; -+ -+ p = &__uml_setup_start; -+ while(p < &__uml_setup_end) { -+ int n; -+ -+ n = strlen(p->str); -+ if(!strncmp(line, p->str, n)){ -+ if (p->setup_func(line + n, add)) return 1; -+ } -+ p++; -+ } -+ return 0; -+} -+ -+static void __init uml_postsetup(void) -+{ -+ initcall_t *p; -+ -+ p = &__uml_postsetup_start; -+ while(p < &__uml_postsetup_end){ -+ (*p)(); -+ p++; -+ } -+ return; -+} -+ -+/* Set during early boot */ -+unsigned long brk_start; -+static struct vm_reserved kernel_vm_reserved; -+ -+#define MIN_VMALLOC (32 * 1024 * 1024) -+ -+int linux_main(int argc, char **argv) -+{ -+ unsigned long avail; -+ unsigned long virtmem_size, max_physmem; -+ unsigned int i, add, err; -+ -+ for (i = 1; i < argc; i++){ -+ if((i == 1) && (argv[i][0] == ' ')) continue; -+ add = 1; -+ uml_checksetup(argv[i], &add); -+ if(add) add_arg(saved_command_line, argv[i]); -+ } -+ if(have_root == 0) add_arg(saved_command_line, DEFAULT_COMMAND_LINE); -+ -+ mode_tt = force_tt ? 1 : !can_do_skas(); -+ uml_start = CHOOSE_MODE_PROC(set_task_sizes_tt, set_task_sizes_skas, 0, -+ &host_task_size, &task_size); -+ -+ brk_start = (unsigned long) sbrk(0); -+ CHOOSE_MODE_PROC(before_mem_tt, before_mem_skas, brk_start); -+ -+ uml_physmem = uml_start; -+ -+ /* Reserve up to 4M after the current brk */ -+ uml_reserved = ROUND_4M(brk_start) + (1 << 22); -+ -+ setup_machinename(system_utsname.machine); -+ -+#ifdef CONFIG_MODE_TT -+ argv1_begin = argv[1]; -+ argv1_end = &argv[1][strlen(argv[1])]; -+#endif -+ -+ set_usable_vm(uml_physmem, get_kmem_end()); -+ -+ highmem = 0; -+ max_physmem = get_kmem_end() - uml_physmem - MIN_VMALLOC; -+ if(physmem_size > max_physmem){ -+ highmem = physmem_size - max_physmem; -+ physmem_size -= highmem; -+#ifndef CONFIG_HIGHMEM -+ highmem = 0; -+ printf("CONFIG_HIGHMEM not enabled - physical memory shrunk " -+ "to %ld bytes\n", physmem_size); -+#endif -+ } -+ -+ high_physmem = uml_physmem + physmem_size; -+ high_memory = (void *) high_physmem; -+ -+ start_vm = VMALLOC_START; -+ -+ setup_physmem(uml_physmem, uml_reserved, physmem_size); -+ virtmem_size = physmem_size; -+ avail = get_kmem_end() - start_vm; -+ if(physmem_size > avail) virtmem_size = avail; -+ end_vm = start_vm + virtmem_size; -+ -+ if(virtmem_size < physmem_size) -+ printf("Kernel virtual memory size shrunk to %ld bytes\n", -+ virtmem_size); -+ -+ err = reserve_vm(high_physmem, end_vm, &kernel_vm_reserved); -+ if(err){ -+ printf("Failed to reserve VM area for kernel VM\n"); -+ exit(1); -+ } -+ -+ uml_postsetup(); -+ -+ init_task.thread.kernel_stack = (unsigned long) &init_task + -+ 2 * PAGE_SIZE; -+ -+ task_protections((unsigned long) &init_task); -+ os_flush_stdout(); -+ -+ return(CHOOSE_MODE(start_uml_tt(), start_uml_skas())); -+} -+ -+static int panic_exit(struct notifier_block *self, unsigned long unused1, -+ void *unused2) -+{ -+#ifdef CONFIG_SYSRQ -+ handle_sysrq('p', ¤t->thread.regs, NULL, NULL); -+#endif -+ machine_halt(); -+ return(0); -+} -+ -+static struct notifier_block panic_exit_notifier = { -+ .notifier_call = panic_exit, -+ .next = NULL, -+ .priority = 0 -+}; -+ -+void __init setup_arch(char **cmdline_p) -+{ -+ notifier_chain_register(&panic_notifier_list, &panic_exit_notifier); -+ paging_init(); -+ strcpy(command_line, saved_command_line); -+ *cmdline_p = command_line; -+ setup_hostinfo(); -+} -+ -+void __init check_bugs(void) -+{ -+ arch_check_bugs(); -+ check_ptrace(); -+ check_sigio(); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/umid.c um/arch/um/kernel/umid.c ---- orig/arch/um/kernel/umid.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/umid.c Mon Feb 24 23:11:23 2003 -@@ -0,0 +1,319 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <unistd.h> -+#include <fcntl.h> -+#include <errno.h> -+#include <string.h> -+#include <stdlib.h> -+#include <dirent.h> -+#include <signal.h> -+#include <sys/stat.h> -+#include <sys/param.h> -+#include "user.h" -+#include "umid.h" -+#include "init.h" -+#include "os.h" -+#include "user_util.h" -+#include "choose-mode.h" -+ -+#define UMID_LEN 64 -+#define UML_DIR "~/.uml/" -+ -+/* Changed by set_umid and make_umid, which are run early in boot */ -+static char umid[UMID_LEN] = { 0 }; -+ -+/* Changed by set_uml_dir and make_uml_dir, which are run early in boot */ -+static char *uml_dir = UML_DIR; -+ -+/* Changed by set_umid */ -+static int umid_is_random = 1; -+static int umid_inited = 0; -+ -+static int make_umid(int (*printer)(const char *fmt, ...)); -+ -+static int __init set_umid(char *name, int is_random, -+ int (*printer)(const char *fmt, ...)) -+{ -+ if(umid_inited){ -+ (*printer)("Unique machine name can't be set twice\n"); -+ return(-1); -+ } -+ -+ if(strlen(name) > UMID_LEN - 1) -+ (*printer)("Unique machine name is being truncated to %s " -+ "characters\n", UMID_LEN); -+ strncpy(umid, name, UMID_LEN - 1); -+ umid[UMID_LEN - 1] = '\0'; -+ -+ umid_is_random = is_random; -+ umid_inited = 1; -+ return 0; -+} -+ -+static int __init set_umid_arg(char *name, int *add) -+{ -+ return(set_umid(name, 0, printf)); -+} -+ -+__uml_setup("umid=", set_umid_arg, -+"umid=<name>\n" -+" This is used to assign a unique identity to this UML machine and\n" -+" is used for naming the pid file and management console socket.\n\n" -+); -+ -+int __init umid_file_name(char *name, char *buf, int len) -+{ -+ int n; -+ -+ if(!umid_inited && make_umid(printk)) return(-1); -+ -+ n = strlen(uml_dir) + strlen(umid) + strlen(name) + 1; -+ if(n > len){ -+ printk("umid_file_name : buffer too short\n"); -+ return(-1); -+ } -+ -+ sprintf(buf, "%s%s/%s", uml_dir, umid, name); -+ return(0); -+} -+ -+extern int tracing_pid; -+ -+static int __init create_pid_file(void) -+{ -+ char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; -+ char pid[sizeof("nnnnn\0")]; -+ int fd; -+ -+ if(umid_file_name("pid", file, sizeof(file))) return 0; -+ -+ fd = os_open_file(file, of_create(of_excl(of_rdwr(OPENFLAGS()))), -+ 0644); -+ if(fd < 0){ -+ printf("Open of machine pid file \"%s\" failed - " -+ "errno = %d\n", file, -fd); -+ return 0; -+ } -+ -+ sprintf(pid, "%d\n", os_getpid()); -+ if(write(fd, pid, strlen(pid)) != strlen(pid)) -+ printf("Write of pid file failed - errno = %d\n", errno); -+ close(fd); -+ return 0; -+} -+ -+static int actually_do_remove(char *dir) -+{ -+ DIR *directory; -+ struct dirent *ent; -+ int len; -+ char file[256]; -+ -+ if((directory = opendir(dir)) == NULL){ -+ printk("actually_do_remove : couldn't open directory '%s', " -+ "errno = %d\n", dir, errno); -+ return(1); -+ } -+ while((ent = readdir(directory)) != NULL){ -+ if(!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, "..")) -+ continue; -+ len = strlen(dir) + sizeof("/") + strlen(ent->d_name) + 1; -+ if(len > sizeof(file)){ -+ printk("Not deleting '%s' from '%s' - name too long\n", -+ ent->d_name, dir); -+ continue; -+ } -+ sprintf(file, "%s/%s", dir, ent->d_name); -+ if(unlink(file) < 0){ -+ printk("actually_do_remove : couldn't remove '%s' " -+ "from '%s', errno = %d\n", ent->d_name, dir, -+ errno); -+ return(1); -+ } -+ } -+ if(rmdir(dir) < 0){ -+ printk("actually_do_remove : couldn't rmdir '%s', " -+ "errno = %d\n", dir, errno); -+ return(1); -+ } -+ return(0); -+} -+ -+void remove_umid_dir(void) -+{ -+ char dir[strlen(uml_dir) + UMID_LEN + 1]; -+ if(!umid_inited) return; -+ -+ sprintf(dir, "%s%s", uml_dir, umid); -+ actually_do_remove(dir); -+} -+ -+char *get_umid(int only_if_set) -+{ -+ if(only_if_set && umid_is_random) return(NULL); -+ return(umid); -+} -+ -+int not_dead_yet(char *dir) -+{ -+ char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; -+ char pid[sizeof("nnnnn\0")], *end; -+ int dead, fd, p; -+ -+ sprintf(file, "%s/pid", dir); -+ dead = 0; -+ if((fd = os_open_file(file, of_read(OPENFLAGS()), 0)) < 0){ -+ if(fd != -ENOENT){ -+ printk("not_dead_yet : couldn't open pid file '%s', " -+ "errno = %d\n", file, -fd); -+ return(1); -+ } -+ dead = 1; -+ } -+ if(fd > 0){ -+ if(read(fd, pid, sizeof(pid)) < 0){ -+ printk("not_dead_yet : couldn't read pid file '%s', " -+ "errno = %d\n", file, errno); -+ return(1); -+ } -+ p = strtoul(pid, &end, 0); -+ if(end == pid){ -+ printk("not_dead_yet : couldn't parse pid file '%s', " -+ "errno = %d\n", file, errno); -+ dead = 1; -+ } -+ if(((kill(p, 0) < 0) && (errno == ESRCH)) || -+ (p == CHOOSE_MODE(tracing_pid, os_getpid()))) -+ dead = 1; -+ } -+ if(!dead) return(1); -+ return(actually_do_remove(dir)); -+} -+ -+static int __init set_uml_dir(char *name, int *add) -+{ -+ if((strlen(name) > 0) && (name[strlen(name) - 1] != '/')){ -+ uml_dir = malloc(strlen(name) + 1); -+ if(uml_dir == NULL){ -+ printf("Failed to malloc uml_dir - error = %d\n", -+ errno); -+ uml_dir = name; -+ return(0); -+ } -+ sprintf(uml_dir, "%s/", name); -+ } -+ else uml_dir = name; -+ return 0; -+} -+ -+static int __init make_uml_dir(void) -+{ -+ char dir[MAXPATHLEN + 1] = { '\0' }; -+ int len; -+ -+ if(*uml_dir == '~'){ -+ char *home = getenv("HOME"); -+ -+ if(home == NULL){ -+ printf("make_uml_dir : no value in environment for " -+ "$HOME\n"); -+ exit(1); -+ } -+ strncpy(dir, home, sizeof(dir)); -+ uml_dir++; -+ } -+ len = strlen(dir); -+ strncat(dir, uml_dir, sizeof(dir) - len); -+ len = strlen(dir); -+ if((len > 0) && (len < sizeof(dir) - 1) && (dir[len - 1] != '/')){ -+ dir[len] = '/'; -+ dir[len + 1] = '\0'; -+ } -+ -+ if((uml_dir = malloc(strlen(dir) + 1)) == NULL){ -+ printf("make_uml_dir : malloc failed, errno = %d\n", errno); -+ exit(1); -+ } -+ strcpy(uml_dir, dir); -+ -+ if((mkdir(uml_dir, 0777) < 0) && (errno != EEXIST)){ -+ printf("Failed to mkdir %s - errno = %i\n", uml_dir, errno); -+ return(-1); -+ } -+ return 0; -+} -+ -+static int __init make_umid(int (*printer)(const char *fmt, ...)) -+{ -+ int fd, err; -+ char tmp[strlen(uml_dir) + UMID_LEN + 1]; -+ -+ strncpy(tmp, uml_dir, sizeof(tmp) - 1); -+ tmp[sizeof(tmp) - 1] = '\0'; -+ -+ if(!umid_inited){ -+ strcat(tmp, "XXXXXX"); -+ fd = mkstemp(tmp); -+ if(fd < 0){ -+ (*printer)("make_umid - mkstemp failed, errno = %d\n", -+ errno); -+ return(1); -+ } -+ -+ close(fd); -+ /* There's a nice tiny little race between this unlink and -+ * the mkdir below. It'd be nice if there were a mkstemp -+ * for directories. -+ */ -+ unlink(tmp); -+ set_umid(&tmp[strlen(uml_dir)], 1, printer); -+ } -+ -+ sprintf(tmp, "%s%s", uml_dir, umid); -+ -+ if((err = mkdir(tmp, 0777)) < 0){ -+ if(errno == EEXIST){ -+ if(not_dead_yet(tmp)){ -+ (*printer)("umid '%s' is in use\n", umid); -+ return(-1); -+ } -+ err = mkdir(tmp, 0777); -+ } -+ } -+ if(err < 0){ -+ (*printer)("Failed to create %s - errno = %d\n", umid, errno); -+ return(-1); -+ } -+ -+ return(0); -+} -+ -+__uml_setup("uml_dir=", set_uml_dir, -+"uml_dir=<directory>\n" -+" The location to place the pid and umid files.\n\n" -+); -+ -+__uml_postsetup(make_uml_dir); -+ -+static int __init make_umid_setup(void) -+{ -+ return(make_umid(printf)); -+} -+ -+__uml_postsetup(make_umid_setup); -+__uml_postsetup(create_pid_file); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/kernel/user_syms.c um/arch/um/kernel/user_syms.c ---- orig/arch/um/kernel/user_syms.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/user_syms.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,116 @@ -+#include <stdio.h> -+#include <unistd.h> -+#include <fcntl.h> -+#include <dirent.h> -+#include <errno.h> -+#include <utime.h> -+#include <string.h> -+#include <sys/stat.h> -+#include <sys/vfs.h> -+#include <sys/ioctl.h> -+#include "user_util.h" -+#include "mem_user.h" -+ -+/* XXX All the __CONFIG_* stuff is broken because this file can't include -+ * config.h -+ */ -+ -+/* Had to steal this from linux/module.h because that file can't be included -+ * since this includes various user-level headers. -+ */ -+ -+struct module_symbol -+{ -+ unsigned long value; -+ const char *name; -+}; -+ -+/* Indirect stringification. */ -+ -+#define __MODULE_STRING_1(x) #x -+#define __MODULE_STRING(x) __MODULE_STRING_1(x) -+ -+#if !defined(__AUTOCONF_INCLUDED__) -+ -+#define __EXPORT_SYMBOL(sym,str) error config_must_be_included_before_module -+#define EXPORT_SYMBOL(var) error config_must_be_included_before_module -+#define EXPORT_SYMBOL_NOVERS(var) error config_must_be_included_before_module -+ -+#elif !defined(__CONFIG_MODULES__) -+ -+#define __EXPORT_SYMBOL(sym,str) -+#define EXPORT_SYMBOL(var) -+#define EXPORT_SYMBOL_NOVERS(var) -+ -+#else -+ -+#define __EXPORT_SYMBOL(sym, str) \ -+const char __kstrtab_##sym[] \ -+__attribute__((section(".kstrtab"))) = str; \ -+const struct module_symbol __ksymtab_##sym \ -+__attribute__((section("__ksymtab"))) = \ -+{ (unsigned long)&sym, __kstrtab_##sym } -+ -+#if defined(__MODVERSIONS__) || !defined(__CONFIG_MODVERSIONS__) -+#define EXPORT_SYMBOL(var) __EXPORT_SYMBOL(var, __MODULE_STRING(var)) -+#else -+#define EXPORT_SYMBOL(var) __EXPORT_SYMBOL(var, __MODULE_STRING(__VERSIONED_SYMBOL(var))) -+#endif -+ -+#define EXPORT_SYMBOL_NOVERS(var) __EXPORT_SYMBOL(var, __MODULE_STRING(var)) -+ -+#endif -+ -+EXPORT_SYMBOL(__errno_location); -+ -+EXPORT_SYMBOL(access); -+EXPORT_SYMBOL(open); -+EXPORT_SYMBOL(open64); -+EXPORT_SYMBOL(close); -+EXPORT_SYMBOL(read); -+EXPORT_SYMBOL(write); -+EXPORT_SYMBOL(dup2); -+EXPORT_SYMBOL(__xstat); -+EXPORT_SYMBOL(__lxstat); -+EXPORT_SYMBOL(__lxstat64); -+EXPORT_SYMBOL(lseek); -+EXPORT_SYMBOL(lseek64); -+EXPORT_SYMBOL(chown); -+EXPORT_SYMBOL(truncate); -+EXPORT_SYMBOL(utime); -+EXPORT_SYMBOL(chmod); -+EXPORT_SYMBOL(rename); -+EXPORT_SYMBOL(__xmknod); -+ -+EXPORT_SYMBOL(symlink); -+EXPORT_SYMBOL(link); -+EXPORT_SYMBOL(unlink); -+EXPORT_SYMBOL(readlink); -+ -+EXPORT_SYMBOL(mkdir); -+EXPORT_SYMBOL(rmdir); -+EXPORT_SYMBOL(opendir); -+EXPORT_SYMBOL(readdir); -+EXPORT_SYMBOL(closedir); -+EXPORT_SYMBOL(seekdir); -+EXPORT_SYMBOL(telldir); -+ -+EXPORT_SYMBOL(ioctl); -+ -+extern ssize_t pread64 (int __fd, void *__buf, size_t __nbytes, -+ __off64_t __offset); -+extern ssize_t pwrite64 (int __fd, __const void *__buf, size_t __n, -+ __off64_t __offset); -+EXPORT_SYMBOL(pread64); -+EXPORT_SYMBOL(pwrite64); -+ -+EXPORT_SYMBOL(statfs); -+EXPORT_SYMBOL(statfs64); -+ -+EXPORT_SYMBOL(memcpy); -+EXPORT_SYMBOL(getuid); -+ -+EXPORT_SYMBOL(memset); -+EXPORT_SYMBOL(strstr); -+ -+EXPORT_SYMBOL(find_iomem); -diff -Naur -X ../exclude-files orig/arch/um/kernel/user_util.c um/arch/um/kernel/user_util.c ---- orig/arch/um/kernel/user_util.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/kernel/user_util.c Wed Apr 23 20:41:54 2003 -@@ -0,0 +1,164 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stdlib.h> -+#include <fcntl.h> -+#include <unistd.h> -+#include <limits.h> -+#include <sys/mman.h> -+#include <sys/stat.h> -+#include <sys/ptrace.h> -+#include <sys/utsname.h> -+#include <sys/param.h> -+#include <sys/time.h> -+#include "asm/types.h" -+#include <ctype.h> -+#include <signal.h> -+#include <wait.h> -+#include <errno.h> -+#include <stdarg.h> -+#include <sched.h> -+#include <termios.h> -+#include <string.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "user.h" -+#include "mem_user.h" -+#include "init.h" -+#include "helper.h" -+#include "uml-config.h" -+ -+#define COMMAND_LINE_SIZE _POSIX_ARG_MAX -+ -+/* Changed in linux_main and setup_arch, which run before SMP is started */ -+char saved_command_line[COMMAND_LINE_SIZE] = { 0 }; -+char command_line[COMMAND_LINE_SIZE] = { 0 }; -+ -+void add_arg(char *cmd_line, char *arg) -+{ -+ if (strlen(cmd_line) + strlen(arg) + 1 > COMMAND_LINE_SIZE) { -+ printf("add_arg: Too much command line!\n"); -+ exit(1); -+ } -+ if(strlen(cmd_line) > 0) strcat(cmd_line, " "); -+ strcat(cmd_line, arg); -+} -+ -+void stop(void) -+{ -+ while(1) sleep(1000000); -+} -+ -+void stack_protections(unsigned long address) -+{ -+ int prot = PROT_READ | PROT_WRITE | PROT_EXEC; -+ -+ if(mprotect((void *) address, page_size(), prot) < 0) -+ panic("protecting stack failed, errno = %d", errno); -+} -+ -+void task_protections(unsigned long address) -+{ -+ unsigned long guard = address + page_size(); -+ unsigned long stack = guard + page_size(); -+ int prot = 0, pages; -+#ifdef notdef -+ if(mprotect((void *) guard, page_size(), prot) < 0) -+ panic("protecting guard page failed, errno = %d", errno); -+#endif -+ pages = (1 << UML_CONFIG_KERNEL_STACK_ORDER) - 2; -+ prot = PROT_READ | PROT_WRITE | PROT_EXEC; -+ if(mprotect((void *) stack, pages * page_size(), prot) < 0) -+ panic("protecting stack failed, errno = %d", errno); -+} -+ -+int wait_for_stop(int pid, int sig, int cont_type, void *relay) -+{ -+ sigset_t *relay_signals = relay; -+ int status, ret; -+ -+ while(1){ -+ if(((ret = waitpid(pid, &status, WUNTRACED)) < 0) || -+ !WIFSTOPPED(status) || (WSTOPSIG(status) != sig)){ -+ if(ret < 0){ -+ if(errno == EINTR) continue; -+ printk("wait failed, errno = %d\n", -+ errno); -+ } -+ else if(WIFEXITED(status)) -+ printk("process exited with status %d\n", -+ WEXITSTATUS(status)); -+ else if(WIFSIGNALED(status)) -+ printk("process exited with signal %d\n", -+ WTERMSIG(status)); -+ else if((WSTOPSIG(status) == SIGVTALRM) || -+ (WSTOPSIG(status) == SIGALRM) || -+ (WSTOPSIG(status) == SIGIO) || -+ (WSTOPSIG(status) == SIGPROF) || -+ (WSTOPSIG(status) == SIGCHLD) || -+ (WSTOPSIG(status) == SIGWINCH) || -+ (WSTOPSIG(status) == SIGINT)){ -+ ptrace(cont_type, pid, 0, WSTOPSIG(status)); -+ continue; -+ } -+ else if((relay_signals != NULL) && -+ sigismember(relay_signals, WSTOPSIG(status))){ -+ ptrace(cont_type, pid, 0, WSTOPSIG(status)); -+ continue; -+ } -+ else printk("process stopped with signal %d\n", -+ WSTOPSIG(status)); -+ panic("wait_for_stop failed to wait for %d to stop " -+ "with %d\n", pid, sig); -+ } -+ return(status); -+ } -+} -+ -+int raw(int fd, int complain) -+{ -+ struct termios tt; -+ int err; -+ -+ tcgetattr(fd, &tt); -+ cfmakeraw(&tt); -+ err = tcsetattr(fd, TCSANOW, &tt); -+ if((err < 0) && complain){ -+ printk("tcsetattr failed, errno = %d\n", errno); -+ return(-errno); -+ } -+ return(0); -+} -+ -+void setup_machinename(char *machine_out) -+{ -+ struct utsname host; -+ -+ uname(&host); -+ strcpy(machine_out, host.machine); -+} -+ -+char host_info[(_UTSNAME_LENGTH + 1) * 4 + _UTSNAME_NODENAME_LENGTH + 1]; -+ -+void setup_hostinfo(void) -+{ -+ struct utsname host; -+ -+ uname(&host); -+ sprintf(host_info, "%s %s %s %s %s", host.sysname, host.nodename, -+ host.release, host.version, host.machine); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/link.ld.in um/arch/um/link.ld.in ---- orig/arch/um/link.ld.in Wed Dec 31 19:00:00 1969 -+++ um/arch/um/link.ld.in Fri Jan 17 23:11:30 2003 -@@ -0,0 +1,95 @@ -+OUTPUT_FORMAT("ELF_FORMAT") -+OUTPUT_ARCH(ELF_ARCH) -+ENTRY(_start) -+ -+SECTIONS -+{ -+ . = START() + SIZEOF_HEADERS; -+ -+ . = ALIGN(4096); -+ __binary_start = .; -+ifdef(`MODE_TT', ` -+ .thread_private : { -+ __start_thread_private = .; -+ errno = .; -+ . += 4; -+ arch/um/kernel/tt/unmap_fin.o (.data) -+ __end_thread_private = .; -+ } -+ . = ALIGN(4096); -+ .remap : { arch/um/kernel/tt/unmap_fin.o (.text) } -+') -+ . = ALIGN(4096); /* Init code and data */ -+ _stext = .; -+ __init_begin = .; -+ .text.init : { *(.text.init) } -+ . = ALIGN(4096); -+ .text : -+ { -+ *(.text) -+ /* .gnu.warning sections are handled specially by elf32.em. */ -+ *(.gnu.warning) -+ *(.gnu.linkonce.t*) -+ } -+ .fini : { *(.fini) } =0x9090 -+ .rodata : { *(.rodata) *(.gnu.linkonce.r*) } -+ .rodata1 : { *(.rodata1) } -+ _etext = .; -+ PROVIDE (etext = .); -+ -+ . = ALIGN(4096); -+ PROVIDE (_sdata = .); -+ -+include(`arch/um/common.ld.in') -+ -+ .data : -+ { -+ . = ALIGN(KERNEL_STACK_SIZE); /* init_task */ -+ *(.data.init_task) -+ *(.data) -+ *(.gnu.linkonce.d*) -+ CONSTRUCTORS -+ } -+ .data1 : { *(.data1) } -+ .ctors : -+ { -+ *(.ctors) -+ } -+ .dtors : -+ { -+ *(.dtors) -+ } -+ -+ .got : { *(.got.plt) *(.got) } -+ .dynamic : { *(.dynamic) } -+ /* We want the small data sections together, so single-instruction offsets -+ can access them all, and initialized data all before uninitialized, so -+ we can shorten the on-disk segment size. */ -+ .sdata : { *(.sdata) } -+ _edata = .; -+ PROVIDE (edata = .); -+ . = ALIGN(0x1000); -+ .sbss : -+ { -+ __bss_start = .; -+ PROVIDE(_bss_start = .); -+ *(.sbss) -+ *(.scommon) -+ } -+ .bss : -+ { -+ *(.dynbss) -+ *(.bss) -+ *(COMMON) -+ } -+ _end = . ; -+ PROVIDE (end = .); -+ /* Stabs debugging sections. */ -+ .stab 0 : { *(.stab) } -+ .stabstr 0 : { *(.stabstr) } -+ .stab.excl 0 : { *(.stab.excl) } -+ .stab.exclstr 0 : { *(.stab.exclstr) } -+ .stab.index 0 : { *(.stab.index) } -+ .stab.indexstr 0 : { *(.stab.indexstr) } -+ .comment 0 : { *(.comment) } -+} -diff -Naur -X ../exclude-files orig/arch/um/main.c um/arch/um/main.c ---- orig/arch/um/main.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/main.c Fri Jan 17 13:22:40 2003 -@@ -0,0 +1,195 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <unistd.h> -+#include <stdio.h> -+#include <stdlib.h> -+#include <string.h> -+#include <signal.h> -+#include <sys/resource.h> -+#include <sys/mman.h> -+#include <sys/user.h> -+#include <asm/page.h> -+#include "user_util.h" -+#include "kern_util.h" -+#include "mem_user.h" -+#include "signal_user.h" -+#include "user.h" -+#include "init.h" -+#include "mode.h" -+#include "choose-mode.h" -+#include "uml-config.h" -+ -+/* Set in set_stklim, which is called from main and __wrap_malloc. -+ * __wrap_malloc only calls it if main hasn't started. -+ */ -+unsigned long stacksizelim; -+ -+/* Set in main */ -+char *linux_prog; -+ -+#define PGD_BOUND (4 * 1024 * 1024) -+#define STACKSIZE (8 * 1024 * 1024) -+#define THREAD_NAME_LEN (256) -+ -+static void set_stklim(void) -+{ -+ struct rlimit lim; -+ -+ if(getrlimit(RLIMIT_STACK, &lim) < 0){ -+ perror("getrlimit"); -+ exit(1); -+ } -+ if((lim.rlim_cur == RLIM_INFINITY) || (lim.rlim_cur > STACKSIZE)){ -+ lim.rlim_cur = STACKSIZE; -+ if(setrlimit(RLIMIT_STACK, &lim) < 0){ -+ perror("setrlimit"); -+ exit(1); -+ } -+ } -+ stacksizelim = (lim.rlim_cur + PGD_BOUND - 1) & ~(PGD_BOUND - 1); -+} -+ -+static __init void do_uml_initcalls(void) -+{ -+ initcall_t *call; -+ -+ call = &__uml_initcall_start; -+ while (call < &__uml_initcall_end){; -+ (*call)(); -+ call++; -+ } -+} -+ -+static void last_ditch_exit(int sig) -+{ -+ CHOOSE_MODE(kmalloc_ok = 0, (void) 0); -+ signal(SIGINT, SIG_DFL); -+ signal(SIGTERM, SIG_DFL); -+ signal(SIGHUP, SIG_DFL); -+ uml_cleanup(); -+ exit(1); -+} -+ -+extern int uml_exitcode; -+ -+int main(int argc, char **argv, char **envp) -+{ -+ char **new_argv; -+ sigset_t mask; -+ int ret, i; -+ -+ /* Enable all signals except SIGIO - in some environments, we can -+ * enter with some signals blocked -+ */ -+ -+ sigemptyset(&mask); -+ sigaddset(&mask, SIGIO); -+ if(sigprocmask(SIG_SETMASK, &mask, NULL) < 0){ -+ perror("sigprocmask"); -+ exit(1); -+ } -+ -+#ifdef UML_CONFIG_MODE_TT -+ /* Allocate memory for thread command lines */ -+ if(argc < 2 || strlen(argv[1]) < THREAD_NAME_LEN - 1){ -+ -+ char padding[THREAD_NAME_LEN] = { -+ [ 0 ... THREAD_NAME_LEN - 2] = ' ', '\0' -+ }; -+ -+ new_argv = malloc((argc + 2) * sizeof(char*)); -+ if(!new_argv) { -+ perror("Allocating extended argv"); -+ exit(1); -+ } -+ -+ new_argv[0] = argv[0]; -+ new_argv[1] = padding; -+ -+ for(i = 2; i <= argc; i++) -+ new_argv[i] = argv[i - 1]; -+ new_argv[argc + 1] = NULL; -+ -+ execvp(new_argv[0], new_argv); -+ perror("execing with extended args"); -+ exit(1); -+ } -+#endif -+ -+ linux_prog = argv[0]; -+ -+ set_stklim(); -+ -+ if((new_argv = malloc((argc + 1) * sizeof(char *))) == NULL){ -+ perror("Mallocing argv"); -+ exit(1); -+ } -+ for(i=0;i<argc;i++){ -+ if((new_argv[i] = strdup(argv[i])) == NULL){ -+ perror("Mallocing an arg"); -+ exit(1); -+ } -+ } -+ new_argv[argc] = NULL; -+ -+ set_handler(SIGINT, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); -+ set_handler(SIGTERM, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); -+ set_handler(SIGHUP, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); -+ -+ do_uml_initcalls(); -+ ret = linux_main(argc, argv); -+ -+ /* Reboot */ -+ if(ret){ -+ printf("\n"); -+ execvp(new_argv[0], new_argv); -+ perror("Failed to exec kernel"); -+ ret = 1; -+ } -+ printf("\n"); -+ return(uml_exitcode); -+} -+ -+#define CAN_KMALLOC() \ -+ (kmalloc_ok && CHOOSE_MODE((getpid() != tracing_pid), 1)) -+ -+extern void *__real_malloc(int); -+ -+void *__wrap_malloc(int size) -+{ -+ if(CAN_KMALLOC()) -+ return(um_kmalloc(size)); -+ else -+ return(__real_malloc(size)); -+} -+ -+void *__wrap_calloc(int n, int size) -+{ -+ void *ptr = __wrap_malloc(n * size); -+ -+ if(ptr == NULL) return(NULL); -+ memset(ptr, 0, n * size); -+ return(ptr); -+} -+ -+extern void __real_free(void *); -+ -+void __wrap_free(void *ptr) -+{ -+ if(CAN_KMALLOC()) kfree(ptr); -+ else __real_free(ptr); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/Makefile um/arch/um/os-Linux/Makefile ---- orig/arch/um/os-Linux/Makefile Wed Dec 31 19:00:00 1969 -+++ um/arch/um/os-Linux/Makefile Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,17 @@ -+# -+# Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET = built-in.o -+ -+obj-y = file.o process.o tty.o -+ -+include $(TOPDIR)/Rules.make -+ -+$(obj-y) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+clean : -+ -+archmrproper: -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/drivers/Makefile um/arch/um/os-Linux/drivers/Makefile ---- orig/arch/um/os-Linux/drivers/Makefile Wed Dec 31 19:00:00 1969 -+++ um/arch/um/os-Linux/drivers/Makefile Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,31 @@ -+# -+# Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET := drivers.o -+ -+list-multi := tuntap.o ethertap.o -+ -+ethertap-objs := ethertap_kern.o ethertap_user.o -+tuntap-objs := tuntap_kern.o tuntap_user.o -+ -+obj-y = -+obj-$(CONFIG_UML_NET_ETHERTAP) += ethertap.o -+obj-$(CONFIG_UML_NET_TUNTAP) += tuntap.o -+ -+USER_SINGLE_OBJS = $(foreach f,$(patsubst %.o,%,$(obj-y)),$($(f)-objs)) -+ -+USER_OBJS = $(filter %_user.o,$(obj-y) $(USER_SINGLE_OBJS)) -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+ethertap.o : $(ethertap-objs) -+ -+tuntap.o : $(tuntap-objs) -+ -+$(list-multi) : # This doesn't work, but should : '%.o : $(%-objs)' -+ $(LD) $(LD_RFLAG) -r -o $@ $($(patsubst %.o,%,$@)-objs) -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/drivers/etap.h um/arch/um/os-Linux/drivers/etap.h ---- orig/arch/um/os-Linux/drivers/etap.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/os-Linux/drivers/etap.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,27 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "net_user.h" -+ -+struct ethertap_data { -+ char *dev_name; -+ char *gate_addr; -+ int data_fd; -+ int control_fd; -+ void *dev; -+}; -+ -+extern struct net_user_info ethertap_user_info; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/drivers/ethertap_kern.c um/arch/um/os-Linux/drivers/ethertap_kern.c ---- orig/arch/um/os-Linux/drivers/ethertap_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/os-Linux/drivers/ethertap_kern.c Sun Dec 15 21:17:37 2002 -@@ -0,0 +1,122 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and -+ * James Leu (jleu@mindspring.net). -+ * Copyright (C) 2001 by various other people who didn't put their name here. -+ * Licensed under the GPL. -+ */ -+ -+#include "linux/init.h" -+#include "linux/netdevice.h" -+#include "linux/etherdevice.h" -+#include "linux/init.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "etap.h" -+ -+struct ethertap_init { -+ char *dev_name; -+ char *gate_addr; -+}; -+ -+static void etap_init(struct net_device *dev, void *data) -+{ -+ struct uml_net_private *pri; -+ struct ethertap_data *epri; -+ struct ethertap_init *init = data; -+ -+ init_etherdev(dev, 0); -+ pri = dev->priv; -+ epri = (struct ethertap_data *) pri->user; -+ *epri = ((struct ethertap_data) -+ { .dev_name = init->dev_name, -+ .gate_addr = init->gate_addr, -+ .data_fd = -1, -+ .control_fd = -1, -+ .dev = dev }); -+ -+ printk("ethertap backend - %s", epri->dev_name); -+ if(epri->gate_addr != NULL) -+ printk(", IP = %s", epri->gate_addr); -+ printk("\n"); -+} -+ -+static int etap_read(int fd, struct sk_buff **skb, struct uml_net_private *lp) -+{ -+ int len; -+ -+ *skb = ether_adjust_skb(*skb, ETH_HEADER_ETHERTAP); -+ if(*skb == NULL) return(-ENOMEM); -+ len = net_recvfrom(fd, (*skb)->mac.raw, -+ (*skb)->dev->mtu + 2 * ETH_HEADER_ETHERTAP); -+ if(len <= 0) return(len); -+ skb_pull(*skb, 2); -+ len -= 2; -+ return(len); -+} -+ -+static int etap_write(int fd, struct sk_buff **skb, struct uml_net_private *lp) -+{ -+ if(skb_headroom(*skb) < 2){ -+ struct sk_buff *skb2; -+ -+ skb2 = skb_realloc_headroom(*skb, 2); -+ dev_kfree_skb(*skb); -+ if (skb2 == NULL) return(-ENOMEM); -+ *skb = skb2; -+ } -+ skb_push(*skb, 2); -+ return(net_send(fd, (*skb)->data, (*skb)->len)); -+} -+ -+struct net_kern_info ethertap_kern_info = { -+ .init = etap_init, -+ .protocol = eth_protocol, -+ .read = etap_read, -+ .write = etap_write, -+}; -+ -+int ethertap_setup(char *str, char **mac_out, void *data) -+{ -+ struct ethertap_init *init = data; -+ -+ *init = ((struct ethertap_init) -+ { .dev_name = NULL, -+ .gate_addr = NULL }); -+ if(tap_setup_common(str, "ethertap", &init->dev_name, mac_out, -+ &init->gate_addr)) -+ return(0); -+ if(init->dev_name == NULL){ -+ printk("ethertap_setup : Missing tap device name\n"); -+ return(0); -+ } -+ -+ return(1); -+} -+ -+static struct transport ethertap_transport = { -+ .list = LIST_HEAD_INIT(ethertap_transport.list), -+ .name = "ethertap", -+ .setup = ethertap_setup, -+ .user = ðertap_user_info, -+ .kern = ðertap_kern_info, -+ .private_size = sizeof(struct ethertap_data), -+}; -+ -+static int register_ethertap(void) -+{ -+ register_transport(ðertap_transport); -+ return(1); -+} -+ -+__initcall(register_ethertap); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/drivers/ethertap_user.c um/arch/um/os-Linux/drivers/ethertap_user.c ---- orig/arch/um/os-Linux/drivers/ethertap_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/os-Linux/drivers/ethertap_user.c Sun Dec 15 21:17:52 2002 -@@ -0,0 +1,238 @@ -+/* -+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and -+ * James Leu (jleu@mindspring.net). -+ * Copyright (C) 2001 by various other people who didn't put their name here. -+ * Licensed under the GPL. -+ */ -+ -+#include <stdio.h> -+#include <unistd.h> -+#include <stddef.h> -+#include <fcntl.h> -+#include <stdlib.h> -+#include <sys/errno.h> -+#include <sys/socket.h> -+#include <sys/wait.h> -+#include <sys/un.h> -+#include <net/if.h> -+#include "user.h" -+#include "kern_util.h" -+#include "net_user.h" -+#include "etap.h" -+#include "helper.h" -+#include "os.h" -+ -+#define MAX_PACKET ETH_MAX_PACKET -+ -+void etap_user_init(void *data, void *dev) -+{ -+ struct ethertap_data *pri = data; -+ -+ pri->dev = dev; -+} -+ -+struct addr_change { -+ enum { ADD_ADDR, DEL_ADDR } what; -+ unsigned char addr[4]; -+ unsigned char netmask[4]; -+}; -+ -+static void etap_change(int op, unsigned char *addr, unsigned char *netmask, -+ int fd) -+{ -+ struct addr_change change; -+ void *output; -+ -+ change.what = op; -+ memcpy(change.addr, addr, sizeof(change.addr)); -+ memcpy(change.netmask, netmask, sizeof(change.netmask)); -+ if(write(fd, &change, sizeof(change)) != sizeof(change)) -+ printk("etap_change - request failed, errno = %d\n", -+ errno); -+ output = um_kmalloc(page_size()); -+ if(output == NULL) -+ printk("etap_change : Failed to allocate output buffer\n"); -+ read_output(fd, output, page_size()); -+ if(output != NULL){ -+ printk("%s", output); -+ kfree(output); -+ } -+} -+ -+static void etap_open_addr(unsigned char *addr, unsigned char *netmask, -+ void *arg) -+{ -+ etap_change(ADD_ADDR, addr, netmask, *((int *) arg)); -+} -+ -+static void etap_close_addr(unsigned char *addr, unsigned char *netmask, -+ void *arg) -+{ -+ etap_change(DEL_ADDR, addr, netmask, *((int *) arg)); -+} -+ -+struct etap_pre_exec_data { -+ int control_remote; -+ int control_me; -+ int data_me; -+}; -+ -+static void etap_pre_exec(void *arg) -+{ -+ struct etap_pre_exec_data *data = arg; -+ -+ dup2(data->control_remote, 1); -+ close(data->data_me); -+ close(data->control_me); -+} -+ -+static int etap_tramp(char *dev, char *gate, int control_me, -+ int control_remote, int data_me, int data_remote) -+{ -+ struct etap_pre_exec_data pe_data; -+ int pid, status, err; -+ char version_buf[sizeof("nnnnn\0")]; -+ char data_fd_buf[sizeof("nnnnnn\0")]; -+ char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; -+ char *setup_args[] = { "uml_net", version_buf, "ethertap", dev, -+ data_fd_buf, gate_buf, NULL }; -+ char *nosetup_args[] = { "uml_net", version_buf, "ethertap", -+ dev, data_fd_buf, NULL }; -+ char **args, c; -+ -+ sprintf(data_fd_buf, "%d", data_remote); -+ sprintf(version_buf, "%d", UML_NET_VERSION); -+ if(gate != NULL){ -+ strcpy(gate_buf, gate); -+ args = setup_args; -+ } -+ else args = nosetup_args; -+ -+ err = 0; -+ pe_data.control_remote = control_remote; -+ pe_data.control_me = control_me; -+ pe_data.data_me = data_me; -+ pid = run_helper(etap_pre_exec, &pe_data, args, NULL); -+ -+ if(pid < 0) err = errno; -+ close(data_remote); -+ close(control_remote); -+ if(read(control_me, &c, sizeof(c)) != sizeof(c)){ -+ printk("etap_tramp : read of status failed, errno = %d\n", -+ errno); -+ return(EINVAL); -+ } -+ if(c != 1){ -+ printk("etap_tramp : uml_net failed\n"); -+ err = EINVAL; -+ if(waitpid(pid, &status, 0) < 0) err = errno; -+ else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 1)){ -+ printk("uml_net didn't exit with status 1\n"); -+ } -+ } -+ return(err); -+} -+ -+static int etap_open(void *data) -+{ -+ struct ethertap_data *pri = data; -+ char *output; -+ int data_fds[2], control_fds[2], err, output_len; -+ -+ err = tap_open_common(pri->dev, pri->gate_addr); -+ if(err) return(err); -+ -+ err = os_pipe(data_fds, 0, 0); -+ if(err){ -+ printk("data os_pipe failed - errno = %d\n", -err); -+ return(err); -+ } -+ -+ err = os_pipe(control_fds, 1, 0); -+ if(err){ -+ printk("control os_pipe failed - errno = %d\n", -err); -+ return(err); -+ } -+ -+ err = etap_tramp(pri->dev_name, pri->gate_addr, control_fds[0], -+ control_fds[1], data_fds[0], data_fds[1]); -+ output_len = page_size(); -+ output = um_kmalloc(output_len); -+ read_output(control_fds[0], output, output_len); -+ -+ if(output == NULL) -+ printk("etap_open : failed to allocate output buffer\n"); -+ else { -+ printk("%s", output); -+ kfree(output); -+ } -+ -+ if(err != 0){ -+ printk("etap_tramp failed - errno = %d\n", err); -+ return(-err); -+ } -+ -+ pri->data_fd = data_fds[0]; -+ pri->control_fd = control_fds[0]; -+ iter_addresses(pri->dev, etap_open_addr, &pri->control_fd); -+ return(data_fds[0]); -+} -+ -+static void etap_close(int fd, void *data) -+{ -+ struct ethertap_data *pri = data; -+ -+ iter_addresses(pri->dev, etap_close_addr, &pri->control_fd); -+ close(fd); -+ os_shutdown_socket(pri->data_fd, 1, 1); -+ close(pri->data_fd); -+ pri->data_fd = -1; -+ close(pri->control_fd); -+ pri->control_fd = -1; -+} -+ -+static int etap_set_mtu(int mtu, void *data) -+{ -+ return(mtu); -+} -+ -+static void etap_add_addr(unsigned char *addr, unsigned char *netmask, -+ void *data) -+{ -+ struct ethertap_data *pri = data; -+ -+ tap_check_ips(pri->gate_addr, addr); -+ if(pri->control_fd == -1) return; -+ etap_open_addr(addr, netmask, &pri->control_fd); -+} -+ -+static void etap_del_addr(unsigned char *addr, unsigned char *netmask, -+ void *data) -+{ -+ struct ethertap_data *pri = data; -+ -+ if(pri->control_fd == -1) return; -+ etap_close_addr(addr, netmask, &pri->control_fd); -+} -+ -+struct net_user_info ethertap_user_info = { -+ .init = etap_user_init, -+ .open = etap_open, -+ .close = etap_close, -+ .remove = NULL, -+ .set_mtu = etap_set_mtu, -+ .add_address = etap_add_addr, -+ .delete_address = etap_del_addr, -+ .max_packet = MAX_PACKET - ETH_HEADER_ETHERTAP -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/drivers/tuntap.h um/arch/um/os-Linux/drivers/tuntap.h ---- orig/arch/um/os-Linux/drivers/tuntap.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/os-Linux/drivers/tuntap.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,32 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_TUNTAP_H -+#define __UM_TUNTAP_H -+ -+#include "net_user.h" -+ -+struct tuntap_data { -+ char *dev_name; -+ int fixed_config; -+ char *gate_addr; -+ int fd; -+ void *dev; -+}; -+ -+extern struct net_user_info tuntap_user_info; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/drivers/tuntap_kern.c um/arch/um/os-Linux/drivers/tuntap_kern.c ---- orig/arch/um/os-Linux/drivers/tuntap_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/os-Linux/drivers/tuntap_kern.c Sun Dec 15 21:18:16 2002 -@@ -0,0 +1,105 @@ -+/* -+ * Copyright (C) 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/stddef.h" -+#include "linux/netdevice.h" -+#include "linux/etherdevice.h" -+#include "linux/skbuff.h" -+#include "linux/init.h" -+#include "asm/errno.h" -+#include "net_kern.h" -+#include "net_user.h" -+#include "tuntap.h" -+ -+struct tuntap_init { -+ char *dev_name; -+ char *gate_addr; -+}; -+ -+static void tuntap_init(struct net_device *dev, void *data) -+{ -+ struct uml_net_private *pri; -+ struct tuntap_data *tpri; -+ struct tuntap_init *init = data; -+ -+ init_etherdev(dev, 0); -+ pri = dev->priv; -+ tpri = (struct tuntap_data *) pri->user; -+ *tpri = ((struct tuntap_data) -+ { .dev_name = init->dev_name, -+ .fixed_config = (init->dev_name != NULL), -+ .gate_addr = init->gate_addr, -+ .fd = -1, -+ .dev = dev }); -+ printk("TUN/TAP backend - "); -+ if(tpri->gate_addr != NULL) -+ printk("IP = %s", tpri->gate_addr); -+ printk("\n"); -+} -+ -+static int tuntap_read(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ *skb = ether_adjust_skb(*skb, ETH_HEADER_OTHER); -+ if(*skb == NULL) return(-ENOMEM); -+ return(net_read(fd, (*skb)->mac.raw, -+ (*skb)->dev->mtu + ETH_HEADER_OTHER)); -+} -+ -+static int tuntap_write(int fd, struct sk_buff **skb, -+ struct uml_net_private *lp) -+{ -+ return(net_write(fd, (*skb)->data, (*skb)->len)); -+} -+ -+struct net_kern_info tuntap_kern_info = { -+ .init = tuntap_init, -+ .protocol = eth_protocol, -+ .read = tuntap_read, -+ .write = tuntap_write, -+}; -+ -+int tuntap_setup(char *str, char **mac_out, void *data) -+{ -+ struct tuntap_init *init = data; -+ -+ *init = ((struct tuntap_init) -+ { .dev_name = NULL, -+ .gate_addr = NULL }); -+ if(tap_setup_common(str, "tuntap", &init->dev_name, mac_out, -+ &init->gate_addr)) -+ return(0); -+ -+ return(1); -+} -+ -+static struct transport tuntap_transport = { -+ .list = LIST_HEAD_INIT(tuntap_transport.list), -+ .name = "tuntap", -+ .setup = tuntap_setup, -+ .user = &tuntap_user_info, -+ .kern = &tuntap_kern_info, -+ .private_size = sizeof(struct tuntap_data), -+ .setup_size = sizeof(struct tuntap_init), -+}; -+ -+static int register_tuntap(void) -+{ -+ register_transport(&tuntap_transport); -+ return(1); -+} -+ -+__initcall(register_tuntap); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/drivers/tuntap_user.c um/arch/um/os-Linux/drivers/tuntap_user.c ---- orig/arch/um/os-Linux/drivers/tuntap_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/os-Linux/drivers/tuntap_user.c Sun Dec 15 21:18:25 2002 -@@ -0,0 +1,223 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <stddef.h> -+#include <stdlib.h> -+#include <unistd.h> -+#include <errno.h> -+#include <fcntl.h> -+#include <sys/wait.h> -+#include <sys/socket.h> -+#include <sys/un.h> -+#include <sys/uio.h> -+#include <sys/ioctl.h> -+#include <net/if.h> -+#include <linux/if_tun.h> -+#include "net_user.h" -+#include "tuntap.h" -+#include "kern_util.h" -+#include "user.h" -+#include "helper.h" -+#include "os.h" -+ -+#define MAX_PACKET ETH_MAX_PACKET -+ -+void tuntap_user_init(void *data, void *dev) -+{ -+ struct tuntap_data *pri = data; -+ -+ pri->dev = dev; -+} -+ -+static void tuntap_add_addr(unsigned char *addr, unsigned char *netmask, -+ void *data) -+{ -+ struct tuntap_data *pri = data; -+ -+ tap_check_ips(pri->gate_addr, addr); -+ if((pri->fd == -1) || pri->fixed_config) return; -+ open_addr(addr, netmask, pri->dev_name); -+} -+ -+static void tuntap_del_addr(unsigned char *addr, unsigned char *netmask, -+ void *data) -+{ -+ struct tuntap_data *pri = data; -+ -+ if((pri->fd == -1) || pri->fixed_config) return; -+ close_addr(addr, netmask, pri->dev_name); -+} -+ -+struct tuntap_pre_exec_data { -+ int stdout; -+ int close_me; -+}; -+ -+static void tuntap_pre_exec(void *arg) -+{ -+ struct tuntap_pre_exec_data *data = arg; -+ -+ dup2(data->stdout, 1); -+ close(data->close_me); -+} -+ -+static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote, -+ char *buffer, int buffer_len, int *used_out) -+{ -+ struct tuntap_pre_exec_data data; -+ char version_buf[sizeof("nnnnn\0")]; -+ char *argv[] = { "uml_net", version_buf, "tuntap", "up", gate, -+ NULL }; -+ char buf[CMSG_SPACE(sizeof(*fd_out))]; -+ struct msghdr msg; -+ struct cmsghdr *cmsg; -+ struct iovec iov; -+ int pid, n; -+ -+ sprintf(version_buf, "%d", UML_NET_VERSION); -+ -+ data.stdout = remote; -+ data.close_me = me; -+ -+ pid = run_helper(tuntap_pre_exec, &data, argv, NULL); -+ -+ if(pid < 0) return(-pid); -+ -+ close(remote); -+ -+ msg.msg_name = NULL; -+ msg.msg_namelen = 0; -+ if(buffer != NULL){ -+ iov = ((struct iovec) { buffer, buffer_len }); -+ msg.msg_iov = &iov; -+ msg.msg_iovlen = 1; -+ } -+ else { -+ msg.msg_iov = NULL; -+ msg.msg_iovlen = 0; -+ } -+ msg.msg_control = buf; -+ msg.msg_controllen = sizeof(buf); -+ msg.msg_flags = 0; -+ n = recvmsg(me, &msg, 0); -+ *used_out = n; -+ if(n < 0){ -+ printk("tuntap_open_tramp : recvmsg failed - errno = %d\n", -+ errno); -+ return(errno); -+ } -+ waitpid(pid, NULL, 0); -+ -+ cmsg = CMSG_FIRSTHDR(&msg); -+ if(cmsg == NULL){ -+ printk("tuntap_open_tramp : didn't receive a message\n"); -+ return(EINVAL); -+ } -+ if((cmsg->cmsg_level != SOL_SOCKET) || -+ (cmsg->cmsg_type != SCM_RIGHTS)){ -+ printk("tuntap_open_tramp : didn't receive a descriptor\n"); -+ return(EINVAL); -+ } -+ *fd_out = ((int *) CMSG_DATA(cmsg))[0]; -+ return(0); -+} -+ -+static int tuntap_open(void *data) -+{ -+ struct ifreq ifr; -+ struct tuntap_data *pri = data; -+ char *output, *buffer; -+ int err, fds[2], len, used; -+ -+ err = tap_open_common(pri->dev, pri->gate_addr); -+ if(err) return(err); -+ -+ if(pri->fixed_config){ -+ if((pri->fd = open("/dev/net/tun", O_RDWR)) < 0){ -+ printk("Failed to open /dev/net/tun, errno = %d\n", -+ errno); -+ return(-errno); -+ } -+ memset(&ifr, 0, sizeof(ifr)); -+ ifr.ifr_flags = IFF_TAP; -+ strncpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name) - 1); -+ if(ioctl(pri->fd, TUNSETIFF, (void *) &ifr) < 0){ -+ printk("TUNSETIFF failed, errno = %d", errno); -+ close(pri->fd); -+ return(-errno); -+ } -+ } -+ else { -+ err = os_pipe(fds, 0, 0); -+ if(err){ -+ printk("tuntap_open : os_pipe failed - errno = %d\n", -+ -err); -+ return(err); -+ } -+ -+ buffer = get_output_buffer(&len); -+ if(buffer != NULL) len--; -+ used = 0; -+ -+ err = tuntap_open_tramp(pri->gate_addr, &pri->fd, fds[0], -+ fds[1], buffer, len, &used); -+ -+ output = buffer; -+ if(err == 0){ -+ pri->dev_name = uml_strdup(buffer); -+ output += IFNAMSIZ; -+ printk(output); -+ free_output_buffer(buffer); -+ } -+ else { -+ printk(output); -+ free_output_buffer(buffer); -+ printk("tuntap_open_tramp failed - errno = %d\n", err); -+ return(-err); -+ } -+ close(fds[0]); -+ iter_addresses(pri->dev, open_addr, pri->dev_name); -+ } -+ -+ return(pri->fd); -+} -+ -+static void tuntap_close(int fd, void *data) -+{ -+ struct tuntap_data *pri = data; -+ -+ if(!pri->fixed_config) -+ iter_addresses(pri->dev, close_addr, pri->dev_name); -+ close(fd); -+ pri->fd = -1; -+} -+ -+static int tuntap_set_mtu(int mtu, void *data) -+{ -+ return(mtu); -+} -+ -+struct net_user_info tuntap_user_info = { -+ .init = tuntap_user_init, -+ .open = tuntap_open, -+ .close = tuntap_close, -+ .remove = NULL, -+ .set_mtu = tuntap_set_mtu, -+ .add_address = tuntap_add_addr, -+ .delete_address = tuntap_del_addr, -+ .max_packet = MAX_PACKET -+}; -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/file.c um/arch/um/os-Linux/file.c ---- orig/arch/um/os-Linux/file.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/os-Linux/file.c Tue Feb 4 19:32:10 2003 -@@ -0,0 +1,384 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <unistd.h> -+#include <errno.h> -+#include <fcntl.h> -+#include <signal.h> -+#include <sys/socket.h> -+#include <sys/un.h> -+#include <sys/ioctl.h> -+#include <sys/mount.h> -+#include <sys/uio.h> -+#include "os.h" -+#include "user.h" -+#include "kern_util.h" -+ -+int os_file_type(char *file) -+{ -+ struct stat64 buf; -+ -+ if(stat64(file, &buf) == -1) -+ return(-errno); -+ -+ if(S_ISDIR(buf.st_mode)) return(OS_TYPE_DIR); -+ else if(S_ISLNK(buf.st_mode)) return(OS_TYPE_SYMLINK); -+ else if(S_ISCHR(buf.st_mode)) return(OS_TYPE_CHARDEV); -+ else if(S_ISBLK(buf.st_mode)) return(OS_TYPE_BLOCKDEV); -+ else if(S_ISFIFO(buf.st_mode)) return(OS_TYPE_FIFO); -+ else if(S_ISSOCK(buf.st_mode)) return(OS_TYPE_SOCK); -+ else return(OS_TYPE_FILE); -+} -+ -+int os_file_mode(char *file, struct openflags *mode_out) -+{ -+ *mode_out = OPENFLAGS(); -+ -+ if(!access(file, W_OK)) *mode_out = of_write(*mode_out); -+ else if(errno != EACCES) -+ return(-errno); -+ -+ if(!access(file, R_OK)) *mode_out = of_read(*mode_out); -+ else if(errno != EACCES) -+ return(-errno); -+ -+ return(0); -+} -+ -+int os_open_file(char *file, struct openflags flags, int mode) -+{ -+ int fd, f = 0; -+ -+ if(flags.r && flags.w) f = O_RDWR; -+ else if(flags.r) f = O_RDONLY; -+ else if(flags.w) f = O_WRONLY; -+ else f = 0; -+ -+ if(flags.s) f |= O_SYNC; -+ if(flags.c) f |= O_CREAT; -+ if(flags.t) f |= O_TRUNC; -+ if(flags.e) f |= O_EXCL; -+ -+ fd = open64(file, f, mode); -+ if(fd < 0) return(-errno); -+ -+ if(flags.cl){ -+ if(fcntl(fd, F_SETFD, 1)){ -+ close(fd); -+ return(-errno); -+ } -+ } -+ -+ return(fd); -+} -+ -+int os_connect_socket(char *name) -+{ -+ struct sockaddr_un sock; -+ int fd, err; -+ -+ sock.sun_family = AF_UNIX; -+ snprintf(sock.sun_path, sizeof(sock.sun_path), "%s", name); -+ -+ fd = socket(AF_UNIX, SOCK_STREAM, 0); -+ if(fd < 0) -+ return(fd); -+ -+ err = connect(fd, (struct sockaddr *) &sock, sizeof(sock)); -+ if(err) -+ return(err); -+ -+ return(fd); -+} -+ -+void os_close_file(int fd) -+{ -+ close(fd); -+} -+ -+int os_seek_file(int fd, __u64 offset) -+{ -+ __u64 actual; -+ -+ actual = lseek64(fd, offset, SEEK_SET); -+ if(actual != offset) return(-errno); -+ return(0); -+} -+ -+int os_read_file(int fd, void *buf, int len) -+{ -+ int n; -+ -+ /* Force buf into memory if it's not already. */ -+ -+ /* XXX This fails if buf is kernel memory */ -+#ifdef notdef -+ if(copy_to_user_proc(buf, &c, sizeof(c))) -+ return(-EFAULT); -+#endif -+ -+ n = read(fd, buf, len); -+ if(n < 0) -+ return(-errno); -+ return(n); -+} -+ -+int os_write_file(int fd, void *buf, int count) -+{ -+ int n; -+ -+ /* Force buf into memory if it's not already. */ -+ -+ /* XXX This fails if buf is kernel memory */ -+#ifdef notdef -+ if(copy_to_user_proc(buf, buf, buf[0])) -+ return(-EFAULT); -+#endif -+ -+ n = write(fd, buf, count); -+ if(n < 0) -+ return(-errno); -+ return(n); -+} -+ -+int os_file_size(char *file, long long *size_out) -+{ -+ struct stat64 buf; -+ -+ if(stat64(file, &buf) == -1){ -+ printk("Couldn't stat \"%s\" : errno = %d\n", file, errno); -+ return(-errno); -+ } -+ if(S_ISBLK(buf.st_mode)){ -+ int fd, blocks; -+ -+ if((fd = open64(file, O_RDONLY)) < 0){ -+ printk("Couldn't open \"%s\", errno = %d\n", file, -+ errno); -+ return(-errno); -+ } -+ if(ioctl(fd, BLKGETSIZE, &blocks) < 0){ -+ printk("Couldn't get the block size of \"%s\", " -+ "errno = %d\n", file, errno); -+ close(fd); -+ return(-errno); -+ } -+ *size_out = ((long long) blocks) * 512; -+ close(fd); -+ return(0); -+ } -+ *size_out = buf.st_size; -+ return(0); -+} -+ -+int os_pipe(int *fds, int stream, int close_on_exec) -+{ -+ int err, type = stream ? SOCK_STREAM : SOCK_DGRAM; -+ -+ err = socketpair(AF_UNIX, type, 0, fds); -+ if(err) -+ return(-errno); -+ -+ if(!close_on_exec) -+ return(0); -+ -+ if((fcntl(fds[0], F_SETFD, 1) < 0) || (fcntl(fds[1], F_SETFD, 1) < 0)) -+ printk("os_pipe : Setting FD_CLOEXEC failed, errno = %d", -+ errno); -+ -+ return(0); -+} -+ -+int os_set_fd_async(int fd, int owner) -+{ -+ /* XXX This should do F_GETFL first */ -+ if(fcntl(fd, F_SETFL, O_ASYNC | O_NONBLOCK) < 0){ -+ printk("os_set_fd_async : failed to set O_ASYNC and " -+ "O_NONBLOCK on fd # %d, errno = %d\n", fd, errno); -+ return(-errno); -+ } -+#ifdef notdef -+ if(fcntl(fd, F_SETFD, 1) < 0){ -+ printk("os_set_fd_async : Setting FD_CLOEXEC failed, " -+ "errno = %d\n", errno); -+ } -+#endif -+ -+ if((fcntl(fd, F_SETSIG, SIGIO) < 0) || -+ (fcntl(fd, F_SETOWN, owner) < 0)){ -+ printk("os_set_fd_async : Failed to fcntl F_SETOWN " -+ "(or F_SETSIG) fd %d to pid %d, errno = %d\n", fd, -+ owner, errno); -+ return(-errno); -+ } -+ -+ return(0); -+} -+ -+int os_set_fd_block(int fd, int blocking) -+{ -+ int flags; -+ -+ flags = fcntl(fd, F_GETFL); -+ -+ if(blocking) flags &= ~O_NONBLOCK; -+ else flags |= O_NONBLOCK; -+ -+ if(fcntl(fd, F_SETFL, flags) < 0){ -+ printk("Failed to change blocking on fd # %d, errno = %d\n", -+ fd, errno); -+ return(-errno); -+ } -+ return(0); -+} -+ -+int os_accept_connection(int fd) -+{ -+ int new; -+ -+ new = accept(fd, NULL, 0); -+ if(new < 0) -+ return(-errno); -+ return(new); -+} -+ -+#ifndef SHUT_RD -+#define SHUT_RD 0 -+#endif -+ -+#ifndef SHUT_WR -+#define SHUT_WR 1 -+#endif -+ -+#ifndef SHUT_RDWR -+#define SHUT_RDWR 2 -+#endif -+ -+int os_shutdown_socket(int fd, int r, int w) -+{ -+ int what, err; -+ -+ if(r && w) what = SHUT_RDWR; -+ else if(r) what = SHUT_RD; -+ else if(w) what = SHUT_WR; -+ else { -+ printk("os_shutdown_socket : neither r or w was set\n"); -+ return(-EINVAL); -+ } -+ err = shutdown(fd, what); -+ if(err) -+ return(-errno); -+ return(0); -+} -+ -+int os_rcv_fd(int fd, int *helper_pid_out) -+{ -+ int new, n; -+ char buf[CMSG_SPACE(sizeof(new))]; -+ struct msghdr msg; -+ struct cmsghdr *cmsg; -+ struct iovec iov; -+ -+ msg.msg_name = NULL; -+ msg.msg_namelen = 0; -+ iov = ((struct iovec) { .iov_base = helper_pid_out, -+ .iov_len = sizeof(*helper_pid_out) }); -+ msg.msg_iov = &iov; -+ msg.msg_iovlen = 1; -+ msg.msg_control = buf; -+ msg.msg_controllen = sizeof(buf); -+ msg.msg_flags = 0; -+ -+ n = recvmsg(fd, &msg, 0); -+ if(n < 0) -+ return(-errno); -+ -+ else if(n != sizeof(iov.iov_len)) -+ *helper_pid_out = -1; -+ -+ cmsg = CMSG_FIRSTHDR(&msg); -+ if(cmsg == NULL){ -+ printk("rcv_fd didn't receive anything, error = %d\n", errno); -+ return(-1); -+ } -+ if((cmsg->cmsg_level != SOL_SOCKET) || -+ (cmsg->cmsg_type != SCM_RIGHTS)){ -+ printk("rcv_fd didn't receive a descriptor\n"); -+ return(-1); -+ } -+ -+ new = ((int *) CMSG_DATA(cmsg))[0]; -+ return(new); -+} -+ -+int create_unix_socket(char *file, int len) -+{ -+ struct sockaddr_un addr; -+ int sock, err; -+ -+ sock = socket(PF_UNIX, SOCK_DGRAM, 0); -+ if (sock < 0){ -+ printk("create_unix_socket - socket failed, errno = %d\n", -+ errno); -+ return(-errno); -+ } -+ -+ addr.sun_family = AF_UNIX; -+ -+ /* XXX Be more careful about overflow */ -+ snprintf(addr.sun_path, len, "%s", file); -+ -+ err = bind(sock, (struct sockaddr *) &addr, sizeof(addr)); -+ if (err < 0){ -+ printk("create_listening_socket - bind failed, errno = %d\n", -+ errno); -+ return(-errno); -+ } -+ -+ return(sock); -+} -+ -+void os_flush_stdout(void) -+{ -+ fflush(stdout); -+} -+ -+int os_lock_file(int fd, int excl) -+{ -+ int type = excl ? F_WRLCK : F_RDLCK; -+ struct flock lock = ((struct flock) { .l_type = type, -+ .l_whence = SEEK_SET, -+ .l_start = 0, -+ .l_len = 0 } ); -+ int err, save; -+ -+ err = fcntl(fd, F_SETLK, &lock); -+ if(!err) -+ goto out; -+ -+ save = -errno; -+ err = fcntl(fd, F_GETLK, &lock); -+ if(err){ -+ err = -errno; -+ goto out; -+ } -+ -+ printk("F_SETLK failed, file already locked by pid %d\n", lock.l_pid); -+ err = save; -+ out: -+ return(err); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/include/file.h um/arch/um/os-Linux/include/file.h ---- orig/arch/um/os-Linux/include/file.h Wed Dec 31 19:00:00 1969 -+++ um/arch/um/os-Linux/include/file.h Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __OS_FILE_H__ -+#define __OS_FILE_H__ -+ -+#define DEV_NULL "/dev/null" -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/process.c um/arch/um/os-Linux/process.c ---- orig/arch/um/os-Linux/process.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/os-Linux/process.c Wed Jan 8 14:19:00 2003 -@@ -0,0 +1,142 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <unistd.h> -+#include <stdio.h> -+#include <errno.h> -+#include <signal.h> -+#include <sys/mman.h> -+#include <sys/wait.h> -+#include "os.h" -+#include "user.h" -+ -+unsigned long os_process_pc(int pid) -+{ -+ char proc_stat[sizeof("/proc/#####/stat\0")], buf[256]; -+ unsigned long pc; -+ int fd; -+ -+ sprintf(proc_stat, "/proc/%d/stat", pid); -+ fd = os_open_file(proc_stat, of_read(OPENFLAGS()), 0); -+ if(fd < 0){ -+ printk("os_process_pc - couldn't open '%s', errno = %d\n", -+ proc_stat, errno); -+ return(-1); -+ } -+ if(read(fd, buf, sizeof(buf)) < 0){ -+ printk("os_process_pc - couldn't read '%s', errno = %d\n", -+ proc_stat, errno); -+ close(fd); -+ return(-1); -+ } -+ close(fd); -+ pc = -1; -+ if(sscanf(buf, "%*d %*s %*c %*d %*d %*d %*d %*d %*d %*d %*d " -+ "%*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d " -+ "%*d %*d %*d %*d %ld", &pc) != 1){ -+ printk("os_process_pc - couldn't find pc in '%s'\n", buf); -+ } -+ return(pc); -+} -+ -+int os_process_parent(int pid) -+{ -+ char stat[sizeof("/proc/nnnnn/stat\0")]; -+ char data[256]; -+ int parent, n, fd; -+ -+ if(pid == -1) return(-1); -+ -+ snprintf(stat, sizeof(stat), "/proc/%d/stat", pid); -+ fd = os_open_file(stat, of_read(OPENFLAGS()), 0); -+ if(fd < 0){ -+ printk("Couldn't open '%s', errno = %d\n", stat, -fd); -+ return(-1); -+ } -+ -+ n = read(fd, data, sizeof(data)); -+ close(fd); -+ -+ if(n < 0){ -+ printk("Couldn't read '%s', errno = %d\n", stat); -+ return(-1); -+ } -+ -+ parent = -1; -+ /* XXX This will break if there is a space in the command */ -+ n = sscanf(data, "%*d %*s %*c %d", &parent); -+ if(n != 1) printk("Failed to scan '%s'\n", data); -+ -+ return(parent); -+} -+ -+void os_stop_process(int pid) -+{ -+ kill(pid, SIGSTOP); -+} -+ -+void os_kill_process(int pid, int reap_child) -+{ -+ kill(pid, SIGKILL); -+ if(reap_child) -+ waitpid(pid, NULL, 0); -+ -+} -+ -+void os_usr1_process(int pid) -+{ -+ kill(pid, SIGUSR1); -+} -+ -+int os_getpid(void) -+{ -+ return(getpid()); -+} -+ -+int os_map_memory(void *virt, int fd, unsigned long off, unsigned long len, -+ int r, int w, int x) -+{ -+ void *loc; -+ int prot; -+ -+ prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | -+ (x ? PROT_EXEC : 0); -+ -+ loc = mmap((void *) virt, len, prot, MAP_SHARED | MAP_FIXED, -+ fd, off); -+ if(loc == MAP_FAILED) -+ return(-errno); -+ return(0); -+} -+ -+int os_protect_memory(void *addr, unsigned long len, int r, int w, int x) -+{ -+ int prot = ((r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | -+ (x ? PROT_EXEC : 0)); -+ -+ if(mprotect(addr, len, prot) < 0) -+ return(-errno); -+ return(0); -+} -+ -+int os_unmap_memory(void *addr, int len) -+{ -+ int err; -+ -+ err = munmap(addr, len); -+ if(err < 0) return(-errno); -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/os-Linux/tty.c um/arch/um/os-Linux/tty.c ---- orig/arch/um/os-Linux/tty.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/os-Linux/tty.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,61 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdlib.h> -+#include <errno.h> -+#include "os.h" -+#include "user.h" -+#include "kern_util.h" -+ -+struct grantpt_info { -+ int fd; -+ int res; -+ int err; -+}; -+ -+static void grantpt_cb(void *arg) -+{ -+ struct grantpt_info *info = arg; -+ -+ info->res = grantpt(info->fd); -+ info->err = errno; -+} -+ -+int get_pty(void) -+{ -+ struct grantpt_info info; -+ int fd; -+ -+ if((fd = os_open_file("/dev/ptmx", of_rdwr(OPENFLAGS()), 0)) < 0){ -+ printk("get_pty : Couldn't open /dev/ptmx - errno = %d\n", -+ errno); -+ return(-1); -+ } -+ -+ info.fd = fd; -+ initial_thread_cb(grantpt_cb, &info); -+ -+ if(info.res < 0){ -+ printk("get_pty : Couldn't grant pty - errno = %d\n", -+ info.err); -+ return(-1); -+ } -+ if(unlockpt(fd) < 0){ -+ printk("get_pty : Couldn't unlock pty - errno = %d\n", errno); -+ return(-1); -+ } -+ return(fd); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/Makefile um/arch/um/sys-i386/Makefile ---- orig/arch/um/sys-i386/Makefile Wed Dec 31 19:00:00 1969 -+++ um/arch/um/sys-i386/Makefile Sat Nov 23 23:34:24 2002 -@@ -0,0 +1,46 @@ -+# -+# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+# Licensed under the GPL -+# -+ -+O_TARGET = built-in.o -+ -+obj-y = bugs.o checksum.o extable.o fault.o ksyms.o ldt.o ptrace.o \ -+ ptrace_user.o semaphore.o sigcontext.o syscalls.o sysrq.o -+export-objs = ksyms.o -+ -+USER_OBJS = bugs.o ptrace_user.o sigcontext.o fault.o -+ -+SYMLINKS = semaphore.c extable.c -+ -+semaphore.c-dir = kernel -+extable.c-dir = mm -+ -+include $(TOPDIR)/Rules.make -+ -+$(USER_OBJS) : %.o: %.c -+ $(CC) $(CFLAGS_$@) $(USER_CFLAGS) -c -o $@ $< -+ -+define make_link -+ -rm -f $1 -+ ln -sf $(TOPDIR)/arch/i386/$($1-dir)/$1 $1 -+endef -+ -+$(SYMLINKS): -+ $(call make_link,$@) -+ -+clean: -+ $(MAKE) -C util clean -+ rm -f $(SYMLINKS) -+ -+fastdep: -+ -+dep: -+ -+archmrproper: -+ -+archclean: -+ -+archdep: -+ -+modules: -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/bugs.c um/arch/um/sys-i386/bugs.c ---- orig/arch/um/sys-i386/bugs.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/sys-i386/bugs.c Sun Dec 8 20:38:45 2002 -@@ -0,0 +1,157 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <unistd.h> -+#include <fcntl.h> -+#include <errno.h> -+#include <string.h> -+#include <sys/signal.h> -+#include "kern_util.h" -+#include "user.h" -+#include "sysdep/ptrace.h" -+#include "task.h" -+ -+#define MAXTOKEN 64 -+ -+/* Set during early boot */ -+int cpu_has_cmov = 1; -+int cpu_has_xmm = 0; -+ -+static char token(int fd, char *buf, int len, char stop) -+{ -+ int n; -+ char *ptr, *end, c; -+ -+ ptr = buf; -+ end = &buf[len]; -+ do { -+ n = read(fd, ptr, sizeof(*ptr)); -+ c = *ptr++; -+ if(n == 0) return(0); -+ else if(n != sizeof(*ptr)){ -+ printk("Reading /proc/cpuinfo failed, " -+ "errno = %d\n", errno); -+ return(-errno); -+ } -+ } while((c != '\n') && (c != stop) && (ptr < end)); -+ -+ if(ptr == end){ -+ printk("Failed to find '%c' in /proc/cpuinfo\n", stop); -+ return(-1); -+ } -+ *(ptr - 1) = '\0'; -+ return(c); -+} -+ -+static int check_cpu_feature(char *feature, int *have_it) -+{ -+ char buf[MAXTOKEN], c; -+ int fd, len = sizeof(buf)/sizeof(buf[0]), n; -+ -+ printk("Checking for host processor %s support...", feature); -+ fd = open("/proc/cpuinfo", O_RDONLY); -+ if(fd < 0){ -+ printk("Couldn't open /proc/cpuinfo, errno = %d\n", errno); -+ return(0); -+ } -+ -+ *have_it = 0; -+ buf[len - 1] = '\0'; -+ while(1){ -+ c = token(fd, buf, len - 1, ':'); -+ if(c <= 0) goto out; -+ else if(c != ':'){ -+ printk("Failed to find ':' in /proc/cpuinfo\n"); -+ goto out; -+ } -+ -+ if(!strncmp(buf, "flags", strlen("flags"))) break; -+ -+ do { -+ n = read(fd, &c, sizeof(c)); -+ if(n != sizeof(c)){ -+ printk("Failed to find newline in " -+ "/proc/cpuinfo, n = %d, errno = %d\n", -+ n, errno); -+ goto out; -+ } -+ } while(c != '\n'); -+ } -+ -+ c = token(fd, buf, len - 1, ' '); -+ if(c < 0) goto out; -+ else if(c != ' '){ -+ printk("Failed to find ':' in /proc/cpuinfo\n"); -+ goto out; -+ } -+ -+ while(1){ -+ c = token(fd, buf, len - 1, ' '); -+ if(c < 0) goto out; -+ else if(c == '\n') break; -+ -+ if(!strcmp(buf, feature)){ -+ *have_it = 1; -+ goto out; -+ } -+ } -+ out: -+ if(*have_it == 0) printk("No\n"); -+ else if(*have_it == 1) printk("Yes\n"); -+ close(fd); -+ return(1); -+} -+ -+void arch_check_bugs(void) -+{ -+ int have_it; -+ -+ if(access("/proc/cpuinfo", R_OK)){ -+ printk("/proc/cpuinfo not available - skipping CPU capability " -+ "checks\n"); -+ return; -+ } -+ if(check_cpu_feature("cmov", &have_it)) cpu_has_cmov = have_it; -+ if(check_cpu_feature("xmm", &have_it)) cpu_has_xmm = have_it; -+} -+ -+int arch_handle_signal(int sig, union uml_pt_regs *regs) -+{ -+ unsigned long ip; -+ -+ /* This is testing for a cmov (0x0f 0x4x) instruction causing a -+ * SIGILL in init. -+ */ -+ if((sig != SIGILL) || (TASK_PID(get_current()) != 1)) return(0); -+ -+ ip = UPT_IP(regs); -+ if((*((char *) ip) != 0x0f) || ((*((char *) (ip + 1)) & 0xf0) != 0x40)) -+ return(0); -+ -+ if(cpu_has_cmov == 0) -+ panic("SIGILL caused by cmov, which this processor doesn't " -+ "implement, boot a filesystem compiled for older " -+ "processors"); -+ else if(cpu_has_cmov == 1) -+ panic("SIGILL caused by cmov, which this processor claims to " -+ "implement"); -+ else if(cpu_has_cmov == -1) -+ panic("SIGILL caused by cmov, couldn't tell if this processor " -+ "implements it, boot a filesystem compiled for older " -+ "processors"); -+ else panic("Bad value for cpu_has_cmov (%d)", cpu_has_cmov); -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/checksum.S um/arch/um/sys-i386/checksum.S ---- orig/arch/um/sys-i386/checksum.S Wed Dec 31 19:00:00 1969 -+++ um/arch/um/sys-i386/checksum.S Thu Oct 31 20:17:50 2002 -@@ -0,0 +1,460 @@ -+/* -+ * INET An implementation of the TCP/IP protocol suite for the LINUX -+ * operating system. INET is implemented using the BSD Socket -+ * interface as the means of communication with the user level. -+ * -+ * IP/TCP/UDP checksumming routines -+ * -+ * Authors: Jorge Cwik, <jorge@laser.satlink.net> -+ * Arnt Gulbrandsen, <agulbra@nvg.unit.no> -+ * Tom May, <ftom@netcom.com> -+ * Pentium Pro/II routines: -+ * Alexander Kjeldaas <astor@guardian.no> -+ * Finn Arne Gangstad <finnag@guardian.no> -+ * Lots of code moved from tcp.c and ip.c; see those files -+ * for more names. -+ * -+ * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception -+ * handling. -+ * Andi Kleen, add zeroing on error -+ * converted to pure assembler -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ */ -+ -+#include <linux/config.h> -+#include <asm/errno.h> -+ -+/* -+ * computes a partial checksum, e.g. for TCP/UDP fragments -+ */ -+ -+/* -+unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) -+ */ -+ -+.text -+.align 4 -+.globl arch_csum_partial -+ -+#ifndef CONFIG_X86_USE_PPRO_CHECKSUM -+ -+ /* -+ * Experiments with Ethernet and SLIP connections show that buff -+ * is aligned on either a 2-byte or 4-byte boundary. We get at -+ * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. -+ * Fortunately, it is easy to convert 2-byte alignment to 4-byte -+ * alignment for the unrolled loop. -+ */ -+arch_csum_partial: -+ pushl %esi -+ pushl %ebx -+ movl 20(%esp),%eax # Function arg: unsigned int sum -+ movl 16(%esp),%ecx # Function arg: int len -+ movl 12(%esp),%esi # Function arg: unsigned char *buff -+ testl $2, %esi # Check alignment. -+ jz 2f # Jump if alignment is ok. -+ subl $2, %ecx # Alignment uses up two bytes. -+ jae 1f # Jump if we had at least two bytes. -+ addl $2, %ecx # ecx was < 2. Deal with it. -+ jmp 4f -+1: movw (%esi), %bx -+ addl $2, %esi -+ addw %bx, %ax -+ adcl $0, %eax -+2: -+ movl %ecx, %edx -+ shrl $5, %ecx -+ jz 2f -+ testl %esi, %esi -+1: movl (%esi), %ebx -+ adcl %ebx, %eax -+ movl 4(%esi), %ebx -+ adcl %ebx, %eax -+ movl 8(%esi), %ebx -+ adcl %ebx, %eax -+ movl 12(%esi), %ebx -+ adcl %ebx, %eax -+ movl 16(%esi), %ebx -+ adcl %ebx, %eax -+ movl 20(%esi), %ebx -+ adcl %ebx, %eax -+ movl 24(%esi), %ebx -+ adcl %ebx, %eax -+ movl 28(%esi), %ebx -+ adcl %ebx, %eax -+ lea 32(%esi), %esi -+ dec %ecx -+ jne 1b -+ adcl $0, %eax -+2: movl %edx, %ecx -+ andl $0x1c, %edx -+ je 4f -+ shrl $2, %edx # This clears CF -+3: adcl (%esi), %eax -+ lea 4(%esi), %esi -+ dec %edx -+ jne 3b -+ adcl $0, %eax -+4: andl $3, %ecx -+ jz 7f -+ cmpl $2, %ecx -+ jb 5f -+ movw (%esi),%cx -+ leal 2(%esi),%esi -+ je 6f -+ shll $16,%ecx -+5: movb (%esi),%cl -+6: addl %ecx,%eax -+ adcl $0, %eax -+7: -+ popl %ebx -+ popl %esi -+ ret -+ -+#else -+ -+/* Version for PentiumII/PPro */ -+ -+arch_csum_partial: -+ pushl %esi -+ pushl %ebx -+ movl 20(%esp),%eax # Function arg: unsigned int sum -+ movl 16(%esp),%ecx # Function arg: int len -+ movl 12(%esp),%esi # Function arg: const unsigned char *buf -+ -+ testl $2, %esi -+ jnz 30f -+10: -+ movl %ecx, %edx -+ movl %ecx, %ebx -+ andl $0x7c, %ebx -+ shrl $7, %ecx -+ addl %ebx,%esi -+ shrl $2, %ebx -+ negl %ebx -+ lea 45f(%ebx,%ebx,2), %ebx -+ testl %esi, %esi -+ jmp *%ebx -+ -+ # Handle 2-byte-aligned regions -+20: addw (%esi), %ax -+ lea 2(%esi), %esi -+ adcl $0, %eax -+ jmp 10b -+ -+30: subl $2, %ecx -+ ja 20b -+ je 32f -+ movzbl (%esi),%ebx # csumming 1 byte, 2-aligned -+ addl %ebx, %eax -+ adcl $0, %eax -+ jmp 80f -+32: -+ addw (%esi), %ax # csumming 2 bytes, 2-aligned -+ adcl $0, %eax -+ jmp 80f -+ -+40: -+ addl -128(%esi), %eax -+ adcl -124(%esi), %eax -+ adcl -120(%esi), %eax -+ adcl -116(%esi), %eax -+ adcl -112(%esi), %eax -+ adcl -108(%esi), %eax -+ adcl -104(%esi), %eax -+ adcl -100(%esi), %eax -+ adcl -96(%esi), %eax -+ adcl -92(%esi), %eax -+ adcl -88(%esi), %eax -+ adcl -84(%esi), %eax -+ adcl -80(%esi), %eax -+ adcl -76(%esi), %eax -+ adcl -72(%esi), %eax -+ adcl -68(%esi), %eax -+ adcl -64(%esi), %eax -+ adcl -60(%esi), %eax -+ adcl -56(%esi), %eax -+ adcl -52(%esi), %eax -+ adcl -48(%esi), %eax -+ adcl -44(%esi), %eax -+ adcl -40(%esi), %eax -+ adcl -36(%esi), %eax -+ adcl -32(%esi), %eax -+ adcl -28(%esi), %eax -+ adcl -24(%esi), %eax -+ adcl -20(%esi), %eax -+ adcl -16(%esi), %eax -+ adcl -12(%esi), %eax -+ adcl -8(%esi), %eax -+ adcl -4(%esi), %eax -+45: -+ lea 128(%esi), %esi -+ adcl $0, %eax -+ dec %ecx -+ jge 40b -+ movl %edx, %ecx -+50: andl $3, %ecx -+ jz 80f -+ -+ # Handle the last 1-3 bytes without jumping -+ notl %ecx # 1->2, 2->1, 3->0, higher bits are masked -+ movl $0xffffff,%ebx # by the shll and shrl instructions -+ shll $3,%ecx -+ shrl %cl,%ebx -+ andl -128(%esi),%ebx # esi is 4-aligned so should be ok -+ addl %ebx,%eax -+ adcl $0,%eax -+80: -+ popl %ebx -+ popl %esi -+ ret -+ -+#endif -+ -+/* -+unsigned int csum_partial_copy_generic (const char *src, char *dst, -+ int len, int sum, int *src_err_ptr, int *dst_err_ptr) -+ */ -+ -+/* -+ * Copy from ds while checksumming, otherwise like csum_partial -+ * -+ * The macros SRC and DST specify the type of access for the instruction. -+ * thus we can call a custom exception handler for all access types. -+ * -+ * FIXME: could someone double-check whether I haven't mixed up some SRC and -+ * DST definitions? It's damn hard to trigger all cases. I hope I got -+ * them all but there's no guarantee. -+ */ -+ -+#define SRC(y...) \ -+ 9999: y; \ -+ .section __ex_table, "a"; \ -+ .long 9999b, 6001f ; \ -+ .previous -+ -+#define DST(y...) \ -+ 9999: y; \ -+ .section __ex_table, "a"; \ -+ .long 9999b, 6002f ; \ -+ .previous -+ -+.align 4 -+.globl csum_partial_copy_generic_i386 -+ -+#ifndef CONFIG_X86_USE_PPRO_CHECKSUM -+ -+#define ARGBASE 16 -+#define FP 12 -+ -+csum_partial_copy_generic_i386: -+ subl $4,%esp -+ pushl %edi -+ pushl %esi -+ pushl %ebx -+ movl ARGBASE+16(%esp),%eax # sum -+ movl ARGBASE+12(%esp),%ecx # len -+ movl ARGBASE+4(%esp),%esi # src -+ movl ARGBASE+8(%esp),%edi # dst -+ -+ testl $2, %edi # Check alignment. -+ jz 2f # Jump if alignment is ok. -+ subl $2, %ecx # Alignment uses up two bytes. -+ jae 1f # Jump if we had at least two bytes. -+ addl $2, %ecx # ecx was < 2. Deal with it. -+ jmp 4f -+SRC(1: movw (%esi), %bx ) -+ addl $2, %esi -+DST( movw %bx, (%edi) ) -+ addl $2, %edi -+ addw %bx, %ax -+ adcl $0, %eax -+2: -+ movl %ecx, FP(%esp) -+ shrl $5, %ecx -+ jz 2f -+ testl %esi, %esi -+SRC(1: movl (%esi), %ebx ) -+SRC( movl 4(%esi), %edx ) -+ adcl %ebx, %eax -+DST( movl %ebx, (%edi) ) -+ adcl %edx, %eax -+DST( movl %edx, 4(%edi) ) -+ -+SRC( movl 8(%esi), %ebx ) -+SRC( movl 12(%esi), %edx ) -+ adcl %ebx, %eax -+DST( movl %ebx, 8(%edi) ) -+ adcl %edx, %eax -+DST( movl %edx, 12(%edi) ) -+ -+SRC( movl 16(%esi), %ebx ) -+SRC( movl 20(%esi), %edx ) -+ adcl %ebx, %eax -+DST( movl %ebx, 16(%edi) ) -+ adcl %edx, %eax -+DST( movl %edx, 20(%edi) ) -+ -+SRC( movl 24(%esi), %ebx ) -+SRC( movl 28(%esi), %edx ) -+ adcl %ebx, %eax -+DST( movl %ebx, 24(%edi) ) -+ adcl %edx, %eax -+DST( movl %edx, 28(%edi) ) -+ -+ lea 32(%esi), %esi -+ lea 32(%edi), %edi -+ dec %ecx -+ jne 1b -+ adcl $0, %eax -+2: movl FP(%esp), %edx -+ movl %edx, %ecx -+ andl $0x1c, %edx -+ je 4f -+ shrl $2, %edx # This clears CF -+SRC(3: movl (%esi), %ebx ) -+ adcl %ebx, %eax -+DST( movl %ebx, (%edi) ) -+ lea 4(%esi), %esi -+ lea 4(%edi), %edi -+ dec %edx -+ jne 3b -+ adcl $0, %eax -+4: andl $3, %ecx -+ jz 7f -+ cmpl $2, %ecx -+ jb 5f -+SRC( movw (%esi), %cx ) -+ leal 2(%esi), %esi -+DST( movw %cx, (%edi) ) -+ leal 2(%edi), %edi -+ je 6f -+ shll $16,%ecx -+SRC(5: movb (%esi), %cl ) -+DST( movb %cl, (%edi) ) -+6: addl %ecx, %eax -+ adcl $0, %eax -+7: -+5000: -+ -+# Exception handler: -+.section .fixup, "ax" -+ -+6001: -+ movl ARGBASE+20(%esp), %ebx # src_err_ptr -+ movl $-EFAULT, (%ebx) -+ -+ # zero the complete destination - computing the rest -+ # is too much work -+ movl ARGBASE+8(%esp), %edi # dst -+ movl ARGBASE+12(%esp), %ecx # len -+ xorl %eax,%eax -+ rep ; stosb -+ -+ jmp 5000b -+ -+6002: -+ movl ARGBASE+24(%esp), %ebx # dst_err_ptr -+ movl $-EFAULT,(%ebx) -+ jmp 5000b -+ -+.previous -+ -+ popl %ebx -+ popl %esi -+ popl %edi -+ popl %ecx # equivalent to addl $4,%esp -+ ret -+ -+#else -+ -+/* Version for PentiumII/PPro */ -+ -+#define ROUND1(x) \ -+ SRC(movl x(%esi), %ebx ) ; \ -+ addl %ebx, %eax ; \ -+ DST(movl %ebx, x(%edi) ) ; -+ -+#define ROUND(x) \ -+ SRC(movl x(%esi), %ebx ) ; \ -+ adcl %ebx, %eax ; \ -+ DST(movl %ebx, x(%edi) ) ; -+ -+#define ARGBASE 12 -+ -+csum_partial_copy_generic_i386: -+ pushl %ebx -+ pushl %edi -+ pushl %esi -+ movl ARGBASE+4(%esp),%esi #src -+ movl ARGBASE+8(%esp),%edi #dst -+ movl ARGBASE+12(%esp),%ecx #len -+ movl ARGBASE+16(%esp),%eax #sum -+# movl %ecx, %edx -+ movl %ecx, %ebx -+ movl %esi, %edx -+ shrl $6, %ecx -+ andl $0x3c, %ebx -+ negl %ebx -+ subl %ebx, %esi -+ subl %ebx, %edi -+ lea -1(%esi),%edx -+ andl $-32,%edx -+ lea 3f(%ebx,%ebx), %ebx -+ testl %esi, %esi -+ jmp *%ebx -+1: addl $64,%esi -+ addl $64,%edi -+ SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) -+ ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) -+ ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) -+ ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) -+ ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) -+3: adcl $0,%eax -+ addl $64, %edx -+ dec %ecx -+ jge 1b -+4: movl ARGBASE+12(%esp),%edx #len -+ andl $3, %edx -+ jz 7f -+ cmpl $2, %edx -+ jb 5f -+SRC( movw (%esi), %dx ) -+ leal 2(%esi), %esi -+DST( movw %dx, (%edi) ) -+ leal 2(%edi), %edi -+ je 6f -+ shll $16,%edx -+5: -+SRC( movb (%esi), %dl ) -+DST( movb %dl, (%edi) ) -+6: addl %edx, %eax -+ adcl $0, %eax -+7: -+.section .fixup, "ax" -+6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr -+ movl $-EFAULT, (%ebx) -+ # zero the complete destination (computing the rest is too much work) -+ movl ARGBASE+8(%esp),%edi # dst -+ movl ARGBASE+12(%esp),%ecx # len -+ xorl %eax,%eax -+ rep; stosb -+ jmp 7b -+6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr -+ movl $-EFAULT, (%ebx) -+ jmp 7b -+.previous -+ -+ popl %esi -+ popl %edi -+ popl %ebx -+ ret -+ -+#undef ROUND -+#undef ROUND1 -+ -+#endif -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/fault.c um/arch/um/sys-i386/fault.c ---- orig/arch/um/sys-i386/fault.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/sys-i386/fault.c Sun Oct 27 16:49:35 2002 -@@ -0,0 +1,34 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <signal.h> -+#include "sysdep/ptrace.h" -+#include "sysdep/sigcontext.h" -+ -+extern unsigned long search_exception_table(unsigned long addr); -+ -+int arch_fixup(unsigned long address, void *sc_ptr) -+{ -+ struct sigcontext *sc = sc_ptr; -+ unsigned long fixup; -+ -+ fixup = search_exception_table(address); -+ if(fixup != 0){ -+ sc->eip = fixup; -+ return(1); -+ } -+ return(0); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/ksyms.c um/arch/um/sys-i386/ksyms.c ---- orig/arch/um/sys-i386/ksyms.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/sys-i386/ksyms.c Tue Oct 29 21:01:45 2002 -@@ -0,0 +1,17 @@ -+#include "linux/module.h" -+#include "linux/in6.h" -+#include "linux/rwsem.h" -+#include "asm/byteorder.h" -+#include "asm/semaphore.h" -+#include "asm/uaccess.h" -+#include "asm/checksum.h" -+#include "asm/errno.h" -+ -+EXPORT_SYMBOL(__down_failed); -+EXPORT_SYMBOL(__down_failed_interruptible); -+EXPORT_SYMBOL(__down_failed_trylock); -+EXPORT_SYMBOL(__up_wakeup); -+ -+/* Networking helper routines. */ -+EXPORT_SYMBOL(csum_partial_copy_from); -+EXPORT_SYMBOL(csum_partial_copy_to); -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/ldt.c um/arch/um/sys-i386/ldt.c ---- orig/arch/um/sys-i386/ldt.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/sys-i386/ldt.c Wed Nov 13 12:43:04 2002 -@@ -0,0 +1,92 @@ -+/* -+ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/config.h" -+#include "linux/slab.h" -+#include "asm/uaccess.h" -+#include "asm/ptrace.h" -+#include "choose-mode.h" -+#include "kern.h" -+ -+#ifdef CONFIG_MODE_TT -+extern int modify_ldt(int func, void *ptr, unsigned long bytecount); -+ -+int sys_modify_ldt_tt(int func, void *ptr, unsigned long bytecount) -+{ -+ if(verify_area(VERIFY_READ, ptr, bytecount)) return(-EFAULT); -+ return(modify_ldt(func, ptr, bytecount)); -+} -+#endif -+ -+#ifdef CONFIG_MODE_SKAS -+extern int userspace_pid; -+ -+int sys_modify_ldt_skas(int func, void *ptr, unsigned long bytecount) -+{ -+ struct ptrace_ldt ldt; -+ void *buf; -+ int res, n; -+ -+ buf = kmalloc(bytecount, GFP_KERNEL); -+ if(buf == NULL) -+ return(-ENOMEM); -+ -+ res = 0; -+ -+ switch(func){ -+ case 1: -+ case 0x11: -+ res = copy_from_user(buf, ptr, bytecount); -+ break; -+ } -+ -+ if(res != 0){ -+ res = -EFAULT; -+ goto out; -+ } -+ -+ ldt = ((struct ptrace_ldt) { .func = func, -+ .ptr = buf, -+ .bytecount = bytecount }); -+ res = ptrace(PTRACE_LDT, userspace_pid, 0, (unsigned long) &ldt); -+ if(res < 0) -+ goto out; -+ -+ switch(func){ -+ case 0: -+ case 2: -+ n = res; -+ res = copy_to_user(ptr, buf, n); -+ if(res != 0) -+ res = -EFAULT; -+ else -+ res = n; -+ break; -+ } -+ -+ out: -+ kfree(buf); -+ return(res); -+} -+#endif -+ -+int sys_modify_ldt(int func, void *ptr, unsigned long bytecount) -+{ -+ return(CHOOSE_MODE_PROC(sys_modify_ldt_tt, sys_modify_ldt_skas, func, -+ ptr, bytecount)); -+} -+ -+ -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/ptrace.c um/arch/um/sys-i386/ptrace.c ---- orig/arch/um/sys-i386/ptrace.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/sys-i386/ptrace.c Sun Oct 27 16:49:35 2002 -@@ -0,0 +1,365 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/sched.h" -+#include "asm/elf.h" -+#include "asm/ptrace.h" -+#include "asm/uaccess.h" -+#include "ptrace_user.h" -+#include "sysdep/sigcontext.h" -+#include "sysdep/sc.h" -+ -+void arch_switch(void) -+{ -+ update_debugregs(current->thread.arch.debugregs_seq); -+} -+ -+int is_syscall(unsigned long addr) -+{ -+ unsigned short instr; -+ int n; -+ -+ n = copy_from_user(&instr, (void *) addr, sizeof(instr)); -+ if(n){ -+ printk("is_syscall : failed to read instruction from 0x%lu\n", -+ addr); -+ return(0); -+ } -+ return(instr == 0x80cd); -+} -+ -+/* determines which flags the user has access to. */ -+/* 1 = access 0 = no access */ -+#define FLAG_MASK 0x00044dd5 -+ -+int putreg(struct task_struct *child, int regno, unsigned long value) -+{ -+ regno >>= 2; -+ switch (regno) { -+ case FS: -+ if (value && (value & 3) != 3) -+ return -EIO; -+ PT_REGS_FS(&child->thread.regs) = value; -+ return 0; -+ case GS: -+ if (value && (value & 3) != 3) -+ return -EIO; -+ PT_REGS_GS(&child->thread.regs) = value; -+ return 0; -+ case DS: -+ case ES: -+ if (value && (value & 3) != 3) -+ return -EIO; -+ value &= 0xffff; -+ break; -+ case SS: -+ case CS: -+ if ((value & 3) != 3) -+ return -EIO; -+ value &= 0xffff; -+ break; -+ case EFL: -+ value &= FLAG_MASK; -+ value |= PT_REGS_EFLAGS(&child->thread.regs); -+ break; -+ } -+ PT_REGS_SET(&child->thread.regs, regno, value); -+ return 0; -+} -+ -+unsigned long getreg(struct task_struct *child, int regno) -+{ -+ unsigned long retval = ~0UL; -+ -+ regno >>= 2; -+ switch (regno) { -+ case FS: -+ case GS: -+ case DS: -+ case ES: -+ case SS: -+ case CS: -+ retval = 0xffff; -+ /* fall through */ -+ default: -+ retval &= PT_REG(&child->thread.regs, regno); -+ } -+ return retval; -+} -+ -+struct i387_fxsave_struct { -+ unsigned short cwd; -+ unsigned short swd; -+ unsigned short twd; -+ unsigned short fop; -+ long fip; -+ long fcs; -+ long foo; -+ long fos; -+ long mxcsr; -+ long reserved; -+ long st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */ -+ long xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */ -+ long padding[56]; -+}; -+ -+/* -+ * FPU tag word conversions. -+ */ -+ -+static inline unsigned short twd_i387_to_fxsr( unsigned short twd ) -+{ -+ unsigned int tmp; /* to avoid 16 bit prefixes in the code */ -+ -+ /* Transform each pair of bits into 01 (valid) or 00 (empty) */ -+ tmp = ~twd; -+ tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ -+ /* and move the valid bits to the lower byte. */ -+ tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ -+ tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ -+ tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ -+ return tmp; -+} -+ -+static inline unsigned long twd_fxsr_to_i387( struct i387_fxsave_struct *fxsave ) -+{ -+ struct _fpxreg *st = NULL; -+ unsigned long twd = (unsigned long) fxsave->twd; -+ unsigned long tag; -+ unsigned long ret = 0xffff0000; -+ int i; -+ -+#define FPREG_ADDR(f, n) ((char *)&(f)->st_space + (n) * 16); -+ -+ for ( i = 0 ; i < 8 ; i++ ) { -+ if ( twd & 0x1 ) { -+ st = (struct _fpxreg *) FPREG_ADDR( fxsave, i ); -+ -+ switch ( st->exponent & 0x7fff ) { -+ case 0x7fff: -+ tag = 2; /* Special */ -+ break; -+ case 0x0000: -+ if ( !st->significand[0] && -+ !st->significand[1] && -+ !st->significand[2] && -+ !st->significand[3] ) { -+ tag = 1; /* Zero */ -+ } else { -+ tag = 2; /* Special */ -+ } -+ break; -+ default: -+ if ( st->significand[3] & 0x8000 ) { -+ tag = 0; /* Valid */ -+ } else { -+ tag = 2; /* Special */ -+ } -+ break; -+ } -+ } else { -+ tag = 3; /* Empty */ -+ } -+ ret |= (tag << (2 * i)); -+ twd = twd >> 1; -+ } -+ return ret; -+} -+ -+/* -+ * FXSR floating point environment conversions. -+ */ -+ -+#ifdef CONFIG_MODE_TT -+static inline int convert_fxsr_to_user_tt(struct _fpstate *buf, -+ struct pt_regs *regs) -+{ -+ struct i387_fxsave_struct *fxsave = SC_FXSR_ENV(PT_REGS_SC(regs)); -+ unsigned long env[7]; -+ struct _fpreg *to; -+ struct _fpxreg *from; -+ int i; -+ -+ env[0] = (unsigned long)fxsave->cwd | 0xffff0000; -+ env[1] = (unsigned long)fxsave->swd | 0xffff0000; -+ env[2] = twd_fxsr_to_i387(fxsave); -+ env[3] = fxsave->fip; -+ env[4] = fxsave->fcs | ((unsigned long)fxsave->fop << 16); -+ env[5] = fxsave->foo; -+ env[6] = fxsave->fos; -+ -+ if ( __copy_to_user( buf, env, 7 * sizeof(unsigned long) ) ) -+ return 1; -+ -+ to = &buf->_st[0]; -+ from = (struct _fpxreg *) &fxsave->st_space[0]; -+ for ( i = 0 ; i < 8 ; i++, to++, from++ ) { -+ if ( __copy_to_user( to, from, sizeof(*to) ) ) -+ return 1; -+ } -+ return 0; -+} -+#endif -+ -+static inline int convert_fxsr_to_user(struct _fpstate *buf, -+ struct pt_regs *regs) -+{ -+ return(CHOOSE_MODE(convert_fxsr_to_user_tt(buf, regs), 0)); -+} -+ -+#ifdef CONFIG_MODE_TT -+static inline int convert_fxsr_from_user_tt(struct pt_regs *regs, -+ struct _fpstate *buf) -+{ -+ struct i387_fxsave_struct *fxsave = SC_FXSR_ENV(PT_REGS_SC(regs)); -+ unsigned long env[7]; -+ struct _fpxreg *to; -+ struct _fpreg *from; -+ int i; -+ -+ if ( __copy_from_user( env, buf, 7 * sizeof(long) ) ) -+ return 1; -+ -+ fxsave->cwd = (unsigned short)(env[0] & 0xffff); -+ fxsave->swd = (unsigned short)(env[1] & 0xffff); -+ fxsave->twd = twd_i387_to_fxsr((unsigned short)(env[2] & 0xffff)); -+ fxsave->fip = env[3]; -+ fxsave->fop = (unsigned short)((env[4] & 0xffff0000) >> 16); -+ fxsave->fcs = (env[4] & 0xffff); -+ fxsave->foo = env[5]; -+ fxsave->fos = env[6]; -+ -+ to = (struct _fpxreg *) &fxsave->st_space[0]; -+ from = &buf->_st[0]; -+ for ( i = 0 ; i < 8 ; i++, to++, from++ ) { -+ if ( __copy_from_user( to, from, sizeof(*from) ) ) -+ return 1; -+ } -+ return 0; -+} -+#endif -+ -+static inline int convert_fxsr_from_user(struct pt_regs *regs, -+ struct _fpstate *buf) -+{ -+ return(CHOOSE_MODE(convert_fxsr_from_user_tt(regs, buf), 0)); -+} -+ -+int get_fpregs(unsigned long buf, struct task_struct *child) -+{ -+ int err; -+ -+ err = convert_fxsr_to_user((struct _fpstate *) buf, -+ &child->thread.regs); -+ if(err) return(-EFAULT); -+ else return(0); -+} -+ -+int set_fpregs(unsigned long buf, struct task_struct *child) -+{ -+ int err; -+ -+ err = convert_fxsr_from_user(&child->thread.regs, -+ (struct _fpstate *) buf); -+ if(err) return(-EFAULT); -+ else return(0); -+} -+ -+#ifdef CONFIG_MODE_TT -+int get_fpxregs_tt(unsigned long buf, struct task_struct *tsk) -+{ -+ struct pt_regs *regs = &tsk->thread.regs; -+ struct i387_fxsave_struct *fxsave = SC_FXSR_ENV(PT_REGS_SC(regs)); -+ int err; -+ -+ err = __copy_to_user((void *) buf, fxsave, -+ sizeof(struct user_fxsr_struct)); -+ if(err) return -EFAULT; -+ else return 0; -+} -+#endif -+ -+int get_fpxregs(unsigned long buf, struct task_struct *tsk) -+{ -+ return(CHOOSE_MODE(get_fpxregs_tt(buf, tsk), 0)); -+} -+ -+#ifdef CONFIG_MODE_TT -+int set_fpxregs_tt(unsigned long buf, struct task_struct *tsk) -+{ -+ struct pt_regs *regs = &tsk->thread.regs; -+ struct i387_fxsave_struct *fxsave = SC_FXSR_ENV(PT_REGS_SC(regs)); -+ int err; -+ -+ err = __copy_from_user(fxsave, (void *) buf, -+ sizeof(struct user_fxsr_struct) ); -+ if(err) return -EFAULT; -+ else return 0; -+} -+#endif -+ -+int set_fpxregs(unsigned long buf, struct task_struct *tsk) -+{ -+ return(CHOOSE_MODE(set_fpxregs_tt(buf, tsk), 0)); -+} -+ -+#ifdef notdef -+int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) -+{ -+ fpu->cwd = (((SC_FP_CW(PT_REGS_SC(regs)) & 0xffff) << 16) | -+ (SC_FP_SW(PT_REGS_SC(regs)) & 0xffff)); -+ fpu->swd = SC_FP_CSSEL(PT_REGS_SC(regs)) & 0xffff; -+ fpu->twd = SC_FP_IPOFF(PT_REGS_SC(regs)); -+ fpu->fip = SC_FP_CSSEL(PT_REGS_SC(regs)) & 0xffff; -+ fpu->fcs = SC_FP_DATAOFF(PT_REGS_SC(regs)); -+ fpu->foo = SC_FP_DATASEL(PT_REGS_SC(regs)); -+ fpu->fos = 0; -+ memcpy(fpu->st_space, (void *) SC_FP_ST(PT_REGS_SC(regs)), -+ sizeof(fpu->st_space)); -+ return(1); -+} -+#endif -+ -+#ifdef CONFIG_MODE_TT -+static inline void copy_fpu_fxsave_tt(struct pt_regs *regs, -+ struct user_i387_struct *buf) -+{ -+ struct i387_fxsave_struct *fpu = SC_FXSR_ENV(PT_REGS_SC(regs)); -+ unsigned short *to; -+ unsigned short *from; -+ int i; -+ -+ memcpy( buf, fpu, 7 * sizeof(long) ); -+ -+ to = (unsigned short *) &buf->st_space[0]; -+ from = (unsigned short *) &fpu->st_space[0]; -+ for ( i = 0 ; i < 8 ; i++, to += 5, from += 8 ) { -+ memcpy( to, from, 5 * sizeof(unsigned short) ); -+ } -+} -+#endif -+ -+static inline void copy_fpu_fxsave(struct pt_regs *regs, -+ struct user_i387_struct *buf) -+{ -+ (void) CHOOSE_MODE(copy_fpu_fxsave_tt(regs, buf), 0); -+} -+ -+int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu ) -+{ -+ copy_fpu_fxsave(regs, (struct user_i387_struct *) fpu); -+ return(1); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/ptrace_user.c um/arch/um/sys-i386/ptrace_user.c ---- orig/arch/um/sys-i386/ptrace_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/sys-i386/ptrace_user.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,117 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stdio.h> -+#include <errno.h> -+#include <unistd.h> -+#include <linux/stddef.h> -+#include <sys/ptrace.h> -+#include <asm/ptrace.h> -+#include <asm/user.h> -+#include "kern_util.h" -+#include "sysdep/thread.h" -+#include "user.h" -+#include "os.h" -+ -+int ptrace_getregs(long pid, unsigned long *regs_out) -+{ -+ return(ptrace(PTRACE_GETREGS, pid, 0, regs_out)); -+} -+ -+int ptrace_setregs(long pid, unsigned long *regs) -+{ -+ return(ptrace(PTRACE_SETREGS, pid, 0, regs)); -+} -+ -+int ptrace_getfpregs(long pid, unsigned long *regs) -+{ -+ return(ptrace(PTRACE_GETFPREGS, pid, 0, regs)); -+} -+ -+static void write_debugregs(int pid, unsigned long *regs) -+{ -+ struct user *dummy; -+ int nregs, i; -+ -+ dummy = NULL; -+ nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]); -+ for(i = 0; i < nregs; i++){ -+ if((i == 4) || (i == 5)) continue; -+ if(ptrace(PTRACE_POKEUSR, pid, &dummy->u_debugreg[i], -+ regs[i]) < 0) -+ printk("write_debugregs - ptrace failed, " -+ "errno = %d\n", errno); -+ } -+} -+ -+static void read_debugregs(int pid, unsigned long *regs) -+{ -+ struct user *dummy; -+ int nregs, i; -+ -+ dummy = NULL; -+ nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]); -+ for(i = 0; i < nregs; i++){ -+ regs[i] = ptrace(PTRACE_PEEKUSR, pid, -+ &dummy->u_debugreg[i], 0); -+ } -+} -+ -+/* Accessed only by the tracing thread */ -+static unsigned long kernel_debugregs[8] = { [ 0 ... 7 ] = 0 }; -+static int debugregs_seq = 0; -+ -+void arch_enter_kernel(void *task, int pid) -+{ -+ read_debugregs(pid, TASK_DEBUGREGS(task)); -+ write_debugregs(pid, kernel_debugregs); -+} -+ -+void arch_leave_kernel(void *task, int pid) -+{ -+ read_debugregs(pid, kernel_debugregs); -+ write_debugregs(pid, TASK_DEBUGREGS(task)); -+} -+ -+void ptrace_pokeuser(unsigned long addr, unsigned long data) -+{ -+ if((addr < offsetof(struct user, u_debugreg[0])) || -+ (addr > offsetof(struct user, u_debugreg[7]))) -+ return; -+ addr -= offsetof(struct user, u_debugreg[0]); -+ addr = addr >> 2; -+ if(kernel_debugregs[addr] == data) return; -+ -+ kernel_debugregs[addr] = data; -+ debugregs_seq++; -+} -+ -+static void update_debugregs_cb(void *arg) -+{ -+ int pid = *((int *) arg); -+ -+ write_debugregs(pid, kernel_debugregs); -+} -+ -+void update_debugregs(int seq) -+{ -+ int me; -+ -+ if(seq == debugregs_seq) return; -+ -+ me = os_getpid(); -+ initial_thread_cb(update_debugregs_cb, &me); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/sigcontext.c um/arch/um/sys-i386/sigcontext.c ---- orig/arch/um/sys-i386/sigcontext.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/sys-i386/sigcontext.c Mon Dec 2 23:20:13 2002 -@@ -0,0 +1,80 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include <stddef.h> -+#include <string.h> -+#include <asm/ptrace.h> -+#include <asm/sigcontext.h> -+#include "sysdep/ptrace.h" -+#include "kern_util.h" -+#include "frame_user.h" -+ -+int sc_size(void *data) -+{ -+ struct arch_frame_data *arch = data; -+ -+ return(sizeof(struct sigcontext) + arch->fpstate_size); -+} -+ -+void sc_to_sc(void *to_ptr, void *from_ptr) -+{ -+ struct sigcontext *to = to_ptr, *from = from_ptr; -+ int size = sizeof(*to) + signal_frame_sc.common.arch.fpstate_size; -+ -+ memcpy(to, from, size); -+ if(from->fpstate != NULL) to->fpstate = (struct _fpstate *) (to + 1); -+} -+ -+unsigned long *sc_sigmask(void *sc_ptr) -+{ -+ struct sigcontext *sc = sc_ptr; -+ -+ return(&sc->oldmask); -+} -+ -+int sc_get_fpregs(unsigned long buf, void *sc_ptr) -+{ -+ struct sigcontext *sc = sc_ptr; -+ struct _fpstate *from = sc->fpstate, *to = (struct _fpstate *) buf; -+ int err = 0; -+ -+ if(from == NULL){ -+ err |= clear_user_proc(&to->cw, sizeof(to->cw)); -+ err |= clear_user_proc(&to->sw, sizeof(to->sw)); -+ err |= clear_user_proc(&to->tag, sizeof(to->tag)); -+ err |= clear_user_proc(&to->ipoff, sizeof(to->ipoff)); -+ err |= clear_user_proc(&to->cssel, sizeof(to->cssel)); -+ err |= clear_user_proc(&to->dataoff, sizeof(to->dataoff)); -+ err |= clear_user_proc(&to->datasel, sizeof(to->datasel)); -+ err |= clear_user_proc(&to->_st, sizeof(to->_st)); -+ } -+ else { -+ err |= copy_to_user_proc(&to->cw, &from->cw, sizeof(to->cw)); -+ err |= copy_to_user_proc(&to->sw, &from->sw, sizeof(to->sw)); -+ err |= copy_to_user_proc(&to->tag, &from->tag, -+ sizeof(to->tag)); -+ err |= copy_to_user_proc(&to->ipoff, &from->ipoff, -+ sizeof(to->ipoff)); -+ err |= copy_to_user_proc(&to->cssel,& from->cssel, -+ sizeof(to->cssel)); -+ err |= copy_to_user_proc(&to->dataoff, &from->dataoff, -+ sizeof(to->dataoff)); -+ err |= copy_to_user_proc(&to->datasel, &from->datasel, -+ sizeof(to->datasel)); -+ err |= copy_to_user_proc(to->_st, from->_st, sizeof(to->_st)); -+ } -+ return(err); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/syscalls.c um/arch/um/sys-i386/syscalls.c ---- orig/arch/um/sys-i386/syscalls.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/sys-i386/syscalls.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,68 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "asm/mman.h" -+#include "asm/uaccess.h" -+#include "asm/unistd.h" -+ -+/* -+ * Perform the select(nd, in, out, ex, tv) and mmap() system -+ * calls. Linux/i386 didn't use to be able to handle more than -+ * 4 system call parameters, so these system calls used a memory -+ * block for parameter passing.. -+ */ -+ -+struct mmap_arg_struct { -+ unsigned long addr; -+ unsigned long len; -+ unsigned long prot; -+ unsigned long flags; -+ unsigned long fd; -+ unsigned long offset; -+}; -+ -+extern int old_mmap(unsigned long addr, unsigned long len, -+ unsigned long prot, unsigned long flags, -+ unsigned long fd, unsigned long offset); -+ -+int old_mmap_i386(struct mmap_arg_struct *arg) -+{ -+ struct mmap_arg_struct a; -+ int err = -EFAULT; -+ -+ if (copy_from_user(&a, arg, sizeof(a))) -+ goto out; -+ -+ err = old_mmap(a.addr, a.len, a.prot, a.flags, a.fd, a.offset); -+ out: -+ return err; -+} -+ -+struct sel_arg_struct { -+ unsigned long n; -+ fd_set *inp, *outp, *exp; -+ struct timeval *tvp; -+}; -+ -+int old_select(struct sel_arg_struct *arg) -+{ -+ struct sel_arg_struct a; -+ -+ if (copy_from_user(&a, arg, sizeof(a))) -+ return -EFAULT; -+ /* sys_select() does the appropriate kernel locking */ -+ return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp); -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/sysrq.c um/arch/um/sys-i386/sysrq.c ---- orig/arch/um/sys-i386/sysrq.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/sys-i386/sysrq.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,30 @@ -+#include "linux/kernel.h" -+#include "linux/smp.h" -+#include "linux/sched.h" -+#include "asm/ptrace.h" -+#include "sysrq.h" -+ -+void show_regs(struct pt_regs *regs) -+{ -+ printk("\n"); -+ printk("EIP: %04lx:[<%08lx>] CPU: %d %s", -+ 0xffff & PT_REGS_CS(regs), PT_REGS_IP(regs), -+ smp_processor_id(), print_tainted()); -+ if (PT_REGS_CS(regs) & 3) -+ printk(" ESP: %04lx:%08lx", 0xffff & PT_REGS_SS(regs), -+ PT_REGS_SP(regs)); -+ printk(" EFLAGS: %08lx\n %s\n", PT_REGS_EFLAGS(regs), -+ print_tainted()); -+ printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", -+ PT_REGS_EAX(regs), PT_REGS_EBX(regs), -+ PT_REGS_ECX(regs), -+ PT_REGS_EDX(regs)); -+ printk("ESI: %08lx EDI: %08lx EBP: %08lx", -+ PT_REGS_ESI(regs), PT_REGS_EDI(regs), -+ PT_REGS_EBP(regs)); -+ printk(" DS: %04lx ES: %04lx\n", -+ 0xffff & PT_REGS_DS(regs), -+ 0xffff & PT_REGS_ES(regs)); -+ -+ show_trace((unsigned long *) ®s); -+} -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/util/Makefile um/arch/um/sys-i386/util/Makefile ---- orig/arch/um/sys-i386/util/Makefile Wed Dec 31 19:00:00 1969 -+++ um/arch/um/sys-i386/util/Makefile Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,28 @@ -+EXE = mk_sc mk_thread -+ -+include $(TOPDIR)/Rules.make -+ -+all : $(EXE) -+ -+mk_sc : mk_sc.o -+ $(CC) -o mk_sc mk_sc.o -+ -+mk_sc.o : mk_sc.c -+ $(CC) -c $< -+ -+mk_thread : mk_thread_user.o mk_thread_kern.o -+ $(CC) -o mk_thread mk_thread_user.o mk_thread_kern.o -+ -+mk_thread_user.o : mk_thread_user.c -+ $(CC) -c $< -+ -+mk_thread_kern.o : mk_thread_kern.c -+ $(CC) $(CFLAGS) -c $< -+ -+clean : -+ $(RM) $(EXE) *.o -+ -+archmrproper : clean -+ -+fastdep : -+ -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/util/mk_sc.c um/arch/um/sys-i386/util/mk_sc.c ---- orig/arch/um/sys-i386/util/mk_sc.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/sys-i386/util/mk_sc.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,51 @@ -+#include <stdio.h> -+#include <signal.h> -+#include <linux/stddef.h> -+ -+#define SC_OFFSET(name, field) \ -+ printf("#define " name "(sc) *((unsigned long *) &(((char *) (sc))[%d]))\n",\ -+ offsetof(struct sigcontext, field)) -+ -+#define SC_FP_OFFSET(name, field) \ -+ printf("#define " name \ -+ "(sc) *((unsigned long *) &(((char *) (SC_FPSTATE(sc)))[%d]))\n",\ -+ offsetof(struct _fpstate, field)) -+ -+#define SC_FP_OFFSET_PTR(name, field, type) \ -+ printf("#define " name \ -+ "(sc) ((" type " *) &(((char *) (SC_FPSTATE(sc)))[%d]))\n",\ -+ offsetof(struct _fpstate, field)) -+ -+int main(int argc, char **argv) -+{ -+ SC_OFFSET("SC_IP", eip); -+ SC_OFFSET("SC_SP", esp); -+ SC_OFFSET("SC_FS", fs); -+ SC_OFFSET("SC_GS", gs); -+ SC_OFFSET("SC_DS", ds); -+ SC_OFFSET("SC_ES", es); -+ SC_OFFSET("SC_SS", ss); -+ SC_OFFSET("SC_CS", cs); -+ SC_OFFSET("SC_EFLAGS", eflags); -+ SC_OFFSET("SC_EAX", eax); -+ SC_OFFSET("SC_EBX", ebx); -+ SC_OFFSET("SC_ECX", ecx); -+ SC_OFFSET("SC_EDX", edx); -+ SC_OFFSET("SC_EDI", edi); -+ SC_OFFSET("SC_ESI", esi); -+ SC_OFFSET("SC_EBP", ebp); -+ SC_OFFSET("SC_TRAPNO", trapno); -+ SC_OFFSET("SC_ERR", err); -+ SC_OFFSET("SC_CR2", cr2); -+ SC_OFFSET("SC_FPSTATE", fpstate); -+ SC_FP_OFFSET("SC_FP_CW", cw); -+ SC_FP_OFFSET("SC_FP_SW", sw); -+ SC_FP_OFFSET("SC_FP_TAG", tag); -+ SC_FP_OFFSET("SC_FP_IPOFF", ipoff); -+ SC_FP_OFFSET("SC_FP_CSSEL", cssel); -+ SC_FP_OFFSET("SC_FP_DATAOFF", dataoff); -+ SC_FP_OFFSET("SC_FP_DATASEL", datasel); -+ SC_FP_OFFSET_PTR("SC_FP_ST", _st, "struct _fpstate"); -+ SC_FP_OFFSET_PTR("SC_FXSR_ENV", _fxsr_env, "void"); -+ return(0); -+} -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/util/mk_thread_kern.c um/arch/um/sys-i386/util/mk_thread_kern.c ---- orig/arch/um/sys-i386/util/mk_thread_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/sys-i386/util/mk_thread_kern.c Mon Dec 9 23:24:38 2002 -@@ -0,0 +1,22 @@ -+#include "linux/config.h" -+#include "linux/stddef.h" -+#include "linux/sched.h" -+ -+extern void print_head(void); -+extern void print_constant_ptr(char *name, int value); -+extern void print_constant(char *name, char *type, int value); -+extern void print_tail(void); -+ -+#define THREAD_OFFSET(field) offsetof(struct task_struct, thread.field) -+ -+int main(int argc, char **argv) -+{ -+ print_head(); -+ print_constant_ptr("TASK_DEBUGREGS", THREAD_OFFSET(arch.debugregs)); -+#ifdef CONFIG_MODE_TT -+ print_constant("TASK_EXTERN_PID", "int", THREAD_OFFSET(mode.tt.extern_pid)); -+#endif -+ print_tail(); -+ return(0); -+} -+ -diff -Naur -X ../exclude-files orig/arch/um/sys-i386/util/mk_thread_user.c um/arch/um/sys-i386/util/mk_thread_user.c ---- orig/arch/um/sys-i386/util/mk_thread_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/sys-i386/util/mk_thread_user.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,30 @@ -+#include <stdio.h> -+ -+void print_head(void) -+{ -+ printf("/*\n"); -+ printf(" * Generated by mk_thread\n"); -+ printf(" */\n"); -+ printf("\n"); -+ printf("#ifndef __UM_THREAD_H\n"); -+ printf("#define __UM_THREAD_H\n"); -+ printf("\n"); -+} -+ -+void print_constant_ptr(char *name, int value) -+{ -+ printf("#define %s(task) ((unsigned long *) " -+ "&(((char *) (task))[%d]))\n", name, value); -+} -+ -+void print_constant(char *name, char *type, int value) -+{ -+ printf("#define %s(task) *((%s *) &(((char *) (task))[%d]))\n", name, type, -+ value); -+} -+ -+void print_tail(void) -+{ -+ printf("\n"); -+ printf("#endif\n"); -+} -diff -Naur -X ../exclude-files orig/arch/um/sys-ia64/Makefile um/arch/um/sys-ia64/Makefile ---- orig/arch/um/sys-ia64/Makefile Wed Dec 31 19:00:00 1969 -+++ um/arch/um/sys-ia64/Makefile Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,26 @@ -+OBJ = sys.o -+ -+OBJS = -+ -+all: $(OBJ) -+ -+$(OBJ): $(OBJS) -+ rm -f $@ -+ $(LD) $(LINKFLAGS) --start-group $^ --end-group -o $@ -+clean: -+ rm -f $(OBJS) -+ -+fastdep: -+ -+archmrproper: -+ -+archclean: -+ rm -f link.ld -+ @$(MAKEBOOT) clean -+ -+archdep: -+ @$(MAKEBOOT) dep -+ -+modules: -+ -+include $(TOPDIR)/Rules.make -diff -Naur -X ../exclude-files orig/arch/um/sys-ppc/Makefile um/arch/um/sys-ppc/Makefile ---- orig/arch/um/sys-ppc/Makefile Wed Dec 31 19:00:00 1969 -+++ um/arch/um/sys-ppc/Makefile Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,80 @@ -+OBJ = sys.o -+ -+.S.o: -+ $(CC) $(AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o -+ -+OBJS = ptrace.o sigcontext.o semaphore.o checksum.o miscthings.o misc.o \ -+ ptrace_user.o sysrq.o -+ -+EXTRA_AFLAGS := -DCONFIG_ALL_PPC -I. -I$(TOPDIR)/arch/ppc/kernel -+ -+all: $(OBJ) -+ -+$(OBJ): $(OBJS) -+ rm -f $@ -+ $(LD) $(LINKFLAGS) --start-group $^ --end-group -o $@ -+ -+ptrace_user.o: ptrace_user.c -+ $(CC) -D__KERNEL__ $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< -+ -+sigcontext.o: sigcontext.c -+ $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $< -+ -+semaphore.c: -+ rm -f $@ -+ ln -s $(TOPDIR)/arch/ppc/kernel/$@ $@ -+ -+checksum.S: -+ rm -f $@ -+ ln -s $(TOPDIR)/arch/ppc/lib/$@ $@ -+ -+mk_defs.c: -+ rm -f $@ -+ ln -s $(TOPDIR)/arch/ppc/kernel/$@ $@ -+ -+ppc_defs.head: -+ rm -f $@ -+ ln -s $(TOPDIR)/arch/ppc/kernel/$@ $@ -+ -+ppc_defs.h: mk_defs.c ppc_defs.head \ -+ $(TOPDIR)/include/asm-ppc/mmu.h \ -+ $(TOPDIR)/include/asm-ppc/processor.h \ -+ $(TOPDIR)/include/asm-ppc/pgtable.h \ -+ $(TOPDIR)/include/asm-ppc/ptrace.h -+# $(CC) $(CFLAGS) -S mk_defs.c -+ cp ppc_defs.head ppc_defs.h -+# for bk, this way we can write to the file even if it's not checked out -+ echo '#define THREAD 608' >> ppc_defs.h -+ echo '#define PT_REGS 8' >> ppc_defs.h -+ echo '#define CLONE_VM 256' >> ppc_defs.h -+# chmod u+w ppc_defs.h -+# grep '^#define' mk_defs.s >> ppc_defs.h -+# rm mk_defs.s -+ -+# the asm link is horrible, and breaks the other targets. This is also -+# not going to work with parallel makes. -+ -+checksum.o: checksum.S -+ rm -f asm -+ ln -s $(TOPDIR)/include/asm-ppc asm -+ $(CC) $(EXTRA_AFLAGS) $(AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o -+ rm -f asm -+ -+misc.o: misc.S ppc_defs.h -+ rm -f asm -+ ln -s $(TOPDIR)/include/asm-ppc asm -+ $(CC) $(EXTRA_AFLAGS) $(AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o -+ rm -f asm -+ -+clean: -+ rm -f $(OBJS) -+ rm -f ppc_defs.h -+ rm -f checksum.S semaphore.c mk_defs.c -+ -+fastdep: -+ -+dep: -+ -+modules: -+ -+include $(TOPDIR)/Rules.make -diff -Naur -X ../exclude-files orig/arch/um/sys-ppc/misc.S um/arch/um/sys-ppc/misc.S ---- orig/arch/um/sys-ppc/misc.S Wed Dec 31 19:00:00 1969 -+++ um/arch/um/sys-ppc/misc.S Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,116 @@ -+/* -+ * This file contains miscellaneous low-level functions. -+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) -+ * -+ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu) -+ * and Paul Mackerras. -+ * -+ * A couple of functions stolen from arch/ppc/kernel/misc.S for UML -+ * by Chris Emerson. -+ * -+ * This program is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU General Public License -+ * as published by the Free Software Foundation; either version -+ * 2 of the License, or (at your option) any later version. -+ * -+ */ -+ -+#include <linux/config.h> -+#include <asm/processor.h> -+#include "ppc_asm.h" -+ -+#if defined(CONFIG_4xx) || defined(CONFIG_8xx) -+#define CACHE_LINE_SIZE 16 -+#define LG_CACHE_LINE_SIZE 4 -+#define MAX_COPY_PREFETCH 1 -+#elif !defined(CONFIG_PPC64BRIDGE) -+#define CACHE_LINE_SIZE 32 -+#define LG_CACHE_LINE_SIZE 5 -+#define MAX_COPY_PREFETCH 4 -+#else -+#define CACHE_LINE_SIZE 128 -+#define LG_CACHE_LINE_SIZE 7 -+#define MAX_COPY_PREFETCH 1 -+#endif /* CONFIG_4xx || CONFIG_8xx */ -+ -+ .text -+ -+/* -+ * Clear a page using the dcbz instruction, which doesn't cause any -+ * memory traffic (except to write out any cache lines which get -+ * displaced). This only works on cacheable memory. -+ */ -+_GLOBAL(clear_page) -+ li r0,4096/CACHE_LINE_SIZE -+ mtctr r0 -+#ifdef CONFIG_8xx -+ li r4, 0 -+1: stw r4, 0(r3) -+ stw r4, 4(r3) -+ stw r4, 8(r3) -+ stw r4, 12(r3) -+#else -+1: dcbz 0,r3 -+#endif -+ addi r3,r3,CACHE_LINE_SIZE -+ bdnz 1b -+ blr -+ -+/* -+ * Copy a whole page. We use the dcbz instruction on the destination -+ * to reduce memory traffic (it eliminates the unnecessary reads of -+ * the destination into cache). This requires that the destination -+ * is cacheable. -+ */ -+#define COPY_16_BYTES \ -+ lwz r6,4(r4); \ -+ lwz r7,8(r4); \ -+ lwz r8,12(r4); \ -+ lwzu r9,16(r4); \ -+ stw r6,4(r3); \ -+ stw r7,8(r3); \ -+ stw r8,12(r3); \ -+ stwu r9,16(r3) -+ -+_GLOBAL(copy_page) -+ addi r3,r3,-4 -+ addi r4,r4,-4 -+ li r5,4 -+ -+#ifndef CONFIG_8xx -+#if MAX_COPY_PREFETCH > 1 -+ li r0,MAX_COPY_PREFETCH -+ li r11,4 -+ mtctr r0 -+11: dcbt r11,r4 -+ addi r11,r11,CACHE_LINE_SIZE -+ bdnz 11b -+#else /* MAX_COPY_PREFETCH == 1 */ -+ dcbt r5,r4 -+ li r11,CACHE_LINE_SIZE+4 -+#endif /* MAX_COPY_PREFETCH */ -+#endif /* CONFIG_8xx */ -+ -+ li r0,4096/CACHE_LINE_SIZE -+ mtctr r0 -+1: -+#ifndef CONFIG_8xx -+ dcbt r11,r4 -+ dcbz r5,r3 -+#endif -+ COPY_16_BYTES -+#if CACHE_LINE_SIZE >= 32 -+ COPY_16_BYTES -+#if CACHE_LINE_SIZE >= 64 -+ COPY_16_BYTES -+ COPY_16_BYTES -+#if CACHE_LINE_SIZE >= 128 -+ COPY_16_BYTES -+ COPY_16_BYTES -+ COPY_16_BYTES -+ COPY_16_BYTES -+#endif -+#endif -+#endif -+ bdnz 1b -+ blr -diff -Naur -X ../exclude-files orig/arch/um/sys-ppc/miscthings.c um/arch/um/sys-ppc/miscthings.c ---- orig/arch/um/sys-ppc/miscthings.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/sys-ppc/miscthings.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,53 @@ -+#include "linux/threads.h" -+#include "linux/stddef.h" // for NULL -+#include "linux/elf.h" // for AT_NULL -+ -+/* The following function nicked from arch/ppc/kernel/process.c and -+ * adapted slightly */ -+/* -+ * XXX ld.so expects the auxiliary table to start on -+ * a 16-byte boundary, so we have to find it and -+ * move it up. :-( -+ */ -+void shove_aux_table(unsigned long sp) -+{ -+ int argc; -+ char *p; -+ unsigned long e; -+ unsigned long aux_start, offset; -+ -+ argc = *(int *)sp; -+ sp += sizeof(int) + (argc + 1) * sizeof(char *); -+ /* skip over the environment pointers */ -+ do { -+ p = *(char **)sp; -+ sp += sizeof(char *); -+ } while (p != NULL); -+ aux_start = sp; -+ /* skip to the end of the auxiliary table */ -+ do { -+ e = *(unsigned long *)sp; -+ sp += 2 * sizeof(unsigned long); -+ } while (e != AT_NULL); -+ offset = ((aux_start + 15) & ~15) - aux_start; -+ if (offset != 0) { -+ do { -+ sp -= sizeof(unsigned long); -+ e = *(unsigned long *)sp; -+ *(unsigned long *)(sp + offset) = e; -+ } while (sp > aux_start); -+ } -+} -+/* END stuff taken from arch/ppc/kernel/process.c */ -+ -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-ppc/ptrace.c um/arch/um/sys-ppc/ptrace.c ---- orig/arch/um/sys-ppc/ptrace.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/sys-ppc/ptrace.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,28 @@ -+#include "linux/sched.h" -+#include "asm/ptrace.h" -+ -+int putreg(struct task_struct *child, unsigned long regno, -+ unsigned long value) -+{ -+ child->thread.process_regs.regs[regno >> 2] = value; -+ return 0; -+} -+ -+unsigned long getreg(struct task_struct *child, unsigned long regno) -+{ -+ unsigned long retval = ~0UL; -+ -+ retval &= child->thread.process_regs.regs[regno >> 2]; -+ return retval; -+} -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-ppc/ptrace_user.c um/arch/um/sys-ppc/ptrace_user.c ---- orig/arch/um/sys-ppc/ptrace_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/sys-ppc/ptrace_user.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,40 @@ -+#include <sys/ptrace.h> -+#include <errno.h> -+#include <asm/ptrace.h> -+#include "sysdep/ptrace.h" -+ -+int ptrace_getregs(long pid, unsigned long *regs_out) -+{ -+ int i; -+ for (i=0; i < sizeof(struct sys_pt_regs)/sizeof(PPC_REG); ++i) { -+ errno = 0; -+ regs_out->regs[i] = ptrace(PTRACE_PEEKUSER, pid, i*4, 0); -+ if (errno) { -+ return -errno; -+ } -+ } -+ return 0; -+} -+ -+int ptrace_setregs(long pid, unsigned long *regs_in) -+{ -+ int i; -+ for (i=0; i < sizeof(struct sys_pt_regs)/sizeof(PPC_REG); ++i) { -+ if (i != 34 /* FIXME: PT_ORIG_R3 */ && i <= PT_MQ) { -+ if (ptrace(PTRACE_POKEUSER, pid, i*4, regs_in->regs[i]) < 0) { -+ return -errno; -+ } -+ } -+ } -+ return 0; -+} -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-ppc/sigcontext.c um/arch/um/sys-ppc/sigcontext.c ---- orig/arch/um/sys-ppc/sigcontext.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/sys-ppc/sigcontext.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,15 @@ -+#include "asm/ptrace.h" -+#include "asm/sigcontext.h" -+#include "sysdep/ptrace.h" -+#include "user_util.h" -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/sys-ppc/sysrq.c um/arch/um/sys-ppc/sysrq.c ---- orig/arch/um/sys-ppc/sysrq.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/sys-ppc/sysrq.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,43 @@ -+/* -+ * Copyright (C) 2001 Chris Emerson (cemerson@chiark.greenend.org.uk) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/kernel.h" -+#include "linux/smp.h" -+#include "asm/ptrace.h" -+#include "sysrq.h" -+ -+void show_regs(struct pt_regs_subarch *regs) -+{ -+ printk("\n"); -+ printk("show_regs(): insert regs here.\n"); -+#if 0 -+ printk("\n"); -+ printk("EIP: %04x:[<%08lx>] CPU: %d",0xffff & regs->xcs, regs->eip, -+ smp_processor_id()); -+ if (regs->xcs & 3) -+ printk(" ESP: %04x:%08lx",0xffff & regs->xss, regs->esp); -+ printk(" EFLAGS: %08lx\n", regs->eflags); -+ printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", -+ regs->eax, regs->ebx, regs->ecx, regs->edx); -+ printk("ESI: %08lx EDI: %08lx EBP: %08lx", -+ regs->esi, regs->edi, regs->ebp); -+ printk(" DS: %04x ES: %04x\n", -+ 0xffff & regs->xds, 0xffff & regs->xes); -+#endif -+ -+ show_trace(®s->gpr[1]); -+} -+ -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/arch/um/util/Makefile um/arch/um/util/Makefile ---- orig/arch/um/util/Makefile Wed Dec 31 19:00:00 1969 -+++ um/arch/um/util/Makefile Wed Oct 23 21:09:14 2002 -@@ -0,0 +1,26 @@ -+ALL = mk_task mk_constants -+ -+all : $(ALL) -+ -+mk_task : mk_task_user.o mk_task_kern.o -+ $(CC) -o mk_task mk_task_user.o mk_task_kern.o -+ -+mk_task_user.o : mk_task_user.c -+ $(CC) -c $< -+ -+mk_task_kern.o : mk_task_kern.c -+ $(CC) $(CFLAGS) -c $< -+ -+mk_constants : mk_constants_user.o mk_constants_kern.o -+ $(CC) -o mk_constants mk_constants_user.o mk_constants_kern.o -+ -+mk_constants_user.o : mk_constants_user.c -+ $(CC) -c $< -+ -+mk_constants_kern.o : mk_constants_kern.c -+ $(CC) $(CFLAGS) -c $< -+ -+clean : -+ $(RM) $(ALL) *.o *~ -+ -+archmrproper : clean -diff -Naur -X ../exclude-files orig/arch/um/util/mk_constants_kern.c um/arch/um/util/mk_constants_kern.c ---- orig/arch/um/util/mk_constants_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/util/mk_constants_kern.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,24 @@ -+#include "linux/kernel.h" -+#include "linux/stringify.h" -+#include "asm/page.h" -+ -+extern void print_head(void); -+extern void print_constant_str(char *name, char *value); -+extern void print_constant_int(char *name, int value); -+extern void print_tail(void); -+ -+int main(int argc, char **argv) -+{ -+ print_head(); -+ print_constant_int("UM_KERN_PAGE_SIZE", PAGE_SIZE); -+ print_constant_str("UM_KERN_EMERG", KERN_EMERG); -+ print_constant_str("UM_KERN_ALERT", KERN_ALERT); -+ print_constant_str("UM_KERN_CRIT", KERN_CRIT); -+ print_constant_str("UM_KERN_ERR", KERN_ERR); -+ print_constant_str("UM_KERN_WARNING", KERN_WARNING); -+ print_constant_str("UM_KERN_NOTICE", KERN_NOTICE); -+ print_constant_str("UM_KERN_INFO", KERN_INFO); -+ print_constant_str("UM_KERN_DEBUG", KERN_DEBUG); -+ print_tail(); -+ return(0); -+} -diff -Naur -X ../exclude-files orig/arch/um/util/mk_constants_user.c um/arch/um/util/mk_constants_user.c ---- orig/arch/um/util/mk_constants_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/util/mk_constants_user.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,28 @@ -+#include <stdio.h> -+ -+void print_head(void) -+{ -+ printf("/*\n"); -+ printf(" * Generated by mk_constants\n"); -+ printf(" */\n"); -+ printf("\n"); -+ printf("#ifndef __UM_CONSTANTS_H\n"); -+ printf("#define __UM_CONSTANTS_H\n"); -+ printf("\n"); -+} -+ -+void print_constant_str(char *name, char *value) -+{ -+ printf("#define %s \"%s\"\n", name, value); -+} -+ -+void print_constant_int(char *name, int value) -+{ -+ printf("#define %s %d\n", name, value); -+} -+ -+void print_tail(void) -+{ -+ printf("\n"); -+ printf("#endif\n"); -+} -diff -Naur -X ../exclude-files orig/arch/um/util/mk_task_kern.c um/arch/um/util/mk_task_kern.c ---- orig/arch/um/util/mk_task_kern.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/util/mk_task_kern.c Sun Dec 8 21:03:34 2002 -@@ -0,0 +1,17 @@ -+#include "linux/sched.h" -+#include "linux/stddef.h" -+ -+extern void print(char *name, char *type, int offset); -+extern void print_ptr(char *name, char *type, int offset); -+extern void print_head(void); -+extern void print_tail(void); -+ -+int main(int argc, char **argv) -+{ -+ print_head(); -+ print_ptr("TASK_REGS", "union uml_pt_regs", -+ offsetof(struct task_struct, thread.regs)); -+ print("TASK_PID", "int", offsetof(struct task_struct, pid)); -+ print_tail(); -+ return(0); -+} -diff -Naur -X ../exclude-files orig/arch/um/util/mk_task_user.c um/arch/um/util/mk_task_user.c ---- orig/arch/um/util/mk_task_user.c Wed Dec 31 19:00:00 1969 -+++ um/arch/um/util/mk_task_user.c Wed Oct 23 21:08:04 2002 -@@ -0,0 +1,30 @@ -+#include <stdio.h> -+ -+void print(char *name, char *type, int offset) -+{ -+ printf("#define %s(task) *((%s *) &(((char *) (task))[%d]))\n", name, type, -+ offset); -+} -+ -+void print_ptr(char *name, char *type, int offset) -+{ -+ printf("#define %s(task) ((%s *) &(((char *) (task))[%d]))\n", name, type, -+ offset); -+} -+ -+void print_head(void) -+{ -+ printf("/*\n"); -+ printf(" * Generated by mk_task\n"); -+ printf(" */\n"); -+ printf("\n"); -+ printf("#ifndef __TASK_H\n"); -+ printf("#define __TASK_H\n"); -+ printf("\n"); -+} -+ -+void print_tail(void) -+{ -+ printf("\n"); -+ printf("#endif\n"); -+} -diff -Naur -X ../exclude-files orig/drivers/char/Makefile um/drivers/char/Makefile ---- orig/drivers/char/Makefile Thu Feb 27 13:04:15 2003 -+++ um/drivers/char/Makefile Thu Feb 27 13:05:21 2003 -@@ -95,6 +95,12 @@ - endif - endif - -+ifeq ($(ARCH),um) -+ KEYMAP = -+ KEYBD = -+ CONSOLE = -+endif -+ - ifeq ($(ARCH),sh) - KEYMAP = - KEYBD = -diff -Naur -X ../exclude-files orig/drivers/char/tty_io.c um/drivers/char/tty_io.c ---- orig/drivers/char/tty_io.c Thu Feb 27 13:04:15 2003 -+++ um/drivers/char/tty_io.c Thu Feb 27 13:05:21 2003 -@@ -637,6 +637,9 @@ - wake_up_interruptible(&tty->write_wait); - } - -+extern int write_tty_log(int fd, const unsigned char *buf, int len, void *tty, -+ int direction); -+ - static ssize_t tty_read(struct file * file, char * buf, size_t count, - loff_t *ppos) - { -@@ -677,8 +680,13 @@ - else - i = -EIO; - unlock_kernel(); -- if (i > 0) -+ if (i > 0){ - inode->i_atime = CURRENT_TIME; -+#ifdef CONFIG_TTY_LOG -+ if(tty->log_fd >= 0) -+ write_tty_log(tty->log_fd, buf, i, tty, 1); -+#endif -+ } - return i; - } - -@@ -732,6 +740,10 @@ - if (written) { - file->f_dentry->d_inode->i_mtime = CURRENT_TIME; - ret = written; -+#ifdef CONFIG_TTY_LOG -+ if(tty->log_fd >= 0) -+ write_tty_log(tty->log_fd, buf - ret, ret, tty, 0); -+#endif - } - up(&tty->atomic_write); - return ret; -@@ -945,6 +957,9 @@ - goto release_mem_out; - } - } -+#ifdef CONFIG_TTY_LOG -+ tty->log_fd = -1; -+#endif - goto success; - - /* -@@ -1039,6 +1054,8 @@ - free_tty_struct(tty); - } - -+extern int close_tty_log(int fd, void *tty); -+ - /* - * Even releasing the tty structures is a tricky business.. We have - * to be very careful that the structures are all released at the -@@ -1267,6 +1284,10 @@ - run_task_queue(&tq_timer); - flush_scheduled_tasks(); - -+#ifdef CONFIG_TTY_LOG -+ if(tty->log_fd >= 0) close_tty_log(tty->log_fd, tty); -+#endif -+ - /* - * The release_mem function takes care of the details of clearing - * the slots and preserving the termios structure. -@@ -1274,6 +1295,8 @@ - release_mem(tty, idx); - } - -+extern int open_tty_log(void *tty, void *current_tty); -+ - /* - * tty_open and tty_release keep up the tty count that contains the - * number of opens done on a tty. We cannot use the inode-count, as -@@ -1425,6 +1448,11 @@ - nr_warns++; - } - } -+ -+#ifdef CONFIG_TTY_LOG -+ if(tty->log_fd < 0) -+ tty->log_fd = open_tty_log(tty, current->tty); -+#endif - return 0; - } - -diff -Naur -X ../exclude-files orig/drivers/net/setup.c um/drivers/net/setup.c ---- orig/drivers/net/setup.c Sun Sep 15 12:13:19 2002 -+++ um/drivers/net/setup.c Wed Oct 23 21:08:05 2002 -@@ -28,7 +28,6 @@ - extern int lmc_setup(void); - - extern int madgemc_probe(void); --extern int uml_net_probe(void); - - /* Pad device name to IFNAMSIZ=16. F.e. __PAD6 is string of 9 zeros. */ - #define __PAD6 "\0\0\0\0\0\0\0\0\0" -@@ -102,9 +101,6 @@ - */ - #ifdef CONFIG_MADGEMC - {madgemc_probe, 0}, --#endif --#ifdef CONFIG_UML_NET -- {uml_net_probe, 0}, - #endif - - {NULL, 0}, -diff -Naur -X ../exclude-files orig/include/asm-i386/hardirq.h um/include/asm-i386/hardirq.h ---- orig/include/asm-i386/hardirq.h Sun Sep 15 12:13:19 2002 -+++ um/include/asm-i386/hardirq.h Wed Apr 16 13:59:04 2003 -@@ -4,6 +4,7 @@ - #include <linux/config.h> - #include <linux/threads.h> - #include <linux/irq.h> -+#include <asm/processor.h> /* for cpu_relax */ - - /* assembly code in softirq.h is sensitive to the offsets of these fields */ - typedef struct { -diff -Naur -X ../exclude-files orig/include/asm-um/a.out.h um/include/asm-um/a.out.h ---- orig/include/asm-um/a.out.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/a.out.h Sun Oct 27 11:54:50 2002 -@@ -0,0 +1,20 @@ -+#ifndef __UM_A_OUT_H -+#define __UM_A_OUT_H -+ -+#include "linux/config.h" -+#include "asm/arch/a.out.h" -+#include "choose-mode.h" -+ -+#undef STACK_TOP -+ -+extern unsigned long stacksizelim; -+ -+extern unsigned long host_task_size; -+ -+#define STACK_ROOM (stacksizelim) -+ -+extern int honeypot; -+#define STACK_TOP \ -+ CHOOSE_MODE((honeypot ? host_task_size : task_size), task_size) -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/arch-signal-i386.h um/include/asm-um/arch-signal-i386.h ---- orig/include/asm-um/arch-signal-i386.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/arch-signal-i386.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,24 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_ARCH_SIGNAL_I386_H -+#define __UM_ARCH_SIGNAL_I386_H -+ -+struct arch_signal_context { -+ unsigned long extrasigs[_NSIG_WORDS]; -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/archparam-i386.h um/include/asm-um/archparam-i386.h ---- orig/include/asm-um/archparam-i386.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/archparam-i386.h Sun Dec 8 20:09:11 2002 -@@ -0,0 +1,80 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_ARCHPARAM_I386_H -+#define __UM_ARCHPARAM_I386_H -+ -+/********* Bits for asm-um/elf.h ************/ -+ -+#include "user.h" -+ -+#define ELF_PLATFORM "i586" -+ -+#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) -+ -+typedef struct user_i387_struct elf_fpregset_t; -+typedef unsigned long elf_greg_t; -+ -+#define ELF_NGREG (sizeof (struct user_regs_struct) / sizeof(elf_greg_t)) -+typedef elf_greg_t elf_gregset_t[ELF_NGREG]; -+ -+#define ELF_DATA ELFDATA2LSB -+#define ELF_ARCH EM_386 -+ -+#define ELF_PLAT_INIT(regs) do { \ -+ PT_REGS_EBX(regs) = 0; \ -+ PT_REGS_ECX(regs) = 0; \ -+ PT_REGS_EDX(regs) = 0; \ -+ PT_REGS_ESI(regs) = 0; \ -+ PT_REGS_EDI(regs) = 0; \ -+ PT_REGS_EBP(regs) = 0; \ -+ PT_REGS_EAX(regs) = 0; \ -+} while(0) -+ -+/* Shamelessly stolen from include/asm-i386/elf.h */ -+ -+#define ELF_CORE_COPY_REGS(pr_reg, regs) do { \ -+ pr_reg[0] = PT_REGS_EBX(regs); \ -+ pr_reg[1] = PT_REGS_ECX(regs); \ -+ pr_reg[2] = PT_REGS_EDX(regs); \ -+ pr_reg[3] = PT_REGS_ESI(regs); \ -+ pr_reg[4] = PT_REGS_EDI(regs); \ -+ pr_reg[5] = PT_REGS_EBP(regs); \ -+ pr_reg[6] = PT_REGS_EAX(regs); \ -+ pr_reg[7] = PT_REGS_DS(regs); \ -+ pr_reg[8] = PT_REGS_ES(regs); \ -+ /* fake once used fs and gs selectors? */ \ -+ pr_reg[9] = PT_REGS_DS(regs); \ -+ pr_reg[10] = PT_REGS_DS(regs); \ -+ pr_reg[11] = PT_REGS_SYSCALL_NR(regs); \ -+ pr_reg[12] = PT_REGS_IP(regs); \ -+ pr_reg[13] = PT_REGS_CS(regs); \ -+ pr_reg[14] = PT_REGS_EFLAGS(regs); \ -+ pr_reg[15] = PT_REGS_SP(regs); \ -+ pr_reg[16] = PT_REGS_SS(regs); \ -+} while(0); -+ -+/********* Bits for asm-um/delay.h **********/ -+ -+typedef unsigned long um_udelay_t; -+ -+/********* Nothing for asm-um/hardirq.h **********/ -+ -+/********* Nothing for asm-um/hw_irq.h **********/ -+ -+/********* Nothing for asm-um/string.h **********/ -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/archparam-ppc.h um/include/asm-um/archparam-ppc.h ---- orig/include/asm-um/archparam-ppc.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/archparam-ppc.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,41 @@ -+#ifndef __UM_ARCHPARAM_PPC_H -+#define __UM_ARCHPARAM_PPC_H -+ -+/********* Bits for asm-um/elf.h ************/ -+ -+#define ELF_PLATFORM (0) -+ -+#define ELF_ET_DYN_BASE (0x08000000) -+ -+/* the following stolen from asm-ppc/elf.h */ -+#define ELF_NGREG 48 /* includes nip, msr, lr, etc. */ -+#define ELF_NFPREG 33 /* includes fpscr */ -+/* General registers */ -+typedef unsigned long elf_greg_t; -+typedef elf_greg_t elf_gregset_t[ELF_NGREG]; -+ -+/* Floating point registers */ -+typedef double elf_fpreg_t; -+typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; -+ -+#define ELF_DATA ELFDATA2MSB -+#define ELF_ARCH EM_PPC -+ -+/********* Bits for asm-um/delay.h **********/ -+ -+typedef unsigned int um_udelay_t; -+ -+/********* Bits for asm-um/hw_irq.h **********/ -+ -+struct hw_interrupt_type; -+ -+/********* Bits for asm-um/hardirq.h **********/ -+ -+#define irq_enter(cpu, irq) hardirq_enter(cpu) -+#define irq_exit(cpu, irq) hardirq_exit(cpu) -+ -+/********* Bits for asm-um/string.h **********/ -+ -+#define __HAVE_ARCH_STRRCHR -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/atomic.h um/include/asm-um/atomic.h ---- orig/include/asm-um/atomic.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/atomic.h Wed Mar 26 22:01:25 2003 -@@ -0,0 +1,6 @@ -+#ifndef __UM_ATOMIC_H -+#define __UM_ATOMIC_H -+ -+#include "asm/arch/atomic.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/bitops.h um/include/asm-um/bitops.h ---- orig/include/asm-um/bitops.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/bitops.h Wed Mar 26 22:01:25 2003 -@@ -0,0 +1,6 @@ -+#ifndef __UM_BITOPS_H -+#define __UM_BITOPS_H -+ -+#include "asm/arch/bitops.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/boot.h um/include/asm-um/boot.h ---- orig/include/asm-um/boot.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/boot.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_BOOT_H -+#define __UM_BOOT_H -+ -+#include "asm/arch/boot.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/bugs.h um/include/asm-um/bugs.h ---- orig/include/asm-um/bugs.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/bugs.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_BUGS_H -+#define __UM_BUGS_H -+ -+void check_bugs(void); -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/byteorder.h um/include/asm-um/byteorder.h ---- orig/include/asm-um/byteorder.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/byteorder.h Thu Feb 27 13:20:12 2003 -@@ -0,0 +1,6 @@ -+#ifndef __UM_BYTEORDER_H -+#define __UM_BYTEORDER_H -+ -+#include "asm/arch/byteorder.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/cache.h um/include/asm-um/cache.h ---- orig/include/asm-um/cache.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/cache.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_CACHE_H -+#define __UM_CACHE_H -+ -+#define L1_CACHE_BYTES 32 -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/checksum.h um/include/asm-um/checksum.h ---- orig/include/asm-um/checksum.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/checksum.h Tue Oct 29 17:25:12 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_CHECKSUM_H -+#define __UM_CHECKSUM_H -+ -+#include "sysdep/checksum.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/cobalt.h um/include/asm-um/cobalt.h ---- orig/include/asm-um/cobalt.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/cobalt.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_COBALT_H -+#define __UM_COBALT_H -+ -+#include "asm/arch/cobalt.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/current.h um/include/asm-um/current.h ---- orig/include/asm-um/current.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/current.h Wed Mar 26 22:01:25 2003 -@@ -0,0 +1,34 @@ -+/* -+ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_CURRENT_H -+#define __UM_CURRENT_H -+ -+#ifndef __ASSEMBLY__ -+ -+#include "linux/config.h" -+#include "asm/page.h" -+ -+struct task_struct; -+ -+#define CURRENT_TASK(dummy) (((unsigned long) &dummy) & \ -+ (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER)) -+ -+#define current ({ int dummy; (struct task_struct *) CURRENT_TASK(dummy); }) -+ -+#endif /* __ASSEMBLY__ */ -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/delay.h um/include/asm-um/delay.h ---- orig/include/asm-um/delay.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/delay.h Sun Dec 8 20:09:15 2002 -@@ -0,0 +1,7 @@ -+#ifndef __UM_DELAY_H -+#define __UM_DELAY_H -+ -+#include "asm/arch/delay.h" -+#include "asm/archparam.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/desc.h um/include/asm-um/desc.h ---- orig/include/asm-um/desc.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/desc.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_DESC_H -+#define __UM_DESC_H -+ -+#include "asm/arch/desc.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/div64.h um/include/asm-um/div64.h ---- orig/include/asm-um/div64.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/div64.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef _UM_DIV64_H -+#define _UM_DIV64_H -+ -+#include "asm/arch/div64.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/dma.h um/include/asm-um/dma.h ---- orig/include/asm-um/dma.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/dma.h Sun Oct 27 16:53:42 2002 -@@ -0,0 +1,10 @@ -+#ifndef __UM_DMA_H -+#define __UM_DMA_H -+ -+#include "asm/io.h" -+ -+extern unsigned long uml_physmem; -+ -+#define MAX_DMA_ADDRESS (uml_physmem) -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/elf.h um/include/asm-um/elf.h ---- orig/include/asm-um/elf.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/elf.h Sun Dec 8 20:13:07 2002 -@@ -0,0 +1,18 @@ -+#ifndef __UM_ELF_H -+#define __UM_ELF_H -+ -+#include "asm/archparam.h" -+ -+#define ELF_HWCAP (0) -+ -+#define SET_PERSONALITY(ex, ibcs2) do ; while(0) -+ -+#define ELF_EXEC_PAGESIZE 4096 -+ -+#define elf_check_arch(x) (1) -+ -+#define ELF_CLASS ELFCLASS32 -+ -+#define USE_ELF_CORE_DUMP -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/errno.h um/include/asm-um/errno.h ---- orig/include/asm-um/errno.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/errno.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_ERRNO_H -+#define __UM_ERRNO_H -+ -+#include "asm/arch/errno.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/fcntl.h um/include/asm-um/fcntl.h ---- orig/include/asm-um/fcntl.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/fcntl.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_FCNTL_H -+#define __UM_FCNTL_H -+ -+#include "asm/arch/fcntl.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/fixmap.h um/include/asm-um/fixmap.h ---- orig/include/asm-um/fixmap.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/fixmap.h Wed Mar 26 22:01:27 2003 -@@ -0,0 +1,89 @@ -+#ifndef __UM_FIXMAP_H -+#define __UM_FIXMAP_H -+ -+#include <linux/config.h> -+#include <asm/kmap_types.h> -+ -+/* -+ * Here we define all the compile-time 'special' virtual -+ * addresses. The point is to have a constant address at -+ * compile time, but to set the physical address only -+ * in the boot process. We allocate these special addresses -+ * from the end of virtual memory (0xfffff000) backwards. -+ * Also this lets us do fail-safe vmalloc(), we -+ * can guarantee that these special addresses and -+ * vmalloc()-ed addresses never overlap. -+ * -+ * these 'compile-time allocated' memory buffers are -+ * fixed-size 4k pages. (or larger if used with an increment -+ * highger than 1) use fixmap_set(idx,phys) to associate -+ * physical memory with fixmap indices. -+ * -+ * TLB entries of such buffers will not be flushed across -+ * task switches. -+ */ -+ -+/* -+ * on UP currently we will have no trace of the fixmap mechanizm, -+ * no page table allocations, etc. This might change in the -+ * future, say framebuffers for the console driver(s) could be -+ * fix-mapped? -+ */ -+enum fixed_addresses { -+#ifdef CONFIG_HIGHMEM -+ FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ -+ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, -+#endif -+ __end_of_fixed_addresses -+}; -+ -+extern void __set_fixmap (enum fixed_addresses idx, -+ unsigned long phys, pgprot_t flags); -+ -+#define set_fixmap(idx, phys) \ -+ __set_fixmap(idx, phys, PAGE_KERNEL) -+/* -+ * Some hardware wants to get fixmapped without caching. -+ */ -+#define set_fixmap_nocache(idx, phys) \ -+ __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) -+/* -+ * used by vmalloc.c. -+ * -+ * Leave one empty page between vmalloc'ed areas and -+ * the start of the fixmap, and leave one page empty -+ * at the top of mem.. -+ */ -+extern unsigned long get_kmem_end(void); -+ -+#define FIXADDR_TOP (get_kmem_end() - 0x2000) -+#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) -+#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) -+ -+#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) -+ -+extern void __this_fixmap_does_not_exist(void); -+ -+/* -+ * 'index to address' translation. If anyone tries to use the idx -+ * directly without tranlation, we catch the bug with a NULL-deference -+ * kernel oops. Illegal ranges of incoming indices are caught too. -+ */ -+static inline unsigned long fix_to_virt(const unsigned int idx) -+{ -+ /* -+ * this branch gets completely eliminated after inlining, -+ * except when someone tries to use fixaddr indices in an -+ * illegal way. (such as mixing up address types or using -+ * out-of-range indices). -+ * -+ * If it doesn't get removed, the linker will complain -+ * loudly with a reasonably clear error message.. -+ */ -+ if (idx >= __end_of_fixed_addresses) -+ __this_fixmap_does_not_exist(); -+ -+ return __fix_to_virt(idx); -+} -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/floppy.h um/include/asm-um/floppy.h ---- orig/include/asm-um/floppy.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/floppy.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_FLOPPY_H -+#define __UM_FLOPPY_H -+ -+#include "asm/arch/floppy.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/hardirq.h um/include/asm-um/hardirq.h ---- orig/include/asm-um/hardirq.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/hardirq.h Wed Apr 16 13:59:04 2003 -@@ -0,0 +1,6 @@ -+#ifndef __UM_HARDIRQ_H -+#define __UM_HARDIRQ_H -+ -+#include "asm/arch/hardirq.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/hdreg.h um/include/asm-um/hdreg.h ---- orig/include/asm-um/hdreg.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/hdreg.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_HDREG_H -+#define __UM_HDREG_H -+ -+#include "asm/arch/hdreg.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/highmem.h um/include/asm-um/highmem.h ---- orig/include/asm-um/highmem.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/highmem.h Wed Apr 16 13:59:04 2003 -@@ -0,0 +1,12 @@ -+#ifndef __UM_HIGHMEM_H -+#define __UM_HIGHMEM_H -+ -+#include "asm/page.h" -+#include "asm/fixmap.h" -+#include "asm/arch/highmem.h" -+ -+#undef PKMAP_BASE -+ -+#define PKMAP_BASE ((FIXADDR_START - LAST_PKMAP * PAGE_SIZE) & PMD_MASK) -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/hw_irq.h um/include/asm-um/hw_irq.h ---- orig/include/asm-um/hw_irq.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/hw_irq.h Wed Mar 26 22:01:25 2003 -@@ -0,0 +1,10 @@ -+#ifndef _ASM_UM_HW_IRQ_H -+#define _ASM_UM_HW_IRQ_H -+ -+#include "asm/irq.h" -+#include "asm/archparam.h" -+ -+static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) -+{} -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/ide.h um/include/asm-um/ide.h ---- orig/include/asm-um/ide.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/ide.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_IDE_H -+#define __UM_IDE_H -+ -+#include "asm/arch/ide.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/init.h um/include/asm-um/init.h ---- orig/include/asm-um/init.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/init.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,11 @@ -+#ifndef _UM_INIT_H -+#define _UM_INIT_H -+ -+#ifdef notdef -+#define __init -+#define __initdata -+#define __initfunc(__arginit) __arginit -+#define __cacheline_aligned -+#endif -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/io.h um/include/asm-um/io.h ---- orig/include/asm-um/io.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/io.h Sun Oct 27 16:53:42 2002 -@@ -0,0 +1,25 @@ -+#ifndef __UM_IO_H -+#define __UM_IO_H -+ -+#include "asm/page.h" -+ -+#define IO_SPACE_LIMIT 0xdeadbeef /* Sure hope nothing uses this */ -+ -+static inline int inb(unsigned long i) { return(0); } -+static inline void outb(char c, unsigned long i) { } -+ -+/* -+ * Change virtual addresses to physical addresses and vv. -+ * These are pretty trivial -+ */ -+static inline unsigned long virt_to_phys(volatile void * address) -+{ -+ return __pa((void *) address); -+} -+ -+static inline void * phys_to_virt(unsigned long address) -+{ -+ return __va(address); -+} -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/ioctl.h um/include/asm-um/ioctl.h ---- orig/include/asm-um/ioctl.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/ioctl.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_IOCTL_H -+#define __UM_IOCTL_H -+ -+#include "asm/arch/ioctl.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/ioctls.h um/include/asm-um/ioctls.h ---- orig/include/asm-um/ioctls.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/ioctls.h Wed Oct 23 21:11:14 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_IOCTLS_H -+#define __UM_IOCTLS_H -+ -+#include "asm/arch/ioctls.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/ipc.h um/include/asm-um/ipc.h ---- orig/include/asm-um/ipc.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/ipc.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_IPC_H -+#define __UM_IPC_H -+ -+#include "asm/arch/ipc.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/ipcbuf.h um/include/asm-um/ipcbuf.h ---- orig/include/asm-um/ipcbuf.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/ipcbuf.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_IPCBUF_H -+#define __UM_IPCBUF_H -+ -+#include "asm/arch/ipcbuf.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/irq.h um/include/asm-um/irq.h ---- orig/include/asm-um/irq.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/irq.h Wed Mar 26 22:01:25 2003 -@@ -0,0 +1,36 @@ -+#ifndef __UM_IRQ_H -+#define __UM_IRQ_H -+ -+/* The i386 irq.h has a struct task_struct in a prototype without including -+ * sched.h. This forward declaration kills the resulting warning. -+ */ -+struct task_struct; -+ -+#include "asm/arch/irq.h" -+#include "asm/ptrace.h" -+ -+#undef NR_IRQS -+ -+#define TIMER_IRQ 0 -+#define UMN_IRQ 1 -+#define CONSOLE_IRQ 2 -+#define CONSOLE_WRITE_IRQ 3 -+#define UBD_IRQ 4 -+#define UM_ETH_IRQ 5 -+#define SSL_IRQ 6 -+#define SSL_WRITE_IRQ 7 -+#define ACCEPT_IRQ 8 -+#define MCONSOLE_IRQ 9 -+#define WINCH_IRQ 10 -+#define SIGIO_WRITE_IRQ 11 -+#define TELNETD_IRQ 12 -+#define XTERM_IRQ 13 -+ -+#define LAST_IRQ XTERM_IRQ -+#define NR_IRQS (LAST_IRQ + 1) -+ -+extern int um_request_irq(unsigned int irq, int fd, int type, -+ void (*handler)(int, void *, struct pt_regs *), -+ unsigned long irqflags, const char * devname, -+ void *dev_id); -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/keyboard.h um/include/asm-um/keyboard.h ---- orig/include/asm-um/keyboard.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/keyboard.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_KEYBOARD_H -+#define __UM_KEYBOARD_H -+ -+#include "asm/arch/keyboard.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/kmap_types.h um/include/asm-um/kmap_types.h ---- orig/include/asm-um/kmap_types.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/kmap_types.h Thu Feb 27 13:20:14 2003 -@@ -0,0 +1,11 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_KMAP_TYPES_H -+#define __UM_KMAP_TYPES_H -+ -+#include "asm/arch/kmap_types.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/linux_logo.h um/include/asm-um/linux_logo.h ---- orig/include/asm-um/linux_logo.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/linux_logo.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_LINUX_LOGO_H -+#define __UM_LINUX_LOGO_H -+ -+#include "asm/arch/linux_logo.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/locks.h um/include/asm-um/locks.h ---- orig/include/asm-um/locks.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/locks.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_LOCKS_H -+#define __UM_LOCKS_H -+ -+#include "asm/arch/locks.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/mca_dma.h um/include/asm-um/mca_dma.h ---- orig/include/asm-um/mca_dma.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/mca_dma.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef mca___UM_DMA_H -+#define mca___UM_DMA_H -+ -+#include "asm/arch/mca_dma.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/mman.h um/include/asm-um/mman.h ---- orig/include/asm-um/mman.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/mman.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_MMAN_H -+#define __UM_MMAN_H -+ -+#include "asm/arch/mman.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/mmu.h um/include/asm-um/mmu.h ---- orig/include/asm-um/mmu.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/mmu.h Sat Nov 9 12:51:11 2002 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __MMU_H -+#define __MMU_H -+ -+#include "um_mmu.h" -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/mmu_context.h um/include/asm-um/mmu_context.h ---- orig/include/asm-um/mmu_context.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/mmu_context.h Wed Apr 16 13:59:16 2003 -@@ -0,0 +1,72 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_MMU_CONTEXT_H -+#define __UM_MMU_CONTEXT_H -+ -+#include "linux/sched.h" -+#include "choose-mode.h" -+ -+#define get_mmu_context(task) do ; while(0) -+#define activate_context(tsk) do ; while(0) -+ -+static inline void activate_mm(struct mm_struct *old, struct mm_struct *new) -+{ -+} -+ -+extern void switch_mm_skas(int mm_fd); -+ -+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, -+ struct task_struct *tsk, unsigned cpu) -+{ -+ if(prev != next){ -+ clear_bit(cpu, &prev->cpu_vm_mask); -+ set_bit(cpu, &next->cpu_vm_mask); -+ if(next != &init_mm) -+ CHOOSE_MODE((void) 0, -+ switch_mm_skas(next->context.skas.mm_fd)); -+ } -+} -+ -+static inline void enter_lazy_tlb(struct mm_struct *mm, -+ struct task_struct *tsk, unsigned cpu) -+{ -+} -+ -+extern int init_new_context_skas(struct task_struct *task, -+ struct mm_struct *mm); -+ -+static inline int init_new_context_tt(struct task_struct *task, -+ struct mm_struct *mm) -+{ -+ return(0); -+} -+ -+static inline int init_new_context(struct task_struct *task, -+ struct mm_struct *mm) -+{ -+ return(CHOOSE_MODE_PROC(init_new_context_tt, init_new_context_skas, -+ task, mm)); -+} -+ -+extern void destroy_context_skas(struct mm_struct *mm); -+ -+static inline void destroy_context(struct mm_struct *mm) -+{ -+ CHOOSE_MODE((void) 0, destroy_context_skas(mm)); -+} -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/module.h um/include/asm-um/module.h ---- orig/include/asm-um/module.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/module.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_MODULE_H -+#define __UM_MODULE_H -+ -+#include "asm/arch/module.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/msgbuf.h um/include/asm-um/msgbuf.h ---- orig/include/asm-um/msgbuf.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/msgbuf.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_MSGBUF_H -+#define __UM_MSGBUF_H -+ -+#include "asm/arch/msgbuf.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/mtrr.h um/include/asm-um/mtrr.h ---- orig/include/asm-um/mtrr.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/mtrr.h Thu Mar 27 15:11:56 2003 -@@ -0,0 +1,6 @@ -+#ifndef __UM_MTRR_H -+#define __UM_MTRR_H -+ -+#include "asm/arch/mtrr.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/namei.h um/include/asm-um/namei.h ---- orig/include/asm-um/namei.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/namei.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_NAMEI_H -+#define __UM_NAMEI_H -+ -+#include "asm/arch/namei.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/page.h um/include/asm-um/page.h ---- orig/include/asm-um/page.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/page.h Sun Oct 27 16:49:35 2002 -@@ -0,0 +1,53 @@ -+#ifndef __UM_PAGE_H -+#define __UM_PAGE_H -+ -+struct page; -+ -+#include "asm/arch/page.h" -+ -+#undef BUG -+#undef PAGE_BUG -+#undef __pa -+#undef __va -+#undef virt_to_page -+#undef VALID_PAGE -+#undef PAGE_OFFSET -+#undef KERNELBASE -+ -+extern unsigned long uml_physmem; -+ -+#define PAGE_OFFSET (uml_physmem) -+#define KERNELBASE PAGE_OFFSET -+ -+#ifndef __ASSEMBLY__ -+ -+extern void stop(void); -+ -+#define BUG() do { \ -+ panic("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ -+} while (0) -+ -+#define PAGE_BUG(page) do { \ -+ BUG(); \ -+} while (0) -+ -+#endif /* __ASSEMBLY__ */ -+ -+#define __va_space (8*1024*1024) -+ -+extern unsigned long region_pa(void *virt); -+extern void *region_va(unsigned long phys); -+ -+#define __pa(virt) region_pa((void *) (virt)) -+#define __va(phys) region_va((unsigned long) (phys)) -+ -+extern struct page *page_mem_map(struct page *page); -+ -+extern struct page *pfn_to_page(unsigned long pfn); -+ -+#define VALID_PAGE(page) (page_mem_map(page) != NULL) -+ -+extern struct page *arch_validate(struct page *page, int mask, int order); -+#define HAVE_ARCH_VALIDATE -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/page_offset.h um/include/asm-um/page_offset.h ---- orig/include/asm-um/page_offset.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/page_offset.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1 @@ -+#define PAGE_OFFSET_RAW (uml_physmem) -diff -Naur -X ../exclude-files orig/include/asm-um/param.h um/include/asm-um/param.h ---- orig/include/asm-um/param.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/param.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,24 @@ -+#ifndef _UM_PARAM_H -+#define _UM_PARAM_H -+ -+#ifndef HZ -+#define HZ 52 -+#endif -+ -+#define EXEC_PAGESIZE 4096 -+ -+#ifndef NGROUPS -+#define NGROUPS 32 -+#endif -+ -+#ifndef NOGROUP -+#define NOGROUP (-1) -+#endif -+ -+#define MAXHOSTNAMELEN 64 /* max length of hostname */ -+ -+#ifdef __KERNEL__ -+# define CLOCKS_PER_SEC 100 /* frequency at which times() counts */ -+#endif -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/pci.h um/include/asm-um/pci.h ---- orig/include/asm-um/pci.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/pci.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_PCI_H -+#define __UM_PCI_H -+ -+#define PCI_DMA_BUS_IS_PHYS (1) -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/pgalloc.h um/include/asm-um/pgalloc.h ---- orig/include/asm-um/pgalloc.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/pgalloc.h Wed Apr 16 13:59:04 2003 -@@ -0,0 +1,162 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Derived from include/asm-i386/pgalloc.h and include/asm-i386/pgtable.h -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_PGALLOC_H -+#define __UM_PGALLOC_H -+ -+#include "linux/config.h" -+#include "linux/mm.h" -+#include "asm/fixmap.h" -+#include "choose-mode.h" -+ -+#define pgd_quicklist (current_cpu_data.pgd_quick) -+#define pmd_quicklist (current_cpu_data.pmd_quick) -+#define pte_quicklist (current_cpu_data.pte_quick) -+#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz) -+ -+#define pmd_populate(mm, pmd, pte) set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))) -+ -+/* -+ * Allocate and free page tables. -+ */ -+ -+static inline pgd_t *get_pgd_slow_tt(void) -+{ -+ pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL); -+ -+ if (pgd) { -+ memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); -+ memcpy(pgd + USER_PTRS_PER_PGD, -+ swapper_pg_dir + USER_PTRS_PER_PGD, -+ (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); -+ } -+ return pgd; -+} -+ -+static inline pgd_t *get_pgd_slow_skas(void) -+{ -+ pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL); -+ -+ if (pgd) -+ memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); -+ return pgd; -+} -+ -+static inline pgd_t *get_pgd_slow(void) -+{ -+ return(CHOOSE_MODE(get_pgd_slow_tt(), get_pgd_slow_skas())); -+} -+ -+static inline pgd_t *get_pgd_fast(void) -+{ -+ unsigned long *ret; -+ -+ if ((ret = pgd_quicklist) != NULL) { -+ pgd_quicklist = (unsigned long *)(*ret); -+ ret[0] = 0; -+ pgtable_cache_size--; -+ } else -+ ret = (unsigned long *)get_pgd_slow(); -+ return (pgd_t *)ret; -+} -+ -+static inline void free_pgd_fast(pgd_t *pgd) -+{ -+ *(unsigned long *)pgd = (unsigned long) pgd_quicklist; -+ pgd_quicklist = (unsigned long *) pgd; -+ pgtable_cache_size++; -+} -+ -+static inline void free_pgd_slow(pgd_t *pgd) -+{ -+ free_page((unsigned long)pgd); -+} -+ -+static inline pte_t *pte_alloc_one(struct mm_struct *mm, unsigned long address) -+{ -+ pte_t *pte; -+ -+ pte = (pte_t *) __get_free_page(GFP_KERNEL); -+ if (pte) -+ clear_page(pte); -+ return pte; -+} -+ -+static inline pte_t *pte_alloc_one_fast(struct mm_struct *mm, unsigned long address) -+{ -+ unsigned long *ret; -+ -+ if ((ret = (unsigned long *)pte_quicklist) != NULL) { -+ pte_quicklist = (unsigned long *)(*ret); -+ ret[0] = ret[1]; -+ pgtable_cache_size--; -+ } -+ return (pte_t *)ret; -+} -+ -+static inline void pte_free_fast(pte_t *pte) -+{ -+ *(unsigned long *)pte = (unsigned long) pte_quicklist; -+ pte_quicklist = (unsigned long *) pte; -+ pgtable_cache_size++; -+} -+ -+static inline void pte_free_slow(pte_t *pte) -+{ -+ free_page((unsigned long)pte); -+} -+ -+#define pte_free(pte) pte_free_fast(pte) -+#define pgd_free(pgd) free_pgd_slow(pgd) -+#define pgd_alloc(mm) get_pgd_fast() -+ -+/* -+ * allocating and freeing a pmd is trivial: the 1-entry pmd is -+ * inside the pgd, so has no extra memory associated with it. -+ */ -+ -+#define pmd_alloc_one_fast(mm, addr) ({ BUG(); ((pmd_t *)1); }) -+#define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) -+#define pmd_free_slow(x) do { } while (0) -+#define pmd_free_fast(x) do { } while (0) -+#define pmd_free(x) do { } while (0) -+#define pgd_populate(mm, pmd, pte) BUG() -+ -+/* -+ * TLB flushing: -+ * -+ * - flush_tlb() flushes the current mm struct TLBs -+ * - flush_tlb_all() flushes all processes TLBs -+ * - flush_tlb_mm(mm) flushes the specified mm context TLB's -+ * - flush_tlb_page(vma, vmaddr) flushes one page -+ * - flush_tlb_kernel_vm() flushes the kernel vm area -+ * - flush_tlb_range(mm, start, end) flushes a range of pages -+ * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables -+ */ -+ -+extern void flush_tlb_all(void); -+extern void flush_tlb_mm(struct mm_struct *mm); -+extern void flush_tlb_range(struct mm_struct *mm, unsigned long start, -+ unsigned long end); -+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -+extern void flush_tlb_kernel_vm(void); -+ -+static inline void flush_tlb_pgtables(struct mm_struct *mm, -+ unsigned long start, unsigned long end) -+{ -+} -+ -+#endif -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/pgtable.h um/include/asm-um/pgtable.h ---- orig/include/asm-um/pgtable.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/pgtable.h Wed Apr 16 13:59:04 2003 -@@ -0,0 +1,428 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Derived from include/asm-i386/pgtable.h -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_PGTABLE_H -+#define __UM_PGTABLE_H -+ -+#include "linux/sched.h" -+#include "asm/processor.h" -+#include "asm/page.h" -+ -+extern pgd_t swapper_pg_dir[1024]; -+ -+#define flush_cache_all() do ; while (0) -+#define flush_cache_mm(mm) do ; while (0) -+#define flush_cache_range(vma, start, end) do ; while (0) -+#define flush_cache_page(vma, vmaddr) do ; while (0) -+#define flush_page_to_ram(page) do ; while (0) -+#define flush_dcache_page(page) do ; while (0) -+#define flush_icache_range(from, to) do ; while (0) -+#define flush_icache_page(vma,pg) do ; while (0) -+#define flush_icache_user_range(vma,pg,adr,len) do ; while (0) -+ -+extern void __flush_tlb_one(unsigned long addr); -+ -+extern void pte_free(pte_t *pte); -+ -+extern void pgd_free(pgd_t *pgd); -+ -+extern int do_check_pgt_cache(int, int); -+ -+extern void *um_virt_to_phys(struct task_struct *task, unsigned long virt, -+ pte_t *pte_out); -+ -+/* zero page used for uninitialized stuff */ -+extern unsigned long *empty_zero_page; -+ -+#define pgtable_cache_init() do ; while (0) -+ -+/* PMD_SHIFT determines the size of the area a second-level page table can map */ -+#define PMD_SHIFT 22 -+#define PMD_SIZE (1UL << PMD_SHIFT) -+#define PMD_MASK (~(PMD_SIZE-1)) -+ -+/* PGDIR_SHIFT determines what a third-level page table entry can map */ -+#define PGDIR_SHIFT 22 -+#define PGDIR_SIZE (1UL << PGDIR_SHIFT) -+#define PGDIR_MASK (~(PGDIR_SIZE-1)) -+ -+/* -+ * entries per page directory level: the i386 is two-level, so -+ * we don't really have any PMD directory physically. -+ */ -+#define PTRS_PER_PTE 1024 -+#define PTRS_PER_PMD 1 -+#define PTRS_PER_PGD 1024 -+#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) -+#define FIRST_USER_PGD_NR 0 -+ -+#define pte_ERROR(e) \ -+ printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) -+#define pmd_ERROR(e) \ -+ printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) -+#define pgd_ERROR(e) \ -+ printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) -+ -+/* -+ * pgd entries used up by user/kernel: -+ */ -+ -+#define USER_PGD_PTRS (TASK_SIZE >> PGDIR_SHIFT) -+#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) -+ -+#ifndef __ASSEMBLY__ -+/* Just any arbitrary offset to the start of the vmalloc VM area: the -+ * current 8MB value just means that there will be a 8MB "hole" after the -+ * physical memory until the kernel virtual memory starts. That means that -+ * any out-of-bounds memory accesses will hopefully be caught. -+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced -+ * area for the same reason. ;) -+ */ -+ -+extern unsigned long high_physmem; -+ -+#define VMALLOC_OFFSET (__va_space) -+#define VMALLOC_START (((unsigned long) high_physmem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) -+#define VMALLOC_VMADDR(x) ((unsigned long)(x)) -+ -+#if CONFIG_HIGHMEM -+# define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) -+#else -+# define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) -+#endif -+ -+#define _PAGE_PRESENT 0x001 -+#define _PAGE_NEWPAGE 0x002 -+#define _PAGE_PROTNONE 0x004 /* If not present */ -+#define _PAGE_RW 0x008 -+#define _PAGE_USER 0x010 -+#define _PAGE_ACCESSED 0x020 -+#define _PAGE_DIRTY 0x040 -+#define _PAGE_NEWPROT 0x080 -+ -+#define REGION_MASK 0xf0000000 -+#define REGION_SHIFT 28 -+ -+#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) -+#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) -+#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) -+ -+#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) -+#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) -+#define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) -+#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) -+#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) -+#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_DIRTY | _PAGE_ACCESSED) -+ -+/* -+ * The i386 can't do page protection for execute, and considers that the same are read. -+ * Also, write permissions imply read permissions. This is the closest we can get.. -+ */ -+#define __P000 PAGE_NONE -+#define __P001 PAGE_READONLY -+#define __P010 PAGE_COPY -+#define __P011 PAGE_COPY -+#define __P100 PAGE_READONLY -+#define __P101 PAGE_READONLY -+#define __P110 PAGE_COPY -+#define __P111 PAGE_COPY -+ -+#define __S000 PAGE_NONE -+#define __S001 PAGE_READONLY -+#define __S010 PAGE_SHARED -+#define __S011 PAGE_SHARED -+#define __S100 PAGE_READONLY -+#define __S101 PAGE_READONLY -+#define __S110 PAGE_SHARED -+#define __S111 PAGE_SHARED -+ -+/* -+ * Define this if things work differently on an i386 and an i486: -+ * it will (on an i486) warn about kernel memory accesses that are -+ * done without a 'verify_area(VERIFY_WRITE,..)' -+ */ -+#undef TEST_VERIFY_AREA -+ -+/* page table for 0-4MB for everybody */ -+extern unsigned long pg0[1024]; -+ -+/* -+ * BAD_PAGETABLE is used when we need a bogus page-table, while -+ * BAD_PAGE is used for a bogus page. -+ * -+ * ZERO_PAGE is a global shared page that is always zero: used -+ * for zero-mapped memory areas etc.. -+ */ -+extern pte_t __bad_page(void); -+extern pte_t * __bad_pagetable(void); -+ -+#define BAD_PAGETABLE __bad_pagetable() -+#define BAD_PAGE __bad_page() -+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) -+ -+/* number of bits that fit into a memory pointer */ -+#define BITS_PER_PTR (8*sizeof(unsigned long)) -+ -+/* to align the pointer to a pointer address */ -+#define PTR_MASK (~(sizeof(void*)-1)) -+ -+/* sizeof(void*)==1<<SIZEOF_PTR_LOG2 */ -+/* 64-bit machines, beware! SRB. */ -+#define SIZEOF_PTR_LOG2 2 -+ -+/* to find an entry in a page-table */ -+#define PAGE_PTR(address) \ -+((unsigned long)(address)>>(PAGE_SHIFT-SIZEOF_PTR_LOG2)&PTR_MASK&~PAGE_MASK) -+ -+#define pte_none(x) !(pte_val(x) & ~_PAGE_NEWPAGE) -+#define pte_present(x) (pte_val(x) & (_PAGE_PRESENT | _PAGE_PROTNONE)) -+ -+#define pte_clear(xp) do { pte_val(*(xp)) = _PAGE_NEWPAGE; } while (0) -+ -+#define phys_region_index(x) (((x) & REGION_MASK) >> REGION_SHIFT) -+#define pte_region_index(x) phys_region_index(pte_val(x)) -+ -+#define pmd_none(x) (!(pmd_val(x) & ~_PAGE_NEWPAGE)) -+#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) -+#define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) -+#define pmd_clear(xp) do { pmd_val(*(xp)) = _PAGE_NEWPAGE; } while (0) -+ -+#define pmd_newpage(x) (pmd_val(x) & _PAGE_NEWPAGE) -+#define pmd_mkuptodate(x) (pmd_val(x) &= ~_PAGE_NEWPAGE) -+ -+/* -+ * The "pgd_xxx()" functions here are trivial for a folded two-level -+ * setup: the pgd is never bad, and a pmd always exists (as it's folded -+ * into the pgd entry) -+ */ -+static inline int pgd_none(pgd_t pgd) { return 0; } -+static inline int pgd_bad(pgd_t pgd) { return 0; } -+static inline int pgd_present(pgd_t pgd) { return 1; } -+static inline void pgd_clear(pgd_t * pgdp) { } -+ -+#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) -+ -+extern struct page *pte_mem_map(pte_t pte); -+extern struct page *phys_mem_map(unsigned long phys); -+extern unsigned long phys_to_pfn(unsigned long p); -+ -+#define pte_page(x) pfn_to_page(pte_pfn(x)) -+#define pte_address(x) (__va(pte_val(x) & PAGE_MASK)) -+#define mk_phys(a, r) ((a) + (r << REGION_SHIFT)) -+#define phys_addr(p) ((p) & ~REGION_MASK) -+#define phys_page(p) (phys_mem_map(p) + ((phys_addr(p)) >> PAGE_SHIFT)) -+#define virt_to_page(kaddr) \ -+ (phys_mem_map(__pa(kaddr)) + (phys_addr(__pa(kaddr)) >> PAGE_SHIFT)) -+#define pte_pfn(x) phys_to_pfn(pte_val(x)) -+ -+static inline pte_t pte_mknewprot(pte_t pte) -+{ -+ pte_val(pte) |= _PAGE_NEWPROT; -+ return(pte); -+} -+ -+static inline pte_t pte_mknewpage(pte_t pte) -+{ -+ pte_val(pte) |= _PAGE_NEWPAGE; -+ return(pte); -+} -+ -+static inline void set_pte(pte_t *pteptr, pte_t pteval) -+{ -+ /* If it's a swap entry, it needs to be marked _PAGE_NEWPAGE so -+ * fix_range knows to unmap it. _PAGE_NEWPROT is specific to -+ * mapped pages. -+ */ -+ *pteptr = pte_mknewpage(pteval); -+ if(pte_present(*pteptr)) *pteptr = pte_mknewprot(*pteptr); -+} -+ -+/* -+ * (pmds are folded into pgds so this doesnt get actually called, -+ * but the define is needed for a generic inline function.) -+ */ -+#define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval) -+#define set_pgd(pgdptr, pgdval) (*(pgdptr) = pgdval) -+ -+/* -+ * The following only work if pte_present() is true. -+ * Undefined behaviour if not.. -+ */ -+static inline int pte_read(pte_t pte) -+{ -+ return((pte_val(pte) & _PAGE_USER) && -+ !(pte_val(pte) & _PAGE_PROTNONE)); -+} -+ -+static inline int pte_exec(pte_t pte){ -+ return((pte_val(pte) & _PAGE_USER) && -+ !(pte_val(pte) & _PAGE_PROTNONE)); -+} -+ -+static inline int pte_write(pte_t pte) -+{ -+ return((pte_val(pte) & _PAGE_RW) && -+ !(pte_val(pte) & _PAGE_PROTNONE)); -+} -+ -+static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } -+static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } -+static inline int pte_newpage(pte_t pte) { return pte_val(pte) & _PAGE_NEWPAGE; } -+static inline int pte_newprot(pte_t pte) -+{ -+ return(pte_present(pte) && (pte_val(pte) & _PAGE_NEWPROT)); -+} -+ -+static inline pte_t pte_rdprotect(pte_t pte) -+{ -+ pte_val(pte) &= ~_PAGE_USER; -+ return(pte_mknewprot(pte)); -+} -+ -+static inline pte_t pte_exprotect(pte_t pte) -+{ -+ pte_val(pte) &= ~_PAGE_USER; -+ return(pte_mknewprot(pte)); -+} -+ -+static inline pte_t pte_mkclean(pte_t pte) -+{ -+ pte_val(pte) &= ~_PAGE_DIRTY; -+ return(pte); -+} -+ -+static inline pte_t pte_mkold(pte_t pte) -+{ -+ pte_val(pte) &= ~_PAGE_ACCESSED; -+ return(pte); -+} -+ -+static inline pte_t pte_wrprotect(pte_t pte) -+{ -+ pte_val(pte) &= ~_PAGE_RW; -+ return(pte_mknewprot(pte)); -+} -+ -+static inline pte_t pte_mkread(pte_t pte) -+{ -+ pte_val(pte) |= _PAGE_USER; -+ return(pte_mknewprot(pte)); -+} -+ -+static inline pte_t pte_mkexec(pte_t pte) -+{ -+ pte_val(pte) |= _PAGE_USER; -+ return(pte_mknewprot(pte)); -+} -+ -+static inline pte_t pte_mkdirty(pte_t pte) -+{ -+ pte_val(pte) |= _PAGE_DIRTY; -+ return(pte); -+} -+ -+static inline pte_t pte_mkyoung(pte_t pte) -+{ -+ pte_val(pte) |= _PAGE_ACCESSED; -+ return(pte); -+} -+ -+static inline pte_t pte_mkwrite(pte_t pte) -+{ -+ pte_val(pte) |= _PAGE_RW; -+ return(pte_mknewprot(pte)); -+} -+ -+static inline pte_t pte_mkuptodate(pte_t pte) -+{ -+ pte_val(pte) &= ~_PAGE_NEWPAGE; -+ if(pte_present(pte)) pte_val(pte) &= ~_PAGE_NEWPROT; -+ return(pte); -+} -+ -+extern unsigned long page_to_phys(struct page *page); -+ -+/* -+ * Conversion functions: convert a page and protection to a page entry, -+ * and a page entry and page directory to the page they refer to. -+ */ -+ -+#define mk_pte(page, pgprot) \ -+({ \ -+ pte_t __pte; \ -+ \ -+ pte_val(__pte) = page_to_phys(page) + pgprot_val(pgprot);\ -+ if(pte_present(__pte)) pte_mknewprot(pte_mknewpage(__pte)); \ -+ __pte; \ -+}) -+ -+/* This takes a physical page address that is used by the remapping functions */ -+#define mk_pte_phys(physpage, pgprot) \ -+ pte_mknewpage(mk_pte(phys_page(physpage), pgprot)) -+ -+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) -+{ -+ pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot); -+ if(pte_present(pte)) pte = pte_mknewpage(pte_mknewprot(pte)); -+ return pte; -+} -+ -+#define pmd_page(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) -+ -+/* to find an entry in a page-table-directory. */ -+#define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) -+#define __pgd_offset(address) pgd_index(address) -+ -+/* to find an entry in a page-table-directory */ -+#define pgd_offset(mm, address) \ -+((mm)->pgd + ((address) >> PGDIR_SHIFT)) -+ -+/* to find an entry in a kernel page-table-directory */ -+#define pgd_offset_k(address) pgd_offset(&init_mm, address) -+ -+#define __pmd_offset(address) \ -+ (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) -+ -+/* Find an entry in the second-level page table.. */ -+static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) -+{ -+ return (pmd_t *) dir; -+} -+ -+/* Find an entry in the third-level page table.. */ -+#define pte_offset(pmd, address) \ -+((pte_t *) (pmd_page(*pmd) + ((address>>10) & ((PTRS_PER_PTE-1)<<2)))) -+ -+#define update_mmu_cache(vma,address,pte) do ; while (0) -+ -+/* Encode and de-code a swap entry */ -+#define SWP_TYPE(x) (((x).val >> 3) & 0x7f) -+#define SWP_OFFSET(x) ((x).val >> 10) -+ -+#define SWP_ENTRY(type, offset) \ -+ ((swp_entry_t) { ((type) << 3) | ((offset) << 10) }) -+#define pte_to_swp_entry(pte) \ -+ ((swp_entry_t) { pte_val(pte_mkuptodate(pte)) }) -+#define swp_entry_to_pte(x) ((pte_t) { (x).val }) -+ -+#define PageSkip(x) (0) -+#define kern_addr_valid(addr) (1) -+ -+#include <asm-generic/pgtable.h> -+ -+#endif -+ -+#endif -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/poll.h um/include/asm-um/poll.h ---- orig/include/asm-um/poll.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/poll.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_POLL_H -+#define __UM_POLL_H -+ -+#include "asm/arch/poll.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/posix_types.h um/include/asm-um/posix_types.h ---- orig/include/asm-um/posix_types.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/posix_types.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_POSIX_TYPES_H -+#define __UM_POSIX_TYPES_H -+ -+#include "asm/arch/posix_types.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/processor-generic.h um/include/asm-um/processor-generic.h ---- orig/include/asm-um/processor-generic.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/processor-generic.h Wed Apr 16 13:59:03 2003 -@@ -0,0 +1,182 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_PROCESSOR_GENERIC_H -+#define __UM_PROCESSOR_GENERIC_H -+ -+struct pt_regs; -+ -+struct task_struct; -+ -+#include "linux/config.h" -+#include "linux/signal.h" -+#include "asm/ptrace.h" -+#include "asm/siginfo.h" -+#include "choose-mode.h" -+ -+struct mm_struct; -+ -+#define current_text_addr() ((void *) 0) -+ -+#define cpu_relax() do ; while (0) -+ -+#ifdef CONFIG_MODE_TT -+struct proc_tt_mode { -+ int extern_pid; -+ int tracing; -+ int switch_pipe[2]; -+ int singlestep_syscall; -+ int vm_seq; -+}; -+#endif -+ -+#ifdef CONFIG_MODE_SKAS -+struct proc_skas_mode { -+ void *switch_buf; -+ void *fork_buf; -+}; -+#endif -+ -+struct thread_struct { -+ int forking; -+ unsigned long kernel_stack; -+ int nsyscalls; -+ struct pt_regs regs; -+ unsigned long cr2; -+ int err; -+ void *fault_addr; -+ void *fault_catcher; -+ struct task_struct *prev_sched; -+ unsigned long temp_stack; -+ void *exec_buf; -+ struct arch_thread arch; -+ union { -+#ifdef CONFIG_MODE_TT -+ struct proc_tt_mode tt; -+#endif -+#ifdef CONFIG_MODE_SKAS -+ struct proc_skas_mode skas; -+#endif -+ } mode; -+ struct { -+ int op; -+ union { -+ struct { -+ int pid; -+ } fork, exec; -+ struct { -+ int (*proc)(void *); -+ void *arg; -+ } thread; -+ struct { -+ void (*proc)(void *); -+ void *arg; -+ } cb; -+ } u; -+ } request; -+}; -+ -+#define INIT_THREAD \ -+{ \ -+ .forking = 0, \ -+ .kernel_stack = 0, \ -+ .nsyscalls = 0, \ -+ .regs = EMPTY_REGS, \ -+ .cr2 = 0, \ -+ .err = 0, \ -+ .fault_addr = NULL, \ -+ .prev_sched = NULL, \ -+ .temp_stack = 0, \ -+ .exec_buf = NULL, \ -+ .arch = INIT_ARCH_THREAD, \ -+ .request = { 0 } \ -+} -+ -+#define THREAD_SIZE ((1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE) -+ -+typedef struct { -+ unsigned long seg; -+} mm_segment_t; -+ -+extern struct task_struct *alloc_task_struct(void); -+extern void free_task_struct(struct task_struct *task); -+ -+#define get_task_struct(tsk) atomic_inc(&virt_to_page(tsk)->count) -+ -+extern void release_thread(struct task_struct *); -+extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); -+extern void dump_thread(struct pt_regs *regs, struct user *u); -+ -+extern unsigned long thread_saved_pc(struct thread_struct *t); -+ -+static inline void mm_copy_segments(struct mm_struct *from_mm, -+ struct mm_struct *new_mm) -+{ -+} -+ -+static inline void copy_segments(struct task_struct *p, -+ struct mm_struct *new_mm) -+{ -+} -+ -+static inline void release_segments(struct mm_struct *mm) -+{ -+} -+ -+#define init_task (init_task_union.task) -+#define init_stack (init_task_union.stack) -+ -+/* -+ * User space process size: 3GB (default). -+ */ -+extern unsigned long task_size; -+ -+#define TASK_SIZE (task_size) -+ -+/* This decides where the kernel will search for a free chunk of vm -+ * space during mmap's. -+ */ -+#define TASK_UNMAPPED_BASE (0x40000000) -+ -+extern void start_thread(struct pt_regs *regs, unsigned long entry, -+ unsigned long stack); -+ -+struct cpuinfo_um { -+ unsigned long loops_per_jiffy; -+ unsigned long *pgd_quick; -+ unsigned long *pmd_quick; -+ unsigned long *pte_quick; -+ unsigned long pgtable_cache_sz; -+ int ipi_pipe[2]; -+}; -+ -+extern struct cpuinfo_um boot_cpu_data; -+ -+#define my_cpu_data cpu_data[smp_processor_id()] -+ -+#ifdef CONFIG_SMP -+extern struct cpuinfo_um cpu_data[]; -+#define current_cpu_data cpu_data[smp_processor_id()] -+#else -+#define cpu_data (&boot_cpu_data) -+#define current_cpu_data boot_cpu_data -+#endif -+ -+#define KSTK_EIP(tsk) (PT_REGS_IP(&tsk->thread.regs)) -+#define KSTK_ESP(tsk) (PT_REGS_SP(&tsk->thread.regs)) -+#define get_wchan(p) (0) -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/processor-i386.h um/include/asm-um/processor-i386.h ---- orig/include/asm-um/processor-i386.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/processor-i386.h Wed Apr 16 13:59:03 2003 -@@ -0,0 +1,35 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_PROCESSOR_I386_H -+#define __UM_PROCESSOR_I386_H -+ -+extern int cpu_has_xmm; -+extern int cpu_has_cmov; -+ -+struct arch_thread { -+ unsigned long debugregs[8]; -+ int debugregs_seq; -+}; -+ -+#define INIT_ARCH_THREAD { .debugregs = { [ 0 ... 7 ] = 0 }, \ -+ .debugregs_seq = 0 } -+ -+#include "asm/arch/user.h" -+ -+#include "asm/processor-generic.h" -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/processor-ppc.h um/include/asm-um/processor-ppc.h ---- orig/include/asm-um/processor-ppc.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/processor-ppc.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,15 @@ -+#ifndef __UM_PROCESSOR_PPC_H -+#define __UM_PROCESSOR_PPC_H -+ -+#if defined(__ASSEMBLY__) -+ -+#define CONFIG_ALL_PPC -+#include "arch/processor.h" -+ -+#else -+ -+#include "asm/processor-generic.h" -+ -+#endif -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/ptrace-generic.h um/include/asm-um/ptrace-generic.h ---- orig/include/asm-um/ptrace-generic.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/ptrace-generic.h Wed Mar 26 22:01:25 2003 -@@ -0,0 +1,74 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_PTRACE_GENERIC_H -+#define __UM_PTRACE_GENERIC_H -+ -+#ifndef __ASSEMBLY__ -+ -+#include "linux/config.h" -+ -+#include "asm/current.h" -+ -+#define pt_regs pt_regs_subarch -+#define show_regs show_regs_subarch -+ -+#include "asm/arch/ptrace.h" -+ -+#undef pt_regs -+#undef show_regs -+#undef user_mode -+#undef instruction_pointer -+ -+#include "sysdep/ptrace.h" -+#include "skas_ptrace.h" -+ -+struct pt_regs { -+ union uml_pt_regs regs; -+}; -+ -+#define EMPTY_REGS { regs : EMPTY_UML_PT_REGS } -+ -+#define PT_REGS_IP(r) UPT_IP(&(r)->regs) -+#define PT_REGS_SP(r) UPT_SP(&(r)->regs) -+ -+#define PT_REG(r, reg) UPT_REG(&(r)->regs, reg) -+#define PT_REGS_SET(r, reg, val) UPT_SET(&(r)->regs, reg, val) -+ -+#define PT_REGS_SET_SYSCALL_RETURN(r, res) \ -+ UPT_SET_SYSCALL_RETURN(&(r)->regs, res) -+#define PT_REGS_RESTART_SYSCALL(r) UPT_RESTART_SYSCALL(&(r)->regs) -+ -+#define PT_REGS_SYSCALL_NR(r) UPT_SYSCALL_NR(&(r)->regs) -+ -+#define PT_REGS_SC(r) UPT_SC(&(r)->regs) -+ -+struct task_struct; -+ -+extern unsigned long getreg(struct task_struct *child, int regno); -+extern int putreg(struct task_struct *child, int regno, unsigned long value); -+extern int get_fpregs(unsigned long buf, struct task_struct *child); -+extern int set_fpregs(unsigned long buf, struct task_struct *child); -+extern int get_fpxregs(unsigned long buf, struct task_struct *child); -+extern int set_fpxregs(unsigned long buf, struct task_struct *tsk); -+ -+extern void show_regs(struct pt_regs *regs); -+ -+#define INIT_TASK_SIZE ((1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE) -+ -+#endif -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/ptrace-i386.h um/include/asm-um/ptrace-i386.h ---- orig/include/asm-um/ptrace-i386.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/ptrace-i386.h Wed Mar 26 22:01:25 2003 -@@ -0,0 +1,46 @@ -+/* -+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_PTRACE_I386_H -+#define __UM_PTRACE_I386_H -+ -+#include "sysdep/ptrace.h" -+#include "asm/ptrace-generic.h" -+ -+#define PT_REGS_EAX(r) UPT_EAX(&(r)->regs) -+#define PT_REGS_EBX(r) UPT_EBX(&(r)->regs) -+#define PT_REGS_ECX(r) UPT_ECX(&(r)->regs) -+#define PT_REGS_EDX(r) UPT_EDX(&(r)->regs) -+#define PT_REGS_ESI(r) UPT_ESI(&(r)->regs) -+#define PT_REGS_EDI(r) UPT_EDI(&(r)->regs) -+#define PT_REGS_EBP(r) UPT_EBP(&(r)->regs) -+ -+#define PT_REGS_CS(r) UPT_CS(&(r)->regs) -+#define PT_REGS_SS(r) UPT_SS(&(r)->regs) -+#define PT_REGS_DS(r) UPT_DS(&(r)->regs) -+#define PT_REGS_ES(r) UPT_ES(&(r)->regs) -+#define PT_REGS_FS(r) UPT_FS(&(r)->regs) -+#define PT_REGS_GS(r) UPT_GS(&(r)->regs) -+ -+#define PT_REGS_EFLAGS(r) UPT_EFLAGS(&(r)->regs) -+ -+#define PT_REGS_ORIG_SYSCALL(r) PT_REGS_EAX(r) -+#define PT_REGS_SYSCALL_RET(r) PT_REGS_EAX(r) -+#define PT_FIX_EXEC_STACK(sp) do ; while(0) -+ -+#define user_mode(r) UPT_IS_USER(&(r)->regs) -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/resource.h um/include/asm-um/resource.h ---- orig/include/asm-um/resource.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/resource.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_RESOURCE_H -+#define __UM_RESOURCE_H -+ -+#include "asm/arch/resource.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/rwlock.h um/include/asm-um/rwlock.h ---- orig/include/asm-um/rwlock.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/rwlock.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_RWLOCK_H -+#define __UM_RWLOCK_H -+ -+#include "asm/arch/rwlock.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/rwsem.h um/include/asm-um/rwsem.h ---- orig/include/asm-um/rwsem.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/rwsem.h Wed Apr 16 13:59:03 2003 -@@ -0,0 +1,10 @@ -+#ifndef __UM_RWSEM_H__ -+#define __UM_RWSEM_H__ -+ -+#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 96) -+#define __builtin_expect(exp,c) (exp) -+#endif -+ -+#include "asm/arch/rwsem.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/scatterlist.h um/include/asm-um/scatterlist.h ---- orig/include/asm-um/scatterlist.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/scatterlist.h Thu Feb 27 13:21:49 2003 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SCATTERLIST_H -+#define __UM_SCATTERLIST_H -+ -+#include "asm/arch/scatterlist.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/segment.h um/include/asm-um/segment.h ---- orig/include/asm-um/segment.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/segment.h Fri Nov 1 19:45:34 2002 -@@ -0,0 +1,4 @@ -+#ifndef __UM_SEGMENT_H -+#define __UM_SEGMENT_H -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/semaphore.h um/include/asm-um/semaphore.h ---- orig/include/asm-um/semaphore.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/semaphore.h Wed Apr 16 13:59:03 2003 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SEMAPHORE_H -+#define __UM_SEMAPHORE_H -+ -+#include "asm/arch/semaphore.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/sembuf.h um/include/asm-um/sembuf.h ---- orig/include/asm-um/sembuf.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/sembuf.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SEMBUF_H -+#define __UM_SEMBUF_H -+ -+#include "asm/arch/sembuf.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/serial.h um/include/asm-um/serial.h ---- orig/include/asm-um/serial.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/serial.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SERIAL_H -+#define __UM_SERIAL_H -+ -+#include "asm/arch/serial.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/shmbuf.h um/include/asm-um/shmbuf.h ---- orig/include/asm-um/shmbuf.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/shmbuf.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SHMBUF_H -+#define __UM_SHMBUF_H -+ -+#include "asm/arch/shmbuf.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/shmparam.h um/include/asm-um/shmparam.h ---- orig/include/asm-um/shmparam.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/shmparam.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SHMPARAM_H -+#define __UM_SHMPARAM_H -+ -+#include "asm/arch/shmparam.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/sigcontext-generic.h um/include/asm-um/sigcontext-generic.h ---- orig/include/asm-um/sigcontext-generic.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/sigcontext-generic.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SIGCONTEXT_GENERIC_H -+#define __UM_SIGCONTEXT_GENERIC_H -+ -+#include "asm/arch/sigcontext.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/sigcontext-i386.h um/include/asm-um/sigcontext-i386.h ---- orig/include/asm-um/sigcontext-i386.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/sigcontext-i386.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SIGCONTEXT_I386_H -+#define __UM_SIGCONTEXT_I386_H -+ -+#include "asm/sigcontext-generic.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/sigcontext-ppc.h um/include/asm-um/sigcontext-ppc.h ---- orig/include/asm-um/sigcontext-ppc.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/sigcontext-ppc.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,10 @@ -+#ifndef __UM_SIGCONTEXT_PPC_H -+#define __UM_SIGCONTEXT_PPC_H -+ -+#define pt_regs sys_pt_regs -+ -+#include "asm/sigcontext-generic.h" -+ -+#undef pt_regs -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/siginfo.h um/include/asm-um/siginfo.h ---- orig/include/asm-um/siginfo.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/siginfo.h Wed Mar 26 22:01:25 2003 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SIGINFO_H -+#define __UM_SIGINFO_H -+ -+#include "asm/arch/siginfo.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/signal.h um/include/asm-um/signal.h ---- orig/include/asm-um/signal.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/signal.h Wed Mar 26 22:01:25 2003 -@@ -0,0 +1,22 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_SIGNAL_H -+#define __UM_SIGNAL_H -+ -+#include "asm/arch/signal.h" -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/smp.h um/include/asm-um/smp.h ---- orig/include/asm-um/smp.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/smp.h Wed Mar 26 22:01:25 2003 -@@ -0,0 +1,19 @@ -+#ifndef __UM_SMP_H -+#define __UM_SMP_H -+ -+#ifdef CONFIG_SMP -+ -+#include "linux/config.h" -+#include "asm/current.h" -+ -+#define smp_processor_id() (current->processor) -+#define cpu_logical_map(n) (n) -+#define cpu_number_map(n) (n) -+#define PROC_CHANGE_PENALTY 15 /* Pick a number, any number */ -+extern int hard_smp_processor_id(void); -+extern unsigned long cpu_online_map; -+#define NO_PROC_ID -1 -+ -+#endif -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/smplock.h um/include/asm-um/smplock.h ---- orig/include/asm-um/smplock.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/smplock.h Wed Apr 16 13:59:04 2003 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SMPLOCK_H -+#define __UM_SMPLOCK_H -+ -+#include "asm/arch/smplock.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/socket.h um/include/asm-um/socket.h ---- orig/include/asm-um/socket.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/socket.h Thu Feb 27 13:20:13 2003 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SOCKET_H -+#define __UM_SOCKET_H -+ -+#include "asm/arch/socket.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/sockios.h um/include/asm-um/sockios.h ---- orig/include/asm-um/sockios.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/sockios.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_SOCKIOS_H -+#define __UM_SOCKIOS_H -+ -+#include "asm/arch/sockios.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/softirq.h um/include/asm-um/softirq.h ---- orig/include/asm-um/softirq.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/softirq.h Wed Apr 16 13:59:04 2003 -@@ -0,0 +1,13 @@ -+#ifndef __UM_SOFTIRQ_H -+#define __UM_SOFTIRQ_H -+ -+#include "linux/smp.h" -+#include "asm/system.h" -+#include "asm/processor.h" -+ -+/* A gratuitous name change */ -+#define i386_bh_lock um_bh_lock -+#include "asm/arch/softirq.h" -+#undef i386_bh_lock -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/spinlock.h um/include/asm-um/spinlock.h ---- orig/include/asm-um/spinlock.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/spinlock.h Wed Mar 26 22:01:25 2003 -@@ -0,0 +1,10 @@ -+#ifndef __UM_SPINLOCK_H -+#define __UM_SPINLOCK_H -+ -+#include "linux/config.h" -+ -+#ifdef CONFIG_SMP -+#include "asm/arch/spinlock.h" -+#endif -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/stat.h um/include/asm-um/stat.h ---- orig/include/asm-um/stat.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/stat.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_STAT_H -+#define __UM_STAT_H -+ -+#include "asm/arch/stat.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/statfs.h um/include/asm-um/statfs.h ---- orig/include/asm-um/statfs.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/statfs.h Wed Mar 26 22:01:25 2003 -@@ -0,0 +1,6 @@ -+#ifndef _UM_STATFS_H -+#define _UM_STATFS_H -+ -+#include "asm/arch/statfs.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/string.h um/include/asm-um/string.h ---- orig/include/asm-um/string.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/string.h Mon Feb 24 22:52:09 2003 -@@ -0,0 +1,7 @@ -+#ifndef __UM_STRING_H -+#define __UM_STRING_H -+ -+#include "asm/arch/string.h" -+#include "asm/archparam.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/system-generic.h um/include/asm-um/system-generic.h ---- orig/include/asm-um/system-generic.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/system-generic.h Wed Mar 26 22:01:25 2003 -@@ -0,0 +1,50 @@ -+#ifndef __UM_SYSTEM_GENERIC_H -+#define __UM_SYSTEM_GENERIC_H -+ -+#include "asm/arch/system.h" -+ -+#undef prepare_to_switch -+#undef switch_to -+#undef __save_flags -+#undef save_flags -+#undef __restore_flags -+#undef restore_flags -+#undef __cli -+#undef __sti -+#undef cli -+#undef sti -+#undef local_irq_save -+#undef local_irq_restore -+#undef local_irq_disable -+#undef local_irq_enable -+ -+#define prepare_to_switch() do ; while(0) -+ -+void *_switch_to(void *prev, void *next); -+ -+#define switch_to(prev, next, last) prev = _switch_to(prev, next) -+ -+extern int get_signals(void); -+extern int set_signals(int enable); -+extern void block_signals(void); -+extern void unblock_signals(void); -+ -+#define local_irq_save(flags) do { (flags) = set_signals(0); } while(0) -+ -+#define local_irq_restore(flags) do { set_signals(flags); } while(0) -+ -+#define local_irq_enable() unblock_signals() -+#define local_irq_disable() block_signals() -+ -+#define __sti() unblock_signals() -+#define sti() unblock_signals() -+#define __cli() block_signals() -+#define cli() block_signals() -+ -+#define __save_flags(x) do { (flags) = get_signals(); } while(0) -+#define save_flags(x) __save_flags(x) -+ -+#define __restore_flags(x) local_irq_restore(x) -+#define restore_flags(x) __restore_flags(x) -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/system-i386.h um/include/asm-um/system-i386.h ---- orig/include/asm-um/system-i386.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/system-i386.h Wed Mar 26 22:01:25 2003 -@@ -0,0 +1,39 @@ -+#ifndef __UM_SYSTEM_I386_H -+#define __UM_SYSTEM_I386_H -+ -+#include "asm/system-generic.h" -+ -+#define __HAVE_ARCH_CMPXCHG 1 -+ -+static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, -+ unsigned long new, int size) -+{ -+ unsigned long prev; -+ switch (size) { -+ case 1: -+ __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2" -+ : "=a"(prev) -+ : "q"(new), "m"(*__xg(ptr)), "0"(old) -+ : "memory"); -+ return prev; -+ case 2: -+ __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2" -+ : "=a"(prev) -+ : "q"(new), "m"(*__xg(ptr)), "0"(old) -+ : "memory"); -+ return prev; -+ case 4: -+ __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2" -+ : "=a"(prev) -+ : "q"(new), "m"(*__xg(ptr)), "0"(old) -+ : "memory"); -+ return prev; -+ } -+ return old; -+} -+ -+#define cmpxchg(ptr,o,n)\ -+ ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ -+ (unsigned long)(n),sizeof(*(ptr)))) -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/system-ppc.h um/include/asm-um/system-ppc.h ---- orig/include/asm-um/system-ppc.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/system-ppc.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,12 @@ -+#ifndef __UM_SYSTEM_PPC_H -+#define __UM_SYSTEM_PPC_H -+ -+#define _switch_to _ppc_switch_to -+ -+#include "asm/arch/system.h" -+ -+#undef _switch_to -+ -+#include "asm/system-generic.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/termbits.h um/include/asm-um/termbits.h ---- orig/include/asm-um/termbits.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/termbits.h Wed Oct 23 21:11:14 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_TERMBITS_H -+#define __UM_TERMBITS_H -+ -+#include "asm/arch/termbits.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/termios.h um/include/asm-um/termios.h ---- orig/include/asm-um/termios.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/termios.h Thu Feb 27 13:20:13 2003 -@@ -0,0 +1,6 @@ -+#ifndef __UM_TERMIOS_H -+#define __UM_TERMIOS_H -+ -+#include "asm/arch/termios.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/timex.h um/include/asm-um/timex.h ---- orig/include/asm-um/timex.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/timex.h Wed Mar 26 22:01:25 2003 -@@ -0,0 +1,18 @@ -+#ifndef __UM_TIMEX_H -+#define __UM_TIMEX_H -+ -+#include "linux/time.h" -+ -+typedef unsigned long cycles_t; -+ -+#define cacheflush_time (0) -+ -+static inline cycles_t get_cycles (void) -+{ -+ return 0; -+} -+ -+#define vxtime_lock() do ; while (0) -+#define vxtime_unlock() do ; while (0) -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/tlb.h um/include/asm-um/tlb.h ---- orig/include/asm-um/tlb.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/tlb.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1 @@ -+#include <asm-generic/tlb.h> -diff -Naur -X ../exclude-files orig/include/asm-um/types.h um/include/asm-um/types.h ---- orig/include/asm-um/types.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/types.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_TYPES_H -+#define __UM_TYPES_H -+ -+#include "asm/arch/types.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/uaccess.h um/include/asm-um/uaccess.h ---- orig/include/asm-um/uaccess.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/uaccess.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,97 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __UM_UACCESS_H -+#define __UM_UACCESS_H -+ -+#define VERIFY_READ 0 -+#define VERIFY_WRITE 1 -+ -+/* -+ * The fs value determines whether argument validity checking should be -+ * performed or not. If get_fs() == USER_DS, checking is performed, with -+ * get_fs() == KERNEL_DS, checking is bypassed. -+ * -+ * For historical reasons, these macros are grossly misnamed. -+ */ -+ -+#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) -+ -+#define KERNEL_DS MAKE_MM_SEG(0xFFFFFFFF) -+#define USER_DS MAKE_MM_SEG(TASK_SIZE) -+ -+#define get_ds() (KERNEL_DS) -+#define get_fs() (current->addr_limit) -+#define set_fs(x) (current->addr_limit = (x)) -+ -+#define segment_eq(a, b) ((a).seg == (b).seg) -+ -+#include "um_uaccess.h" -+ -+#define __copy_from_user(to, from, n) copy_from_user(to, from, n) -+ -+#define __copy_to_user(to, from, n) copy_to_user(to, from, n) -+ -+#define __get_user(x, ptr) \ -+({ \ -+ const __typeof__(ptr) __private_ptr = ptr; \ -+ __typeof__(*(__private_ptr)) __private_val; \ -+ int __private_ret = -EFAULT; \ -+ (x) = 0; \ -+ if (__copy_from_user(&__private_val, (__private_ptr), \ -+ sizeof(*(__private_ptr))) == 0) {\ -+ (x) = (__typeof__(*(__private_ptr))) __private_val; \ -+ __private_ret = 0; \ -+ } \ -+ __private_ret; \ -+}) -+ -+#define get_user(x, ptr) \ -+({ \ -+ const __typeof__((*ptr)) *private_ptr = (ptr); \ -+ (access_ok(VERIFY_READ, private_ptr, sizeof(*private_ptr)) ? \ -+ __get_user(x, private_ptr) : ((x) = 0, -EFAULT)); \ -+}) -+ -+#define __put_user(x, ptr) \ -+({ \ -+ __typeof__(ptr) __private_ptr = ptr; \ -+ __typeof__(*(__private_ptr)) __private_val; \ -+ int __private_ret = -EFAULT; \ -+ __private_val = (__typeof__(*(__private_ptr))) (x); \ -+ if (__copy_to_user((__private_ptr), &__private_val, \ -+ sizeof(*(__private_ptr))) == 0) { \ -+ __private_ret = 0; \ -+ } \ -+ __private_ret; \ -+}) -+ -+#define put_user(x, ptr) \ -+({ \ -+ __typeof__(*(ptr)) *private_ptr = (ptr); \ -+ (access_ok(VERIFY_WRITE, private_ptr, sizeof(*private_ptr)) ? \ -+ __put_user(x, private_ptr) : -EFAULT); \ -+}) -+ -+#define strlen_user(str) strnlen_user(str, ~0UL >> 1) -+ -+struct exception_table_entry -+{ -+ unsigned long insn; -+ unsigned long fixup; -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/ucontext.h um/include/asm-um/ucontext.h ---- orig/include/asm-um/ucontext.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/ucontext.h Sun Dec 1 13:20:58 2002 -@@ -0,0 +1,6 @@ -+#ifndef _ASM_UM_UCONTEXT_H -+#define _ASM_UM_UCONTEXT_H -+ -+#include "asm/arch/ucontext.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/unaligned.h um/include/asm-um/unaligned.h ---- orig/include/asm-um/unaligned.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/unaligned.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_UNALIGNED_H -+#define __UM_UNALIGNED_H -+ -+#include "asm/arch/unaligned.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/unistd.h um/include/asm-um/unistd.h ---- orig/include/asm-um/unistd.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/unistd.h Wed Mar 26 22:01:27 2003 -@@ -0,0 +1,118 @@ -+/* -+ * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef _UM_UNISTD_H_ -+#define _UM_UNISTD_H_ -+ -+#include "linux/resource.h" -+#include "asm/uaccess.h" -+ -+extern long sys_open(const char *filename, int flags, int mode); -+extern long sys_dup(unsigned int fildes); -+extern long sys_close(unsigned int fd); -+extern int um_execve(const char *file, char *const argv[], char *const env[]); -+extern long sys_setsid(void); -+extern long sys_waitpid(pid_t pid, unsigned int * stat_addr, int options); -+extern long sys_wait4(pid_t pid,unsigned int *stat_addr, int options, -+ struct rusage *ru); -+extern long sys_mount(char *dev_name, char *dir_name, char *type, -+ unsigned long flags, void *data); -+extern long sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, -+ struct timeval *tvp); -+extern long sys_lseek(unsigned int fildes, unsigned long offset, int whence); -+extern long sys_read(unsigned int fildes, char *buf, int len); -+extern long sys_write(unsigned int fildes, char *buf, int len); -+ -+#ifdef __KERNEL_SYSCALLS__ -+ -+#define KERNEL_CALL(ret_t, sys, args...) \ -+ mm_segment_t fs = get_fs(); \ -+ ret_t ret; \ -+ set_fs(KERNEL_DS); \ -+ ret = sys(args); \ -+ set_fs(fs); \ -+ return ret; -+ -+static inline long open(const char *pathname, int flags, int mode) -+{ -+ KERNEL_CALL(int, sys_open, pathname, flags, mode) -+} -+ -+static inline long dup(unsigned int fd) -+{ -+ KERNEL_CALL(int, sys_dup, fd); -+} -+ -+static inline long close(unsigned int fd) -+{ -+ KERNEL_CALL(int, sys_close, fd); -+} -+ -+static inline int execve(const char *filename, char *const argv[], -+ char *const envp[]) -+{ -+ KERNEL_CALL(int, um_execve, filename, argv, envp); -+} -+ -+static inline long waitpid(pid_t pid, unsigned int *status, int options) -+{ -+ KERNEL_CALL(pid_t, sys_wait4, pid, status, options, NULL) -+} -+ -+static inline pid_t wait(int *status) -+{ -+ KERNEL_CALL(pid_t, sys_wait4, -1, status, 0, NULL) -+} -+ -+static inline pid_t setsid(void) -+{ -+ KERNEL_CALL(pid_t, sys_setsid) -+} -+ -+static inline long lseek(unsigned int fd, off_t offset, unsigned int whence) -+{ -+ KERNEL_CALL(long, sys_lseek, fd, offset, whence) -+} -+ -+static inline int read(unsigned int fd, char * buf, int len) -+{ -+ KERNEL_CALL(int, sys_read, fd, buf, len) -+} -+ -+static inline int write(unsigned int fd, char * buf, int len) -+{ -+ KERNEL_CALL(int, sys_write, fd, buf, len) -+} -+ -+#endif -+ -+/* Save the value of __KERNEL_SYSCALLS__, undefine it, include the underlying -+ * arch's unistd.h for the system call numbers, and restore the old -+ * __KERNEL_SYSCALLS__. -+ */ -+ -+#ifdef __KERNEL_SYSCALLS__ -+#define __SAVE_KERNEL_SYSCALLS__ __KERNEL_SYSCALLS__ -+#endif -+ -+#undef __KERNEL_SYSCALLS__ -+#include "asm/arch/unistd.h" -+ -+#ifdef __KERNEL_SYSCALLS__ -+#define __KERNEL_SYSCALLS__ __SAVE_KERNEL_SYSCALLS__ -+#endif -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/asm-um/user.h um/include/asm-um/user.h ---- orig/include/asm-um/user.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/user.h Wed Apr 16 13:59:45 2003 -@@ -0,0 +1,6 @@ -+#ifndef __UM_USER_H -+#define __UM_USER_H -+ -+#include "asm/arch/user.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/vga.h um/include/asm-um/vga.h ---- orig/include/asm-um/vga.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/vga.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_VGA_H -+#define __UM_VGA_H -+ -+#include "asm/arch/vga.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/asm-um/xor.h um/include/asm-um/xor.h ---- orig/include/asm-um/xor.h Wed Dec 31 19:00:00 1969 -+++ um/include/asm-um/xor.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,6 @@ -+#ifndef __UM_XOR_H -+#define __UM_XOR_H -+ -+#include "asm-generic/xor.h" -+ -+#endif -diff -Naur -X ../exclude-files orig/include/linux/blk.h um/include/linux/blk.h ---- orig/include/linux/blk.h Sun Sep 15 12:13:19 2002 -+++ um/include/linux/blk.h Wed Apr 16 13:59:04 2003 -@@ -320,6 +320,15 @@ - #define DEVICE_REQUEST do_ida_request - #define DEVICE_NR(device) (MINOR(device) >> 4) - -+#elif (MAJOR_NR == UBD_MAJOR) -+ -+#define DEVICE_NAME "User-mode block device" -+#define DEVICE_INTR do_ubd -+#define DEVICE_REQUEST do_ubd_request -+#define DEVICE_NR(device) (MINOR(device) >> UBD_SHIFT) -+#define DEVICE_ON(device) -+#define DEVICE_OFF(device) -+ - #endif /* MAJOR_NR == whatever */ - - /* provide DEVICE_xxx defaults, if not explicitly defined -diff -Naur -X ../exclude-files orig/include/linux/fs.h um/include/linux/fs.h ---- orig/include/linux/fs.h Thu Feb 27 13:04:27 2003 -+++ um/include/linux/fs.h Wed Apr 16 13:59:03 2003 -@@ -318,6 +318,8 @@ - #include <linux/ncp_fs_i.h> - #include <linux/proc_fs_i.h> - #include <linux/usbdev_fs_i.h> -+#include <linux/hostfs_fs_i.h> -+#include <linux/hppfs_fs_i.h> - #include <linux/jffs2_fs_i.h> - #include <linux/cramfs_fs_sb.h> - -@@ -509,7 +511,9 @@ - struct proc_inode_info proc_i; - struct socket socket_i; - struct usbdev_inode_info usbdev_i; -- struct jffs2_inode_info jffs2_i; -+ struct hostfs_inode_info hostfs_i; -+ struct hppfs_inode_info hppfs_i; -+ struct jffs2_inode_info jffs2_i; - void *generic_ip; - } u; - }; -diff -Naur -X ../exclude-files orig/include/linux/hostfs_fs_i.h um/include/linux/hostfs_fs_i.h ---- orig/include/linux/hostfs_fs_i.h Wed Dec 31 19:00:00 1969 -+++ um/include/linux/hostfs_fs_i.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,21 @@ -+#ifndef _HOSTFS_FS_I -+#define _HOSTFS_FS_I -+ -+struct hostfs_inode_info { -+ char *host_filename; -+ int fd; -+ int mode; -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/linux/hppfs_fs_i.h um/include/linux/hppfs_fs_i.h ---- orig/include/linux/hppfs_fs_i.h Wed Dec 31 19:00:00 1969 -+++ um/include/linux/hppfs_fs_i.h Wed Oct 23 21:08:05 2002 -@@ -0,0 +1,19 @@ -+#ifndef _HPPFS_FS_I -+#define _HPPFS_FS_I -+ -+struct hppfs_inode_info { -+ struct dentry *proc_dentry; -+}; -+ -+#endif -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/include/linux/kernel.h um/include/linux/kernel.h ---- orig/include/linux/kernel.h Thu Feb 27 13:04:27 2003 -+++ um/include/linux/kernel.h Wed Mar 26 22:01:25 2003 -@@ -49,7 +49,7 @@ - # define ATTRIB_NORET __attribute__((noreturn)) - # define NORET_AND noreturn, - --#ifdef __i386__ -+#if defined(__i386__) || defined(UM_FASTCALL) - #define FASTCALL(x) x __attribute__((regparm(3))) - #else - #define FASTCALL(x) x -diff -Naur -X ../exclude-files orig/include/linux/kernel_stat.h um/include/linux/kernel_stat.h ---- orig/include/linux/kernel_stat.h Thu Feb 27 13:04:27 2003 -+++ um/include/linux/kernel_stat.h Wed Apr 16 13:59:39 2003 -@@ -12,7 +12,7 @@ - * used by rstatd/perfmeter - */ - --#define DK_MAX_MAJOR 16 -+#define DK_MAX_MAJOR 99 - #define DK_MAX_DISK 16 - - struct kernel_stat { -diff -Naur -X ../exclude-files orig/include/linux/mm.h um/include/linux/mm.h ---- orig/include/linux/mm.h Sun Sep 15 12:13:19 2002 -+++ um/include/linux/mm.h Wed Apr 16 13:59:04 2003 -@@ -425,6 +425,14 @@ - extern struct page * FASTCALL(__alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist)); - extern struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order); - -+#ifndef HAVE_ARCH_VALIDATE -+static inline struct page *arch_validate(struct page *page, -+ unsigned int gfp_mask, int order) -+{ -+ return(page); -+} -+#endif -+ - static inline struct page * alloc_pages(unsigned int gfp_mask, unsigned int order) - { - /* -@@ -432,7 +440,7 @@ - */ - if (order >= MAX_ORDER) - return NULL; -- return _alloc_pages(gfp_mask, order); -+ return arch_validate(_alloc_pages(gfp_mask, order), gfp_mask, order); - } - - #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) -@@ -492,6 +500,9 @@ - int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, - int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); - -+extern long do_mprotect(struct mm_struct *mm, unsigned long start, -+ size_t len, unsigned long prot); -+ - /* - * On a two-level page table, this ends up being trivial. Thus the - * inlining and the symmetry break with pte_alloc() that does all -@@ -539,9 +550,10 @@ - - extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); - --extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, -- unsigned long len, unsigned long prot, -- unsigned long flag, unsigned long pgoff); -+extern unsigned long do_mmap_pgoff(struct mm_struct *mm, struct file *file, -+ unsigned long addr, unsigned long len, -+ unsigned long prot, unsigned long flag, -+ unsigned long pgoff); - - static inline unsigned long do_mmap(struct file *file, unsigned long addr, - unsigned long len, unsigned long prot, -@@ -551,7 +563,8 @@ - if ((offset + PAGE_ALIGN(len)) < offset) - goto out; - if (!(offset & ~PAGE_MASK)) -- ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); -+ ret = do_mmap_pgoff(current->mm, file, addr, len, prot, flag, -+ offset >> PAGE_SHIFT); - out: - return ret; - } -diff -Naur -X ../exclude-files orig/include/linux/proc_mm.h um/include/linux/proc_mm.h ---- orig/include/linux/proc_mm.h Wed Dec 31 19:00:00 1969 -+++ um/include/linux/proc_mm.h Wed Apr 16 13:59:47 2003 -@@ -0,0 +1,48 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#ifndef __PROC_MM_H -+#define __PROC_MM_H -+ -+#include "linux/sched.h" -+ -+#define MM_MMAP 54 -+#define MM_MUNMAP 55 -+#define MM_MPROTECT 56 -+#define MM_COPY_SEGMENTS 57 -+ -+struct mm_mmap { -+ unsigned long addr; -+ unsigned long len; -+ unsigned long prot; -+ unsigned long flags; -+ unsigned long fd; -+ unsigned long offset; -+}; -+ -+struct mm_munmap { -+ unsigned long addr; -+ unsigned long len; -+}; -+ -+struct mm_mprotect { -+ unsigned long addr; -+ unsigned long len; -+ unsigned int prot; -+}; -+ -+struct proc_mm_op { -+ int op; -+ union { -+ struct mm_mmap mmap; -+ struct mm_munmap munmap; -+ struct mm_mprotect mprotect; -+ int copy_segments; -+ } u; -+}; -+ -+extern struct mm_struct *proc_mm_get_mm(int fd); -+ -+#endif -diff -Naur -X ../exclude-files orig/include/linux/tty.h um/include/linux/tty.h ---- orig/include/linux/tty.h Thu Feb 27 13:04:28 2003 -+++ um/include/linux/tty.h Wed Apr 16 13:59:04 2003 -@@ -309,6 +309,9 @@ - spinlock_t read_lock; - /* If the tty has a pending do_SAK, queue it here - akpm */ - struct tq_struct SAK_tq; -+#ifdef CONFIG_TTY_LOG -+ int log_fd; -+#endif - }; - - /* tty magic number */ -@@ -366,6 +369,7 @@ - extern int specialix_init(void); - extern int espserial_init(void); - extern int macserial_init(void); -+extern int stdio_init(void); - extern int a2232board_init(void); - - extern int tty_paranoia_check(struct tty_struct *tty, kdev_t device, -@@ -420,6 +424,8 @@ - - extern int vt_ioctl(struct tty_struct *tty, struct file * file, - unsigned int cmd, unsigned long arg); -+ -+extern void stdio_console_init(void); - - #endif /* __KERNEL__ */ - #endif -diff -Naur -X ../exclude-files orig/init/do_mounts.c um/init/do_mounts.c ---- orig/init/do_mounts.c Thu Feb 27 13:04:28 2003 -+++ um/init/do_mounts.c Thu Feb 27 13:05:27 2003 -@@ -153,6 +153,22 @@ - { "pf", 0x2f00 }, - { "apblock", APBLOCK_MAJOR << 8}, - { "ddv", DDV_MAJOR << 8}, -+ { "ubd0", UBD_MAJOR << 8 | 0 << 4}, -+ { "ubda", UBD_MAJOR << 8 | 0 << 4}, -+ { "ubd1", UBD_MAJOR << 8 | 1 << 4}, -+ { "ubdb", UBD_MAJOR << 8 | 1 << 4}, -+ { "ubd2", UBD_MAJOR << 8 | 2 << 4}, -+ { "ubdc", UBD_MAJOR << 8 | 2 << 4}, -+ { "ubd3", UBD_MAJOR << 8 | 3 << 4}, -+ { "ubdd", UBD_MAJOR << 8 | 3 << 4}, -+ { "ubd4", UBD_MAJOR << 8 | 4 << 4}, -+ { "ubde", UBD_MAJOR << 8 | 4 << 4}, -+ { "ubd5", UBD_MAJOR << 8 | 5 << 4}, -+ { "ubdf", UBD_MAJOR << 8 | 5 << 4}, -+ { "ubd6", UBD_MAJOR << 8 | 6 << 4}, -+ { "ubdg", UBD_MAJOR << 8 | 6 << 4}, -+ { "ubd7", UBD_MAJOR << 8 | 7 << 4}, -+ { "ubdh", UBD_MAJOR << 8 | 7 << 4}, - { "jsfd", JSFD_MAJOR << 8}, - #if defined(CONFIG_ARCH_S390) - { "dasda", (DASD_MAJOR << MINORBITS) }, -diff -Naur -X ../exclude-files orig/kernel/panic.c um/kernel/panic.c ---- orig/kernel/panic.c Thu Feb 27 13:04:29 2003 -+++ um/kernel/panic.c Thu Feb 27 13:05:27 2003 -@@ -66,7 +66,7 @@ - smp_send_stop(); - #endif - -- notifier_call_chain(&panic_notifier_list, 0, NULL); -+ notifier_call_chain(&panic_notifier_list, 0, buf); - - if (panic_timeout > 0) - { -diff -Naur -X ../exclude-files orig/mm/Makefile um/mm/Makefile ---- orig/mm/Makefile Wed Aug 21 11:47:43 2002 -+++ um/mm/Makefile Fri Nov 8 14:21:36 2002 -@@ -17,5 +17,6 @@ - shmem.o - - obj-$(CONFIG_HIGHMEM) += highmem.o -+obj-$(CONFIG_PROC_MM) += proc_mm.o - - include $(TOPDIR)/Rules.make -diff -Naur -X ../exclude-files orig/mm/mmap.c um/mm/mmap.c ---- orig/mm/mmap.c Thu Feb 27 13:04:29 2003 -+++ um/mm/mmap.c Thu Feb 27 13:05:27 2003 -@@ -390,10 +390,11 @@ - return 0; - } - --unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned long len, -- unsigned long prot, unsigned long flags, unsigned long pgoff) -+unsigned long do_mmap_pgoff(struct mm_struct *mm, struct file * file, -+ unsigned long addr, unsigned long len, -+ unsigned long prot, unsigned long flags, -+ unsigned long pgoff) - { -- struct mm_struct * mm = current->mm; - struct vm_area_struct * vma, * prev; - unsigned int vm_flags; - int correct_wcount = 0; -diff -Naur -X ../exclude-files orig/mm/mprotect.c um/mm/mprotect.c ---- orig/mm/mprotect.c Wed Aug 21 11:47:43 2002 -+++ um/mm/mprotect.c Sun Nov 10 20:24:32 2002 -@@ -264,7 +264,8 @@ - return 0; - } - --asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot) -+long do_mprotect(struct mm_struct *mm, unsigned long start, size_t len, -+ unsigned long prot) - { - unsigned long nstart, end, tmp; - struct vm_area_struct * vma, * next, * prev; -@@ -281,9 +282,9 @@ - if (end == start) - return 0; - -- down_write(¤t->mm->mmap_sem); -+ down_write(&mm->mmap_sem); - -- vma = find_vma_prev(current->mm, start, &prev); -+ vma = find_vma_prev(mm, start, &prev); - error = -ENOMEM; - if (!vma || vma->vm_start > start) - goto out; -@@ -332,6 +333,11 @@ - prev->vm_mm->map_count--; - } - out: -- up_write(¤t->mm->mmap_sem); -+ up_write(&mm->mmap_sem); - return error; -+} -+ -+asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot) -+{ -+ return(do_mprotect(current->mm, start, len, prot)); - } -diff -Naur -X ../exclude-files orig/mm/proc_mm.c um/mm/proc_mm.c ---- orig/mm/proc_mm.c Wed Dec 31 19:00:00 1969 -+++ um/mm/proc_mm.c Tue Nov 19 14:20:26 2002 -@@ -0,0 +1,173 @@ -+/* -+ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) -+ * Licensed under the GPL -+ */ -+ -+#include "linux/init.h" -+#include "linux/proc_fs.h" -+#include "linux/proc_mm.h" -+#include "linux/file.h" -+#include "asm/uaccess.h" -+#include "asm/mmu_context.h" -+ -+static struct file_operations proc_mm_fops; -+ -+struct mm_struct *proc_mm_get_mm(int fd) -+{ -+ struct mm_struct *ret = ERR_PTR(-EBADF); -+ struct file *file; -+ -+ file = fget(fd); -+ if (!file) -+ goto out; -+ -+ ret = ERR_PTR(-EINVAL); -+ if(file->f_op != &proc_mm_fops) -+ goto out_fput; -+ -+ ret = file->private_data; -+ out_fput: -+ fput(file); -+ out: -+ return(ret); -+} -+ -+extern long do_mmap2(struct mm_struct *mm, unsigned long addr, -+ unsigned long len, unsigned long prot, -+ unsigned long flags, unsigned long fd, -+ unsigned long pgoff); -+ -+static ssize_t write_proc_mm(struct file *file, const char *buffer, -+ size_t count, loff_t *ppos) -+{ -+ struct mm_struct *mm = file->private_data; -+ struct proc_mm_op req; -+ int n, ret; -+ -+ if(count > sizeof(req)) -+ return(-EINVAL); -+ -+ n = copy_from_user(&req, buffer, count); -+ if(n != 0) -+ return(-EFAULT); -+ -+ ret = count; -+ switch(req.op){ -+ case MM_MMAP: { -+ struct mm_mmap *map = &req.u.mmap; -+ -+ ret = do_mmap2(mm, map->addr, map->len, map->prot, -+ map->flags, map->fd, map->offset >> PAGE_SHIFT); -+ if((ret & ~PAGE_MASK) == 0) -+ ret = count; -+ -+ break; -+ } -+ case MM_MUNMAP: { -+ struct mm_munmap *unmap = &req.u.munmap; -+ -+ down_write(&mm->mmap_sem); -+ ret = do_munmap(mm, unmap->addr, unmap->len); -+ up_write(&mm->mmap_sem); -+ -+ if(ret == 0) -+ ret = count; -+ break; -+ } -+ case MM_MPROTECT: { -+ struct mm_mprotect *protect = &req.u.mprotect; -+ -+ ret = do_mprotect(mm, protect->addr, protect->len, -+ protect->prot); -+ if(ret == 0) -+ ret = count; -+ break; -+ } -+ -+ case MM_COPY_SEGMENTS: { -+ struct mm_struct *from = proc_mm_get_mm(req.u.copy_segments); -+ -+ if(IS_ERR(from)){ -+ ret = PTR_ERR(from); -+ break; -+ } -+ -+ mm_copy_segments(from, mm); -+ break; -+ } -+ default: -+ ret = -EINVAL; -+ break; -+ } -+ -+ return(ret); -+} -+ -+static int open_proc_mm(struct inode *inode, struct file *file) -+{ -+ struct mm_struct *mm = mm_alloc(); -+ int ret; -+ -+ ret = -ENOMEM; -+ if(mm == NULL) -+ goto out_mem; -+ -+ ret = init_new_context(current, mm); -+ if(ret) -+ goto out_free; -+ -+ spin_lock(&mmlist_lock); -+ list_add(&mm->mmlist, ¤t->mm->mmlist); -+ mmlist_nr++; -+ spin_unlock(&mmlist_lock); -+ -+ file->private_data = mm; -+ -+ return(0); -+ -+ out_free: -+ mmput(mm); -+ out_mem: -+ return(ret); -+} -+ -+static int release_proc_mm(struct inode *inode, struct file *file) -+{ -+ struct mm_struct *mm = file->private_data; -+ -+ mmput(mm); -+ return(0); -+} -+ -+static struct file_operations proc_mm_fops = { -+ .open = open_proc_mm, -+ .release = release_proc_mm, -+ .write = write_proc_mm, -+}; -+ -+static int make_proc_mm(void) -+{ -+ struct proc_dir_entry *ent; -+ -+ ent = create_proc_entry("mm", 0222, &proc_root); -+ if(ent == NULL){ -+ printk("make_proc_mm : Failed to register /proc/mm\n"); -+ return(0); -+ } -+ ent->proc_fops = &proc_mm_fops; -+ -+ return(0); -+} -+ -+__initcall(make_proc_mm); -+ -+/* -+ * Overrides for Emacs so that we follow Linus's tabbing style. -+ * Emacs will notice this stuff at the end of the file and automatically -+ * adjust the settings for this buffer only. This must remain at the end -+ * of the file. -+ * --------------------------------------------------------------------------- -+ * Local variables: -+ * c-file-style: "linux" -+ * End: -+ */ -diff -Naur -X ../exclude-files orig/mm/slab.c um/mm/slab.c ---- orig/mm/slab.c Thu Feb 27 13:04:29 2003 -+++ um/mm/slab.c Thu Feb 27 13:05:27 2003 -@@ -1946,10 +1946,14 @@ - - name = cachep->name; - { -+ mm_segment_t fs; - char tmp; -+ fs = get_fs(); -+ set_fs(KERNEL_DS); - if (__get_user(tmp, name)) - name = "broken"; -- } -+ set_fs(fs); -+ } - - seq_printf(m, "%-17s %6lu %6lu %6u %4lu %4lu %4u", - name, active_objs, num_objs, cachep->objsize, diff --git a/lustre/kernel_patches/patches/uml_check_get_page.patch b/lustre/kernel_patches/patches/uml_check_get_page.patch deleted file mode 100644 index 228d086..0000000 --- a/lustre/kernel_patches/patches/uml_check_get_page.patch +++ /dev/null @@ -1,32 +0,0 @@ - - - - arch/um/kernel/mem.c | 15 +++++++++++++++ - 1 files changed, 15 insertions(+) - ---- linux-2.4.20/arch/um/kernel/mem.c~uml_check_get_page 2003-04-08 23:34:50.000000000 -0600 -+++ linux-2.4.20-braam/arch/um/kernel/mem.c 2003-04-08 23:34:50.000000000 -0600 -@@ -712,6 +712,21 @@ struct page *pte_mem_map(pte_t pte) - return(phys_mem_map(pte_val(pte))); - } - -+struct page *check_get_page(unsigned long kaddr) -+{ -+ struct page *page; -+ struct mem_region *mr; -+ unsigned long phys = __pa(kaddr); -+ unsigned int n = phys_region_index(phys); -+ -+ if (regions[n] == NULL) -+ return NULL; -+ -+ mr = regions[n]; -+ page = (struct page *) mr->mem_map; -+ return page + ((phys_addr(phys)) >> PAGE_SHIFT); -+} -+ - struct mem_region *page_region(struct page *page, int *index_out) - { - int i; - -_ diff --git a/lustre/kernel_patches/patches/uml_compile_fixes.patch b/lustre/kernel_patches/patches/uml_compile_fixes.patch deleted file mode 100644 index 815bd92..0000000 --- a/lustre/kernel_patches/patches/uml_compile_fixes.patch +++ /dev/null @@ -1,18 +0,0 @@ - - - - 0 files changed - ---- linux-2.4.18-17.8.0/include/asm-um/pgtable.h~uml_compile_fixes 2002-12-06 15:46:21.000000000 -0800 -+++ linux-2.4.18-17.8.0-zab/include/asm-um/pgtable.h 2002-12-06 15:46:21.000000000 -0800 -@@ -200,7 +200,7 @@ static inline void pgd_clear(pgd_t * pgd - * called on a highmem page. - */ - --#define page_address(page) ({ if (!(page)->virtual) BUG(); (page)->virtual; }) -+//#define page_address(page) ({ if (!(page)->virtual) BUG(); (page)->virtual; }) - #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) - - extern struct page *pte_mem_map(pte_t pte); - -_ diff --git a/lustre/kernel_patches/patches/uml_no_panic.patch b/lustre/kernel_patches/patches/uml_no_panic.patch deleted file mode 100644 index 59069f9..0000000 --- a/lustre/kernel_patches/patches/uml_no_panic.patch +++ /dev/null @@ -1,32 +0,0 @@ - - - - arch/um/kernel/mem.c | 8 ++++++-- - 1 files changed, 6 insertions(+), 2 deletions(-) - ---- linux-2.4.20/arch/um/kernel/mem.c~uml_no_panic 2003-04-08 23:34:57.000000000 -0600 -+++ linux-2.4.20-braam/arch/um/kernel/mem.c 2003-04-08 23:34:57.000000000 -0600 -@@ -742,7 +742,9 @@ struct mem_region *page_region(struct pa - return(region); - } - } -- panic("No region found for page"); -+// panic("No region found for page"); -+ printk(KERN_ERR "no region foudn for page %p\n, returning NULL\n", -+ page); - return(NULL); - } - -@@ -814,7 +816,9 @@ extern unsigned long region_pa(void *vir - (addr <= region->start + region->len)) - return(mk_phys(addr - region->start, i)); - } -- panic("region_pa : no region for virtual address"); -+ //panic("region_pa : no region for virtual address"); -+ printk(KERN_ERR "no region for virtual address %lu, return pa 0\n", -+ addr); - return(0); - } - - -_ diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.18-18.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.18-18.patch deleted file mode 100644 index 141b5d4..0000000 --- a/lustre/kernel_patches/patches/vfs_intent-2.4.18-18.patch +++ /dev/null @@ -1,1542 +0,0 @@ - fs/dcache.c | 20 ++ - fs/exec.c | 19 +- - fs/namei.c | 378 +++++++++++++++++++++++++++++++++++++++++-------- - fs/nfsd/vfs.c | 2 - fs/open.c | 120 +++++++++++++-- - fs/proc/base.c | 1 - fs/stat.c | 8 - - include/linux/dcache.h | 31 ++++ - include/linux/fs.h | 28 +++ - kernel/ksyms.c | 1 - 10 files changed, 522 insertions(+), 85 deletions(-) - ---- linux-2.4.18-61chaos/fs/dcache.c~vfs_intent-2.4.18-18 Sun Jun 1 21:55:14 2003 -+++ linux-2.4.18-61chaos-root/fs/dcache.c Sun Jun 1 21:59:04 2003 -@@ -186,6 +186,13 @@ int d_invalidate(struct dentry * dentry) - spin_unlock(&dcache_lock); - return 0; - } -+ -+ /* network invalidation by Lustre */ -+ if (dentry->d_flags & DCACHE_LUSTRE_INVALID) { -+ spin_unlock(&dcache_lock); -+ return 0; -+ } -+ - /* - * Check whether to do a partial shrink_dcache - * to get rid of unused child entries. -@@ -645,6 +652,7 @@ struct dentry * d_alloc(struct dentry * - dentry->d_fsdata = NULL; - dentry->d_extra_attributes = NULL; - dentry->d_mounted = 0; -+ dentry->d_it = NULL; - INIT_LIST_HEAD(&dentry->d_hash); - INIT_LIST_HEAD(&dentry->d_lru); - INIT_LIST_HEAD(&dentry->d_subdirs); -@@ -859,13 +867,19 @@ void d_delete(struct dentry * dentry) - * Adds a dentry to the hash according to its name. - */ - --void d_rehash(struct dentry * entry) -+void __d_rehash(struct dentry * entry, int lock) - { - struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash); - if (!list_empty(&entry->d_hash)) BUG(); -- spin_lock(&dcache_lock); -+ if (lock) spin_lock(&dcache_lock); - list_add(&entry->d_hash, list); -- spin_unlock(&dcache_lock); -+ if (lock) spin_unlock(&dcache_lock); -+} -+EXPORT_SYMBOL(__d_rehash); -+ -+void d_rehash(struct dentry * entry) -+{ -+ __d_rehash(entry, 1); - } - - #define do_switch(x,y) do { \ ---- linux-2.4.18-61chaos/fs/namei.c~vfs_intent-2.4.18-18 Sun Jun 1 21:55:14 2003 -+++ linux-2.4.18-61chaos-root/fs/namei.c Sun Jun 1 23:14:49 2003 -@@ -94,6 +94,13 @@ - * XEmacs seems to be relying on it... - */ - -+void intent_release(struct dentry *de, struct lookup_intent *it) -+{ -+ if (it && de->d_op && de->d_op->d_intent_release) -+ de->d_op->d_intent_release(de, it); -+ -+} -+ - /* In order to reduce some races, while at the same time doing additional - * checking and hopefully speeding things up, we copy filenames to the - * kernel data space before using them.. -@@ -260,10 +267,19 @@ void path_release(struct nameidata *nd) - * Internal lookup() using the new generic dcache. - * SMP-safe - */ --static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry *cached_lookup(struct dentry *parent, struct qstr *name, -+ int flags, struct lookup_intent *it) - { - struct dentry * dentry = d_lookup(parent, name); - -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) { -+ if (!dentry->d_op->d_revalidate2(dentry, flags, it) && -+ !d_invalidate(dentry)) { -+ dput(dentry); -+ dentry = NULL; -+ } -+ return dentry; -+ } else - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) { - dput(dentry); -@@ -281,11 +297,14 @@ static struct dentry * cached_lookup(str - * make sure that nobody added the entry to the dcache in the meantime.. - * SMP-safe - */ --static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry *real_lookup(struct dentry *parent, struct qstr *name, -+ int flags, struct lookup_intent *it) - { - struct dentry * result; - struct inode *dir = parent->d_inode; - -+again: -+ - down(&dir->i_sem); - /* - * First re-do the cached lookup just in case it was created -@@ -300,6 +319,9 @@ static struct dentry * real_lookup(struc - result = ERR_PTR(-ENOMEM); - if (dentry) { - lock_kernel(); -+ if (dir->i_op->lookup2) -+ result = dir->i_op->lookup2(dir, dentry, it); -+ else - result = dir->i_op->lookup(dir, dentry); - unlock_kernel(); - if (result) -@@ -321,6 +343,12 @@ static struct dentry * real_lookup(struc - dput(result); - result = ERR_PTR(-ENOENT); - } -+ } else if (result->d_op && result->d_op->d_revalidate2) { -+ if (!result->d_op->d_revalidate2(result, flags, it) && -+ !d_invalidate(result)) { -+ dput(result); -+ goto again; -+ } - } - return result; - } -@@ -334,7 +362,8 @@ int max_recursive_link = 5; - * Without that kind of total limit, nasty chains of consecutive - * symlinks can cause almost arbitrarily long lookups. - */ --static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd) -+static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd, -+ struct lookup_intent *it) - { - int err; - if (current->link_count >= max_recursive_link) -@@ -348,10 +377,21 @@ static inline int do_follow_link(struct - current->link_count++; - current->total_link_count++; - UPDATE_ATIME(dentry->d_inode); -- err = dentry->d_inode->i_op->follow_link(dentry, nd); -+ nd->it = it; -+ if (dentry->d_inode->i_op->follow_link2) -+ err = dentry->d_inode->i_op->follow_link2(dentry, nd, it); -+ else -+ err = dentry->d_inode->i_op->follow_link(dentry, nd); -+ if (!err && it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) { -+ /* vfs_follow_link was never called */ -+ intent_release(dentry, it); -+ path_release(nd); -+ err = -ENOLINK; -+ } - current->link_count--; - return err; - loop: -+ intent_release(dentry, it); - path_release(nd); - return -ELOOP; - } -@@ -381,15 +421,26 @@ int follow_up(struct vfsmount **mnt, str - return __follow_up(mnt, dentry); - } - --static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry) -+static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry, -+ struct lookup_intent *it) - { - struct vfsmount *mounted; - - spin_lock(&dcache_lock); - mounted = lookup_mnt(*mnt, *dentry); - if (mounted) { -+ int opc = 0, mode = 0; - *mnt = mntget(mounted); - spin_unlock(&dcache_lock); -+ if (it) { -+ opc = it->it_op; -+ mode = it->it_mode; -+ } -+ intent_release(*dentry, it); -+ if (it) { -+ it->it_op = opc; -+ it->it_mode = mode; -+ } - dput(*dentry); - mntput(mounted->mnt_parent); - *dentry = dget(mounted->mnt_root); -@@ -401,7 +452,7 @@ static inline int __follow_down(struct v - - int follow_down(struct vfsmount **mnt, struct dentry **dentry) - { -- return __follow_down(mnt,dentry); -+ return __follow_down(mnt,dentry,NULL); - } - - static inline void follow_dotdot(struct nameidata *nd) -@@ -437,7 +488,7 @@ static inline void follow_dotdot(struct - mntput(nd->mnt); - nd->mnt = parent; - } -- while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry)) -+ while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry, NULL)) - ; - } - -@@ -449,7 +500,8 @@ static inline void follow_dotdot(struct - * - * We expect 'base' to be positive and a directory. - */ --int link_path_walk(const char * name, struct nameidata *nd) -+int link_path_walk_it(const char *name, struct nameidata *nd, -+ struct lookup_intent *it) - { - struct dentry *dentry; - struct inode *inode; -@@ -526,18 +578,18 @@ int link_path_walk(const char * name, st - break; - } - /* This does the actual lookups.. */ -- dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE); -+ dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); - if (!dentry) { - err = -EWOULDBLOCKIO; - if (atomic) - break; -- dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE); -+ dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - break; - } - /* Check mountpoints.. */ -- while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry)) -+ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, NULL)) - ; - - err = -ENOENT; -@@ -548,8 +600,8 @@ int link_path_walk(const char * name, st - if (!inode->i_op) - goto out_dput; - -- if (inode->i_op->follow_link) { -- err = do_follow_link(dentry, nd); -+ if (inode->i_op->follow_link || inode->i_op->follow_link2) { -+ err = do_follow_link(dentry, nd, NULL); - dput(dentry); - if (err) - goto return_err; -@@ -565,7 +617,7 @@ int link_path_walk(const char * name, st - nd->dentry = dentry; - } - err = -ENOTDIR; -- if (!inode->i_op->lookup) -+ if (!inode->i_op->lookup && !inode->i_op->lookup2) - break; - continue; - /* here ends the main loop */ -@@ -592,22 +644,23 @@ last_component: - if (err < 0) - break; - } -- dentry = cached_lookup(nd->dentry, &this, 0); -+ dentry = cached_lookup(nd->dentry, &this, 0, it); - if (!dentry) { - err = -EWOULDBLOCKIO; - if (atomic) - break; -- dentry = real_lookup(nd->dentry, &this, 0); -+ dentry = real_lookup(nd->dentry, &this, 0, it); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - break; - } -- while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry)) -+ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, it)) - ; - inode = dentry->d_inode; - if ((lookup_flags & LOOKUP_FOLLOW) -- && inode && inode->i_op && inode->i_op->follow_link) { -- err = do_follow_link(dentry, nd); -+ && inode && inode->i_op && -+ (inode->i_op->follow_link || inode->i_op->follow_link2)) { -+ err = do_follow_link(dentry, nd, it); - dput(dentry); - if (err) - goto return_err; -@@ -621,7 +674,8 @@ last_component: - goto no_inode; - if (lookup_flags & LOOKUP_DIRECTORY) { - err = -ENOTDIR; -- if (!inode->i_op || !inode->i_op->lookup) -+ if (!inode->i_op || -+ (!inode->i_op->lookup && !inode->i_op->lookup2)) - break; - } - goto return_base; -@@ -645,7 +699,24 @@ return_reval: - * Check the cached dentry for staleness. - */ - dentry = nd->dentry; -- if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { -+ revalidate_again: -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) { -+ err = -ESTALE; -+ if (!dentry->d_op->d_revalidate2(dentry, 0, it)) { -+ struct dentry *new; -+ err = permission(dentry->d_parent->d_inode, -+ MAY_EXEC); -+ if (err) -+ break; -+ new = real_lookup(dentry->d_parent, -+ &dentry->d_name, 0, NULL); -+ d_invalidate(dentry); -+ dput(dentry); -+ dentry = new; -+ goto revalidate_again; -+ } -+ } -+ else if (dentry && dentry->d_op && dentry->d_op->d_revalidate){ - err = -ESTALE; - if (!dentry->d_op->d_revalidate(dentry, 0)) { - d_invalidate(dentry); -@@ -658,15 +729,28 @@ out_dput: - dput(dentry); - break; - } -+ if (err) -+ intent_release(nd->dentry, it); - path_release(nd); - return_err: - return err; - } - -+int link_path_walk(const char * name, struct nameidata *nd) -+{ -+ return link_path_walk_it(name, nd, NULL); -+} -+ -+int path_walk_it(const char * name, struct nameidata *nd, struct lookup_intent *it) -+{ -+ current->total_link_count = 0; -+ return link_path_walk_it(name, nd, it); -+} -+ - int path_walk(const char * name, struct nameidata *nd) - { - current->total_link_count = 0; -- return link_path_walk(name, nd); -+ return link_path_walk_it(name, nd, NULL); - } - - /* SMP-safe */ -@@ -751,6 +835,17 @@ walk_init_root(const char *name, struct - } - - /* SMP-safe */ -+int path_lookup_it(const char *path, unsigned flags, struct nameidata *nd, -+ struct lookup_intent *it) -+{ -+ int error = 0; -+ if (path_init(path, flags, nd)) -+ error = path_walk_it(path, nd, it); -+ return error; -+} -+ -+ -+/* SMP-safe */ - int path_lookup(const char *path, unsigned flags, struct nameidata *nd) - { - int error = 0; -@@ -765,6 +860,7 @@ int path_init(const char *name, unsigned - { - nd->last_type = LAST_ROOT; /* if there are only slashes... */ - nd->flags = flags; -+ nd->it = NULL; - if (*name=='/') - return walk_init_root(name,nd); - read_lock(¤t->fs->lock); -@@ -779,7 +875,8 @@ int path_init(const char *name, unsigned - * needs parent already locked. Doesn't follow mounts. - * SMP-safe. - */ --struct dentry * lookup_hash(struct qstr *name, struct dentry * base) -+struct dentry * lookup_hash_it(struct qstr *name, struct dentry * base, -+ struct lookup_intent *it) - { - struct dentry * dentry; - struct inode *inode; -@@ -802,13 +899,16 @@ struct dentry * lookup_hash(struct qstr - goto out; - } - -- dentry = cached_lookup(base, name, 0); -+ dentry = cached_lookup(base, name, 0, it); - if (!dentry) { - struct dentry *new = d_alloc(base, name); - dentry = ERR_PTR(-ENOMEM); - if (!new) - goto out; - lock_kernel(); -+ if (inode->i_op->lookup2) -+ dentry = inode->i_op->lookup2(inode, new, it); -+ else - dentry = inode->i_op->lookup(inode, new); - unlock_kernel(); - if (!dentry) -@@ -820,6 +920,12 @@ out: - return dentry; - } - -+struct dentry * lookup_hash(struct qstr *name, struct dentry * base) -+{ -+ return lookup_hash_it(name, base, NULL); -+} -+ -+ - /* SMP-safe */ - struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) - { -@@ -841,7 +947,7 @@ struct dentry * lookup_one_len(const cha - } - this.hash = end_name_hash(hash); - -- return lookup_hash(&this, base); -+ return lookup_hash_it(&this, base, NULL); - access: - return ERR_PTR(-EACCES); - } -@@ -872,6 +978,23 @@ int __user_walk(const char *name, unsign - return err; - } - -+int __user_walk_it(const char *name, unsigned flags, struct nameidata *nd, -+ struct lookup_intent *it) -+{ -+ char *tmp; -+ int err; -+ -+ tmp = getname(name); -+ err = PTR_ERR(tmp); -+ if (!IS_ERR(tmp)) { -+ err = 0; -+ if (path_init(tmp, flags, nd)) -+ err = path_walk_it(tmp, nd, it); -+ putname(tmp); -+ } -+ return err; -+} -+ - /* - * It's inline, so penalty for filesystems that don't use sticky bit is - * minimal. -@@ -1045,14 +1168,17 @@ int may_open(struct nameidata *nd, int a - return get_lease(inode, flag); - } - -+extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, -+ int flags, struct lookup_intent *it); -+ - struct file *filp_open(const char * pathname, int open_flags, int mode) - { - int acc_mode, error = 0; -- struct inode *inode; - struct dentry *dentry; - struct dentry *dir; - int flag = open_flags; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = open_flags }; - int count = 0; - - if (!capable(CAP_SYS_ADMIN)) -@@ -1069,7 +1195,7 @@ struct file *filp_open(const char * path - * The simplest case - just a plain lookup. - */ - if (!(flag & O_CREAT)) { -- error = path_lookup(pathname, lookup_flags(flag), &nd); -+ error = path_lookup_it(pathname, lookup_flags(flag), &nd, &it); - if (error) - return ERR_PTR(error); - dentry = nd.dentry; -@@ -1079,6 +1205,8 @@ struct file *filp_open(const char * path - /* - * Create - we need to know the parent. - */ -+ it.it_mode = mode; -+ it.it_op |= IT_CREAT; - error = path_lookup(pathname, LOOKUP_PARENT, &nd); - if (error) - return ERR_PTR(error); -@@ -1094,7 +1222,7 @@ struct file *filp_open(const char * path - - dir = nd.dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, &it); - - do_last: - error = PTR_ERR(dentry); -@@ -1103,6 +1231,7 @@ do_last: - goto exit; - } - -+ it.it_mode = mode; - /* Negative dentry, just create the file */ - if (!dentry->d_inode) { - error = vfs_create(dir->d_inode, dentry, -@@ -1132,12 +1261,13 @@ do_last: - error = -ELOOP; - if (flag & O_NOFOLLOW) - goto exit_dput; -- while (__follow_down(&nd.mnt,&dentry) && d_mountpoint(dentry)); -+ while (__follow_down(&nd.mnt,&dentry,&it) && d_mountpoint(dentry)); - } - error = -ENOENT; - if (!dentry->d_inode) - goto exit_dput; -- if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link) -+ if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link || -+ dentry->d_inode->i_op->follow_link2)) - goto do_link; - - dput(nd.dentry); -@@ -1152,11 +1282,13 @@ ok: - if (!S_ISREG(nd.dentry->d_inode->i_mode)) - open_flags &= ~O_TRUNC; - -- return dentry_open(nd.dentry, nd.mnt, open_flags); -+ return dentry_open_it(nd.dentry, nd.mnt, open_flags, &it); - - exit_dput: -+ intent_release(dentry, &it); - dput(dentry); - exit: -+ intent_release(nd.dentry, &it); - path_release(&nd); - return ERR_PTR(error); - -@@ -1175,10 +1307,22 @@ do_link: - * are done. Procfs-like symlinks just set LAST_BIND. - */ - UPDATE_ATIME(dentry->d_inode); -- error = dentry->d_inode->i_op->follow_link(dentry, &nd); -+ nd.it = ⁢ -+ if (dentry->d_inode->i_op->follow_link2) -+ error = dentry->d_inode->i_op->follow_link2(dentry, &nd, &it); -+ else -+ error = dentry->d_inode->i_op->follow_link(dentry, &nd); -+ if (error) { -+ intent_release(dentry, &it); -+ } else if (!(it.it_int_flags & IT_FL_FOLLOWED)) { -+ /* vfs_follow_link was never called */ -+ intent_release(dentry, &it); -+ path_release(&nd); -+ error = -ENOLINK; -+ } - dput(dentry); - if (error) -- return error; -+ return ERR_PTR(error); - if (nd.last_type == LAST_BIND) { - dentry = nd.dentry; - goto ok; -@@ -1197,13 +1341,15 @@ do_link: - } - dir = nd.dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, &it); - putname(nd.last.name); - goto do_last; - } - -+ - /* SMP-safe */ --static struct dentry *lookup_create(struct nameidata *nd, int is_dir) -+static struct dentry *lookup_create(struct nameidata *nd, int is_dir, -+ struct lookup_intent *it) - { - struct dentry *dentry; - -@@ -1211,7 +1357,7 @@ static struct dentry *lookup_create(stru - dentry = ERR_PTR(-EEXIST); - if (nd->last_type != LAST_NORM) - goto fail; -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - if (IS_ERR(dentry)) - goto fail; - if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) -@@ -1267,7 +1413,19 @@ asmlinkage long sys_mknod(const char * f - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 0); -+ -+ if (nd.dentry->d_inode->i_op->mknod2) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mknod2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len, -+ mode, dev); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ -+ dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(dentry); - - mode &= ~current->fs->umask; -@@ -1288,6 +1446,7 @@ asmlinkage long sys_mknod(const char * f - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1335,7 +1494,17 @@ asmlinkage long sys_mkdir(const char * p - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 1); -+ if (nd.dentry->d_inode->i_op->mkdir2) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mkdir2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len, -+ mode); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ dentry = lookup_create(&nd, 1, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_mkdir(nd.dentry->d_inode, dentry, -@@ -1343,6 +1512,7 @@ asmlinkage long sys_mkdir(const char * p - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1443,8 +1613,33 @@ asmlinkage long sys_rmdir(const char * p - error = -EBUSY; - goto exit1; - } -+ if (nd.dentry->d_inode->i_op->rmdir2) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ struct dentry *last; -+ -+ down(&nd.dentry->d_inode->i_sem); -+ last = lookup_hash_it(&nd.last, nd.dentry, NULL); -+ up(&nd.dentry->d_inode->i_sem); -+ if (IS_ERR(last)) { -+ error = PTR_ERR(last); -+ goto exit1; -+ } -+ if (d_mountpoint(last)) { -+ dput(last); -+ error = -EBUSY; -+ goto exit1; -+ } -+ dput(last); -+ -+ error = op->rmdir2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_rmdir(nd.dentry->d_inode, dentry); -@@ -1502,8 +1697,17 @@ asmlinkage long sys_unlink(const char * - error = -EISDIR; - if (nd.last_type != LAST_NORM) - goto exit1; -+ if (nd.dentry->d_inode->i_op->unlink2) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->unlink2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - /* Why not before? Because we want correct error value */ -@@ -1570,15 +1774,26 @@ asmlinkage long sys_symlink(const char * - error = path_lookup(to, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 0); -+ if (nd.dentry->d_inode->i_op->symlink2) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->symlink2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len, -+ from); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_symlink(nd.dentry->d_inode, dentry, from); - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+ out2: - path_release(&nd); --out: -+ out: - putname(to); - } - putname(from); -@@ -1645,7 +1860,7 @@ asmlinkage long sys_link(const char * ol - struct dentry *new_dentry; - struct nameidata nd, old_nd; - -- error = __user_walk(oldname, LOOKUP_POSITIVE, &old_nd); -+ error = __user_walk_it(oldname, LOOKUP_POSITIVE, &old_nd, NULL); - if (error) - goto exit; - error = path_lookup(to, LOOKUP_PARENT, &nd); -@@ -1654,7 +1869,17 @@ asmlinkage long sys_link(const char * ol - error = -EXDEV; - if (old_nd.mnt != nd.mnt) - goto out_release; -- new_dentry = lookup_create(&nd, 0); -+ if (nd.dentry->d_inode->i_op->link2) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->link2(old_nd.dentry->d_inode, -+ nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out_release; -+ } -+ new_dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(new_dentry); - if (!IS_ERR(new_dentry)) { - error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); -@@ -1698,7 +1923,8 @@ exit: - * locking]. - */ - int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it) - { - int error; - struct inode *target; -@@ -1756,6 +1982,7 @@ int vfs_rename_dir(struct inode *old_dir - error = -EBUSY; - else - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); -+ intent_release(new_dentry, it); - if (target) { - if (!error) - target->i_flags |= S_DEAD; -@@ -1777,7 +2004,8 @@ out_unlock: - } - - int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it) - { - int error; - -@@ -1808,6 +2036,7 @@ int vfs_rename_other(struct inode *old_d - error = -EBUSY; - else - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); -+ intent_release(new_dentry, it); - double_up(&old_dir->i_zombie, &new_dir->i_zombie); - if (error) - return error; -@@ -1819,13 +2048,14 @@ int vfs_rename_other(struct inode *old_d - } - - int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it) - { - int error; - if (S_ISDIR(old_dentry->d_inode->i_mode)) -- error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); -+ error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry,it); - else -- error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); -+ error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,it); - if (!error) { - if (old_dir == new_dir) - inode_dir_notify(old_dir, DN_RENAME); -@@ -1867,7 +2097,7 @@ static inline int do_rename(const char * - - double_lock(new_dir, old_dir); - -- old_dentry = lookup_hash(&oldnd.last, old_dir); -+ old_dentry = lookup_hash_it(&oldnd.last, old_dir, NULL); - error = PTR_ERR(old_dentry); - if (IS_ERR(old_dentry)) - goto exit3; -@@ -1883,16 +2113,37 @@ static inline int do_rename(const char * - if (newnd.last.name[newnd.last.len]) - goto exit4; - } -- new_dentry = lookup_hash(&newnd.last, new_dir); -+ new_dentry = lookup_hash_it(&newnd.last, new_dir, NULL); - error = PTR_ERR(new_dentry); - if (IS_ERR(new_dentry)) - goto exit4; - -+ if (old_dir->d_inode->i_op->rename2) { -+ lock_kernel(); -+ /* don't rename mount point. mds will take care of -+ * the rest sanity checking */ -+ if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) { -+ error = -EBUSY; -+ goto exit5; -+ } -+ -+ error = old_dir->d_inode->i_op->rename2(old_dir->d_inode, -+ new_dir->d_inode, -+ oldnd.last.name, -+ oldnd.last.len, -+ newnd.last.name, -+ newnd.last.len); -+ unlock_kernel(); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit5; -+ } -+ - lock_kernel(); - error = vfs_rename(old_dir->d_inode, old_dentry, -- new_dir->d_inode, new_dentry); -+ new_dir->d_inode, new_dentry, NULL); - unlock_kernel(); -- -+exit5: - dput(new_dentry); - exit4: - dput(old_dentry); -@@ -1943,12 +2194,19 @@ out: - } - - static inline int --__vfs_follow_link(struct nameidata *nd, const char *link) -+__vfs_follow_link(struct nameidata *nd, const char *link, -+ struct lookup_intent *it) - { - int res = 0; - char *name; - if (IS_ERR(link)) - goto fail; -+ if (it == NULL) -+ it = nd->it; -+ else if (it != nd->it) -+ printk("it != nd->it: tell phil@clusterfs.com\n"); -+ if (it != NULL) -+ it->it_int_flags |= IT_FL_FOLLOWED; - - if (*link == '/') { - path_release(nd); -@@ -1956,7 +2214,7 @@ __vfs_follow_link(struct nameidata *nd, - /* weird __emul_prefix() stuff did it */ - goto out; - } -- res = link_path_walk(link, nd); -+ res = link_path_walk_it(link, nd, it); - out: - if (current->link_count || res || nd->last_type!=LAST_NORM) - return res; -@@ -1978,7 +2236,13 @@ fail: - - int vfs_follow_link(struct nameidata *nd, const char *link) - { -- return __vfs_follow_link(nd, link); -+ return __vfs_follow_link(nd, link, NULL); -+} -+ -+int vfs_follow_link_it(struct nameidata *nd, const char *link, -+ struct lookup_intent *it) -+{ -+ return __vfs_follow_link(nd, link, it); - } - - /* get the link contents into pagecache */ -@@ -2020,7 +2284,7 @@ int page_follow_link(struct dentry *dent - { - struct page *page = NULL; - char *s = page_getlink(dentry, &page); -- int res = __vfs_follow_link(nd, s); -+ int res = __vfs_follow_link(nd, s, NULL); - if (page) { - kunmap(page); - page_cache_release(page); ---- linux-2.4.18-61chaos/fs/nfsd/vfs.c~vfs_intent-2.4.18-18 Sun Jun 1 21:55:14 2003 -+++ linux-2.4.18-61chaos-root/fs/nfsd/vfs.c Sun Jun 1 21:59:04 2003 -@@ -1298,7 +1298,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru - err = nfserr_perm; - } else - #endif -- err = vfs_rename(fdir, odentry, tdir, ndentry); -+ err = vfs_rename(fdir, odentry, tdir, ndentry, NULL); - unlock_kernel(); - if (!err && EX_ISSYNC(tfhp->fh_export)) { - nfsd_sync_dir(tdentry); ---- linux-2.4.18-61chaos/fs/open.c~vfs_intent-2.4.18-18 Sun Jun 1 21:55:14 2003 -+++ linux-2.4.18-61chaos-root/fs/open.c Sun Jun 1 21:59:04 2003 -@@ -19,6 +19,8 @@ - #include <asm/uaccess.h> - - #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) -+extern int path_walk_it(const char *name, struct nameidata *nd, -+ struct lookup_intent *it); - - int vfs_statfs(struct super_block *sb, struct statfs *buf) - { -@@ -95,9 +97,10 @@ void fd_install(unsigned int fd, struct - write_unlock(&files->file_lock); - } - --int do_truncate(struct dentry *dentry, loff_t length) -+int do_truncate(struct dentry *dentry, loff_t length, int called_from_open) - { - struct inode *inode = dentry->d_inode; -+ struct inode_operations *op = dentry->d_inode->i_op; - int error; - struct iattr newattrs; - -@@ -108,7 +111,14 @@ int do_truncate(struct dentry *dentry, l - down(&inode->i_sem); - newattrs.ia_size = length; - newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; -- error = notify_change(dentry, &newattrs); -+ if (called_from_open) -+ newattrs.ia_valid |= ATTR_FROM_OPEN; -+ if (op->setattr_raw) { -+ newattrs.ia_valid |= ATTR_RAW; -+ newattrs.ia_ctime = CURRENT_TIME; -+ error = op->setattr_raw(inode, &newattrs); -+ } else -+ error = notify_change(dentry, &newattrs); - up(&inode->i_sem); - return error; - } -@@ -118,12 +128,13 @@ static inline long do_sys_truncate(const - struct nameidata nd; - struct inode * inode; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - - error = -EINVAL; - if (length < 0) /* sorry, but loff_t says... */ - goto out; - -- error = user_path_walk(path, &nd); -+ error = user_path_walk_it(path, &nd, &it); - if (error) - goto out; - inode = nd.dentry->d_inode; -@@ -163,11 +174,13 @@ static inline long do_sys_truncate(const - error = locks_verify_truncate(inode, NULL, length); - if (!error) { - DQUOT_INIT(inode); -- error = do_truncate(nd.dentry, length); -+ intent_release(nd.dentry, &it); -+ error = do_truncate(nd.dentry, length, 0); - } - put_write_access(inode); - - dput_and_out: -+ intent_release(nd.dentry, &it); - path_release(&nd); - out: - return error; -@@ -215,7 +228,7 @@ static inline long do_sys_ftruncate(unsi - - error = locks_verify_truncate(inode, file, length); - if (!error) -- error = do_truncate(dentry, length); -+ error = do_truncate(dentry, length, 0); - out_putf: - fput(file); - out: -@@ -260,11 +273,13 @@ asmlinkage long sys_utime(char * filenam - struct inode * inode; - struct iattr newattrs; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, NULL); - if (error) - goto out; - inode = nd.dentry->d_inode; - -+ /* this is safe without a Lustre lock because it only depends -+ on the super block */ - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; -@@ -279,11 +294,29 @@ asmlinkage long sys_utime(char * filenam - goto dput_and_out; - - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; -- } else { -+ } -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ -+ error = -EROFS; -+ if (IS_RDONLY(inode)) -+ goto dput_and_out; -+ -+ error = -EPERM; -+ if (!times) { - if (current->fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) - goto dput_and_out; - } -+ - error = notify_change(nd.dentry, &newattrs); - dput_and_out: - path_release(&nd); -@@ -304,12 +337,14 @@ asmlinkage long sys_utimes(char * filena - struct inode * inode; - struct iattr newattrs; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, NULL); - - if (error) - goto out; - inode = nd.dentry->d_inode; - -+ /* this is safe without a Lustre lock because it only depends -+ on the super block */ - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; -@@ -324,7 +359,20 @@ asmlinkage long sys_utimes(char * filena - newattrs.ia_atime = times[0].tv_sec; - newattrs.ia_mtime = times[1].tv_sec; - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; -- } else { -+ } -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ -+ error = -EPERM; -+ if (!utimes) { - if (current->fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) - goto dput_and_out; -@@ -347,6 +395,7 @@ asmlinkage long sys_access(const char * - int old_fsuid, old_fsgid; - kernel_cap_t old_cap; - int res; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - - if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ - return -EINVAL; -@@ -364,13 +413,14 @@ asmlinkage long sys_access(const char * - else - current->cap_effective = current->cap_permitted; - -- res = user_path_walk(filename, &nd); -+ res = user_path_walk_it(filename, &nd, &it); - if (!res) { - res = permission(nd.dentry->d_inode, mode); - /* SuS v2 requires we report a read only fs too */ - if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) - && !special_file(nd.dentry->d_inode->i_mode)) - res = -EROFS; -+ intent_release(nd.dentry, &it); - path_release(&nd); - } - -@@ -385,8 +435,11 @@ asmlinkage long sys_chdir(const char * f - { - int error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = __user_walk(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd); -+ error = __user_walk_it(filename, -+ LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, -+ &nd, &it); - if (error) - goto out; - -@@ -397,6 +450,7 @@ asmlinkage long sys_chdir(const char * f - set_fs_pwd(current->fs, nd.mnt, nd.dentry); - - dput_and_out: -+ intent_release(nd.dentry, &it); - path_release(&nd); - out: - return error; -@@ -436,9 +490,10 @@ asmlinkage long sys_chroot(const char * - { - int error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = __user_walk(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | -- LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); -+ error = __user_walk_it(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | -+ LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it); - if (error) - goto out; - -@@ -454,6 +509,7 @@ asmlinkage long sys_chroot(const char * - set_fs_altroot(); - error = 0; - dput_and_out: -+ intent_release(nd.dentry, &it); - path_release(&nd); - out: - return error; -@@ -508,6 +564,18 @@ asmlinkage long sys_chmod(const char * f - if (IS_RDONLY(inode)) - goto dput_and_out; - -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_mode = mode; -+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ - error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto dput_and_out; -@@ -538,6 +606,20 @@ static int chown_common(struct dentry * - error = -EROFS; - if (IS_RDONLY(inode)) - goto out; -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = dentry->d_inode->i_op; -+ -+ newattrs.ia_uid = user; -+ newattrs.ia_gid = group; -+ newattrs.ia_valid = ATTR_UID | ATTR_GID; -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ return error; -+ } -+ - error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto out; -@@ -628,7 +710,8 @@ extern ssize_t do_readahead(struct file - /* for files over a certains size it doesn't pay to do readahead on open */ - #define READAHEAD_CUTOFF 48000 - --struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, -+ int flags, struct lookup_intent *it) - { - struct file * f; - struct inode *inode; -@@ -649,7 +732,7 @@ struct file *dentry_open(struct dentry * - error = locks_verify_locked(inode); - if (!error) { - DQUOT_INIT(inode); -- error = do_truncate(dentry, 0); -+ error = do_truncate(dentry, 0, 1); - } - if (error || !(f->f_mode & FMODE_WRITE)) - put_write_access(inode); -@@ -693,6 +776,7 @@ struct file *dentry_open(struct dentry * - do_readahead(f, 0, (48 * 1024) >> PAGE_SHIFT); - - -+ intent_release(dentry, it); - return f; - - cleanup_all: -@@ -707,11 +791,17 @@ cleanup_all: - cleanup_file: - put_filp(f); - cleanup_dentry: -+ intent_release(dentry, it); - dput(dentry); - mntput(mnt); - return ERR_PTR(error); - } - -+struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+{ -+ return dentry_open_it(dentry, mnt, flags, NULL); -+} -+ - /* - * Find an empty file descriptor entry, and mark it busy. - */ ---- linux-2.4.18-61chaos/fs/stat.c~vfs_intent-2.4.18-18 Sun Jun 1 21:55:14 2003 -+++ linux-2.4.18-61chaos-root/fs/stat.c Sun Jun 1 21:59:04 2003 -@@ -104,10 +104,12 @@ int vfs_stat(char *name, struct kstat *s - { - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = user_path_walk(name, &nd); -+ error = user_path_walk_it(name, &nd, &it); - if (!error) { - error = do_getattr(nd.mnt, nd.dentry, stat); -+ intent_release(nd.dentry, &it); - path_release(&nd); - } - return error; -@@ -117,10 +119,12 @@ int vfs_lstat(char *name, struct kstat * - { - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = user_path_walk_link(name, &nd); -+ error = user_path_walk_link_it(name, &nd, &it); - if (!error) { - error = do_getattr(nd.mnt, nd.dentry, stat); -+ intent_release(nd.dentry, &it); - path_release(&nd); - } - return error; ---- linux-2.4.18-61chaos/fs/exec.c~vfs_intent-2.4.18-18 Sun Jun 1 21:55:14 2003 -+++ linux-2.4.18-61chaos-root/fs/exec.c Sun Jun 1 21:59:04 2003 -@@ -112,13 +112,18 @@ static inline void put_binfmt(struct lin - * - * Also note that we take the address to load from from the file itself. - */ -+extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, -+ int flags, struct lookup_intent *it); -+int path_lookup_it(const char *path, unsigned flags, struct nameidata *nd, -+ struct lookup_intent *it); - asmlinkage long sys_uselib(const char * library) - { - struct file * file; - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY }; - -- error = user_path_walk(library, &nd); -+ error = user_path_walk_it(library, &nd, &it); - if (error) - goto out; - -@@ -130,7 +135,8 @@ asmlinkage long sys_uselib(const char * - if (error) - goto exit; - -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); -+ intent_release(nd.dentry, &it); - error = PTR_ERR(file); - if (IS_ERR(file)) - goto out; -@@ -359,8 +365,9 @@ struct file *open_exec(const char *name) - struct inode *inode; - struct file *file; - int err = 0; -+ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY }; - -- err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd); -+ err = path_lookup_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it); - file = ERR_PTR(err); - if (!err) { - inode = nd.dentry->d_inode; -@@ -372,8 +379,9 @@ struct file *open_exec(const char *name) - err = -EACCES; - file = ERR_PTR(err); - if (!err) { -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); - if (!IS_ERR(file)) { -+ intent_release(nd.dentry, &it); - err = deny_write_access(file); - if (err) { - fput(file); -@@ -384,6 +392,7 @@ out: - return file; - } - } -+ intent_release(nd.dentry, &it); - path_release(&nd); - } - goto out; -@@ -1104,7 +1113,7 @@ int do_coredump(long signr, struct pt_re - goto close_fail; - if (!file->f_op->write) - goto close_fail; -- if (do_truncate(file->f_dentry, 0) != 0) -+ if (do_truncate(file->f_dentry, 0, 0) != 0) - goto close_fail; - - retval = binfmt->core_dump(signr, regs, file); ---- linux-2.4.18-61chaos/include/linux/dcache.h~vfs_intent-2.4.18-18 Sun Jun 1 21:55:14 2003 -+++ linux-2.4.18-61chaos-root/include/linux/dcache.h Sun Jun 1 22:02:31 2003 -@@ -6,6 +6,28 @@ - #include <asm/atomic.h> - #include <linux/mount.h> - -+#define IT_OPEN (1) -+#define IT_CREAT (1<<1) -+#define IT_READDIR (1<<2) -+#define IT_GETATTR (1<<3) -+#define IT_LOOKUP (1<<4) -+#define IT_UNLINK (1<<5) -+ -+#define IT_FL_LOCKED (1) -+#define IT_FL_FOLLOWED (1<<1) /* set by vfs_follow_link */ -+ -+struct lookup_intent { -+ int it_op; -+ int it_mode; -+ int it_flags; -+ int it_disposition; -+ int it_status; -+ int it_int_flags; -+ __u64 it_lock_handle[2]; -+ int it_lock_mode; -+ void *it_data; -+}; -+ - /* - * linux/include/linux/dcache.h - * -@@ -78,6 +100,7 @@ struct dentry { - unsigned long d_time; /* used by d_revalidate */ - struct dentry_operations *d_op; - struct super_block * d_sb; /* The root of the dentry tree */ -+ struct lookup_intent *d_it; - unsigned long d_vfs_flags; - void * d_fsdata; /* fs-specific data */ - void * d_extra_attributes; /* TUX-specific data */ -@@ -91,8 +114,15 @@ struct dentry_operations { - int (*d_delete)(struct dentry *); - void (*d_release)(struct dentry *); - void (*d_iput)(struct dentry *, struct inode *); -+ int (*d_revalidate2)(struct dentry *, int, struct lookup_intent *); -+ void (*d_intent_release)(struct dentry *, struct lookup_intent *); - }; - -+/* defined in fs/namei.c */ -+extern void intent_release(struct dentry *de, struct lookup_intent *it); -+/* defined in fs/dcache.c */ -+extern void __d_rehash(struct dentry * entry, int lock); -+ - /* the dentry parameter passed to d_hash and d_compare is the parent - * directory of the entries to be compared. It is used in case these - * functions need any directory specific information for determining -@@ -124,6 +154,7 @@ d_iput: no no yes - * s_nfsd_free_path semaphore will be down - */ - #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ -+#define DCACHE_LUSTRE_INVALID 0x0010 /* Lustre invalidated */ - - extern spinlock_t dcache_lock; - ---- linux-2.4.18-61chaos/include/linux/fs.h~vfs_intent-2.4.18-18 Sun Jun 1 21:59:03 2003 -+++ linux-2.4.18-61chaos-root/include/linux/fs.h Sun Jun 1 22:01:46 2003 -@@ -339,6 +339,8 @@ extern void set_bh_page(struct buffer_he - #define ATTR_MTIME_SET 256 - #define ATTR_FORCE 512 /* Not a change, but a change it */ - #define ATTR_ATTR_FLAG 1024 -+#define ATTR_RAW 2048 /* file system, not vfs will massage attrs */ -+#define ATTR_FROM_OPEN 4096 /* called from open path, ie O_TRUNC */ - - /* - * This is the Inode Attributes structure, used for notify_change(). It -@@ -578,6 +580,7 @@ struct file { - - /* needed for tty driver, and maybe others */ - void *private_data; -+ struct lookup_intent *f_intent; - - /* preallocated helper kiobuf to speedup O_DIRECT */ - struct kiobuf *f_iobuf; -@@ -707,6 +710,7 @@ struct nameidata { - struct qstr last; - unsigned int flags; - int last_type; -+ struct lookup_intent *it; - }; - - #define DQUOT_USR_ENABLED 0x01 /* User diskquotas enabled */ -@@ -840,7 +844,9 @@ extern int vfs_symlink(struct inode *, s - extern int vfs_link(struct dentry *, struct inode *, struct dentry *); - extern int vfs_rmdir(struct inode *, struct dentry *); - extern int vfs_unlink(struct inode *, struct dentry *); --extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); -+int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it); - - /* - * File types -@@ -901,20 +907,33 @@ struct file_operations { - struct inode_operations { - int (*create) (struct inode *,struct dentry *,int); - struct dentry * (*lookup) (struct inode *,struct dentry *); -+ struct dentry * (*lookup2) (struct inode *,struct dentry *, struct lookup_intent *); - int (*link) (struct dentry *,struct inode *,struct dentry *); -+ int (*link2) (struct inode *,struct inode *, const char *, int); - int (*unlink) (struct inode *,struct dentry *); -+ int (*unlink2) (struct inode *, const char *, int); - int (*symlink) (struct inode *,struct dentry *,const char *); -+ int (*symlink2) (struct inode *, const char *, int, const char *); - int (*mkdir) (struct inode *,struct dentry *,int); -+ int (*mkdir2) (struct inode *, const char *, int,int); - int (*rmdir) (struct inode *,struct dentry *); -+ int (*rmdir2) (struct inode *, const char *, int); - int (*mknod) (struct inode *,struct dentry *,int,int); -+ int (*mknod2) (struct inode *, const char *, int,int,int); - int (*rename) (struct inode *, struct dentry *, - struct inode *, struct dentry *); -+ int (*rename2) (struct inode *, struct inode *, -+ const char *oldname, int oldlen, -+ const char *newname, int newlen); - int (*readlink) (struct dentry *, char *,int); - int (*follow_link) (struct dentry *, struct nameidata *); -+ int (*follow_link2) (struct dentry *, struct nameidata *, -+ struct lookup_intent *it); - void (*truncate) (struct inode *); - int (*permission) (struct inode *, int); - int (*revalidate) (struct dentry *); - int (*setattr) (struct dentry *, struct iattr *); -+ int (*setattr_raw) (struct inode *, struct iattr *); - int (*getattr) (struct dentry *, struct iattr *); - }; - -@@ -1119,7 +1138,7 @@ static inline int get_lease(struct inode - - asmlinkage long sys_open(const char *, int, int); - asmlinkage long sys_close(unsigned int); /* yes, it's really unsigned */ --extern int do_truncate(struct dentry *, loff_t start); -+extern int do_truncate(struct dentry *, loff_t start, int called_from_open); - - extern struct file *filp_open(const char *, int, int); - extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); -@@ -1388,6 +1407,7 @@ typedef int (*read_actor_t)(read_descrip - extern loff_t default_llseek(struct file *file, loff_t offset, int origin); - - extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *)); -+extern int FASTCALL(__user_walk_it(const char *, unsigned, struct nameidata *, struct lookup_intent *it)); - extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *)); - extern int FASTCALL(path_walk(const char *, struct nameidata *)); - extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); -@@ -1399,6 +1419,8 @@ extern struct dentry * lookup_one_len(co - extern struct dentry * lookup_hash(struct qstr *, struct dentry *); - #define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd) - #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd) -+#define user_path_walk_it(name,nd,it) __user_walk_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd, it) -+#define user_path_walk_link_it(name,nd,it) __user_walk_it(name, LOOKUP_POSITIVE, nd, it) - - extern void inode_init_once(struct inode *); - extern void iput(struct inode *); -@@ -1499,6 +1521,8 @@ extern struct file_operations generic_ro - - extern int vfs_readlink(struct dentry *, char *, int, const char *); - extern int vfs_follow_link(struct nameidata *, const char *); -+extern int vfs_follow_link_it(struct nameidata *, const char *, -+ struct lookup_intent *it); - extern int page_readlink(struct dentry *, char *, int); - extern int page_follow_link(struct dentry *, struct nameidata *); - extern struct inode_operations page_symlink_inode_operations; ---- linux-2.4.18-61chaos/kernel/ksyms.c~vfs_intent-2.4.18-18 Sun Jun 1 21:59:03 2003 -+++ linux-2.4.18-61chaos-root/kernel/ksyms.c Sun Jun 1 21:59:04 2003 -@@ -294,6 +294,7 @@ EXPORT_SYMBOL(read_cache_page); - EXPORT_SYMBOL(set_page_dirty); - EXPORT_SYMBOL(vfs_readlink); - EXPORT_SYMBOL(vfs_follow_link); -+EXPORT_SYMBOL(vfs_follow_link_it); - EXPORT_SYMBOL(page_readlink); - EXPORT_SYMBOL(page_follow_link); - EXPORT_SYMBOL(page_symlink_inode_operations); - -_ ---- linux/fs/proc/base.c.old Sat Jun 7 00:55:09 2003 -+++ linux/fs/proc/base.c Sat Jun 7 00:55:33 2003 -@@ -465,6 +465,9 @@ - - error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt); - nd->last_type = LAST_BIND; -+ -+ if (nd->it != NULL) -+ nd->it->it_int_flags |= IT_FL_FOLLOWED; - out: - return error; - } diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch deleted file mode 100644 index 710cdc9..0000000 --- a/lustre/kernel_patches/patches/vfs_intent-2.4.20-rh.patch +++ /dev/null @@ -1,1529 +0,0 @@ - 0 files changed - ---- linux-2.4.20-rh/fs/dcache.c~vfs_intent-2.4.20-rh 2003-04-11 14:04:58.000000000 +0800 -+++ linux-2.4.20-rh-root/fs/dcache.c 2003-06-09 23:18:07.000000000 +0800 -@@ -186,6 +186,13 @@ int d_invalidate(struct dentry * dentry) - spin_unlock(&dcache_lock); - return 0; - } -+ -+ /* network invalidation by Lustre */ -+ if (dentry->d_flags & DCACHE_LUSTRE_INVALID) { -+ spin_unlock(&dcache_lock); -+ return 0; -+ } -+ - /* - * Check whether to do a partial shrink_dcache - * to get rid of unused child entries. -@@ -624,6 +631,7 @@ struct dentry * d_alloc(struct dentry * - dentry->d_fsdata = NULL; - dentry->d_extra_attributes = NULL; - dentry->d_mounted = 0; -+ dentry->d_it = NULL; - dentry->d_cookie = NULL; - INIT_LIST_HEAD(&dentry->d_hash); - INIT_LIST_HEAD(&dentry->d_lru); -@@ -839,13 +847,19 @@ void d_delete(struct dentry * dentry) - * Adds a dentry to the hash according to its name. - */ - --void d_rehash(struct dentry * entry) -+void __d_rehash(struct dentry * entry, int lock) - { - struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash); - if (!list_empty(&entry->d_hash)) BUG(); -- spin_lock(&dcache_lock); -+ if (lock) spin_lock(&dcache_lock); - list_add(&entry->d_hash, list); -- spin_unlock(&dcache_lock); -+ if (lock) spin_unlock(&dcache_lock); -+} -+EXPORT_SYMBOL(__d_rehash); -+ -+void d_rehash(struct dentry * entry) -+{ -+ __d_rehash(entry, 1); - } - - #define do_switch(x,y) do { \ ---- linux-2.4.20-rh/fs/namei.c~vfs_intent-2.4.20-rh 2003-04-11 14:04:57.000000000 +0800 -+++ linux-2.4.20-rh-root/fs/namei.c 2003-06-09 23:18:07.000000000 +0800 -@@ -94,6 +94,13 @@ - * XEmacs seems to be relying on it... - */ - -+void intent_release(struct dentry *de, struct lookup_intent *it) -+{ -+ if (it && de->d_op && de->d_op->d_intent_release) -+ de->d_op->d_intent_release(de, it); -+ -+} -+ - /* In order to reduce some races, while at the same time doing additional - * checking and hopefully speeding things up, we copy filenames to the - * kernel data space before using them.. -@@ -260,10 +267,19 @@ void path_release(struct nameidata *nd) - * Internal lookup() using the new generic dcache. - * SMP-safe - */ --static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry *cached_lookup(struct dentry *parent, struct qstr *name, -+ int flags, struct lookup_intent *it) - { - struct dentry * dentry = d_lookup(parent, name); - -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) { -+ if (!dentry->d_op->d_revalidate2(dentry, flags, it) && -+ !d_invalidate(dentry)) { -+ dput(dentry); -+ dentry = NULL; -+ } -+ return dentry; -+ } else - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) { - dput(dentry); -@@ -281,11 +297,14 @@ static struct dentry * cached_lookup(str - * make sure that nobody added the entry to the dcache in the meantime.. - * SMP-safe - */ --static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry *real_lookup(struct dentry *parent, struct qstr *name, -+ int flags, struct lookup_intent *it) - { - struct dentry * result; - struct inode *dir = parent->d_inode; - -+again: -+ - down(&dir->i_sem); - /* - * First re-do the cached lookup just in case it was created -@@ -300,6 +319,9 @@ static struct dentry * real_lookup(struc - result = ERR_PTR(-ENOMEM); - if (dentry) { - lock_kernel(); -+ if (dir->i_op->lookup2) -+ result = dir->i_op->lookup2(dir, dentry, it); -+ else - result = dir->i_op->lookup(dir, dentry); - unlock_kernel(); - if (result) -@@ -321,6 +343,12 @@ static struct dentry * real_lookup(struc - dput(result); - result = ERR_PTR(-ENOENT); - } -+ } else if (result->d_op && result->d_op->d_revalidate2) { -+ if (!result->d_op->d_revalidate2(result, flags, it) && -+ !d_invalidate(result)) { -+ dput(result); -+ goto again; -+ } - } - return result; - } -@@ -334,7 +362,8 @@ int max_recursive_link = 5; - * Without that kind of total limit, nasty chains of consecutive - * symlinks can cause almost arbitrarily long lookups. - */ --static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd) -+static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd, -+ struct lookup_intent *it) - { - int err; - if (current->link_count >= max_recursive_link) -@@ -348,10 +377,21 @@ static inline int do_follow_link(struct - current->link_count++; - current->total_link_count++; - UPDATE_ATIME(dentry->d_inode); -- err = dentry->d_inode->i_op->follow_link(dentry, nd); -+ nd->it = it; -+ if (dentry->d_inode->i_op->follow_link2) -+ err = dentry->d_inode->i_op->follow_link2(dentry, nd, it); -+ else -+ err = dentry->d_inode->i_op->follow_link(dentry, nd); -+ if (!err && it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) { -+ /* vfs_follow_link was never called */ -+ intent_release(dentry, it); -+ path_release(nd); -+ err = -ENOLINK; -+ } - current->link_count--; - return err; - loop: -+ intent_release(dentry, it); - path_release(nd); - return -ELOOP; - } -@@ -381,15 +421,26 @@ int follow_up(struct vfsmount **mnt, str - return __follow_up(mnt, dentry); - } - --static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry) -+static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry, -+ struct lookup_intent *it) - { - struct vfsmount *mounted; - - spin_lock(&dcache_lock); - mounted = lookup_mnt(*mnt, *dentry); - if (mounted) { -+ int opc = 0, mode = 0; - *mnt = mntget(mounted); - spin_unlock(&dcache_lock); -+ if (it) { -+ opc = it->it_op; -+ mode = it->it_mode; -+ } -+ intent_release(*dentry, it); -+ if (it) { -+ it->it_op = opc; -+ it->it_mode = mode; -+ } - dput(*dentry); - mntput(mounted->mnt_parent); - *dentry = dget(mounted->mnt_root); -@@ -401,7 +452,7 @@ static inline int __follow_down(struct v - - int follow_down(struct vfsmount **mnt, struct dentry **dentry) - { -- return __follow_down(mnt,dentry); -+ return __follow_down(mnt,dentry,NULL); - } - - static inline void follow_dotdot(struct nameidata *nd) -@@ -437,7 +488,7 @@ static inline void follow_dotdot(struct - mntput(nd->mnt); - nd->mnt = parent; - } -- while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry)) -+ while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry, NULL)) - ; - } - -@@ -449,7 +500,8 @@ static inline void follow_dotdot(struct - * - * We expect 'base' to be positive and a directory. - */ --int link_path_walk(const char * name, struct nameidata *nd) -+int link_path_walk_it(const char *name, struct nameidata *nd, -+ struct lookup_intent *it) - { - struct dentry *dentry; - struct inode *inode; -@@ -526,18 +578,18 @@ int link_path_walk(const char * name, st - break; - } - /* This does the actual lookups.. */ -- dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE); -+ dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); - if (!dentry) { - err = -EWOULDBLOCKIO; - if (atomic) - break; -- dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE); -+ dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - break; - } - /* Check mountpoints.. */ -- while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry)) -+ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, NULL)) - ; - - err = -ENOENT; -@@ -548,8 +600,8 @@ int link_path_walk(const char * name, st - if (!inode->i_op) - goto out_dput; - -- if (inode->i_op->follow_link) { -- err = do_follow_link(dentry, nd); -+ if (inode->i_op->follow_link || inode->i_op->follow_link2) { -+ err = do_follow_link(dentry, nd, NULL); - dput(dentry); - if (err) - goto return_err; -@@ -565,7 +617,7 @@ int link_path_walk(const char * name, st - nd->dentry = dentry; - } - err = -ENOTDIR; -- if (!inode->i_op->lookup) -+ if (!inode->i_op->lookup && !inode->i_op->lookup2) - break; - continue; - /* here ends the main loop */ -@@ -592,22 +644,23 @@ last_component: - if (err < 0) - break; - } -- dentry = cached_lookup(nd->dentry, &this, 0); -+ dentry = cached_lookup(nd->dentry, &this, 0, it); - if (!dentry) { - err = -EWOULDBLOCKIO; - if (atomic) - break; -- dentry = real_lookup(nd->dentry, &this, 0); -+ dentry = real_lookup(nd->dentry, &this, 0, it); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - break; - } -- while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry)) -+ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, it)) - ; - inode = dentry->d_inode; - if ((lookup_flags & LOOKUP_FOLLOW) -- && inode && inode->i_op && inode->i_op->follow_link) { -- err = do_follow_link(dentry, nd); -+ && inode && inode->i_op && -+ (inode->i_op->follow_link || inode->i_op->follow_link2)) { -+ err = do_follow_link(dentry, nd, it); - dput(dentry); - if (err) - goto return_err; -@@ -621,7 +674,8 @@ last_component: - goto no_inode; - if (lookup_flags & LOOKUP_DIRECTORY) { - err = -ENOTDIR; -- if (!inode->i_op || !inode->i_op->lookup) -+ if (!inode->i_op || -+ (!inode->i_op->lookup && !inode->i_op->lookup2)) - break; - } - goto return_base; -@@ -645,6 +699,23 @@ return_reval: - * Check the cached dentry for staleness. - */ - dentry = nd->dentry; -+ revalidate_again: -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) { -+ err = -ESTALE; -+ if (!dentry->d_op->d_revalidate2(dentry, 0, it)) { -+ struct dentry *new; -+ err = permission(dentry->d_parent->d_inode, -+ MAY_EXEC); -+ if (err) -+ break; -+ new = real_lookup(dentry->d_parent, -+ &dentry->d_name, 0, NULL); -+ d_invalidate(dentry); -+ dput(dentry); -+ dentry = new; -+ goto revalidate_again; -+ } -+ } else - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - err = -ESTALE; - if (!dentry->d_op->d_revalidate(dentry, 0)) { -@@ -658,15 +729,28 @@ out_dput: - dput(dentry); - break; - } -+ if (err) -+ intent_release(nd->dentry, it); - path_release(nd); - return_err: - return err; - } - -+int link_path_walk(const char * name, struct nameidata *nd) -+{ -+ return link_path_walk_it(name, nd, NULL); -+} -+ -+int path_walk_it(const char * name, struct nameidata *nd, struct lookup_intent *it) -+{ -+ current->total_link_count = 0; -+ return link_path_walk_it(name, nd, it); -+} -+ - int path_walk(const char * name, struct nameidata *nd) - { - current->total_link_count = 0; -- return link_path_walk(name, nd); -+ return link_path_walk_it(name, nd, NULL); - } - - /* SMP-safe */ -@@ -751,6 +835,17 @@ walk_init_root(const char *name, struct - } - - /* SMP-safe */ -+int path_lookup_it(const char *path, unsigned flags, struct nameidata *nd, -+ struct lookup_intent *it) -+{ -+ int error = 0; -+ if (path_init(path, flags, nd)) -+ error = path_walk_it(path, nd, it); -+ return error; -+} -+ -+ -+/* SMP-safe */ - int path_lookup(const char *path, unsigned flags, struct nameidata *nd) - { - int error = 0; -@@ -765,6 +860,7 @@ int path_init(const char *name, unsigned - { - nd->last_type = LAST_ROOT; /* if there are only slashes... */ - nd->flags = flags; -+ nd->it = NULL; - if (*name=='/') - return walk_init_root(name,nd); - read_lock(¤t->fs->lock); -@@ -779,7 +875,8 @@ int path_init(const char *name, unsigned - * needs parent already locked. Doesn't follow mounts. - * SMP-safe. - */ --struct dentry * lookup_hash(struct qstr *name, struct dentry * base) -+struct dentry * lookup_hash_it(struct qstr *name, struct dentry * base, -+ struct lookup_intent *it) - { - struct dentry * dentry; - struct inode *inode; -@@ -802,13 +899,16 @@ struct dentry * lookup_hash(struct qstr - goto out; - } - -- dentry = cached_lookup(base, name, 0); -+ dentry = cached_lookup(base, name, 0, it); - if (!dentry) { - struct dentry *new = d_alloc(base, name); - dentry = ERR_PTR(-ENOMEM); - if (!new) - goto out; - lock_kernel(); -+ if (inode->i_op->lookup2) -+ dentry = inode->i_op->lookup2(inode, new, it); -+ else - dentry = inode->i_op->lookup(inode, new); - unlock_kernel(); - if (!dentry) -@@ -820,6 +920,12 @@ out: - return dentry; - } - -+struct dentry * lookup_hash(struct qstr *name, struct dentry * base) -+{ -+ return lookup_hash_it(name, base, NULL); -+} -+ -+ - /* SMP-safe */ - struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) - { -@@ -841,7 +947,7 @@ struct dentry * lookup_one_len(const cha - } - this.hash = end_name_hash(hash); - -- return lookup_hash(&this, base); -+ return lookup_hash_it(&this, base, NULL); - access: - return ERR_PTR(-EACCES); - } -@@ -872,6 +978,23 @@ int __user_walk(const char *name, unsign - return err; - } - -+int __user_walk_it(const char *name, unsigned flags, struct nameidata *nd, -+ struct lookup_intent *it) -+{ -+ char *tmp; -+ int err; -+ -+ tmp = getname(name); -+ err = PTR_ERR(tmp); -+ if (!IS_ERR(tmp)) { -+ err = 0; -+ if (path_init(tmp, flags, nd)) -+ err = path_walk_it(tmp, nd, it); -+ putname(tmp); -+ } -+ return err; -+} -+ - /* - * It's inline, so penalty for filesystems that don't use sticky bit is - * minimal. -@@ -1010,7 +1133,8 @@ exit_lock: - * for symlinks (where the permissions are checked later). - * SMP-safe - */ --int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) -+int open_namei_it(const char *pathname, int flag, int mode, -+ struct nameidata *nd, struct lookup_intent *it) - { - int acc_mode, error = 0; - struct inode *inode; -@@ -1024,7 +1148,7 @@ int open_namei(const char * pathname, in - * The simplest case - just a plain lookup. - */ - if (!(flag & O_CREAT)) { -- error = path_lookup(pathname, lookup_flags(flag), nd); -+ error = path_lookup_it(pathname, lookup_flags(flag), nd, it); - if (error) - return error; - dentry = nd->dentry; -@@ -1034,6 +1158,10 @@ int open_namei(const char * pathname, in - /* - * Create - we need to know the parent. - */ -+ if (it) { -+ it->it_mode = mode; -+ it->it_op |= IT_CREAT; -+ } - error = path_lookup(pathname, LOOKUP_PARENT, nd); - if (error) - return error; -@@ -1049,7 +1177,7 @@ int open_namei(const char * pathname, in - - dir = nd->dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - - do_last: - error = PTR_ERR(dentry); -@@ -1058,6 +1186,7 @@ do_last: - goto exit; - } - -+ it->it_mode = mode; - /* Negative dentry, just create the file */ - if (!dentry->d_inode) { - error = vfs_create(dir->d_inode, dentry, -@@ -1086,12 +1215,13 @@ do_last: - error = -ELOOP; - if (flag & O_NOFOLLOW) - goto exit_dput; -- while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry)); -+ while (__follow_down(&nd->mnt,&dentry,it) && d_mountpoint(dentry)); - } - error = -ENOENT; - if (!dentry->d_inode) - goto exit_dput; -- if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link) -+ if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link || -+ dentry->d_inode->i_op->follow_link2)) - goto do_link; - - dput(nd->dentry); -@@ -1165,7 +1295,7 @@ ok: - if (!error) { - DQUOT_INIT(inode); - -- error = do_truncate(dentry, 0); -+ error = do_truncate(dentry, 0, 1); - } - put_write_access(inode); - if (error) -@@ -1177,8 +1307,10 @@ ok: - return 0; - - exit_dput: -+ intent_release(dentry, it); - dput(dentry); - exit: -+ intent_release(nd->dentry, it); - path_release(nd); - return error; - -@@ -1197,7 +1329,19 @@ do_link: - * are done. Procfs-like symlinks just set LAST_BIND. - */ - UPDATE_ATIME(dentry->d_inode); -- error = dentry->d_inode->i_op->follow_link(dentry, nd); -+ nd->it = it; -+ if (dentry->d_inode->i_op->follow_link2) -+ error = dentry->d_inode->i_op->follow_link2(dentry, nd, it); -+ else -+ error = dentry->d_inode->i_op->follow_link(dentry, nd); -+ if (error) { -+ intent_release(dentry, it); -+ } else if (it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) { -+ /* vfs_follow_link was never called */ -+ intent_release(dentry, it); -+ path_release(nd); -+ error = -ENOLINK; -+ } - dput(dentry); - if (error) - return error; -@@ -1219,13 +1363,20 @@ do_link: - } - dir = nd->dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - putname(nd->last.name); - goto do_last; - } - -+int open_namei(const char *pathname, int flag, int mode, struct nameidata *nd) -+{ -+ return open_namei_it(pathname, flag, mode, nd, NULL); -+} -+ -+ - /* SMP-safe */ --static struct dentry *lookup_create(struct nameidata *nd, int is_dir) -+static struct dentry *lookup_create(struct nameidata *nd, int is_dir, -+ struct lookup_intent *it) - { - struct dentry *dentry; - -@@ -1233,7 +1384,7 @@ static struct dentry *lookup_create(stru - dentry = ERR_PTR(-EEXIST); - if (nd->last_type != LAST_NORM) - goto fail; -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - if (IS_ERR(dentry)) - goto fail; - if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) -@@ -1289,7 +1440,19 @@ asmlinkage long sys_mknod(const char * f - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 0); -+ -+ if (nd.dentry->d_inode->i_op->mknod2) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mknod2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len, -+ mode, dev); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ -+ dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(dentry); - - mode &= ~current->fs->umask; -@@ -1310,6 +1473,7 @@ asmlinkage long sys_mknod(const char * f - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1357,7 +1521,17 @@ asmlinkage long sys_mkdir(const char * p - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 1); -+ if (nd.dentry->d_inode->i_op->mkdir2) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mkdir2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len, -+ mode); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ dentry = lookup_create(&nd, 1, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_mkdir(nd.dentry->d_inode, dentry, -@@ -1365,6 +1539,7 @@ asmlinkage long sys_mkdir(const char * p - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1465,8 +1640,33 @@ asmlinkage long sys_rmdir(const char * p - error = -EBUSY; - goto exit1; - } -+ if (nd.dentry->d_inode->i_op->rmdir2) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ struct dentry *last; -+ -+ down(&nd.dentry->d_inode->i_sem); -+ last = lookup_hash_it(&nd.last, nd.dentry, NULL); -+ up(&nd.dentry->d_inode->i_sem); -+ if (IS_ERR(last)) { -+ error = PTR_ERR(last); -+ goto exit1; -+ } -+ if (d_mountpoint(last)) { -+ dput(last); -+ error = -EBUSY; -+ goto exit1; -+ } -+ dput(last); -+ -+ error = op->rmdir2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_rmdir(nd.dentry->d_inode, dentry); -@@ -1524,8 +1724,17 @@ asmlinkage long sys_unlink(const char * - error = -EISDIR; - if (nd.last_type != LAST_NORM) - goto exit1; -+ if (nd.dentry->d_inode->i_op->unlink2) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->unlink2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - /* Why not before? Because we want correct error value */ -@@ -1592,15 +1801,26 @@ asmlinkage long sys_symlink(const char * - error = path_lookup(to, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 0); -+ if (nd.dentry->d_inode->i_op->symlink2) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->symlink2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len, -+ from); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_symlink(nd.dentry->d_inode, dentry, from); - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+ out2: - path_release(&nd); --out: -+ out: - putname(to); - } - putname(from); -@@ -1676,7 +1896,17 @@ asmlinkage long sys_link(const char * ol - error = -EXDEV; - if (old_nd.mnt != nd.mnt) - goto out_release; -- new_dentry = lookup_create(&nd, 0); -+ if (nd.dentry->d_inode->i_op->link2) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->link2(old_nd.dentry->d_inode, -+ nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out_release; -+ } -+ new_dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(new_dentry); - if (!IS_ERR(new_dentry)) { - error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); -@@ -1720,7 +1950,8 @@ exit: - * locking]. - */ - int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it) - { - int error; - struct inode *target; -@@ -1778,6 +2009,7 @@ int vfs_rename_dir(struct inode *old_dir - error = -EBUSY; - else - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); -+ intent_release(new_dentry, it); - if (target) { - if (!error) - target->i_flags |= S_DEAD; -@@ -1799,7 +2031,8 @@ out_unlock: - } - - int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it) - { - int error; - -@@ -1830,6 +2063,7 @@ int vfs_rename_other(struct inode *old_d - error = -EBUSY; - else - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); -+ intent_release(new_dentry, it); - double_up(&old_dir->i_zombie, &new_dir->i_zombie); - if (error) - return error; -@@ -1841,13 +2075,14 @@ int vfs_rename_other(struct inode *old_d - } - - int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it) - { - int error; - if (S_ISDIR(old_dentry->d_inode->i_mode)) -- error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); -+ error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry,it); - else -- error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); -+ error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,it); - if (!error) { - if (old_dir == new_dir) - inode_dir_notify(old_dir, DN_RENAME); -@@ -1889,7 +2124,7 @@ static inline int do_rename(const char * - - double_lock(new_dir, old_dir); - -- old_dentry = lookup_hash(&oldnd.last, old_dir); -+ old_dentry = lookup_hash_it(&oldnd.last, old_dir, NULL); - error = PTR_ERR(old_dentry); - if (IS_ERR(old_dentry)) - goto exit3; -@@ -1905,16 +2140,37 @@ static inline int do_rename(const char * - if (newnd.last.name[newnd.last.len]) - goto exit4; - } -- new_dentry = lookup_hash(&newnd.last, new_dir); -+ new_dentry = lookup_hash_it(&newnd.last, new_dir, NULL); - error = PTR_ERR(new_dentry); - if (IS_ERR(new_dentry)) - goto exit4; - -+ if (old_dir->d_inode->i_op->rename2) { -+ lock_kernel(); -+ /* don't rename mount point. mds will take care of -+ * the rest sanity checking */ -+ if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) { -+ error = -EBUSY; -+ goto exit5; -+ } -+ -+ error = old_dir->d_inode->i_op->rename2(old_dir->d_inode, -+ new_dir->d_inode, -+ oldnd.last.name, -+ oldnd.last.len, -+ newnd.last.name, -+ newnd.last.len); -+ unlock_kernel(); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit5; -+ } -+ - lock_kernel(); - error = vfs_rename(old_dir->d_inode, old_dentry, -- new_dir->d_inode, new_dentry); -+ new_dir->d_inode, new_dentry, NULL); - unlock_kernel(); -- -+exit5: - dput(new_dentry); - exit4: - dput(old_dentry); -@@ -1965,20 +2221,28 @@ out: - } - - static inline int --__vfs_follow_link(struct nameidata *nd, const char *link) -+__vfs_follow_link(struct nameidata *nd, const char *link, -+ struct lookup_intent *it) - { - int res = 0; - char *name; - if (IS_ERR(link)) - goto fail; - -+ if (it == NULL) -+ it = nd->it; -+ else if (it != nd->it) -+ printk("it != nd->it: tell phil@clusterfs.com\n"); -+ if (it != NULL) -+ it->it_int_flags |= IT_FL_FOLLOWED; -+ - if (*link == '/') { - path_release(nd); - if (!walk_init_root(link, nd)) - /* weird __emul_prefix() stuff did it */ - goto out; - } -- res = link_path_walk(link, nd); -+ res = link_path_walk_it(link, nd, it); - out: - if (current->link_count || res || nd->last_type!=LAST_NORM) - return res; -@@ -2002,7 +2266,13 @@ fail: - - int vfs_follow_link(struct nameidata *nd, const char *link) - { -- return __vfs_follow_link(nd, link); -+ return __vfs_follow_link(nd, link, NULL); -+} -+ -+int vfs_follow_link_it(struct nameidata *nd, const char *link, -+ struct lookup_intent *it) -+{ -+ return __vfs_follow_link(nd, link, it); - } - - /* get the link contents into pagecache */ -@@ -2044,7 +2314,7 @@ int page_follow_link(struct dentry *dent - { - struct page *page = NULL; - char *s = page_getlink(dentry, &page); -- int res = __vfs_follow_link(nd, s); -+ int res = __vfs_follow_link(nd, s, NULL); - if (page) { - kunmap(page); - page_cache_release(page); ---- linux-2.4.20-rh/fs/nfsd/vfs.c~vfs_intent-2.4.20-rh 2003-04-11 14:04:48.000000000 +0800 -+++ linux-2.4.20-rh-root/fs/nfsd/vfs.c 2003-06-09 23:18:07.000000000 +0800 -@@ -1293,7 +1293,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru - err = nfserr_perm; - } else - #endif -- err = vfs_rename(fdir, odentry, tdir, ndentry); -+ err = vfs_rename(fdir, odentry, tdir, ndentry, NULL); - if (!err && EX_ISSYNC(tfhp->fh_export)) { - nfsd_sync_dir(tdentry); - nfsd_sync_dir(fdentry); ---- linux-2.4.20-rh/fs/open.c~vfs_intent-2.4.20-rh 2003-04-11 14:04:57.000000000 +0800 -+++ linux-2.4.20-rh-root/fs/open.c 2003-06-09 23:18:07.000000000 +0800 -@@ -19,6 +19,8 @@ - #include <asm/uaccess.h> - - #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) -+extern int path_walk_it(const char *name, struct nameidata *nd, -+ struct lookup_intent *it); - - int vfs_statfs(struct super_block *sb, struct statfs *buf) - { -@@ -95,9 +97,10 @@ void fd_install(unsigned int fd, struct - write_unlock(&files->file_lock); - } - --int do_truncate(struct dentry *dentry, loff_t length) -+int do_truncate(struct dentry *dentry, loff_t length, int called_from_open) - { - struct inode *inode = dentry->d_inode; -+ struct inode_operations *op = dentry->d_inode->i_op; - int error; - struct iattr newattrs; - -@@ -108,7 +111,14 @@ int do_truncate(struct dentry *dentry, l - down(&inode->i_sem); - newattrs.ia_size = length; - newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; -- error = notify_change(dentry, &newattrs); -+ if (called_from_open) -+ newattrs.ia_valid |= ATTR_FROM_OPEN; -+ if (op->setattr_raw) { -+ newattrs.ia_valid |= ATTR_RAW; -+ newattrs.ia_ctime = CURRENT_TIME; -+ error = op->setattr_raw(inode, &newattrs); -+ } else -+ error = notify_change(dentry, &newattrs); - up(&inode->i_sem); - return error; - } -@@ -118,12 +128,13 @@ static inline long do_sys_truncate(const - struct nameidata nd; - struct inode * inode; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - - error = -EINVAL; - if (length < 0) /* sorry, but loff_t says... */ - goto out; - -- error = user_path_walk(path, &nd); -+ error = user_path_walk_it(path, &nd, &it); - if (error) - goto out; - inode = nd.dentry->d_inode; -@@ -163,11 +174,13 @@ static inline long do_sys_truncate(const - error = locks_verify_truncate(inode, NULL, length); - if (!error) { - DQUOT_INIT(inode); -- error = do_truncate(nd.dentry, length); -+ intent_release(nd.dentry, &it); -+ error = do_truncate(nd.dentry, length, 0); - } - put_write_access(inode); - - dput_and_out: -+ intent_release(nd.dentry, &it); - path_release(&nd); - out: - return error; -@@ -215,7 +228,7 @@ static inline long do_sys_ftruncate(unsi - - error = locks_verify_truncate(inode, file, length); - if (!error) -- error = do_truncate(dentry, length); -+ error = do_truncate(dentry, length, 0); - out_putf: - fput(file); - out: -@@ -260,11 +273,13 @@ asmlinkage long sys_utime(char * filenam - struct inode * inode; - struct iattr newattrs; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, NULL); - if (error) - goto out; - inode = nd.dentry->d_inode; - -+ /* this is safe without a Lustre lock because it only depends -+ on the super block */ - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; -@@ -279,11 +294,29 @@ asmlinkage long sys_utime(char * filenam - goto dput_and_out; - - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; -- } else { -+ } -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ -+ error = -EROFS; -+ if (IS_RDONLY(inode)) -+ goto dput_and_out; -+ -+ error = -EPERM; -+ if (!times) { - if (current->fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) - goto dput_and_out; - } -+ - error = notify_change(nd.dentry, &newattrs); - dput_and_out: - path_release(&nd); -@@ -304,12 +337,14 @@ asmlinkage long sys_utimes(char * filena - struct inode * inode; - struct iattr newattrs; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, NULL); - - if (error) - goto out; - inode = nd.dentry->d_inode; - -+ /* this is safe without a Lustre lock because it only depends -+ on the super block */ - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; -@@ -324,7 +359,20 @@ asmlinkage long sys_utimes(char * filena - newattrs.ia_atime = times[0].tv_sec; - newattrs.ia_mtime = times[1].tv_sec; - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; -- } else { -+ } -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ -+ error = -EPERM; -+ if (!utimes) { - if (current->fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) - goto dput_and_out; -@@ -347,6 +395,7 @@ asmlinkage long sys_access(const char * - int old_fsuid, old_fsgid; - kernel_cap_t old_cap; - int res; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - - if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ - return -EINVAL; -@@ -364,13 +413,14 @@ asmlinkage long sys_access(const char * - else - current->cap_effective = current->cap_permitted; - -- res = user_path_walk(filename, &nd); -+ res = user_path_walk_it(filename, &nd, &it); - if (!res) { - res = permission(nd.dentry->d_inode, mode); - /* SuS v2 requires we report a read only fs too */ - if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) - && !special_file(nd.dentry->d_inode->i_mode)) - res = -EROFS; -+ intent_release(nd.dentry, &it); - path_release(&nd); - } - -@@ -385,8 +435,9 @@ asmlinkage long sys_chdir(const char * f - { - int error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = __user_walk(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd); -+ error = __user_walk_it(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd, &it); - if (error) - goto out; - -@@ -397,6 +448,7 @@ asmlinkage long sys_chdir(const char * f - set_fs_pwd(current->fs, nd.mnt, nd.dentry); - - dput_and_out: -+ intent_release(nd.dentry, &it); - path_release(&nd); - out: - return error; -@@ -436,9 +488,10 @@ asmlinkage long sys_chroot(const char * - { - int error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = __user_walk(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | -- LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); -+ error = __user_walk_it(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | -+ LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it); - if (error) - goto out; - -@@ -454,6 +507,7 @@ asmlinkage long sys_chroot(const char * - set_fs_altroot(); - error = 0; - dput_and_out: -+ intent_release(nd.dentry, &it); - path_release(&nd); - out: - return error; -@@ -508,6 +562,18 @@ asmlinkage long sys_chmod(const char * f - if (IS_RDONLY(inode)) - goto dput_and_out; - -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_mode = mode; -+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ - error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto dput_and_out; -@@ -538,6 +604,20 @@ static int chown_common(struct dentry * - error = -EROFS; - if (IS_RDONLY(inode)) - goto out; -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = dentry->d_inode->i_op; -+ -+ newattrs.ia_uid = user; -+ newattrs.ia_gid = group; -+ newattrs.ia_valid = ATTR_UID | ATTR_GID; -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ return error; -+ } -+ - error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto out; -@@ -642,6 +722,7 @@ struct file *filp_open(const char * file - { - int namei_flags, error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = flags }; - - flags &= ~O_DIRECT; - -@@ -651,14 +732,15 @@ struct file *filp_open(const char * file - if (namei_flags & O_TRUNC) - namei_flags |= 2; - -- error = open_namei(filename, namei_flags, mode, &nd); -- if (!error) -- return dentry_open(nd.dentry, nd.mnt, flags); -+ error = open_namei_it(filename, namei_flags, mode, &nd, &it); -+ if (error) -+ return ERR_PTR(error); - -- return ERR_PTR(error); -+ return dentry_open_it(nd.dentry, nd.mnt, flags, &it); - } - --struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, -+ int flags, struct lookup_intent *it) - { - struct file * f; - struct inode *inode; -@@ -701,6 +783,7 @@ struct file *dentry_open(struct dentry * - } - f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); - -+ intent_release(dentry, it); - return f; - - cleanup_all: -@@ -715,11 +798,17 @@ cleanup_all: - cleanup_file: - put_filp(f); - cleanup_dentry: -+ intent_release(dentry, it); - dput(dentry); - mntput(mnt); - return ERR_PTR(error); - } - -+struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+{ -+ return dentry_open_it(dentry, mnt, flags, NULL); -+} -+ - /* - * Find an empty file descriptor entry, and mark it busy. - */ ---- linux-2.4.20-rh/fs/stat.c~vfs_intent-2.4.20-rh 2003-04-11 14:05:08.000000000 +0800 -+++ linux-2.4.20-rh-root/fs/stat.c 2003-06-09 23:18:07.000000000 +0800 -@@ -110,11 +110,13 @@ static int do_getattr(struct vfsmount *m - int vfs_stat(char *name, struct kstat *stat) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk(name, &nd); -+ error = user_path_walk_it(name, &nd, &it); - if (!error) { - error = do_getattr(nd.mnt, nd.dentry, stat); -+ intent_release(nd.dentry, &it); - path_release(&nd); - } - return error; -@@ -123,11 +125,13 @@ int vfs_stat(char *name, struct kstat *s - int vfs_lstat(char *name, struct kstat *stat) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk_link(name, &nd); -+ error = user_path_walk_link_it(name, &nd, &it); - if (!error) { - error = do_getattr(nd.mnt, nd.dentry, stat); -+ intent_release(nd.dentry, &it); - path_release(&nd); - } - return error; ---- linux-2.4.20-rh/include/linux/dcache.h~vfs_intent-2.4.20-rh 2003-04-12 15:46:39.000000000 +0800 -+++ linux-2.4.20-rh-root/include/linux/dcache.h 2003-06-09 23:18:07.000000000 +0800 -@@ -7,6 +7,28 @@ - #include <linux/mount.h> - #include <linux/kernel.h> - -+#define IT_OPEN (1) -+#define IT_CREAT (1<<1) -+#define IT_READDIR (1<<2) -+#define IT_GETATTR (1<<3) -+#define IT_LOOKUP (1<<4) -+#define IT_UNLINK (1<<5) -+ -+#define IT_FL_LOCKED (1) -+#define IT_FL_FOLLOWED (1<<1) /* set by vfs_follow_link */ -+ -+struct lookup_intent { -+ int it_op; -+ int it_mode; -+ int it_flags; -+ int it_disposition; -+ int it_status; -+ int it_int_flags; -+ __u64 it_lock_handle[2]; -+ int it_lock_mode; -+ void *it_data; -+}; -+ - /* - * linux/include/linux/dcache.h - * -@@ -82,6 +104,7 @@ struct dentry { - unsigned long d_time; /* used by d_revalidate */ - struct dentry_operations *d_op; - struct super_block * d_sb; /* The root of the dentry tree */ -+ struct lookup_intent *d_it; - unsigned long d_vfs_flags; - void * d_fsdata; /* fs-specific data */ - void * d_extra_attributes; /* TUX-specific data */ -@@ -96,8 +119,15 @@ struct dentry_operations { - int (*d_delete)(struct dentry *); - void (*d_release)(struct dentry *); - void (*d_iput)(struct dentry *, struct inode *); -+ int (*d_revalidate2)(struct dentry *, int, struct lookup_intent *); -+ void (*d_intent_release)(struct dentry *, struct lookup_intent *); - }; - -+/* defined in fs/namei.c */ -+extern void intent_release(struct dentry *de, struct lookup_intent *it); -+/* defined in fs/dcache.c */ -+extern void __d_rehash(struct dentry * entry, int lock); -+ - /* the dentry parameter passed to d_hash and d_compare is the parent - * directory of the entries to be compared. It is used in case these - * functions need any directory specific information for determining -@@ -129,6 +159,7 @@ d_iput: no no yes - * s_nfsd_free_path semaphore will be down - */ - #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ -+#define DCACHE_LUSTRE_INVALID 0x0010 /* Lustre invalidated */ - - extern spinlock_t dcache_lock; - ---- linux-2.4.20-rh/include/linux/fs.h~vfs_intent-2.4.20-rh 2003-05-30 02:07:39.000000000 +0800 -+++ linux-2.4.20-rh-root/include/linux/fs.h 2003-06-09 23:18:07.000000000 +0800 -@@ -337,6 +337,8 @@ extern void set_bh_page(struct buffer_he - #define ATTR_MTIME_SET 256 - #define ATTR_FORCE 512 /* Not a change, but a change it */ - #define ATTR_ATTR_FLAG 1024 -+#define ATTR_RAW 2048 /* file system, not vfs will massage attrs */ -+#define ATTR_FROM_OPEN 4096 /* called from open path, ie O_TRUNC */ - - /* - * This is the Inode Attributes structure, used for notify_change(). It -@@ -574,6 +576,7 @@ struct file { - - /* needed for tty driver, and maybe others */ - void *private_data; -+ struct lookup_intent *f_intent; - - /* preallocated helper kiobuf to speedup O_DIRECT */ - struct kiobuf *f_iobuf; -@@ -701,6 +704,7 @@ struct nameidata { - struct qstr last; - unsigned int flags; - int last_type; -+ struct lookup_intent *it; - }; - - /* -@@ -821,7 +825,9 @@ extern int vfs_symlink(struct inode *, s - extern int vfs_link(struct dentry *, struct inode *, struct dentry *); - extern int vfs_rmdir(struct inode *, struct dentry *); - extern int vfs_unlink(struct inode *, struct dentry *); --extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); -+int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it); - - /* - * File types -@@ -882,20 +888,33 @@ struct file_operations { - struct inode_operations { - int (*create) (struct inode *,struct dentry *,int); - struct dentry * (*lookup) (struct inode *,struct dentry *); -+ struct dentry * (*lookup2) (struct inode *,struct dentry *, struct lookup_intent *); - int (*link) (struct dentry *,struct inode *,struct dentry *); -+ int (*link2) (struct inode *,struct inode *, const char *, int); - int (*unlink) (struct inode *,struct dentry *); -+ int (*unlink2) (struct inode *, const char *, int); - int (*symlink) (struct inode *,struct dentry *,const char *); -+ int (*symlink2) (struct inode *, const char *, int, const char *); - int (*mkdir) (struct inode *,struct dentry *,int); -+ int (*mkdir2) (struct inode *, const char *, int,int); - int (*rmdir) (struct inode *,struct dentry *); -+ int (*rmdir2) (struct inode *, const char *, int); - int (*mknod) (struct inode *,struct dentry *,int,int); -+ int (*mknod2) (struct inode *, const char *, int,int,int); - int (*rename) (struct inode *, struct dentry *, - struct inode *, struct dentry *); -+ int (*rename2) (struct inode *, struct inode *, -+ const char *oldname, int oldlen, -+ const char *newname, int newlen); - int (*readlink) (struct dentry *, char *,int); - int (*follow_link) (struct dentry *, struct nameidata *); -+ int (*follow_link2) (struct dentry *, struct nameidata *, -+ struct lookup_intent *it); - void (*truncate) (struct inode *); - int (*permission) (struct inode *, int); - int (*revalidate) (struct dentry *); - int (*setattr) (struct dentry *, struct iattr *); -+ int (*setattr_raw) (struct inode *, struct iattr *); - int (*getattr) (struct dentry *, struct iattr *); - int (*setxattr) (struct dentry *, const char *, void *, size_t, int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); -@@ -1091,10 +1110,14 @@ static inline int get_lease(struct inode - - asmlinkage long sys_open(const char *, int, int); - asmlinkage long sys_close(unsigned int); /* yes, it's really unsigned */ --extern int do_truncate(struct dentry *, loff_t start); -+extern int do_truncate(struct dentry *, loff_t start, int called_from_open); - - extern struct file *filp_open(const char *, int, int); - extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); -+extern int open_namei_it(const char *filename, int namei_flags, int mode, -+ struct nameidata *nd, struct lookup_intent *it); -+extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, -+ int flags, struct lookup_intent *it); - extern int filp_close(struct file *, fl_owner_t id); - extern char * getname(const char *); - -@@ -1385,6 +1408,7 @@ typedef int (*read_actor_t)(read_descrip - extern loff_t default_llseek(struct file *file, loff_t offset, int origin); - - extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *)); -+extern int FASTCALL(__user_walk_it(const char *, unsigned, struct nameidata *, struct lookup_intent *it)); - extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *)); - extern int FASTCALL(path_walk(const char *, struct nameidata *)); - extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); -@@ -1396,6 +1420,8 @@ extern struct dentry * lookup_one_len(co - extern struct dentry * lookup_hash(struct qstr *, struct dentry *); - #define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd) - #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd) -+#define user_path_walk_it(name,nd,it) __user_walk_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd, it) -+#define user_path_walk_link_it(name,nd,it) __user_walk_it(name, LOOKUP_POSITIVE, nd, it) - - extern void inode_init_once(struct inode *); - extern void iput(struct inode *); -@@ -1495,6 +1521,8 @@ extern struct file_operations generic_ro - - extern int vfs_readlink(struct dentry *, char *, int, const char *); - extern int vfs_follow_link(struct nameidata *, const char *); -+extern int vfs_follow_link_it(struct nameidata *, const char *, -+ struct lookup_intent *it); - extern int page_readlink(struct dentry *, char *, int); - extern int page_follow_link(struct dentry *, struct nameidata *); - extern struct inode_operations page_symlink_inode_operations; ---- linux-2.4.20-rh/kernel/ksyms.c~vfs_intent-2.4.20-rh 2003-05-30 02:07:42.000000000 +0800 -+++ linux-2.4.20-rh-root/kernel/ksyms.c 2003-06-09 23:18:07.000000000 +0800 -@@ -298,6 +298,7 @@ EXPORT_SYMBOL(read_cache_page); - EXPORT_SYMBOL(set_page_dirty); - EXPORT_SYMBOL(vfs_readlink); - EXPORT_SYMBOL(vfs_follow_link); -+EXPORT_SYMBOL(vfs_follow_link_it); - EXPORT_SYMBOL(page_readlink); - EXPORT_SYMBOL(page_follow_link); - EXPORT_SYMBOL(page_symlink_inode_operations); ---- linux-2.4.20-rh/fs/exec.c~vfs_intent-2.4.20-rh 2003-04-13 10:07:02.000000000 +0800 -+++ linux-2.4.20-rh-root/fs/exec.c 2003-06-09 23:18:07.000000000 +0800 -@@ -114,8 +114,9 @@ asmlinkage long sys_uselib(const char * - struct file * file; - struct nameidata nd; - int error; -- -- error = user_path_walk(library, &nd); -+ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY }; -+ -+ error = user_path_walk_it(library, &nd, &it); - if (error) - goto out; - -@@ -127,7 +128,8 @@ asmlinkage long sys_uselib(const char * - if (error) - goto exit; - -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); -+ intent_release(nd.dentry, &it); - error = PTR_ERR(file); - if (IS_ERR(file)) - goto out; -@@ -382,8 +384,9 @@ struct file *open_exec(const char *name) - struct inode *inode; - struct file *file; - int err = 0; -- -- err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd); -+ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY }; -+ -+ err = path_lookup_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it); - file = ERR_PTR(err); - if (!err) { - inode = nd.dentry->d_inode; -@@ -395,7 +398,7 @@ struct file *open_exec(const char *name) - err = -EACCES; - file = ERR_PTR(err); - if (!err) { -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); - if (!IS_ERR(file)) { - err = deny_write_access(file); - if (err) { -@@ -404,6 +407,7 @@ struct file *open_exec(const char *name) - } - } - out: -+ intent_release(nd.dentry, &it); - return file; - } - } -@@ -1283,7 +1287,7 @@ int do_coredump(long signr, int exit_cod - goto close_fail; - if (!file->f_op->write) - goto close_fail; -- if (do_truncate(file->f_dentry, 0) != 0) -+ if (do_truncate(file->f_dentry, 0, 0) != 0) - goto close_fail; - - retval = binfmt->core_dump(signr, regs, file); ---- linux-2.4.20-rh/fs/proc/base.c~vfs_intent-2.4.20-rh 2003-06-09 23:16:51.000000000 +0800 -+++ linux-2.4.20-rh-root/fs/proc/base.c 2003-06-09 23:18:52.000000000 +0800 -@@ -464,6 +464,9 @@ static int proc_pid_follow_link(struct d - - error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt); - nd->last_type = LAST_BIND; -+ -+ if (nd->it != NULL) -+ nd->it->it_int_flags |= IT_FL_FOLLOWED; - out: - return error; - } - -_ diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch deleted file mode 100644 index 09bcb22..0000000 --- a/lustre/kernel_patches/patches/vfs_intent-2.4.20-vanilla.patch +++ /dev/null @@ -1,1607 +0,0 @@ - fs/dcache.c | 20 ++ - fs/exec.c | 15 + - fs/namei.c | 378 ++++++++++++++++++++++++++++++++++++++++++------- - fs/nfsd/vfs.c | 2 - fs/open.c | 126 ++++++++++++++-- - fs/proc/base.c | 3 - fs/stat.c | 24 ++- - include/linux/dcache.h | 31 ++++ - include/linux/fs.h | 32 +++- - kernel/ksyms.c | 1 - 10 files changed, 543 insertions(+), 89 deletions(-) - ---- linux-2.4.20-l18/fs/exec.c~vfs_intent-2.4.20-vanilla Thu Nov 28 18:53:15 2002 -+++ linux-2.4.20-l18-phil/fs/exec.c Wed May 28 01:39:18 2003 -@@ -107,8 +107,9 @@ asmlinkage long sys_uselib(const char * - struct file * file; - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY }; - -- error = user_path_walk(library, &nd); -+ error = user_path_walk_it(library, &nd, &it); - if (error) - goto out; - -@@ -120,7 +121,8 @@ asmlinkage long sys_uselib(const char * - if (error) - goto exit; - -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); -+ intent_release(nd.dentry, &it); - error = PTR_ERR(file); - if (IS_ERR(file)) - goto out; -@@ -363,8 +365,9 @@ struct file *open_exec(const char *name) - struct inode *inode; - struct file *file; - int err = 0; -+ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY }; - -- err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd); -+ err = path_lookup_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it); - file = ERR_PTR(err); - if (!err) { - inode = nd.dentry->d_inode; -@@ -376,7 +379,8 @@ struct file *open_exec(const char *name) - err = -EACCES; - file = ERR_PTR(err); - if (!err) { -- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); -+ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it); -+ intent_release(nd.dentry, &it); - if (!IS_ERR(file)) { - err = deny_write_access(file); - if (err) { -@@ -388,6 +392,7 @@ out: - return file; - } - } -+ intent_release(nd.dentry, &it); - path_release(&nd); - } - goto out; -@@ -989,7 +994,7 @@ int do_coredump(long signr, struct pt_re - goto close_fail; - if (!file->f_op->write) - goto close_fail; -- if (do_truncate(file->f_dentry, 0) != 0) -+ if (do_truncate(file->f_dentry, 0, 0) != 0) - goto close_fail; - - retval = binfmt->core_dump(signr, regs, file); ---- linux-2.4.20-l18/fs/dcache.c~vfs_intent-2.4.20-vanilla Thu Nov 28 18:53:15 2002 -+++ linux-2.4.20-l18-phil/fs/dcache.c Wed May 28 01:39:18 2003 -@@ -181,6 +181,13 @@ int d_invalidate(struct dentry * dentry) - spin_unlock(&dcache_lock); - return 0; - } -+ -+ /* network invalidation by Lustre */ -+ if (dentry->d_flags & DCACHE_LUSTRE_INVALID) { -+ spin_unlock(&dcache_lock); -+ return 0; -+ } -+ - /* - * Check whether to do a partial shrink_dcache - * to get rid of unused child entries. -@@ -616,6 +623,7 @@ struct dentry * d_alloc(struct dentry * - dentry->d_op = NULL; - dentry->d_fsdata = NULL; - dentry->d_mounted = 0; -+ dentry->d_it = NULL; - INIT_LIST_HEAD(&dentry->d_hash); - INIT_LIST_HEAD(&dentry->d_lru); - INIT_LIST_HEAD(&dentry->d_subdirs); -@@ -830,13 +838,19 @@ void d_delete(struct dentry * dentry) - * Adds a dentry to the hash according to its name. - */ - --void d_rehash(struct dentry * entry) -+void __d_rehash(struct dentry * entry, int lock) - { - struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash); - if (!list_empty(&entry->d_hash)) BUG(); -- spin_lock(&dcache_lock); -+ if (lock) spin_lock(&dcache_lock); - list_add(&entry->d_hash, list); -- spin_unlock(&dcache_lock); -+ if (lock) spin_unlock(&dcache_lock); -+} -+EXPORT_SYMBOL(__d_rehash); -+ -+void d_rehash(struct dentry * entry) -+{ -+ __d_rehash(entry, 1); - } - - #define do_switch(x,y) do { \ ---- linux-2.4.20-l18/fs/namei.c~vfs_intent-2.4.20-vanilla Thu Nov 28 18:53:15 2002 -+++ linux-2.4.20-l18-phil/fs/namei.c Sun Jun 1 23:41:35 2003 -@@ -94,6 +94,13 @@ - * XEmacs seems to be relying on it... - */ - -+void intent_release(struct dentry *de, struct lookup_intent *it) -+{ -+ if (it && de->d_op && de->d_op->d_intent_release) -+ de->d_op->d_intent_release(de, it); -+ -+} -+ - /* In order to reduce some races, while at the same time doing additional - * checking and hopefully speeding things up, we copy filenames to the - * kernel data space before using them.. -@@ -260,10 +267,19 @@ void path_release(struct nameidata *nd) - * Internal lookup() using the new generic dcache. - * SMP-safe - */ --static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry *cached_lookup(struct dentry *parent, struct qstr *name, -+ int flags, struct lookup_intent *it) - { - struct dentry * dentry = d_lookup(parent, name); - -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) { -+ if (!dentry->d_op->d_revalidate2(dentry, flags, it) && -+ !d_invalidate(dentry)) { -+ dput(dentry); -+ dentry = NULL; -+ } -+ return dentry; -+ } else - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) { - dput(dentry); -@@ -281,11 +297,14 @@ static struct dentry * cached_lookup(str - * make sure that nobody added the entry to the dcache in the meantime.. - * SMP-safe - */ --static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags) -+static struct dentry *real_lookup(struct dentry *parent, struct qstr *name, -+ int flags, struct lookup_intent *it) - { - struct dentry * result; - struct inode *dir = parent->d_inode; - -+again: -+ - down(&dir->i_sem); - /* - * First re-do the cached lookup just in case it was created -@@ -300,6 +319,9 @@ static struct dentry * real_lookup(struc - result = ERR_PTR(-ENOMEM); - if (dentry) { - lock_kernel(); -+ if (dir->i_op->lookup2) -+ result = dir->i_op->lookup2(dir, dentry, it); -+ else - result = dir->i_op->lookup(dir, dentry); - unlock_kernel(); - if (result) -@@ -321,6 +343,12 @@ static struct dentry * real_lookup(struc - dput(result); - result = ERR_PTR(-ENOENT); - } -+ } else if (result->d_op && result->d_op->d_revalidate2) { -+ if (!result->d_op->d_revalidate2(result, flags, it) && -+ !d_invalidate(result)) { -+ dput(result); -+ goto again; -+ } - } - return result; - } -@@ -332,7 +360,8 @@ static struct dentry * real_lookup(struc - * Without that kind of total limit, nasty chains of consecutive - * symlinks can cause almost arbitrarily long lookups. - */ --static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd) -+static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd, -+ struct lookup_intent *it) - { - int err; - if (current->link_count >= 5) -@@ -346,10 +375,21 @@ static inline int do_follow_link(struct - current->link_count++; - current->total_link_count++; - UPDATE_ATIME(dentry->d_inode); -- err = dentry->d_inode->i_op->follow_link(dentry, nd); -+ nd->it = it; -+ if (dentry->d_inode->i_op->follow_link2) -+ err = dentry->d_inode->i_op->follow_link2(dentry, nd, it); -+ else -+ err = dentry->d_inode->i_op->follow_link(dentry, nd); -+ if (!err && it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) { -+ /* vfs_follow_link was never called */ -+ intent_release(dentry, it); -+ path_release(nd); -+ err = -ENOLINK; -+ } - current->link_count--; - return err; - loop: -+ intent_release(dentry, it); - path_release(nd); - return -ELOOP; - } -@@ -379,15 +419,26 @@ int follow_up(struct vfsmount **mnt, str - return __follow_up(mnt, dentry); - } - --static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry) -+static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry, -+ struct lookup_intent *it) - { - struct vfsmount *mounted; - - spin_lock(&dcache_lock); - mounted = lookup_mnt(*mnt, *dentry); - if (mounted) { -+ int opc = 0, mode = 0; - *mnt = mntget(mounted); - spin_unlock(&dcache_lock); -+ if (it) { -+ opc = it->it_op; -+ mode = it->it_mode; -+ } -+ intent_release(*dentry, it); -+ if (it) { -+ it->it_op = opc; -+ it->it_mode = mode; -+ } - dput(*dentry); - mntput(mounted->mnt_parent); - *dentry = dget(mounted->mnt_root); -@@ -399,7 +450,7 @@ static inline int __follow_down(struct v - - int follow_down(struct vfsmount **mnt, struct dentry **dentry) - { -- return __follow_down(mnt,dentry); -+ return __follow_down(mnt,dentry,NULL); - } - - static inline void follow_dotdot(struct nameidata *nd) -@@ -435,7 +486,7 @@ static inline void follow_dotdot(struct - mntput(nd->mnt); - nd->mnt = parent; - } -- while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry)) -+ while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry, NULL)) - ; - } - -@@ -447,7 +498,8 @@ static inline void follow_dotdot(struct - * - * We expect 'base' to be positive and a directory. - */ --int link_path_walk(const char * name, struct nameidata *nd) -+int link_path_walk_it(const char *name, struct nameidata *nd, -+ struct lookup_intent *it) - { - struct dentry *dentry; - struct inode *inode; -@@ -520,15 +572,15 @@ int link_path_walk(const char * name, st - break; - } - /* This does the actual lookups.. */ -- dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE); -+ dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); - if (!dentry) { -- dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE); -+ dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - break; - } - /* Check mountpoints.. */ -- while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry)) -+ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, NULL)) - ; - - err = -ENOENT; -@@ -539,8 +591,8 @@ int link_path_walk(const char * name, st - if (!inode->i_op) - goto out_dput; - -- if (inode->i_op->follow_link) { -- err = do_follow_link(dentry, nd); -+ if (inode->i_op->follow_link || inode->i_op->follow_link2) { -+ err = do_follow_link(dentry, nd, NULL); - dput(dentry); - if (err) - goto return_err; -@@ -556,7 +608,7 @@ int link_path_walk(const char * name, st - nd->dentry = dentry; - } - err = -ENOTDIR; -- if (!inode->i_op->lookup) -+ if (!inode->i_op->lookup && !inode->i_op->lookup2) - break; - continue; - /* here ends the main loop */ -@@ -583,19 +635,20 @@ last_component: - if (err < 0) - break; - } -- dentry = cached_lookup(nd->dentry, &this, 0); -+ dentry = cached_lookup(nd->dentry, &this, 0, it); - if (!dentry) { -- dentry = real_lookup(nd->dentry, &this, 0); -+ dentry = real_lookup(nd->dentry, &this, 0, it); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - break; - } -- while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry)) -+ while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, it)) - ; - inode = dentry->d_inode; - if ((lookup_flags & LOOKUP_FOLLOW) -- && inode && inode->i_op && inode->i_op->follow_link) { -- err = do_follow_link(dentry, nd); -+ && inode && inode->i_op && -+ (inode->i_op->follow_link || inode->i_op->follow_link2)) { -+ err = do_follow_link(dentry, nd, it); - dput(dentry); - if (err) - goto return_err; -@@ -609,7 +662,8 @@ last_component: - goto no_inode; - if (lookup_flags & LOOKUP_DIRECTORY) { - err = -ENOTDIR; -- if (!inode->i_op || !inode->i_op->lookup) -+ if (!inode->i_op || -+ (!inode->i_op->lookup && !inode->i_op->lookup2)) - break; - } - goto return_base; -@@ -633,6 +687,23 @@ return_reval: - * Check the cached dentry for staleness. - */ - dentry = nd->dentry; -+ revalidate_again: -+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate2) { -+ err = -ESTALE; -+ if (!dentry->d_op->d_revalidate2(dentry, 0, it)) { -+ struct dentry *new; -+ err = permission(dentry->d_parent->d_inode, -+ MAY_EXEC); -+ if (err) -+ break; -+ new = real_lookup(dentry->d_parent, -+ &dentry->d_name, 0, NULL); -+ d_invalidate(dentry); -+ dput(dentry); -+ dentry = new; -+ goto revalidate_again; -+ } -+ } else - if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { - err = -ESTALE; - if (!dentry->d_op->d_revalidate(dentry, 0)) { -@@ -646,15 +717,28 @@ out_dput: - dput(dentry); - break; - } -+ if (err) -+ intent_release(nd->dentry, it); - path_release(nd); - return_err: - return err; - } - -+int link_path_walk(const char * name, struct nameidata *nd) -+{ -+ return link_path_walk_it(name, nd, NULL); -+} -+ -+int path_walk_it(const char * name, struct nameidata *nd, struct lookup_intent *it) -+{ -+ current->total_link_count = 0; -+ return link_path_walk_it(name, nd, it); -+} -+ - int path_walk(const char * name, struct nameidata *nd) - { - current->total_link_count = 0; -- return link_path_walk(name, nd); -+ return link_path_walk_it(name, nd, NULL); - } - - /* SMP-safe */ -@@ -739,6 +823,17 @@ walk_init_root(const char *name, struct - } - - /* SMP-safe */ -+int path_lookup_it(const char *path, unsigned flags, struct nameidata *nd, -+ struct lookup_intent *it) -+{ -+ int error = 0; -+ if (path_init(path, flags, nd)) -+ error = path_walk_it(path, nd, it); -+ return error; -+} -+ -+ -+/* SMP-safe */ - int path_lookup(const char *path, unsigned flags, struct nameidata *nd) - { - int error = 0; -@@ -753,6 +848,7 @@ int path_init(const char *name, unsigned - { - nd->last_type = LAST_ROOT; /* if there are only slashes... */ - nd->flags = flags; -+ nd->it = NULL; - if (*name=='/') - return walk_init_root(name,nd); - read_lock(¤t->fs->lock); -@@ -767,7 +863,8 @@ int path_init(const char *name, unsigned - * needs parent already locked. Doesn't follow mounts. - * SMP-safe. - */ --struct dentry * lookup_hash(struct qstr *name, struct dentry * base) -+struct dentry * lookup_hash_it(struct qstr *name, struct dentry * base, -+ struct lookup_intent *it) - { - struct dentry * dentry; - struct inode *inode; -@@ -790,13 +887,16 @@ struct dentry * lookup_hash(struct qstr - goto out; - } - -- dentry = cached_lookup(base, name, 0); -+ dentry = cached_lookup(base, name, 0, it); - if (!dentry) { - struct dentry *new = d_alloc(base, name); - dentry = ERR_PTR(-ENOMEM); - if (!new) - goto out; - lock_kernel(); -+ if (inode->i_op->lookup2) -+ dentry = inode->i_op->lookup2(inode, new, it); -+ else - dentry = inode->i_op->lookup(inode, new); - unlock_kernel(); - if (!dentry) -@@ -808,6 +908,12 @@ out: - return dentry; - } - -+struct dentry * lookup_hash(struct qstr *name, struct dentry * base) -+{ -+ return lookup_hash_it(name, base, NULL); -+} -+ -+ - /* SMP-safe */ - struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) - { -@@ -829,7 +935,7 @@ struct dentry * lookup_one_len(const cha - } - this.hash = end_name_hash(hash); - -- return lookup_hash(&this, base); -+ return lookup_hash_it(&this, base, NULL); - access: - return ERR_PTR(-EACCES); - } -@@ -860,6 +966,23 @@ int __user_walk(const char *name, unsign - return err; - } - -+int __user_walk_it(const char *name, unsigned flags, struct nameidata *nd, -+ struct lookup_intent *it) -+{ -+ char *tmp; -+ int err; -+ -+ tmp = getname(name); -+ err = PTR_ERR(tmp); -+ if (!IS_ERR(tmp)) { -+ err = 0; -+ if (path_init(tmp, flags, nd)) -+ err = path_walk_it(tmp, nd, it); -+ putname(tmp); -+ } -+ return err; -+} -+ - /* - * It's inline, so penalty for filesystems that don't use sticky bit is - * minimal. -@@ -996,7 +1119,8 @@ exit_lock: - * for symlinks (where the permissions are checked later). - * SMP-safe - */ --int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) -+int open_namei_it(const char *pathname, int flag, int mode, -+ struct nameidata *nd, struct lookup_intent *it) - { - int acc_mode, error = 0; - struct inode *inode; -@@ -1010,7 +1134,7 @@ int open_namei(const char * pathname, in - * The simplest case - just a plain lookup. - */ - if (!(flag & O_CREAT)) { -- error = path_lookup(pathname, lookup_flags(flag), nd); -+ error = path_lookup_it(pathname, lookup_flags(flag), nd, it); - if (error) - return error; - dentry = nd->dentry; -@@ -1020,6 +1144,10 @@ int open_namei(const char * pathname, in - /* - * Create - we need to know the parent. - */ -+ if (it) { -+ it->it_mode = mode; -+ it->it_op |= IT_CREAT; -+ } - error = path_lookup(pathname, LOOKUP_PARENT, nd); - if (error) - return error; -@@ -1035,7 +1163,7 @@ int open_namei(const char * pathname, in - - dir = nd->dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - - do_last: - error = PTR_ERR(dentry); -@@ -1044,6 +1172,7 @@ do_last: - goto exit; - } - -+ it->it_mode = mode; - /* Negative dentry, just create the file */ - if (!dentry->d_inode) { - error = vfs_create(dir->d_inode, dentry, -@@ -1072,12 +1201,13 @@ do_last: - error = -ELOOP; - if (flag & O_NOFOLLOW) - goto exit_dput; -- while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry)); -+ while (__follow_down(&nd->mnt,&dentry,it) && d_mountpoint(dentry)); - } - error = -ENOENT; - if (!dentry->d_inode) - goto exit_dput; -- if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link) -+ if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link || -+ dentry->d_inode->i_op->follow_link2)) - goto do_link; - - dput(nd->dentry); -@@ -1151,7 +1281,7 @@ ok: - if (!error) { - DQUOT_INIT(inode); - -- error = do_truncate(dentry, 0); -+ error = do_truncate(dentry, 0, 1); - } - put_write_access(inode); - if (error) -@@ -1163,8 +1293,10 @@ ok: - return 0; - - exit_dput: -+ intent_release(dentry, it); - dput(dentry); - exit: -+ intent_release(nd->dentry, it); - path_release(nd); - return error; - -@@ -1183,7 +1315,19 @@ do_link: - * are done. Procfs-like symlinks just set LAST_BIND. - */ - UPDATE_ATIME(dentry->d_inode); -- error = dentry->d_inode->i_op->follow_link(dentry, nd); -+ nd->it = it; -+ if (dentry->d_inode->i_op->follow_link2) -+ error = dentry->d_inode->i_op->follow_link2(dentry, nd, it); -+ else -+ error = dentry->d_inode->i_op->follow_link(dentry, nd); -+ if (error) { -+ intent_release(dentry, it); -+ } else if (it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) { -+ /* vfs_follow_link was never called */ -+ intent_release(dentry, it); -+ path_release(nd); -+ error = -ENOLINK; -+ } - dput(dentry); - if (error) - return error; -@@ -1205,13 +1349,20 @@ do_link: - } - dir = nd->dentry; - down(&dir->d_inode->i_sem); -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - putname(nd->last.name); - goto do_last; - } - -+int open_namei(const char *pathname, int flag, int mode, struct nameidata *nd) -+{ -+ return open_namei_it(pathname, flag, mode, nd, NULL); -+} -+ -+ - /* SMP-safe */ --static struct dentry *lookup_create(struct nameidata *nd, int is_dir) -+static struct dentry *lookup_create(struct nameidata *nd, int is_dir, -+ struct lookup_intent *it) - { - struct dentry *dentry; - -@@ -1219,7 +1370,7 @@ static struct dentry *lookup_create(stru - dentry = ERR_PTR(-EEXIST); - if (nd->last_type != LAST_NORM) - goto fail; -- dentry = lookup_hash(&nd->last, nd->dentry); -+ dentry = lookup_hash_it(&nd->last, nd->dentry, it); - if (IS_ERR(dentry)) - goto fail; - if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) -@@ -1275,7 +1426,19 @@ asmlinkage long sys_mknod(const char * f - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 0); -+ -+ if (nd.dentry->d_inode->i_op->mknod2) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mknod2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len, -+ mode, dev); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ -+ dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(dentry); - - mode &= ~current->fs->umask; -@@ -1296,6 +1459,7 @@ asmlinkage long sys_mknod(const char * f - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1343,7 +1507,17 @@ asmlinkage long sys_mkdir(const char * p - error = path_lookup(tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 1); -+ if (nd.dentry->d_inode->i_op->mkdir2) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->mkdir2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len, -+ mode); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ dentry = lookup_create(&nd, 1, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_mkdir(nd.dentry->d_inode, dentry, -@@ -1351,6 +1525,7 @@ asmlinkage long sys_mkdir(const char * p - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+out2: - path_release(&nd); - out: - putname(tmp); -@@ -1451,8 +1626,33 @@ asmlinkage long sys_rmdir(const char * p - error = -EBUSY; - goto exit1; - } -+ if (nd.dentry->d_inode->i_op->rmdir2) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ struct dentry *last; -+ -+ down(&nd.dentry->d_inode->i_sem); -+ last = lookup_hash_it(&nd.last, nd.dentry, NULL); -+ up(&nd.dentry->d_inode->i_sem); -+ if (IS_ERR(last)) { -+ error = PTR_ERR(last); -+ goto exit1; -+ } -+ if (d_mountpoint(last)) { -+ dput(last); -+ error = -EBUSY; -+ goto exit1; -+ } -+ dput(last); -+ -+ error = op->rmdir2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_rmdir(nd.dentry->d_inode, dentry); -@@ -1510,8 +1710,17 @@ asmlinkage long sys_unlink(const char * - error = -EISDIR; - if (nd.last_type != LAST_NORM) - goto exit1; -+ if (nd.dentry->d_inode->i_op->unlink2) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->unlink2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit1; -+ } - down(&nd.dentry->d_inode->i_sem); -- dentry = lookup_hash(&nd.last, nd.dentry); -+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - /* Why not before? Because we want correct error value */ -@@ -1578,15 +1787,26 @@ asmlinkage long sys_symlink(const char * - error = path_lookup(to, LOOKUP_PARENT, &nd); - if (error) - goto out; -- dentry = lookup_create(&nd, 0); -+ if (nd.dentry->d_inode->i_op->symlink2) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->symlink2(nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len, -+ from); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out2; -+ } -+ dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { - error = vfs_symlink(nd.dentry->d_inode, dentry, from); - dput(dentry); - } - up(&nd.dentry->d_inode->i_sem); -+ out2: - path_release(&nd); --out: -+ out: - putname(to); - } - putname(from); -@@ -1662,7 +1882,17 @@ asmlinkage long sys_link(const char * ol - error = -EXDEV; - if (old_nd.mnt != nd.mnt) - goto out_release; -- new_dentry = lookup_create(&nd, 0); -+ if (nd.dentry->d_inode->i_op->link2) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ error = op->link2(old_nd.dentry->d_inode, -+ nd.dentry->d_inode, -+ nd.last.name, -+ nd.last.len); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto out_release; -+ } -+ new_dentry = lookup_create(&nd, 0, NULL); - error = PTR_ERR(new_dentry); - if (!IS_ERR(new_dentry)) { - error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); -@@ -1706,7 +1936,8 @@ exit: - * locking]. - */ - int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it) - { - int error; - struct inode *target; -@@ -1764,6 +1995,7 @@ int vfs_rename_dir(struct inode *old_dir - error = -EBUSY; - else - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); -+ intent_release(new_dentry, it); - if (target) { - if (!error) - target->i_flags |= S_DEAD; -@@ -1785,7 +2017,8 @@ out_unlock: - } - - int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it) - { - int error; - -@@ -1816,6 +2049,7 @@ int vfs_rename_other(struct inode *old_d - error = -EBUSY; - else - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); -+ intent_release(new_dentry, it); - double_up(&old_dir->i_zombie, &new_dir->i_zombie); - if (error) - return error; -@@ -1827,13 +2061,14 @@ int vfs_rename_other(struct inode *old_d - } - - int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, -- struct inode *new_dir, struct dentry *new_dentry) -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it) - { - int error; - if (S_ISDIR(old_dentry->d_inode->i_mode)) -- error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); -+ error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry,it); - else -- error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); -+ error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,it); - if (!error) { - if (old_dir == new_dir) - inode_dir_notify(old_dir, DN_RENAME); -@@ -1875,7 +2110,7 @@ static inline int do_rename(const char * - - double_lock(new_dir, old_dir); - -- old_dentry = lookup_hash(&oldnd.last, old_dir); -+ old_dentry = lookup_hash_it(&oldnd.last, old_dir, NULL); - error = PTR_ERR(old_dentry); - if (IS_ERR(old_dentry)) - goto exit3; -@@ -1891,16 +2126,37 @@ static inline int do_rename(const char * - if (newnd.last.name[newnd.last.len]) - goto exit4; - } -- new_dentry = lookup_hash(&newnd.last, new_dir); -+ new_dentry = lookup_hash_it(&newnd.last, new_dir, NULL); - error = PTR_ERR(new_dentry); - if (IS_ERR(new_dentry)) - goto exit4; - -+ if (old_dir->d_inode->i_op->rename2) { -+ lock_kernel(); -+ /* don't rename mount point. mds will take care of -+ * the rest sanity checking */ -+ if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) { -+ error = -EBUSY; -+ goto exit5; -+ } -+ -+ error = old_dir->d_inode->i_op->rename2(old_dir->d_inode, -+ new_dir->d_inode, -+ oldnd.last.name, -+ oldnd.last.len, -+ newnd.last.name, -+ newnd.last.len); -+ unlock_kernel(); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto exit5; -+ } -+ - lock_kernel(); - error = vfs_rename(old_dir->d_inode, old_dentry, -- new_dir->d_inode, new_dentry); -+ new_dir->d_inode, new_dentry, NULL); - unlock_kernel(); -- -+exit5: - dput(new_dentry); - exit4: - dput(old_dentry); -@@ -1951,20 +2207,28 @@ out: - } - - static inline int --__vfs_follow_link(struct nameidata *nd, const char *link) -+__vfs_follow_link(struct nameidata *nd, const char *link, -+ struct lookup_intent *it) - { - int res = 0; - char *name; - if (IS_ERR(link)) - goto fail; - -+ if (it == NULL) -+ it = nd->it; -+ else if (it != nd->it) -+ printk("it != nd->it: tell phil@clusterfs.com\n"); -+ if (it != NULL) -+ it->it_int_flags |= IT_FL_FOLLOWED; -+ - if (*link == '/') { - path_release(nd); - if (!walk_init_root(link, nd)) - /* weird __emul_prefix() stuff did it */ - goto out; - } -- res = link_path_walk(link, nd); -+ res = link_path_walk_it(link, nd, it); - out: - if (current->link_count || res || nd->last_type!=LAST_NORM) - return res; -@@ -1986,7 +2250,13 @@ fail: - - int vfs_follow_link(struct nameidata *nd, const char *link) - { -- return __vfs_follow_link(nd, link); -+ return __vfs_follow_link(nd, link, NULL); -+} -+ -+int vfs_follow_link_it(struct nameidata *nd, const char *link, -+ struct lookup_intent *it) -+{ -+ return __vfs_follow_link(nd, link, it); - } - - /* get the link contents into pagecache */ -@@ -2028,7 +2298,7 @@ int page_follow_link(struct dentry *dent - { - struct page *page = NULL; - char *s = page_getlink(dentry, &page); -- int res = __vfs_follow_link(nd, s); -+ int res = __vfs_follow_link(nd, s, NULL); - if (page) { - kunmap(page); - page_cache_release(page); ---- linux-2.4.20-l18/fs/nfsd/vfs.c~vfs_intent-2.4.20-vanilla Thu Nov 28 18:53:15 2002 -+++ linux-2.4.20-l18-phil/fs/nfsd/vfs.c Wed May 28 01:39:18 2003 -@@ -1291,7 +1291,7 @@ nfsd_rename(struct svc_rqst *rqstp, stru - err = nfserr_perm; - } else - #endif -- err = vfs_rename(fdir, odentry, tdir, ndentry); -+ err = vfs_rename(fdir, odentry, tdir, ndentry, NULL); - if (!err && EX_ISSYNC(tfhp->fh_export)) { - nfsd_sync_dir(tdentry); - nfsd_sync_dir(fdentry); ---- linux-2.4.20-l18/fs/open.c~vfs_intent-2.4.20-vanilla Thu Nov 28 18:53:15 2002 -+++ linux-2.4.20-l18-phil/fs/open.c Wed May 28 01:39:18 2003 -@@ -19,6 +19,8 @@ - #include <asm/uaccess.h> - - #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) -+extern int path_walk_it(const char *name, struct nameidata *nd, -+ struct lookup_intent *it); - - int vfs_statfs(struct super_block *sb, struct statfs *buf) - { -@@ -95,9 +97,10 @@ void fd_install(unsigned int fd, struct - write_unlock(&files->file_lock); - } - --int do_truncate(struct dentry *dentry, loff_t length) -+int do_truncate(struct dentry *dentry, loff_t length, int called_from_open) - { - struct inode *inode = dentry->d_inode; -+ struct inode_operations *op = dentry->d_inode->i_op; - int error; - struct iattr newattrs; - -@@ -108,7 +111,14 @@ int do_truncate(struct dentry *dentry, l - down(&inode->i_sem); - newattrs.ia_size = length; - newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; -- error = notify_change(dentry, &newattrs); -+ if (called_from_open) -+ newattrs.ia_valid |= ATTR_FROM_OPEN; -+ if (op->setattr_raw) { -+ newattrs.ia_valid |= ATTR_RAW; -+ newattrs.ia_ctime = CURRENT_TIME; -+ error = op->setattr_raw(inode, &newattrs); -+ } else -+ error = notify_change(dentry, &newattrs); - up(&inode->i_sem); - return error; - } -@@ -118,12 +128,13 @@ static inline long do_sys_truncate(const - struct nameidata nd; - struct inode * inode; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - - error = -EINVAL; - if (length < 0) /* sorry, but loff_t says... */ - goto out; - -- error = user_path_walk(path, &nd); -+ error = user_path_walk_it(path, &nd, &it); - if (error) - goto out; - inode = nd.dentry->d_inode; -@@ -163,11 +174,13 @@ static inline long do_sys_truncate(const - error = locks_verify_truncate(inode, NULL, length); - if (!error) { - DQUOT_INIT(inode); -- error = do_truncate(nd.dentry, length); -+ intent_release(nd.dentry, &it); -+ error = do_truncate(nd.dentry, length, 0); - } - put_write_access(inode); - - dput_and_out: -+ intent_release(nd.dentry, &it); - path_release(&nd); - out: - return error; -@@ -215,7 +228,7 @@ static inline long do_sys_ftruncate(unsi - - error = locks_verify_truncate(inode, file, length); - if (!error) -- error = do_truncate(dentry, length); -+ error = do_truncate(dentry, length, 0); - out_putf: - fput(file); - out: -@@ -260,11 +273,13 @@ asmlinkage long sys_utime(char * filenam - struct inode * inode; - struct iattr newattrs; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, NULL); - if (error) - goto out; - inode = nd.dentry->d_inode; - -+ /* this is safe without a Lustre lock because it only depends -+ on the super block */ - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; -@@ -279,11 +294,29 @@ asmlinkage long sys_utime(char * filenam - goto dput_and_out; - - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; -- } else { -+ } -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ -+ error = -EROFS; -+ if (IS_RDONLY(inode)) -+ goto dput_and_out; -+ -+ error = -EPERM; -+ if (!times) { - if (current->fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) - goto dput_and_out; - } -+ - error = notify_change(nd.dentry, &newattrs); - dput_and_out: - path_release(&nd); -@@ -304,12 +337,14 @@ asmlinkage long sys_utimes(char * filena - struct inode * inode; - struct iattr newattrs; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, NULL); - - if (error) - goto out; - inode = nd.dentry->d_inode; - -+ /* this is safe without a Lustre lock because it only depends -+ on the super block */ - error = -EROFS; - if (IS_RDONLY(inode)) - goto dput_and_out; -@@ -324,7 +359,20 @@ asmlinkage long sys_utimes(char * filena - newattrs.ia_atime = times[0].tv_sec; - newattrs.ia_mtime = times[1].tv_sec; - newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET; -- } else { -+ } -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ -+ error = -EPERM; -+ if (!utimes) { - if (current->fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) - goto dput_and_out; -@@ -347,6 +395,7 @@ asmlinkage long sys_access(const char * - int old_fsuid, old_fsgid; - kernel_cap_t old_cap; - int res; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - - if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ - return -EINVAL; -@@ -364,13 +413,14 @@ asmlinkage long sys_access(const char * - else - current->cap_effective = current->cap_permitted; - -- res = user_path_walk(filename, &nd); -+ res = user_path_walk_it(filename, &nd, &it); - if (!res) { - res = permission(nd.dentry->d_inode, mode); - /* SuS v2 requires we report a read only fs too */ - if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) - && !special_file(nd.dentry->d_inode->i_mode)) - res = -EROFS; -+ intent_release(nd.dentry, &it); - path_release(&nd); - } - -@@ -385,8 +435,9 @@ asmlinkage long sys_chdir(const char * f - { - int error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = __user_walk(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd); -+ error = __user_walk_it(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd, &it); - if (error) - goto out; - -@@ -397,6 +448,7 @@ asmlinkage long sys_chdir(const char * f - set_fs_pwd(current->fs, nd.mnt, nd.dentry); - - dput_and_out: -+ intent_release(nd.dentry, &it); - path_release(&nd); - out: - return error; -@@ -436,9 +488,10 @@ asmlinkage long sys_chroot(const char * - { - int error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = __user_walk(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | -- LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); -+ error = __user_walk_it(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW | -+ LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it); - if (error) - goto out; - -@@ -454,6 +507,7 @@ asmlinkage long sys_chroot(const char * - set_fs_altroot(); - error = 0; - dput_and_out: -+ intent_release(nd.dentry, &it); - path_release(&nd); - out: - return error; -@@ -508,6 +562,18 @@ asmlinkage long sys_chmod(const char * f - if (IS_RDONLY(inode)) - goto dput_and_out; - -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = nd.dentry->d_inode->i_op; -+ -+ newattrs.ia_mode = mode; -+ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ goto dput_and_out; -+ } -+ - error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto dput_and_out; -@@ -538,6 +604,20 @@ static int chown_common(struct dentry * - error = -EROFS; - if (IS_RDONLY(inode)) - goto out; -+ -+ if (inode->i_op->setattr_raw) { -+ struct inode_operations *op = dentry->d_inode->i_op; -+ -+ newattrs.ia_uid = user; -+ newattrs.ia_gid = group; -+ newattrs.ia_valid = ATTR_UID | ATTR_GID; -+ newattrs.ia_valid |= ATTR_RAW; -+ error = op->setattr_raw(inode, &newattrs); -+ /* the file system wants to use normal vfs path now */ -+ if (error != -EOPNOTSUPP) -+ return error; -+ } -+ - error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto out; -@@ -638,10 +718,12 @@ asmlinkage long sys_fchown(unsigned int - * for the internal routines (ie open_namei()/follow_link() etc). 00 is - * used by symlinks. - */ -+ - struct file *filp_open(const char * filename, int flags, int mode) - { - int namei_flags, error; - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = flags }; - - namei_flags = flags; - if ((namei_flags+1) & O_ACCMODE) -@@ -649,14 +731,15 @@ struct file *filp_open(const char * file - if (namei_flags & O_TRUNC) - namei_flags |= 2; - -- error = open_namei(filename, namei_flags, mode, &nd); -- if (!error) -- return dentry_open(nd.dentry, nd.mnt, flags); -+ error = open_namei_it(filename, namei_flags, mode, &nd, &it); -+ if (error) -+ return ERR_PTR(error); - -- return ERR_PTR(error); -+ return dentry_open_it(nd.dentry, nd.mnt, flags, &it); - } - --struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, -+ int flags, struct lookup_intent *it) - { - struct file * f; - struct inode *inode; -@@ -699,6 +782,7 @@ struct file *dentry_open(struct dentry * - } - f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); - -+ intent_release(dentry, it); - return f; - - cleanup_all: -@@ -713,11 +797,17 @@ cleanup_all: - cleanup_file: - put_filp(f); - cleanup_dentry: -+ intent_release(dentry, it); - dput(dentry); - mntput(mnt); - return ERR_PTR(error); - } - -+struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) -+{ -+ return dentry_open_it(dentry, mnt, flags, NULL); -+} -+ - /* - * Find an empty file descriptor entry, and mark it busy. - */ ---- linux-2.4.20-l18/fs/stat.c~vfs_intent-2.4.20-vanilla Thu Sep 13 19:04:43 2001 -+++ linux-2.4.20-l18-phil/fs/stat.c Wed May 28 01:39:18 2003 -@@ -135,13 +135,15 @@ static int cp_new_stat(struct inode * in - asmlinkage long sys_stat(char * filename, struct __old_kernel_stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, &it); - if (!error) { - error = do_revalidate(nd.dentry); - if (!error) - error = cp_old_stat(nd.dentry->d_inode, statbuf); -+ intent_release(nd.dentry, &it); - path_release(&nd); - } - return error; -@@ -151,13 +153,15 @@ asmlinkage long sys_stat(char * filename - asmlinkage long sys_newstat(char * filename, struct stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, &it); - if (!error) { - error = do_revalidate(nd.dentry); - if (!error) - error = cp_new_stat(nd.dentry->d_inode, statbuf); -+ intent_release(nd.dentry, &it); - path_release(&nd); - } - return error; -@@ -172,13 +176,15 @@ asmlinkage long sys_newstat(char * filen - asmlinkage long sys_lstat(char * filename, struct __old_kernel_stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk_link(filename, &nd); -+ error = user_path_walk_link_it(filename, &nd, &it); - if (!error) { - error = do_revalidate(nd.dentry); - if (!error) - error = cp_old_stat(nd.dentry->d_inode, statbuf); -+ intent_release(nd.dentry, &it); - path_release(&nd); - } - return error; -@@ -189,13 +195,15 @@ asmlinkage long sys_lstat(char * filenam - asmlinkage long sys_newlstat(char * filename, struct stat * statbuf) - { - struct nameidata nd; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - int error; - -- error = user_path_walk_link(filename, &nd); -+ error = user_path_walk_link_it(filename, &nd, &it); - if (!error) { - error = do_revalidate(nd.dentry); - if (!error) - error = cp_new_stat(nd.dentry->d_inode, statbuf); -+ intent_release(nd.dentry, &it); - path_release(&nd); - } - return error; -@@ -333,12 +341,14 @@ asmlinkage long sys_stat64(char * filena - { - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = user_path_walk(filename, &nd); -+ error = user_path_walk_it(filename, &nd, &it); - if (!error) { - error = do_revalidate(nd.dentry); - if (!error) - error = cp_new_stat64(nd.dentry->d_inode, statbuf); -+ intent_release(nd.dentry, &it); - path_release(&nd); - } - return error; -@@ -348,12 +358,14 @@ asmlinkage long sys_lstat64(char * filen - { - struct nameidata nd; - int error; -+ struct lookup_intent it = { .it_op = IT_GETATTR }; - -- error = user_path_walk_link(filename, &nd); -+ error = user_path_walk_link_it(filename, &nd, &it); - if (!error) { - error = do_revalidate(nd.dentry); - if (!error) - error = cp_new_stat64(nd.dentry->d_inode, statbuf); -+ intent_release(nd.dentry, &it); - path_release(&nd); - } - return error; ---- linux-2.4.20-l18/fs/proc/base.c~vfs_intent-2.4.20-vanilla Wed Jun 4 22:53:14 2003 -+++ linux-2.4.20-l18-phil/fs/proc/base.c Wed Jun 4 22:50:35 2003 -@@ -464,6 +464,9 @@ static int proc_pid_follow_link(struct d - - error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt); - nd->last_type = LAST_BIND; -+ -+ if (nd->it != NULL) -+ nd->it->it_int_flags |= IT_FL_FOLLOWED; - out: - return error; - } ---- linux-2.4.20-l18/include/linux/dcache.h~vfs_intent-2.4.20-vanilla Thu Nov 28 18:53:15 2002 -+++ linux-2.4.20-l18-phil/include/linux/dcache.h Sun Jun 1 22:35:10 2003 -@@ -7,6 +7,28 @@ - #include <linux/mount.h> - #include <linux/kernel.h> - -+#define IT_OPEN (1) -+#define IT_CREAT (1<<1) -+#define IT_READDIR (1<<2) -+#define IT_GETATTR (1<<3) -+#define IT_LOOKUP (1<<4) -+#define IT_UNLINK (1<<5) -+ -+#define IT_FL_LOCKED (1) -+#define IT_FL_FOLLOWED (1<<1) /* set by vfs_follow_link */ -+ -+struct lookup_intent { -+ int it_op; -+ int it_mode; -+ int it_flags; -+ int it_disposition; -+ int it_status; -+ int it_int_flags; -+ __u64 it_lock_handle[2]; -+ int it_lock_mode; -+ void *it_data; -+}; -+ - /* - * linux/include/linux/dcache.h - * -@@ -79,6 +101,7 @@ struct dentry { - unsigned long d_time; /* used by d_revalidate */ - struct dentry_operations *d_op; - struct super_block * d_sb; /* The root of the dentry tree */ -+ struct lookup_intent *d_it; - unsigned long d_vfs_flags; - void * d_fsdata; /* fs-specific data */ - unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ -@@ -91,8 +114,15 @@ struct dentry_operations { - int (*d_delete)(struct dentry *); - void (*d_release)(struct dentry *); - void (*d_iput)(struct dentry *, struct inode *); -+ int (*d_revalidate2)(struct dentry *, int, struct lookup_intent *); -+ void (*d_intent_release)(struct dentry *, struct lookup_intent *); - }; - -+/* defined in fs/namei.c */ -+extern void intent_release(struct dentry *de, struct lookup_intent *it); -+/* defined in fs/dcache.c */ -+extern void __d_rehash(struct dentry * entry, int lock); -+ - /* the dentry parameter passed to d_hash and d_compare is the parent - * directory of the entries to be compared. It is used in case these - * functions need any directory specific information for determining -@@ -124,6 +154,7 @@ d_iput: no no yes - * s_nfsd_free_path semaphore will be down - */ - #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ -+#define DCACHE_LUSTRE_INVALID 0x0010 /* Lustre invalidated */ - - extern spinlock_t dcache_lock; - ---- linux-2.4.20-l18/include/linux/fs.h~vfs_intent-2.4.20-vanilla Wed May 28 01:39:17 2003 -+++ linux-2.4.20-l18-phil/include/linux/fs.h Sun Jun 1 22:07:11 2003 -@@ -338,6 +338,8 @@ extern void set_bh_page(struct buffer_he - #define ATTR_MTIME_SET 256 - #define ATTR_FORCE 512 /* Not a change, but a change it */ - #define ATTR_ATTR_FLAG 1024 -+#define ATTR_RAW 2048 /* file system, not vfs will massage attrs */ -+#define ATTR_FROM_OPEN 4096 /* called from open path, ie O_TRUNC */ - - /* - * This is the Inode Attributes structure, used for notify_change(). It -@@ -542,6 +544,7 @@ struct file { - - /* needed for tty driver, and maybe others */ - void *private_data; -+ struct lookup_intent *f_intent; - - /* preallocated helper kiobuf to speedup O_DIRECT */ - struct kiobuf *f_iobuf; -@@ -661,6 +664,7 @@ struct nameidata { - struct qstr last; - unsigned int flags; - int last_type; -+ struct lookup_intent *it; - }; - - #define DQUOT_USR_ENABLED 0x01 /* User diskquotas enabled */ -@@ -794,7 +798,9 @@ extern int vfs_symlink(struct inode *, s - extern int vfs_link(struct dentry *, struct inode *, struct dentry *); - extern int vfs_rmdir(struct inode *, struct dentry *); - extern int vfs_unlink(struct inode *, struct dentry *); --extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); -+int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, -+ struct inode *new_dir, struct dentry *new_dentry, -+ struct lookup_intent *it); - - /* - * File types -@@ -855,20 +861,33 @@ struct file_operations { - struct inode_operations { - int (*create) (struct inode *,struct dentry *,int); - struct dentry * (*lookup) (struct inode *,struct dentry *); -+ struct dentry * (*lookup2) (struct inode *,struct dentry *, struct lookup_intent *); - int (*link) (struct dentry *,struct inode *,struct dentry *); -+ int (*link2) (struct inode *,struct inode *, const char *, int); - int (*unlink) (struct inode *,struct dentry *); -+ int (*unlink2) (struct inode *, const char *, int); - int (*symlink) (struct inode *,struct dentry *,const char *); -+ int (*symlink2) (struct inode *, const char *, int, const char *); - int (*mkdir) (struct inode *,struct dentry *,int); -+ int (*mkdir2) (struct inode *, const char *, int,int); - int (*rmdir) (struct inode *,struct dentry *); -+ int (*rmdir2) (struct inode *, const char *, int); - int (*mknod) (struct inode *,struct dentry *,int,int); -+ int (*mknod2) (struct inode *, const char *, int,int,int); - int (*rename) (struct inode *, struct dentry *, - struct inode *, struct dentry *); -+ int (*rename2) (struct inode *, struct inode *, -+ const char *oldname, int oldlen, -+ const char *newname, int newlen); - int (*readlink) (struct dentry *, char *,int); - int (*follow_link) (struct dentry *, struct nameidata *); -+ int (*follow_link2) (struct dentry *, struct nameidata *, -+ struct lookup_intent *it); - void (*truncate) (struct inode *); - int (*permission) (struct inode *, int); - int (*revalidate) (struct dentry *); - int (*setattr) (struct dentry *, struct iattr *); -+ int (*setattr_raw) (struct inode *, struct iattr *); - int (*getattr) (struct dentry *, struct iattr *); - int (*setxattr) (struct dentry *, const char *, void *, size_t, int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); -@@ -1070,10 +1089,14 @@ static inline int get_lease(struct inode - - asmlinkage long sys_open(const char *, int, int); - asmlinkage long sys_close(unsigned int); /* yes, it's really unsigned */ --extern int do_truncate(struct dentry *, loff_t start); -+extern int do_truncate(struct dentry *, loff_t start, int called_from_open); - - extern struct file *filp_open(const char *, int, int); - extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); -+extern int open_namei_it(const char *filename, int namei_flags, int mode, -+ struct nameidata *nd, struct lookup_intent *it); -+extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, -+ int flags, struct lookup_intent *it); - extern int filp_close(struct file *, fl_owner_t id); - extern char * getname(const char *); - -@@ -1335,6 +1358,7 @@ typedef int (*read_actor_t)(read_descrip - extern loff_t default_llseek(struct file *file, loff_t offset, int origin); - - extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *)); -+extern int FASTCALL(__user_walk_it(const char *, unsigned, struct nameidata *, struct lookup_intent *it)); - extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *)); - extern int FASTCALL(path_walk(const char *, struct nameidata *)); - extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); -@@ -1346,6 +1370,8 @@ extern struct dentry * lookup_one_len(co - extern struct dentry * lookup_hash(struct qstr *, struct dentry *); - #define user_path_walk(name,nd) __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd) - #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd) -+#define user_path_walk_it(name,nd,it) __user_walk_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd, it) -+#define user_path_walk_link_it(name,nd,it) __user_walk_it(name, LOOKUP_POSITIVE, nd, it) - - extern void iput(struct inode *); - extern void force_delete(struct inode *); -@@ -1455,6 +1481,8 @@ extern struct file_operations generic_ro - - extern int vfs_readlink(struct dentry *, char *, int, const char *); - extern int vfs_follow_link(struct nameidata *, const char *); -+extern int vfs_follow_link_it(struct nameidata *, const char *, -+ struct lookup_intent *it); - extern int page_readlink(struct dentry *, char *, int); - extern int page_follow_link(struct dentry *, struct nameidata *); - extern struct inode_operations page_symlink_inode_operations; ---- linux-2.4.20-l18/kernel/ksyms.c~vfs_intent-2.4.20-vanilla Wed May 28 01:39:18 2003 -+++ linux-2.4.20-l18-phil/kernel/ksyms.c Wed May 28 01:39:18 2003 -@@ -269,6 +269,7 @@ EXPORT_SYMBOL(read_cache_page); - EXPORT_SYMBOL(set_page_dirty); - EXPORT_SYMBOL(vfs_readlink); - EXPORT_SYMBOL(vfs_follow_link); -+EXPORT_SYMBOL(vfs_follow_link_it); - EXPORT_SYMBOL(page_readlink); - EXPORT_SYMBOL(page_follow_link); - EXPORT_SYMBOL(page_symlink_inode_operations); - -_ diff --git a/lustre/kernel_patches/pc/dev_read_only.pc b/lustre/kernel_patches/pc/dev_read_only.pc deleted file mode 100644 index 4760ad1..0000000 --- a/lustre/kernel_patches/pc/dev_read_only.pc +++ /dev/null @@ -1,3 +0,0 @@ -drivers/block/blkpg.c -drivers/block/loop.c -drivers/ide/ide-disk.c diff --git a/lustre/kernel_patches/pc/dev_read_only_2.4.20-rh.pc b/lustre/kernel_patches/pc/dev_read_only_2.4.20-rh.pc deleted file mode 100644 index 4760ad1..0000000 --- a/lustre/kernel_patches/pc/dev_read_only_2.4.20-rh.pc +++ /dev/null @@ -1,3 +0,0 @@ -drivers/block/blkpg.c -drivers/block/loop.c -drivers/ide/ide-disk.c diff --git a/lustre/kernel_patches/pc/dev_read_only_2.4.20.pc b/lustre/kernel_patches/pc/dev_read_only_2.4.20.pc deleted file mode 100644 index 4760ad1..0000000 --- a/lustre/kernel_patches/pc/dev_read_only_2.4.20.pc +++ /dev/null @@ -1,3 +0,0 @@ -drivers/block/blkpg.c -drivers/block/loop.c -drivers/ide/ide-disk.c diff --git a/lustre/kernel_patches/pc/dev_read_only_hp_2.4.20.pc b/lustre/kernel_patches/pc/dev_read_only_hp_2.4.20.pc deleted file mode 100644 index 4760ad1..0000000 --- a/lustre/kernel_patches/pc/dev_read_only_hp_2.4.20.pc +++ /dev/null @@ -1,3 +0,0 @@ -drivers/block/blkpg.c -drivers/block/loop.c -drivers/ide/ide-disk.c diff --git a/lustre/kernel_patches/pc/dsp.pc b/lustre/kernel_patches/pc/dsp.pc deleted file mode 100644 index fdbf418..0000000 --- a/lustre/kernel_patches/pc/dsp.pc +++ /dev/null @@ -1,6 +0,0 @@ -kernel/bootimg.c -kernel/bootimg_pic.c -include/asm-i386/apic.h -include/linux/crash.h -arch/i386/kernel/crash.c -arch/i386/kernel/nmi.c diff --git a/lustre/kernel_patches/pc/export-truncate-2.5.63.pc b/lustre/kernel_patches/pc/export-truncate-2.5.63.pc deleted file mode 100644 index 3f61c00..0000000 --- a/lustre/kernel_patches/pc/export-truncate-2.5.63.pc +++ /dev/null @@ -1,2 +0,0 @@ -include/linux/mm.h -mm/truncate.c diff --git a/lustre/kernel_patches/pc/export-truncate.pc b/lustre/kernel_patches/pc/export-truncate.pc deleted file mode 100644 index bd58c82..0000000 --- a/lustre/kernel_patches/pc/export-truncate.pc +++ /dev/null @@ -1,2 +0,0 @@ -include/linux/mm.h -mm/filemap.c diff --git a/lustre/kernel_patches/pc/exports.pc b/lustre/kernel_patches/pc/exports.pc deleted file mode 100644 index 6472a11..0000000 --- a/lustre/kernel_patches/pc/exports.pc +++ /dev/null @@ -1,4 +0,0 @@ -fs/ext3/Makefile -fs/ext3/super.c -include/linux/fs.h -kernel/ksyms.c diff --git a/lustre/kernel_patches/pc/exports_2.4.20-rh-hp.pc b/lustre/kernel_patches/pc/exports_2.4.20-rh-hp.pc deleted file mode 100644 index 6472a11..0000000 --- a/lustre/kernel_patches/pc/exports_2.4.20-rh-hp.pc +++ /dev/null @@ -1,4 +0,0 @@ -fs/ext3/Makefile -fs/ext3/super.c -include/linux/fs.h -kernel/ksyms.c diff --git a/lustre/kernel_patches/pc/exports_2.4.20.pc b/lustre/kernel_patches/pc/exports_2.4.20.pc deleted file mode 100644 index 6472a11..0000000 --- a/lustre/kernel_patches/pc/exports_2.4.20.pc +++ /dev/null @@ -1,4 +0,0 @@ -fs/ext3/Makefile -fs/ext3/super.c -include/linux/fs.h -kernel/ksyms.c diff --git a/lustre/kernel_patches/pc/exports_hp_2.4.20.pc b/lustre/kernel_patches/pc/exports_hp_2.4.20.pc deleted file mode 100644 index 6472a11..0000000 --- a/lustre/kernel_patches/pc/exports_hp_2.4.20.pc +++ /dev/null @@ -1,4 +0,0 @@ -fs/ext3/Makefile -fs/ext3/super.c -include/linux/fs.h -kernel/ksyms.c diff --git a/lustre/kernel_patches/pc/ext-2.4-patch-1-chaos.pc b/lustre/kernel_patches/pc/ext-2.4-patch-1-chaos.pc deleted file mode 100644 index 634b944..0000000 --- a/lustre/kernel_patches/pc/ext-2.4-patch-1-chaos.pc +++ /dev/null @@ -1,11 +0,0 @@ -fs/ext3/Makefile -fs/ext3/dir.c -fs/ext3/file.c -fs/ext3/hash.c -fs/ext3/namei.c -fs/ext3/super.c -include/linux/ext3_fs.h -include/linux/ext3_fs_sb.h -include/linux/ext3_jbd.h -include/linux/rbtree.h -lib/rbtree.c diff --git a/lustre/kernel_patches/pc/ext-2.4-patch-1.pc b/lustre/kernel_patches/pc/ext-2.4-patch-1.pc deleted file mode 100644 index 634b944..0000000 --- a/lustre/kernel_patches/pc/ext-2.4-patch-1.pc +++ /dev/null @@ -1,11 +0,0 @@ -fs/ext3/Makefile -fs/ext3/dir.c -fs/ext3/file.c -fs/ext3/hash.c -fs/ext3/namei.c -fs/ext3/super.c -include/linux/ext3_fs.h -include/linux/ext3_fs_sb.h -include/linux/ext3_jbd.h -include/linux/rbtree.h -lib/rbtree.c diff --git a/lustre/kernel_patches/pc/ext-2.4-patch-2.pc b/lustre/kernel_patches/pc/ext-2.4-patch-2.pc deleted file mode 100644 index 9b16759..0000000 --- a/lustre/kernel_patches/pc/ext-2.4-patch-2.pc +++ /dev/null @@ -1 +0,0 @@ -fs/ext3/namei.c diff --git a/lustre/kernel_patches/pc/ext-2.4-patch-3.pc b/lustre/kernel_patches/pc/ext-2.4-patch-3.pc deleted file mode 100644 index 65d4845..0000000 --- a/lustre/kernel_patches/pc/ext-2.4-patch-3.pc +++ /dev/null @@ -1,3 +0,0 @@ -fs/ext3/dir.c -fs/ext3/namei.c -include/linux/ext3_fs.h diff --git a/lustre/kernel_patches/pc/ext-2.4-patch-4.pc b/lustre/kernel_patches/pc/ext-2.4-patch-4.pc deleted file mode 100644 index 9b16759..0000000 --- a/lustre/kernel_patches/pc/ext-2.4-patch-4.pc +++ /dev/null @@ -1 +0,0 @@ -fs/ext3/namei.c diff --git a/lustre/kernel_patches/pc/ext3-2.4-ino_t.pc b/lustre/kernel_patches/pc/ext3-2.4-ino_t.pc deleted file mode 100644 index 4cef979..0000000 --- a/lustre/kernel_patches/pc/ext3-2.4-ino_t.pc +++ /dev/null @@ -1,3 +0,0 @@ -fs/ext3/ialloc.c -fs/ext3/namei.c -include/linux/ext3_fs.h diff --git a/lustre/kernel_patches/pc/ext3-2.4.18-fixes.pc b/lustre/kernel_patches/pc/ext3-2.4.18-fixes.pc deleted file mode 100644 index 0822c5e..0000000 --- a/lustre/kernel_patches/pc/ext3-2.4.18-fixes.pc +++ /dev/null @@ -1,7 +0,0 @@ -fs/ext3/balloc.c -fs/ext3/file.c -fs/ext3/fsync.c -fs/ext3/ialloc.c -fs/ext3/inode.c -fs/ext3/namei.c -fs/ext3/super.c diff --git a/lustre/kernel_patches/pc/ext3-2.4.18-ino_sb_macro.pc b/lustre/kernel_patches/pc/ext3-2.4.18-ino_sb_macro.pc deleted file mode 100644 index cd21583..0000000 --- a/lustre/kernel_patches/pc/ext3-2.4.18-ino_sb_macro.pc +++ /dev/null @@ -1,10 +0,0 @@ -fs/ext3/balloc.c -fs/ext3/dir.c -fs/ext3/ialloc.c -fs/ext3/inode.c -fs/ext3/ioctl.c -fs/ext3/namei.c -fs/ext3/super.c -fs/ext3/symlink.c -include/linux/ext3_fs.h -include/linux/ext3_jbd.h diff --git a/lustre/kernel_patches/pc/ext3-2.4.20-fixes.pc b/lustre/kernel_patches/pc/ext3-2.4.20-fixes.pc deleted file mode 100644 index 441ced8..0000000 --- a/lustre/kernel_patches/pc/ext3-2.4.20-fixes.pc +++ /dev/null @@ -1 +0,0 @@ -fs/ext3/balloc.c diff --git a/lustre/kernel_patches/pc/ext3-2.5-noread.pc b/lustre/kernel_patches/pc/ext3-2.5-noread.pc deleted file mode 100644 index 9c3cea8..0000000 --- a/lustre/kernel_patches/pc/ext3-2.5-noread.pc +++ /dev/null @@ -1,3 +0,0 @@ -fs/ext3/ialloc.c -fs/ext3/inode.c -include/linux/ext3_fs.h diff --git a/lustre/kernel_patches/pc/ext3-2.5.63.pc b/lustre/kernel_patches/pc/ext3-2.5.63.pc deleted file mode 100644 index b1e5de5..0000000 --- a/lustre/kernel_patches/pc/ext3-2.5.63.pc +++ /dev/null @@ -1,4 +0,0 @@ -fs/ext3/xattr.c -fs/ext3/inode.c -fs/ext3/super.c -fs/ext3/xattr.h diff --git a/lustre/kernel_patches/pc/ext3-delete_thread-2.4.18.pc b/lustre/kernel_patches/pc/ext3-delete_thread-2.4.18.pc deleted file mode 100644 index 5770132..0000000 --- a/lustre/kernel_patches/pc/ext3-delete_thread-2.4.18.pc +++ /dev/null @@ -1,3 +0,0 @@ -fs/ext3/super.c -include/linux/ext3_fs.h -include/linux/ext3_fs_sb.h diff --git a/lustre/kernel_patches/pc/ext3-delete_thread-2.4.20.pc b/lustre/kernel_patches/pc/ext3-delete_thread-2.4.20.pc deleted file mode 100644 index 5770132..0000000 --- a/lustre/kernel_patches/pc/ext3-delete_thread-2.4.20.pc +++ /dev/null @@ -1,3 +0,0 @@ -fs/ext3/super.c -include/linux/ext3_fs.h -include/linux/ext3_fs_sb.h diff --git a/lustre/kernel_patches/pc/ext3-largefile.pc b/lustre/kernel_patches/pc/ext3-largefile.pc deleted file mode 100644 index 76d683f..0000000 --- a/lustre/kernel_patches/pc/ext3-largefile.pc +++ /dev/null @@ -1 +0,0 @@ -fs/ext3/inode.c diff --git a/lustre/kernel_patches/pc/ext3-noread-2.4.20.pc b/lustre/kernel_patches/pc/ext3-noread-2.4.20.pc deleted file mode 100644 index 9c3cea8..0000000 --- a/lustre/kernel_patches/pc/ext3-noread-2.4.20.pc +++ /dev/null @@ -1,3 +0,0 @@ -fs/ext3/ialloc.c -fs/ext3/inode.c -include/linux/ext3_fs.h diff --git a/lustre/kernel_patches/pc/ext3-orphan_lock.pc b/lustre/kernel_patches/pc/ext3-orphan_lock.pc deleted file mode 100644 index 98aebb0..0000000 --- a/lustre/kernel_patches/pc/ext3-orphan_lock.pc +++ /dev/null @@ -1,3 +0,0 @@ -fs/ext3/namei.c -fs/ext3/super.c -include/linux/ext3_fs_sb.h diff --git a/lustre/kernel_patches/pc/ext3-san-2.4.20.pc b/lustre/kernel_patches/pc/ext3-san-2.4.20.pc deleted file mode 100644 index 9ed5141..0000000 --- a/lustre/kernel_patches/pc/ext3-san-2.4.20.pc +++ /dev/null @@ -1,2 +0,0 @@ -fs/ext3/inode.c -fs/ext3/ext3-exports.c diff --git a/lustre/kernel_patches/pc/ext3-truncate_blocks-chaos.patch.pc b/lustre/kernel_patches/pc/ext3-truncate_blocks-chaos.patch.pc deleted file mode 100644 index 76d683f..0000000 --- a/lustre/kernel_patches/pc/ext3-truncate_blocks-chaos.patch.pc +++ /dev/null @@ -1 +0,0 @@ -fs/ext3/inode.c diff --git a/lustre/kernel_patches/pc/ext3-truncate_blocks.pc b/lustre/kernel_patches/pc/ext3-truncate_blocks.pc deleted file mode 100644 index 76d683f..0000000 --- a/lustre/kernel_patches/pc/ext3-truncate_blocks.pc +++ /dev/null @@ -1 +0,0 @@ -fs/ext3/inode.c diff --git a/lustre/kernel_patches/pc/ext3-unmount_sync.pc b/lustre/kernel_patches/pc/ext3-unmount_sync.pc deleted file mode 100644 index 08795de..0000000 --- a/lustre/kernel_patches/pc/ext3-unmount_sync.pc +++ /dev/null @@ -1 +0,0 @@ -fs/ext3/super.c diff --git a/lustre/kernel_patches/pc/ext3-use-after-free.pc b/lustre/kernel_patches/pc/ext3-use-after-free.pc deleted file mode 100644 index daf8787..0000000 --- a/lustre/kernel_patches/pc/ext3-use-after-free.pc +++ /dev/null @@ -1 +0,0 @@ -./fs/ext3/namei.c diff --git a/lustre/kernel_patches/pc/ext3_orphan_lock-2.4.20-rh.pc b/lustre/kernel_patches/pc/ext3_orphan_lock-2.4.20-rh.pc deleted file mode 100644 index 98aebb0..0000000 --- a/lustre/kernel_patches/pc/ext3_orphan_lock-2.4.20-rh.pc +++ /dev/null @@ -1,3 +0,0 @@ -fs/ext3/namei.c -fs/ext3/super.c -include/linux/ext3_fs_sb.h diff --git a/lustre/kernel_patches/pc/extN-2.4.18-ino_sb_fixup.pc b/lustre/kernel_patches/pc/extN-2.4.18-ino_sb_fixup.pc deleted file mode 100644 index 7191405..0000000 --- a/lustre/kernel_patches/pc/extN-2.4.18-ino_sb_fixup.pc +++ /dev/null @@ -1 +0,0 @@ -include/linux/ext3_fs.h diff --git a/lustre/kernel_patches/pc/extN-delete_thread.pc b/lustre/kernel_patches/pc/extN-delete_thread.pc deleted file mode 100644 index bc81732..0000000 --- a/lustre/kernel_patches/pc/extN-delete_thread.pc +++ /dev/null @@ -1,3 +0,0 @@ -include/linux/ext3_fs.h -include/linux/ext3_fs_sb.h -fs/ext3/super.c diff --git a/lustre/kernel_patches/pc/extN-iget-debug.pc b/lustre/kernel_patches/pc/extN-iget-debug.pc deleted file mode 100644 index e9fe01e..0000000 --- a/lustre/kernel_patches/pc/extN-iget-debug.pc +++ /dev/null @@ -1,2 +0,0 @@ -fs/ext3/namei.c -fs/ext3/inode.c diff --git a/lustre/kernel_patches/pc/extN-misc-fixup.pc b/lustre/kernel_patches/pc/extN-misc-fixup.pc deleted file mode 100644 index 08795de..0000000 --- a/lustre/kernel_patches/pc/extN-misc-fixup.pc +++ /dev/null @@ -1 +0,0 @@ -fs/ext3/super.c diff --git a/lustre/kernel_patches/pc/extN-noread.pc b/lustre/kernel_patches/pc/extN-noread.pc deleted file mode 100644 index 9c3cea8..0000000 --- a/lustre/kernel_patches/pc/extN-noread.pc +++ /dev/null @@ -1,3 +0,0 @@ -fs/ext3/ialloc.c -fs/ext3/inode.c -include/linux/ext3_fs.h diff --git a/lustre/kernel_patches/pc/extN-san.pc b/lustre/kernel_patches/pc/extN-san.pc deleted file mode 100644 index 231df0e..0000000 --- a/lustre/kernel_patches/pc/extN-san.pc +++ /dev/null @@ -1,2 +0,0 @@ -fs/ext3/inode.c -fs/ext3/super.c diff --git a/lustre/kernel_patches/pc/extN-wantedi.pc b/lustre/kernel_patches/pc/extN-wantedi.pc deleted file mode 100644 index 31901ee..0000000 --- a/lustre/kernel_patches/pc/extN-wantedi.pc +++ /dev/null @@ -1,4 +0,0 @@ -fs/ext3/namei.c -fs/ext3/ialloc.c -fs/ext3/ioctl.c -include/linux/ext3_fs.h diff --git a/lustre/kernel_patches/pc/htree-ext3-2.4.18.pc b/lustre/kernel_patches/pc/htree-ext3-2.4.18.pc deleted file mode 100644 index 6499778..0000000 --- a/lustre/kernel_patches/pc/htree-ext3-2.4.18.pc +++ /dev/null @@ -1,4 +0,0 @@ -fs/ext3/super.c -fs/ext3/namei.c -include/linux/ext3_fs.h -include/linux/ext3_jbd.h diff --git a/lustre/kernel_patches/pc/invalidate_show-2.4.20-rh.pc b/lustre/kernel_patches/pc/invalidate_show-2.4.20-rh.pc deleted file mode 100644 index 1d4ed77..0000000 --- a/lustre/kernel_patches/pc/invalidate_show-2.4.20-rh.pc +++ /dev/null @@ -1,4 +0,0 @@ -fs/inode.c -fs/super.c -include/linux/fs.h -fs/smbfs/inode.c diff --git a/lustre/kernel_patches/pc/invalidate_show.pc b/lustre/kernel_patches/pc/invalidate_show.pc deleted file mode 100644 index 1d4ed77..0000000 --- a/lustre/kernel_patches/pc/invalidate_show.pc +++ /dev/null @@ -1,4 +0,0 @@ -fs/inode.c -fs/super.c -include/linux/fs.h -fs/smbfs/inode.c diff --git a/lustre/kernel_patches/pc/iod-rmap-exports-2.4.20.pc b/lustre/kernel_patches/pc/iod-rmap-exports-2.4.20.pc deleted file mode 100644 index 07288b0..0000000 --- a/lustre/kernel_patches/pc/iod-rmap-exports-2.4.20.pc +++ /dev/null @@ -1,5 +0,0 @@ -fs/inode.c -fs/Makefile -mm/vmscan.c -mm/Makefile -mm/page_alloc.c diff --git a/lustre/kernel_patches/pc/iod-rmap-exports.pc b/lustre/kernel_patches/pc/iod-rmap-exports.pc deleted file mode 100644 index 07288b0..0000000 --- a/lustre/kernel_patches/pc/iod-rmap-exports.pc +++ /dev/null @@ -1,5 +0,0 @@ -fs/inode.c -fs/Makefile -mm/vmscan.c -mm/Makefile -mm/page_alloc.c diff --git a/lustre/kernel_patches/pc/iod-stock-24-exports.pc b/lustre/kernel_patches/pc/iod-stock-24-exports.pc deleted file mode 100644 index e4eceee..0000000 --- a/lustre/kernel_patches/pc/iod-stock-24-exports.pc +++ /dev/null @@ -1,3 +0,0 @@ -fs/inode.c -fs/Makefile -mm/page_alloc.c diff --git a/lustre/kernel_patches/pc/iod-stock-24-exports_hp.pc b/lustre/kernel_patches/pc/iod-stock-24-exports_hp.pc deleted file mode 100644 index e4eceee..0000000 --- a/lustre/kernel_patches/pc/iod-stock-24-exports_hp.pc +++ /dev/null @@ -1,3 +0,0 @@ -fs/inode.c -fs/Makefile -mm/page_alloc.c diff --git a/lustre/kernel_patches/pc/iopen-2.4.18.pc b/lustre/kernel_patches/pc/iopen-2.4.18.pc deleted file mode 100644 index b40b1f3..0000000 --- a/lustre/kernel_patches/pc/iopen-2.4.18.pc +++ /dev/null @@ -1,8 +0,0 @@ -Documentation/filesystems/ext2.txt -fs/ext3/Makefile -fs/ext3/inode.c -fs/ext3/iopen.c -fs/ext3/iopen.h -fs/ext3/namei.c -fs/ext3/super.c -include/linux/ext3_fs.h diff --git a/lustre/kernel_patches/pc/iopen-2.4.20.pc b/lustre/kernel_patches/pc/iopen-2.4.20.pc deleted file mode 100644 index b40b1f3..0000000 --- a/lustre/kernel_patches/pc/iopen-2.4.20.pc +++ /dev/null @@ -1,8 +0,0 @@ -Documentation/filesystems/ext2.txt -fs/ext3/Makefile -fs/ext3/inode.c -fs/ext3/iopen.c -fs/ext3/iopen.h -fs/ext3/namei.c -fs/ext3/super.c -include/linux/ext3_fs.h diff --git a/lustre/kernel_patches/pc/jbd-transno-cb.pc b/lustre/kernel_patches/pc/jbd-transno-cb.pc deleted file mode 100644 index cde73d8..0000000 --- a/lustre/kernel_patches/pc/jbd-transno-cb.pc +++ /dev/null @@ -1,4 +0,0 @@ -fs/jbd/commit.c -fs/jbd/journal.c -fs/jbd/transaction.c -include/linux/jbd.h diff --git a/lustre/kernel_patches/pc/kmem_cache_validate.pc b/lustre/kernel_patches/pc/kmem_cache_validate.pc deleted file mode 100644 index a0a6297..0000000 --- a/lustre/kernel_patches/pc/kmem_cache_validate.pc +++ /dev/null @@ -1,5 +0,0 @@ -arch/i386/mm/init.c -arch/ia64/mm/init.c -include/linux/slab.h -kernel/ksyms.c -mm/slab.c diff --git a/lustre/kernel_patches/pc/kmem_cache_validate_2.4.20-rh.pc b/lustre/kernel_patches/pc/kmem_cache_validate_2.4.20-rh.pc deleted file mode 100644 index a0a6297..0000000 --- a/lustre/kernel_patches/pc/kmem_cache_validate_2.4.20-rh.pc +++ /dev/null @@ -1,5 +0,0 @@ -arch/i386/mm/init.c -arch/ia64/mm/init.c -include/linux/slab.h -kernel/ksyms.c -mm/slab.c diff --git a/lustre/kernel_patches/pc/kmem_cache_validate_2.4.20.pc b/lustre/kernel_patches/pc/kmem_cache_validate_2.4.20.pc deleted file mode 100644 index bdba884..0000000 --- a/lustre/kernel_patches/pc/kmem_cache_validate_2.4.20.pc +++ /dev/null @@ -1,5 +0,0 @@ -arch/ia64/mm/init.c -include/linux/slab.h -kernel/ksyms.c -mm/slab.c -arch/i386/mm/init.c diff --git a/lustre/kernel_patches/pc/kmem_cache_validate_hp.pc b/lustre/kernel_patches/pc/kmem_cache_validate_hp.pc deleted file mode 100644 index bdba884..0000000 --- a/lustre/kernel_patches/pc/kmem_cache_validate_hp.pc +++ /dev/null @@ -1,5 +0,0 @@ -arch/ia64/mm/init.c -include/linux/slab.h -kernel/ksyms.c -mm/slab.c -arch/i386/mm/init.c diff --git a/lustre/kernel_patches/pc/linux-2.4.18ea-0.8.26.pc b/lustre/kernel_patches/pc/linux-2.4.18ea-0.8.26.pc deleted file mode 100644 index b647d5a..0000000 --- a/lustre/kernel_patches/pc/linux-2.4.18ea-0.8.26.pc +++ /dev/null @@ -1,10 +0,0 @@ -fs/ext3/ialloc.c -fs/ext3/inode.c -fs/ext3/namei.c -fs/ext3/super.c -fs/ext3/xattr.c -include/linux/ext3_fs.h -include/linux/ext3_jbd.h -include/linux/ext3_xattr.h -include/linux/xattr.h -fs/ext3/Makefile diff --git a/lustre/kernel_patches/pc/linux-2.4.20-xattr-0.8.54-chaos.pc b/lustre/kernel_patches/pc/linux-2.4.20-xattr-0.8.54-chaos.pc deleted file mode 100644 index dbf35cb..0000000 --- a/lustre/kernel_patches/pc/linux-2.4.20-xattr-0.8.54-chaos.pc +++ /dev/null @@ -1,62 +0,0 @@ -Documentation/Configure.help -arch/alpha/defconfig -arch/alpha/kernel/entry.S -arch/arm/defconfig -arch/arm/kernel/calls.S -arch/i386/defconfig -arch/ia64/defconfig -arch/m68k/defconfig -arch/mips/defconfig -arch/mips64/defconfig -arch/ppc/defconfig -arch/ppc64/kernel/misc.S -arch/s390/defconfig -arch/s390/kernel/entry.S -arch/s390x/defconfig -arch/s390x/kernel/entry.S -arch/s390x/kernel/wrapper32.S -arch/sparc/defconfig -arch/sparc/kernel/systbls.S -arch/sparc64/defconfig -arch/sparc64/kernel/systbls.S -fs/Config.in -fs/Makefile -fs/ext2/Makefile -fs/ext2/file.c -fs/ext2/ialloc.c -fs/ext2/inode.c -fs/ext2/namei.c -fs/ext2/super.c -fs/ext2/symlink.c -fs/ext2/xattr.c -fs/ext2/xattr_user.c -fs/ext3/Makefile -fs/ext3/file.c -fs/ext3/ialloc.c -fs/ext3/inode.c -fs/ext3/namei.c -fs/ext3/super.c -fs/ext3/symlink.c -fs/ext3/xattr.c -fs/ext3/xattr_user.c -fs/jfs/jfs_xattr.h -fs/jfs/xattr.c -fs/mbcache.c -include/asm-arm/unistd.h -include/asm-ppc64/unistd.h -include/asm-s390/unistd.h -include/asm-s390x/unistd.h -include/asm-sparc/unistd.h -include/asm-sparc64/unistd.h -include/linux/cache_def.h -include/linux/errno.h -include/linux/ext2_fs.h -include/linux/ext2_xattr.h -include/linux/ext3_fs.h -include/linux/ext3_jbd.h -include/linux/ext3_xattr.h -include/linux/fs.h -include/linux/mbcache.h -kernel/ksyms.c -mm/vmscan.c -fs/ext3/ext3-exports.c diff --git a/lustre/kernel_patches/pc/linux-2.4.20-xattr-0.8.54-hp.pc b/lustre/kernel_patches/pc/linux-2.4.20-xattr-0.8.54-hp.pc deleted file mode 100644 index 1e8cf75..0000000 --- a/lustre/kernel_patches/pc/linux-2.4.20-xattr-0.8.54-hp.pc +++ /dev/null @@ -1,62 +0,0 @@ -Documentation/Configure.help -arch/alpha/defconfig -arch/alpha/kernel/entry.S -arch/arm/defconfig -arch/arm/kernel/calls.S -arch/i386/defconfig -arch/ia64/defconfig -arch/m68k/defconfig -arch/mips/defconfig -arch/mips64/defconfig -arch/ppc/defconfig -arch/ppc64/kernel/misc.S -arch/s390/defconfig -arch/s390/kernel/entry.S -arch/s390x/defconfig -arch/s390x/kernel/entry.S -arch/s390x/kernel/wrapper32.S -arch/sparc/defconfig -arch/sparc/kernel/systbls.S -arch/sparc64/defconfig -arch/sparc64/kernel/systbls.S -fs/Config.in -fs/Makefile -fs/ext2/Makefile -fs/ext2/file.c -fs/ext2/ialloc.c -fs/ext2/inode.c -fs/ext2/namei.c -fs/ext2/super.c -fs/ext2/symlink.c -fs/ext2/xattr.c -fs/ext2/xattr_user.c -fs/ext3/Makefile -fs/ext3/file.c -fs/ext3/ialloc.c -fs/ext3/inode.c -fs/ext3/namei.c -fs/ext3/super.c -fs/ext3/symlink.c -fs/ext3/xattr.c -fs/ext3/xattr_user.c -fs/ext3/ext3-exports.c -fs/jfs/jfs_xattr.h -fs/jfs/xattr.c -fs/mbcache.c -include/asm-arm/unistd.h -include/asm-ppc64/unistd.h -include/asm-s390/unistd.h -include/asm-s390x/unistd.h -include/asm-sparc/unistd.h -include/asm-sparc64/unistd.h -include/linux/cache_def.h -include/linux/errno.h -include/linux/ext2_fs.h -include/linux/ext2_xattr.h -include/linux/ext3_fs.h -include/linux/ext3_jbd.h -include/linux/ext3_xattr.h -include/linux/fs.h -include/linux/mbcache.h -kernel/ksyms.c -mm/vmscan.c diff --git a/lustre/kernel_patches/pc/linux-2.4.20-xattr-0.8.54.pc b/lustre/kernel_patches/pc/linux-2.4.20-xattr-0.8.54.pc deleted file mode 100644 index 2de1b2c..0000000 --- a/lustre/kernel_patches/pc/linux-2.4.20-xattr-0.8.54.pc +++ /dev/null @@ -1,64 +0,0 @@ -Documentation/Configure.help -arch/alpha/defconfig -arch/alpha/kernel/entry.S -arch/arm/defconfig -arch/arm/kernel/calls.S -arch/i386/defconfig -arch/ia64/defconfig -arch/ia64/kernel/entry.S -arch/m68k/defconfig -arch/mips/defconfig -arch/mips64/defconfig -arch/ppc/defconfig -arch/ppc64/kernel/misc.S -arch/s390/defconfig -arch/s390/kernel/entry.S -arch/s390x/defconfig -arch/s390x/kernel/entry.S -arch/s390x/kernel/wrapper32.S -arch/sparc/defconfig -arch/sparc/kernel/systbls.S -arch/sparc64/defconfig -arch/sparc64/kernel/systbls.S -fs/Config.in -fs/Makefile -fs/ext2/Makefile -fs/ext2/file.c -fs/ext2/ialloc.c -fs/ext2/inode.c -fs/ext2/namei.c -fs/ext2/super.c -fs/ext2/symlink.c -fs/ext2/xattr.c -fs/ext2/xattr_user.c -fs/ext3/Makefile -fs/ext3/file.c -fs/ext3/ialloc.c -fs/ext3/inode.c -fs/ext3/namei.c -fs/ext3/super.c -fs/ext3/symlink.c -fs/ext3/xattr.c -fs/ext3/xattr_user.c -fs/jfs/jfs_xattr.h -fs/jfs/xattr.c -fs/mbcache.c -include/asm-arm/unistd.h -include/asm-ia64/unistd.h -include/asm-ppc64/unistd.h -include/asm-s390/unistd.h -include/asm-s390x/unistd.h -include/asm-sparc/unistd.h -include/asm-sparc64/unistd.h -include/linux/cache_def.h -include/linux/errno.h -include/linux/ext2_fs.h -include/linux/ext2_xattr.h -include/linux/ext3_fs.h -include/linux/ext3_jbd.h -include/linux/ext3_xattr.h -include/linux/fs.h -include/linux/mbcache.h -kernel/ksyms.c -mm/vmscan.c -fs/ext3/ext3-exports.c diff --git a/lustre/kernel_patches/pc/lustre-2.5.63.pc b/lustre/kernel_patches/pc/lustre-2.5.63.pc deleted file mode 100644 index daeea17..0000000 --- a/lustre/kernel_patches/pc/lustre-2.5.63.pc +++ /dev/null @@ -1,12 +0,0 @@ -arch/um/kernel/mem.c -fs/namei.c -fs/nfsd/vfs.c -fs/sysfs/inode.c -include/linux/dcache.h -include/linux/fs.h -include/linux/namei.h -include/linux/slab.h -kernel/ksyms.c -mm/slab.c -net/unix/af_unix.c -fs/dcache.c diff --git a/lustre/kernel_patches/pc/lustre-2.5.pc b/lustre/kernel_patches/pc/lustre-2.5.pc deleted file mode 100644 index 71434ea..0000000 --- a/lustre/kernel_patches/pc/lustre-2.5.pc +++ /dev/null @@ -1,11 +0,0 @@ -arch/um/kernel/mem.c -fs/namei.c -fs/nfsd/vfs.c -fs/sysfs/inode.c -include/linux/dcache.h -include/linux/fs.h -include/linux/namei.h -include/linux/slab.h -kernel/ksyms.c -mm/slab.c -net/unix/af_unix.c diff --git a/lustre/kernel_patches/pc/lustre_version.pc b/lustre/kernel_patches/pc/lustre_version.pc deleted file mode 100644 index 898bebd..0000000 --- a/lustre/kernel_patches/pc/lustre_version.pc +++ /dev/null @@ -1 +0,0 @@ -include/linux/lustre_version.h diff --git a/lustre/kernel_patches/pc/mcore-2.4.20-8.pc b/lustre/kernel_patches/pc/mcore-2.4.20-8.pc deleted file mode 100644 index b290f60..0000000 --- a/lustre/kernel_patches/pc/mcore-2.4.20-8.pc +++ /dev/null @@ -1,34 +0,0 @@ -Makefile -Documentation/Configure.help -arch/i386/config.in -arch/i386/vmlinux.lds -arch/i386/boot/setup.S -arch/i386/kernel/Makefile -arch/i386/kernel/crash.c -arch/i386/kernel/nmi.c -arch/i386/kernel/process.c -arch/i386/kernel/setup.c -arch/i386/kernel/smp.c -arch/i386/kernel/traps.c -drivers/char/misc.c -drivers/char/sysrq.c -include/asm-i386/bootimg.h -include/asm-i386/crash.h -include/linux/bootimg.h -include/linux/crash.h -include/linux/mm.h -include/linux/reboot.h -include/linux/sysctl.h -init/main.c -kernel/Makefile -kernel/bootimg.c -kernel/bootimg_pic.c -kernel/crash.c -kernel/module.c -kernel/panic.c -kernel/sysctl.c -lib/Config.in -mm/memory.c -mm/page_alloc.c -arch/i386//boot/compressed/head.S -arch/i386//kernel/head.S diff --git a/lustre/kernel_patches/pc/netconsole_sysrq.pc b/lustre/kernel_patches/pc/netconsole_sysrq.pc deleted file mode 100644 index 030fc19..0000000 --- a/lustre/kernel_patches/pc/netconsole_sysrq.pc +++ /dev/null @@ -1,2 +0,0 @@ -drivers/net/netconsole.c -drivers/net/netconsole.h diff --git a/lustre/kernel_patches/pc/tcp-zero-copy.pc b/lustre/kernel_patches/pc/tcp-zero-copy.pc deleted file mode 100644 index 02877c0..0000000 --- a/lustre/kernel_patches/pc/tcp-zero-copy.pc +++ /dev/null @@ -1,5 +0,0 @@ -include/linux/skbuff.h -include/net/tcp.h -net/netsyms.c -net/core/skbuff.c -net/ipv4/tcp.c diff --git a/lustre/kernel_patches/pc/tg3_netconsole.pc b/lustre/kernel_patches/pc/tg3_netconsole.pc deleted file mode 100644 index 6653b7b..0000000 --- a/lustre/kernel_patches/pc/tg3_netconsole.pc +++ /dev/null @@ -1 +0,0 @@ -drivers/net/tg3.c diff --git a/lustre/kernel_patches/pc/uml-patch-2.4.20-4.pc b/lustre/kernel_patches/pc/uml-patch-2.4.20-4.pc deleted file mode 100644 index 887e3fa..0000000 --- a/lustre/kernel_patches/pc/uml-patch-2.4.20-4.pc +++ /dev/null @@ -1,394 +0,0 @@ -CREDITS -Documentation/Configure.help -MAINTAINERS -Makefile -arch/um/Makefile -arch/um/Makefile-i386 -arch/um/Makefile-ia64 -arch/um/Makefile-os-Linux -arch/um/Makefile-ppc -arch/um/Makefile-skas -arch/um/Makefile-tt -arch/um/common.ld.in -arch/um/config.in -arch/um/config.release -arch/um/config_block.in -arch/um/config_char.in -arch/um/config_net.in -arch/um/config_scsi.in -arch/um/defconfig -arch/um/drivers/Makefile -arch/um/drivers/chan_kern.c -arch/um/drivers/chan_user.c -arch/um/drivers/daemon.h -arch/um/drivers/daemon_kern.c -arch/um/drivers/daemon_user.c -arch/um/drivers/fd.c -arch/um/drivers/harddog_kern.c -arch/um/drivers/harddog_user.c -arch/um/drivers/hostaudio_kern.c -arch/um/drivers/hostaudio_user.c -arch/um/drivers/line.c -arch/um/drivers/mcast.h -arch/um/drivers/mcast_kern.c -arch/um/drivers/mcast_user.c -arch/um/drivers/mconsole_kern.c -arch/um/drivers/mconsole_user.c -arch/um/drivers/mmapper_kern.c -arch/um/drivers/net_kern.c -arch/um/drivers/net_user.c -arch/um/drivers/null.c -arch/um/drivers/pcap_kern.c -arch/um/drivers/pcap_user.c -arch/um/drivers/pcap_user.h -arch/um/drivers/port.h -arch/um/drivers/port_kern.c -arch/um/drivers/port_user.c -arch/um/drivers/pty.c -arch/um/drivers/slip.h -arch/um/drivers/slip_kern.c -arch/um/drivers/slip_proto.h -arch/um/drivers/slip_user.c -arch/um/drivers/slirp.h -arch/um/drivers/slirp_kern.c -arch/um/drivers/slirp_user.c -arch/um/drivers/ssl.c -arch/um/drivers/ssl.h -arch/um/drivers/stdio_console.c -arch/um/drivers/stdio_console.h -arch/um/drivers/tty.c -arch/um/drivers/ubd_kern.c -arch/um/drivers/ubd_user.c -arch/um/drivers/xterm.c -arch/um/drivers/xterm.h -arch/um/drivers/xterm_kern.c -arch/um/dyn_link.ld.in -arch/um/fs/Makefile -arch/um/fs/hostfs/Makefile -arch/um/fs/hostfs/hostfs.h -arch/um/fs/hostfs/hostfs_kern.c -arch/um/fs/hostfs/hostfs_user.c -arch/um/fs/hppfs/Makefile -arch/um/fs/hppfs/hppfs_kern.c -arch/um/include/2_5compat.h -arch/um/include/Makefile -arch/um/include/chan_kern.h -arch/um/include/chan_user.h -arch/um/include/choose-mode.h -arch/um/include/frame.h -arch/um/include/frame_kern.h -arch/um/include/frame_user.h -arch/um/include/helper.h -arch/um/include/hostaudio.h -arch/um/include/init.h -arch/um/include/initrd.h -arch/um/include/irq_user.h -arch/um/include/kern.h -arch/um/include/kern_util.h -arch/um/include/line.h -arch/um/include/mconsole.h -arch/um/include/mconsole_kern.h -arch/um/include/mem.h -arch/um/include/mem_user.h -arch/um/include/mode.h -arch/um/include/mode_kern.h -arch/um/include/net_kern.h -arch/um/include/net_user.h -arch/um/include/os.h -arch/um/include/process.h -arch/um/include/ptrace_user.h -arch/um/include/sigcontext.h -arch/um/include/sigio.h -arch/um/include/signal_kern.h -arch/um/include/signal_user.h -arch/um/include/skas_ptrace.h -arch/um/include/syscall_user.h -arch/um/include/sysdep-i386/checksum.h -arch/um/include/sysdep-i386/frame.h -arch/um/include/sysdep-i386/frame_kern.h -arch/um/include/sysdep-i386/frame_user.h -arch/um/include/sysdep-i386/ptrace.h -arch/um/include/sysdep-i386/ptrace_user.h -arch/um/include/sysdep-i386/sigcontext.h -arch/um/include/sysdep-i386/syscalls.h -arch/um/include/sysdep-ia64/ptrace.h -arch/um/include/sysdep-ia64/sigcontext.h -arch/um/include/sysdep-ia64/syscalls.h -arch/um/include/sysdep-ppc/ptrace.h -arch/um/include/sysdep-ppc/sigcontext.h -arch/um/include/sysdep-ppc/syscalls.h -arch/um/include/sysrq.h -arch/um/include/tempfile.h -arch/um/include/time_user.h -arch/um/include/tlb.h -arch/um/include/ubd_user.h -arch/um/include/um_mmu.h -arch/um/include/um_uaccess.h -arch/um/include/umid.h -arch/um/include/uml_uaccess.h -arch/um/include/umn.h -arch/um/include/user.h -arch/um/include/user_util.h -arch/um/kernel/Makefile -arch/um/kernel/checksum.c -arch/um/kernel/config.c.in -arch/um/kernel/exec_kern.c -arch/um/kernel/exitcode.c -arch/um/kernel/frame.c -arch/um/kernel/frame_kern.c -arch/um/kernel/gmon_syms.c -arch/um/kernel/gprof_syms.c -arch/um/kernel/helper.c -arch/um/kernel/init_task.c -arch/um/kernel/initrd_kern.c -arch/um/kernel/initrd_user.c -arch/um/kernel/irq.c -arch/um/kernel/irq_user.c -arch/um/kernel/ksyms.c -arch/um/kernel/mem.c -arch/um/kernel/mem_user.c -arch/um/kernel/mprot.h -arch/um/kernel/process.c -arch/um/kernel/process_kern.c -arch/um/kernel/ptrace.c -arch/um/kernel/reboot.c -arch/um/kernel/resource.c -arch/um/kernel/sigio_kern.c -arch/um/kernel/sigio_user.c -arch/um/kernel/signal_kern.c -arch/um/kernel/signal_user.c -arch/um/kernel/skas/Makefile -arch/um/kernel/skas/exec_kern.c -arch/um/kernel/skas/exec_user.c -arch/um/kernel/skas/include/mmu.h -arch/um/kernel/skas/include/mode.h -arch/um/kernel/skas/include/mode_kern.h -arch/um/kernel/skas/include/proc_mm.h -arch/um/kernel/skas/include/ptrace-skas.h -arch/um/kernel/skas/include/skas.h -arch/um/kernel/skas/include/uaccess.h -arch/um/kernel/skas/mem.c -arch/um/kernel/skas/mem_user.c -arch/um/kernel/skas/mmu.c -arch/um/kernel/skas/process.c -arch/um/kernel/skas/process_kern.c -arch/um/kernel/skas/sys-i386/Makefile -arch/um/kernel/skas/sys-i386/sigcontext.c -arch/um/kernel/skas/syscall_kern.c -arch/um/kernel/skas/syscall_user.c -arch/um/kernel/skas/time.c -arch/um/kernel/skas/tlb.c -arch/um/kernel/skas/trap_user.c -arch/um/kernel/skas/util/Makefile -arch/um/kernel/skas/util/mk_ptregs.c -arch/um/kernel/smp.c -arch/um/kernel/sys_call_table.c -arch/um/kernel/syscall_kern.c -arch/um/kernel/syscall_user.c -arch/um/kernel/sysrq.c -arch/um/kernel/tempfile.c -arch/um/kernel/time.c -arch/um/kernel/time_kern.c -arch/um/kernel/tlb.c -arch/um/kernel/trap_kern.c -arch/um/kernel/trap_user.c -arch/um/kernel/tt/Makefile -arch/um/kernel/tt/exec_kern.c -arch/um/kernel/tt/exec_user.c -arch/um/kernel/tt/gdb.c -arch/um/kernel/tt/gdb_kern.c -arch/um/kernel/tt/include/debug.h -arch/um/kernel/tt/include/mmu.h -arch/um/kernel/tt/include/mode.h -arch/um/kernel/tt/include/mode_kern.h -arch/um/kernel/tt/include/ptrace-tt.h -arch/um/kernel/tt/include/tt.h -arch/um/kernel/tt/include/uaccess.h -arch/um/kernel/tt/ksyms.c -arch/um/kernel/tt/mem.c -arch/um/kernel/tt/mem_user.c -arch/um/kernel/tt/process_kern.c -arch/um/kernel/tt/ptproxy/Makefile -arch/um/kernel/tt/ptproxy/proxy.c -arch/um/kernel/tt/ptproxy/ptproxy.h -arch/um/kernel/tt/ptproxy/ptrace.c -arch/um/kernel/tt/ptproxy/sysdep.c -arch/um/kernel/tt/ptproxy/sysdep.h -arch/um/kernel/tt/ptproxy/wait.c -arch/um/kernel/tt/ptproxy/wait.h -arch/um/kernel/tt/sys-i386/Makefile -arch/um/kernel/tt/sys-i386/sigcontext.c -arch/um/kernel/tt/syscall_kern.c -arch/um/kernel/tt/syscall_user.c -arch/um/kernel/tt/time.c -arch/um/kernel/tt/tlb.c -arch/um/kernel/tt/tracer.c -arch/um/kernel/tt/trap_user.c -arch/um/kernel/tt/uaccess_user.c -arch/um/kernel/tt/unmap.c -arch/um/kernel/tty_log.c -arch/um/kernel/uaccess_user.c -arch/um/kernel/um_arch.c -arch/um/kernel/umid.c -arch/um/kernel/user_syms.c -arch/um/kernel/user_util.c -arch/um/link.ld.in -arch/um/main.c -arch/um/os-Linux/Makefile -arch/um/os-Linux/drivers/Makefile -arch/um/os-Linux/drivers/etap.h -arch/um/os-Linux/drivers/ethertap_kern.c -arch/um/os-Linux/drivers/ethertap_user.c -arch/um/os-Linux/drivers/tuntap.h -arch/um/os-Linux/drivers/tuntap_kern.c -arch/um/os-Linux/drivers/tuntap_user.c -arch/um/os-Linux/file.c -arch/um/os-Linux/include/file.h -arch/um/os-Linux/process.c -arch/um/os-Linux/tty.c -arch/um/sys-i386/Makefile -arch/um/sys-i386/bugs.c -arch/um/sys-i386/checksum.S -arch/um/sys-i386/fault.c -arch/um/sys-i386/ksyms.c -arch/um/sys-i386/ldt.c -arch/um/sys-i386/ptrace.c -arch/um/sys-i386/ptrace_user.c -arch/um/sys-i386/sigcontext.c -arch/um/sys-i386/syscalls.c -arch/um/sys-i386/sysrq.c -arch/um/sys-i386/util/Makefile -arch/um/sys-i386/util/mk_sc.c -arch/um/sys-i386/util/mk_thread_kern.c -arch/um/sys-i386/util/mk_thread_user.c -arch/um/sys-ia64/Makefile -arch/um/sys-ppc/Makefile -arch/um/sys-ppc/misc.S -arch/um/sys-ppc/miscthings.c -arch/um/sys-ppc/ptrace.c -arch/um/sys-ppc/ptrace_user.c -arch/um/sys-ppc/sigcontext.c -arch/um/sys-ppc/sysrq.c -arch/um/util/Makefile -arch/um/util/mk_constants_kern.c -arch/um/util/mk_constants_user.c -arch/um/util/mk_task_kern.c -arch/um/util/mk_task_user.c -drivers/char/Makefile -drivers/char/tty_io.c -drivers/net/setup.c -include/asm-i386/hardirq.h -include/asm-um/a.out.h -include/asm-um/arch-signal-i386.h -include/asm-um/archparam-i386.h -include/asm-um/archparam-ppc.h -include/asm-um/atomic.h -include/asm-um/bitops.h -include/asm-um/boot.h -include/asm-um/bugs.h -include/asm-um/byteorder.h -include/asm-um/cache.h -include/asm-um/checksum.h -include/asm-um/cobalt.h -include/asm-um/current.h -include/asm-um/delay.h -include/asm-um/desc.h -include/asm-um/div64.h -include/asm-um/dma.h -include/asm-um/elf.h -include/asm-um/errno.h -include/asm-um/fcntl.h -include/asm-um/fixmap.h -include/asm-um/floppy.h -include/asm-um/hardirq.h -include/asm-um/hdreg.h -include/asm-um/highmem.h -include/asm-um/hw_irq.h -include/asm-um/ide.h -include/asm-um/init.h -include/asm-um/io.h -include/asm-um/ioctl.h -include/asm-um/ioctls.h -include/asm-um/ipc.h -include/asm-um/ipcbuf.h -include/asm-um/irq.h -include/asm-um/keyboard.h -include/asm-um/kmap_types.h -include/asm-um/linux_logo.h -include/asm-um/locks.h -include/asm-um/mca_dma.h -include/asm-um/mman.h -include/asm-um/mmu.h -include/asm-um/mmu_context.h -include/asm-um/module.h -include/asm-um/msgbuf.h -include/asm-um/mtrr.h -include/asm-um/namei.h -include/asm-um/page.h -include/asm-um/page_offset.h -include/asm-um/param.h -include/asm-um/pci.h -include/asm-um/pgalloc.h -include/asm-um/pgtable.h -include/asm-um/poll.h -include/asm-um/posix_types.h -include/asm-um/processor-generic.h -include/asm-um/processor-i386.h -include/asm-um/processor-ppc.h -include/asm-um/ptrace-generic.h -include/asm-um/ptrace-i386.h -include/asm-um/resource.h -include/asm-um/rwlock.h -include/asm-um/rwsem.h -include/asm-um/scatterlist.h -include/asm-um/segment.h -include/asm-um/semaphore.h -include/asm-um/sembuf.h -include/asm-um/serial.h -include/asm-um/shmbuf.h -include/asm-um/shmparam.h -include/asm-um/sigcontext-generic.h -include/asm-um/sigcontext-i386.h -include/asm-um/sigcontext-ppc.h -include/asm-um/siginfo.h -include/asm-um/signal.h -include/asm-um/smp.h -include/asm-um/smplock.h -include/asm-um/socket.h -include/asm-um/sockios.h -include/asm-um/softirq.h -include/asm-um/spinlock.h -include/asm-um/stat.h -include/asm-um/statfs.h -include/asm-um/string.h -include/asm-um/system-generic.h -include/asm-um/system-i386.h -include/asm-um/system-ppc.h -include/asm-um/termbits.h -include/asm-um/termios.h -include/asm-um/timex.h -include/asm-um/tlb.h -include/asm-um/types.h -include/asm-um/uaccess.h -include/asm-um/ucontext.h -include/asm-um/unaligned.h -include/asm-um/unistd.h -include/asm-um/user.h -include/asm-um/vga.h -include/asm-um/xor.h -include/linux/blk.h -include/linux/fs.h -include/linux/hostfs_fs_i.h -include/linux/hppfs_fs_i.h -include/linux/kernel.h -include/linux/kernel_stat.h -include/linux/mm.h -include/linux/proc_mm.h -include/linux/tty.h -init/do_mounts.c -kernel/panic.c -mm/Makefile -mm/mmap.c -mm/mprotect.c -mm/proc_mm.c -mm/slab.c diff --git a/lustre/kernel_patches/pc/uml_check_get_page.pc b/lustre/kernel_patches/pc/uml_check_get_page.pc deleted file mode 100644 index 0e90ce5..0000000 --- a/lustre/kernel_patches/pc/uml_check_get_page.pc +++ /dev/null @@ -1 +0,0 @@ -arch/um/kernel/mem.c diff --git a/lustre/kernel_patches/pc/uml_compile_fixes.pc b/lustre/kernel_patches/pc/uml_compile_fixes.pc deleted file mode 100644 index cd28cbd..0000000 --- a/lustre/kernel_patches/pc/uml_compile_fixes.pc +++ /dev/null @@ -1 +0,0 @@ -include/asm-um/pgtable.h diff --git a/lustre/kernel_patches/pc/uml_no_panic.pc b/lustre/kernel_patches/pc/uml_no_panic.pc deleted file mode 100644 index 0e90ce5..0000000 --- a/lustre/kernel_patches/pc/uml_no_panic.pc +++ /dev/null @@ -1 +0,0 @@ -arch/um/kernel/mem.c diff --git a/lustre/kernel_patches/pc/vfs_intent-2.4.18-18.pc b/lustre/kernel_patches/pc/vfs_intent-2.4.18-18.pc deleted file mode 100644 index 8801aa7..0000000 --- a/lustre/kernel_patches/pc/vfs_intent-2.4.18-18.pc +++ /dev/null @@ -1,10 +0,0 @@ -fs/dcache.c -fs/namei.c -fs/nfsd/vfs.c -fs/open.c -fs/proc/base.c -fs/stat.c -fs/exec.c -include/linux/dcache.h -include/linux/fs.h -kernel/ksyms.c diff --git a/lustre/kernel_patches/pc/vfs_intent-2.4.20-rh.pc b/lustre/kernel_patches/pc/vfs_intent-2.4.20-rh.pc deleted file mode 100644 index fbe6ff1..0000000 --- a/lustre/kernel_patches/pc/vfs_intent-2.4.20-rh.pc +++ /dev/null @@ -1,10 +0,0 @@ -fs/dcache.c -fs/namei.c -fs/nfsd/vfs.c -fs/open.c -fs/stat.c -include/linux/dcache.h -include/linux/fs.h -kernel/ksyms.c -fs/exec.c -fs/proc/base.c diff --git a/lustre/kernel_patches/pc/vfs_intent-2.4.20-vanilla.pc b/lustre/kernel_patches/pc/vfs_intent-2.4.20-vanilla.pc deleted file mode 100644 index f8a99ea..0000000 --- a/lustre/kernel_patches/pc/vfs_intent-2.4.20-vanilla.pc +++ /dev/null @@ -1,10 +0,0 @@ -fs/exec.c -fs/dcache.c -fs/namei.c -fs/nfsd/vfs.c -fs/open.c -fs/stat.c -fs/proc/base.c -include/linux/dcache.h -include/linux/fs.h -kernel/ksyms.c diff --git a/lustre/kernel_patches/prepare_tree.sh b/lustre/kernel_patches/prepare_tree.sh deleted file mode 100755 index 7d688db..0000000 --- a/lustre/kernel_patches/prepare_tree.sh +++ /dev/null @@ -1,88 +0,0 @@ -#!/bin/bash - -die() { - echo -e $* >&2 - echo aborting.. >&2 - exit 1 -} - -canon() { - cd $1 - CANON=$PWD - cd - -} - -canon $(dirname $0) -MYDIR=$CANON - -while [ ${#*} -gt 1 ]; do - case "$1" in - -t) - shift; - TREE=$1 - ;; - -s) - shift; - SERIES=$1 - ;; - *) - die "unknown argument $1" - break; - ;; - esac - shift; -done - -[ -z "$TREE" -o -z "$SERIES" ] && die "I need a tree and series:\n\t$0 -t kernel_dir -s series_name" -[ ! -d $TREE ] && die "kernel tree '$TREE' isn't a directory" -SERIES=$(basename $SERIES) -[ ! -f $MYDIR/series/$SERIES ] && die "no series file '$SERIES'" - -canon $TREE -TREE=$CANON - -# patch scripts wants a relative path from the linux tree to -# its patch pile :( - -MY=$(echo $MYDIR | sed -e 's_^/__') -TR=$(echo $TREE | sed -e 's_^/__') - -while true ; do - M=$(echo $MY | cut -d/ -f 1) - T=$(echo $TR | cut -d/ -f 1) - - if [ $M != $T ]; then - break; - fi - - MY=$(echo $MY | cut -d/ -f 2-) - TR=$(echo $TR | cut -d/ -f 2-) -done - -[ $MY == $MYDIR ] && die "bad! $MY == $MYDIR" - -REVERSE=$(revpath $TR)${MY} -ABSINO=$(stat $MYDIR | awk '($3 == "Inode:") {print $4}') -REVINO=`(cd $TREE ; stat $REVERSE | awk '($3 == "Inode:") {print $4}')` - -[ $ABSINO != $REVINO ] && die "inodes differ, my reverse path is bad?" - -echo export PATCHSCRIPTS_LIBDIR=$REVERSE - -cd $TREE -ln -sf $REVERSE/series/$SERIES series - -PATH_ELEMENTS=$(echo $PATH | sed -e 's/:/ /g') - -NEW_PATH=$MYDIR/scripts - -for p in $PATH_ELEMENTS; do - if echo $p | grep kernel_patches/scripts > /dev/null 2>&1 ; then - continue; - fi - NEW_PATH="$NEW_PATH:$p" -done - -echo export PATH=$NEW_PATH - -echo "'$TREE' successfully setup" >&2 diff --git a/lustre/kernel_patches/scripts/added-by-patch b/lustre/kernel_patches/scripts/added-by-patch deleted file mode 100755 index e9ccef6..0000000 --- a/lustre/kernel_patches/scripts/added-by-patch +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/sh -# Extract names of new files from a patch, print them out - -PATCHFILE=$1 -case "$PATCHFILE" in -*.gz) CMD="gzip -d < $PATCHFILE";; -*) CMD="cat $PATCHFILE";; -esac - -TMP=$(mktemp /tmp/abp.XXXXXX) - -eval $CMD | egrep '^--- .*1969|^--- .*1970' > $TMP -sed -e 's@[^/]*/\([^ ]*\).*@\1@' < $TMP | sed -e 's@^linux/@@' | sort -rm -f $TMP diff --git a/lustre/kernel_patches/scripts/apatch b/lustre/kernel_patches/scripts/apatch deleted file mode 100755 index be1c68e..0000000 --- a/lustre/kernel_patches/scripts/apatch +++ /dev/null @@ -1,97 +0,0 @@ -#!/bin/sh - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} -do_apply() -{ - FILES=$(cat $P/pc/$PATCH_NAME.pc) - for file in $FILES - do - copy_file_to_bup $file $PATCH_NAME - done - - silent=-s - if [ $opt_force != 0 ] - then - silent= - fi - - if patch -p1 $silent -i "$1" || [ $opt_force != 0 ] - then - true - else - echo SOMETHING WENT WRONG - exit 1 - fi -} - -add_to_db() -{ - basename "$1" >> "$DB" -} - -usage() -{ - echo "Usage: apatch patchname" - exit 1 -} - -opt_force=0 -PATCH_NAMES="" - -for i in $* -do - case "$i" in - -f) - opt_force=1;; - *) - PATCH_NAMES="$PATCH_NAMES $i" - esac -done - -if [ x"$PATCH_NAMES" == x ] -then - usage -fi - -apatch() -{ - PATCH_NAME=$(stripit $1) - - need_file_there $P/pc/$PATCH_NAME.pc - - if is_applied "$PATCH_NAME" - then - echo "$PATCH_NAME" is already applied - exit 1 - fi - - if [ $opt_force != 0 ] - then - echo FORCING PATCH - fi - - if [ $opt_force != 0 ] || can_apply $P/patches/"$PATCH_NAME".patch - then - check_pc_match $P/patches/"$PATCH_NAME".patch $P/pc/"$PATCH_NAME".pc - do_apply $P/patches/"$PATCH_NAME".patch - add_to_db "$PATCH_NAME" - echo applied $PATCH_NAME - echo - else - echo "$PATCH_NAME" does not apply - exit 1 - fi -} - -for i in $PATCH_NAMES -do - if ! apatch $i - then - exit 1 - fi -done - diff --git a/lustre/kernel_patches/scripts/cat-series b/lustre/kernel_patches/scripts/cat-series deleted file mode 100755 index c38b1a8..0000000 --- a/lustre/kernel_patches/scripts/cat-series +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/sh - -. patchfns 2>/dev/null || -. /usr/lib/patch-scripts/patchfns 2>/dev/null || -. $PATCHSCRIPTS_LIBDIR/patchfns 2>/dev/null || -{ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -if [ $# -eq 0 ] -then - cat_series -else - __cat_series $1 -fi diff --git a/lustre/kernel_patches/scripts/combine-applied b/lustre/kernel_patches/scripts/combine-applied deleted file mode 100755 index 60ab7e9..0000000 --- a/lustre/kernel_patches/scripts/combine-applied +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/sh - -# -# Make superpatch from currently applied patches using combinediff. -# - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -usage() -{ - echo "Usage: combine-applied output-file" - exit 1 -} - -if [ $# -ne 1 ] -then - usage -fi - -need_file_there applied-patches -CURRENT=$(mktemp /tmp/cmbd-XXXXXXXX) -APPLY_FILE=$(mktemp /tmp/cmbd-XXXXXXXX) -for FILE in `cat applied-patches` -do - if [ -f $P/pc/$FILE.pc ] - then - cat $P/pc/$FILE.pc >> $CURRENT - elif [ -f $P/pc/$FILE ] - then - cat $P/pc/$FILE >> $CURRENT - fi -done -cat $CURRENT | sort -u > $APPLY_FILE -echo > $1 -for FILE in `cat $APPLY_FILE` -do - diff -uNp $FILE~orig $FILE >> $1 -done -rm -rf $APPLY_FILE -rm -rf $CURRENT - diff --git a/lustre/kernel_patches/scripts/combine-series b/lustre/kernel_patches/scripts/combine-series deleted file mode 100755 index d00ba36..0000000 --- a/lustre/kernel_patches/scripts/combine-series +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/sh - -# -# Make superpatch from current series using combinediff. -# - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -usage() -{ - echo "Usage: combine-series output-file" - exit 1 -} - -if [ $# -ne 1 ] -then - usage -fi - -need_file_there series -CURRENT=$(mktemp /tmp/cmbd-XXXXXXXX) -for FILE in $(cat series) -do - NEXT=$(mktemp /tmp/cmbd-XXXXXXXX) - if [ -f $P/patches/$FILE ] - then - combinediff $CURRENT $P/patches/$FILE > $NEXT - elif [ -f $P/patches/$FILE.patch ] - then - combinediff $CURRENT $P/patches/$FILE.patch > $NEXT - elif [ -f $FILE ] - then - combinediff $CURRENT $FILE > $NEXT - fi - rm $CURRENT - CURRENT=$NEXT -done - -mv $NEXT "$1" diff --git a/lustre/kernel_patches/scripts/cvs-take-patch b/lustre/kernel_patches/scripts/cvs-take-patch deleted file mode 100755 index c6a6a2a..0000000 --- a/lustre/kernel_patches/scripts/cvs-take-patch +++ /dev/null @@ -1,78 +0,0 @@ -#!/bin/sh - -doit() -{ - echo $* - $* -} - -usage() -{ - echo "Usage: cvs-take-patch patch_file_name" - exit 1 -} - -# -# Find the highest level directory in $1 which does not -# contain the directory $2. Return it in $MISSING -# -highest_missing() -{ - START_DIR="$1" - NAME="$2" - MISSING="" - WHERE=$(dirname "$START_DIR") - PREV_WHERE=$START_DIR - while [ x"$WHERE" != x"$PREV_WHERE" ] - do - WHERE="$PREV_WHERE" - if [ ! -d "$WHERE"/"$NAME" ] - then - MISSING="$WHERE" - fi - PREV_WHERE=$(dirname "$WHERE") - done - echo highest_missing returns $MISSING -} - -# -# Add all new directries to CVS, top-down -# $1: name of a directory -# $2: name of the CVS directory -# -add_cvs_dirs() -{ - MISSING=foo - while [ "$MISSING" != "" ] - do - highest_missing $1 $2 - if [ x"$MISSING" != "x" ] - then - if [ ! -d "$MISSING"/"$2" ] - then - doit cvs add $MISSING - fi - fi - done -} - -PATCHFILE=$1 - -REMOVEDFILES=$(removed-by-patch $PATCHFILE) -if [ "$REMOVEDFILES" != "" ] -then - doit cvs remove $REMOVEDFILES -fi - -NEWFILES=$(added-by-patch $PATCHFILE) -for i in $NEWFILES -do - DIRNAME=$(dirname $i) - echo "Looking at $DIRNAME" - add_cvs_dirs $DIRNAME CVS -done - -if [ "$NEWFILES" != "" ] -then - doit cvs add $NEWFILES -fi diff --git a/lustre/kernel_patches/scripts/export_patch b/lustre/kernel_patches/scripts/export_patch deleted file mode 100755 index d378417..0000000 --- a/lustre/kernel_patches/scripts/export_patch +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/sh - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -usage() -{ - echo "export_patch: export the patches listed in ./series" 1>&2 - echo "usage: export_patch destination-directory [prefix] " 1>&2 - exit 1 -} - -DIR="$1" -PREFIX="$2""_" - -if [ "$DIR" = "" ] -then - usage -fi - -if [ -e "$DIR" -a ! -d "$DIR" ] -then - echo "$DIR exists already, but is not a directory." 1>&2 - exit 1 -fi - -if [ ! -r ./series ] -then - echo "./series is not readable." 1>&2 - exit 1 -fi - -mkdir -p "$DIR" || exit 1 - -count=1 -for x in `cat ./series` -do - fname=`echo "$count" "$PREFIX" "$x" |\ - awk '{ if ( $2 != "_" ) - printf("p%05d_%s%s\n", $1, $2, $3); - else - printf("p%05d_%s\n", $1, $3); - }'` - if [ ! -r $P/patches/"$x" ] - then - echo "$P/patches/"$x" is not readable. skipping." 1>&2 - continue; - fi - cp -f $P/patches/"$x" "$DIR"/"$fname" || continue; - count=`expr $count + 1` -done - diff --git a/lustre/kernel_patches/scripts/extract_description b/lustre/kernel_patches/scripts/extract_description deleted file mode 100755 index 6fa0e68..0000000 --- a/lustre/kernel_patches/scripts/extract_description +++ /dev/null @@ -1,87 +0,0 @@ -#!/bin/sh - -insert_line() -{ - PATTERN="$1" - LINE="$2" - FILE="$3" - awk ' BEGIN { found=0; } - /'"$PATTERN"'/ { - print; - if (!found) - printf("%s\n", "'$LINE'"); - found=1; - next; - } - { print; } - ' < "$FILE" -} - -# extract the description from the top of a patch -# filter stdin -# collapse adjacent blank lines to a single blank line -# remove any lines that look like diffstat output -# stop output on encountering a line beginning with '---' (beginning of patch) - - TMPFILE=`mktemp /tmp/xdtmp.XXXXXX` || exit 1 - formail -kfcb -X 'From:' -X 'Subject:' |\ - awk ' - BEGIN { found_end=0; lastone="x"; } - /^ .* [|] +[0-9]+ [+-]+$/ { - #/* we found something like diffstat output... */ - if (found_end == 1) { - /* we are past end of diffstat, let it pass */ - print; - } - next; - } - /^ [1-9][0-9]* files changed/ { - #/* end of diffstat output, stop filtering diffstat */ - found_end=1; - next; - } - /^--- / { exit; } - { - #/* collapse adjacent blank lines to 1 blank line */ - if ( $0 == "" && lastone == "" ) - next; - else - print; - lastone=$0; - } - ' | awk '{ if ($0 == "" && FNR == 1) next; print; }' > "$TMPFILE" - - descs=`head -10 $TMPFILE | grep -c '^[ ]*DESC[ ]*$'` - if [ "$descs" = "0" ] - then - # DESC is not 1st non blank line in the file - echo "DESC" - descs=0 - fi - edescs=`grep -c '^EDESC$' "$TMPFILE"` - subjects=`grep -c '^[ ]*Subject[:]' "$TMPFILE"` - froms=`grep -c '^[ ]*From[:]' "$TMPFILE"` - if [ "$edescs" = "0" ] - then - if [ "$subjects" != "0" ] - then - insert_line '^Subject[:]' 'EDESC' "$TMPFILE" - else - if [ "$froms" != "0" ] - then - insert_line '^From[:]' 'EDESC' "$TMPFILE" - else - if [ "$descs" = "0" ] - then - # blank DESC line... - echo '(undescribed patch)' - echo EDESC - cat "$TMPFILE" - else - insert_line '^DESC$' "EDESC" "$TMPFILE" - fi - fi - fi - else - cat $TMPFILE - fi diff --git a/lustre/kernel_patches/scripts/forkpatch b/lustre/kernel_patches/scripts/forkpatch deleted file mode 100755 index cef297c..0000000 --- a/lustre/kernel_patches/scripts/forkpatch +++ /dev/null @@ -1,76 +0,0 @@ -#!/bin/sh - -# -# Fork the next patch in the series -# - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -usage() -{ - echo "Usage: forkpatch <newname>" - exit 1 -} - -if [ $# -ne 1 ] -then - usage -fi - -NEW=$1 -BASE=`stripit $NEW` -SERIES=series - -if [ ! -e $SERIES ] -then - echo 'File "series" not found' - exit 1 -fi - -if [ -f $P/$BASE.patch ] ; then - echo "Patch $NEW already exists as a file" - exit 1 -fi - -if grep $BASE $SERIES >& /dev/null ; then - echo "Patch $NEW already exists in series" - exit 1 -fi - -TMPSERIES=$(mktemp /tmp/series-XXXXXXXX) -top=$(toppatch) -if [ x"$top" == x ] -then - todo=$(head -1 $SERIES) -else - last_in_series=$(stripit $(tail -1 $SERIES)) - if [ $last_in_series == $top ] - then - echo "Series fully applied. Ends at $top" - exit 0 - fi - todo=$(grep -C1 "^$top\.patch" $SERIES | tail -1) - if [ x$todo = x ] - then - todo=$(head -1 $SERIES) - fi -fi - -basetodo=`stripit $todo` - -sed "s/$todo/$BASE.patch/" < $SERIES > $TMPSERIES -cat $TMPSERIES > $SERIES -rm -f $TMPSERIES -cp -f $P/patches/$todo $P/patches/$BASE.patch -cp -f $P/pc/$basetodo.pc $P/pc/$BASE.pc -if [ -f $P/txt/$basetodo.txt ]; then - cp -f $P/txt/$basetodo.txt $P/txt/$BASE.txt -else - echo "Warning no documentation for $BASE" -fi - -echo "Cloned $todo to $BASE" diff --git a/lustre/kernel_patches/scripts/fpatch b/lustre/kernel_patches/scripts/fpatch deleted file mode 100755 index 0cafa65..0000000 --- a/lustre/kernel_patches/scripts/fpatch +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/sh - -# -# Add a file to a patch. -# - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -usage() -{ - echo "Usage: fpatch patchname filename" - echo " fpatch filename" - exit 1 -} - -if [ $# == 1 ] -then - PATCH_NAME=$(top_patch) - FILENAME=$1 -elif [ $# == 2 ] -then - PATCH_NAME=$(stripit $1) - FILENAME=$2 -else - usage -fi - - -if is_applied_last $PATCH_NAME -then - true -else - if is_applied $PATCH_NAME - then - echo $PATCH_NAME is not the last-applied patch - exit 1 - else - echo $PATCH_NAME >> $DB - fi -fi - -if file_in_patch $FILENAME $PATCH_NAME -then - echo File $FILENAME is already in patch $PATCH_NAME - exit 1 -fi - -install_file_in_patch $FILENAME $PATCH_NAME - diff --git a/lustre/kernel_patches/scripts/import_patch b/lustre/kernel_patches/scripts/import_patch deleted file mode 100755 index f818f19..0000000 --- a/lustre/kernel_patches/scripts/import_patch +++ /dev/null @@ -1,102 +0,0 @@ -#!/bin/sh - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -usage() -{ - echo "usage: import_patch [ -p prefix-pattern ] patchfile [...]" 1>&2 - exit 1 -} - -XPATTERN="" -if [ "$1" = "-p" ] -then - XPATTERN="$2" - shift; - shift; -fi - -if [ "$1" = "" ] -then - usage -fi - -if [ ! -e applied-patches ] -then - touch applied-patches -fi - -mkdir -p patches || exit 1 -mkdir -p txt || exit 1 -mkdir -p pc || exit 1 - -if [ ! -e ./series ] -then - touch ./series - if [ "$?" != "0" ] - then - echo "Cannot create ./series" 1>&2 - exit 1 - fi -fi - -if [ ! -w ./series ] -then - echo "./series is not writable." 1>&2 - exit 1 -fi - -PATTERN='s/^'"$XPATTERN"'//' -for x in $* -do - if [ ! -r "$x" ] - then - echo "$x does not exist, skipping." 1>&2 - continue - fi - patchname=`basename $x .bz2` - patchname=`basename $patchname .gz` - patchname=`basename $patchname .Z` - patchname=`basename $patchname .patch` - if is_applied $patchname - then - echo $patchname is currently applied - exit 1 - fi - if [ "$XPATTERN" != "" ] - then - patchname=`echo $patchname | sed -e "$PATTERN"` - fi - pname=$P/patches/"$patchname".patch - if [ -r "$pname" ] - then - echo "$pname exists already, skipping." 1>&2 - continue - fi - case "$x" in - *.bz2) - bunzip2 < "$x" > "$pname" - ;; - *.gz) - gunzip < "$x" > "$pname" - ;; - *.Z) zcat < "$z" > "$pname" - ;; - *) - cat "$x" > "$pname" || continue - ;; - esac - echo "$patchname".patch >> series - pcpatch "$pname" - extract_description < "$pname" >$P/txt/"$patchname".txt - grep '^[(]undescribed patch[)]$' < $P/txt/"$patchname".txt > /dev/null - if [ "$?" = "0" ] - then - echo "Warning: $patchname has no description." 1>&2 - fi -done - diff --git a/lustre/kernel_patches/scripts/inpatch b/lustre/kernel_patches/scripts/inpatch deleted file mode 100755 index edb2c20..0000000 --- a/lustre/kernel_patches/scripts/inpatch +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/sh - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -usage() -{ - echo "Usage: inpatch" - exit 1 -} - -if [ $# != 0 ] -then - usage -fi - -if [ -e $DB ] -then - TOP_PATCH=$(top_patch) - if [ x$TOP_PATCH != x ] - then - cat $P/pc/$TOP_PATCH.pc - fi -fi diff --git a/lustre/kernel_patches/scripts/join-patch b/lustre/kernel_patches/scripts/join-patch deleted file mode 100755 index 065ea73..0000000 --- a/lustre/kernel_patches/scripts/join-patch +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/sh - -usage() -{ - echo "Usage: join-patch patchname" - exit 1 -} - -if [ $# -ne 1 ] -then - usage -fi - -PATCHNAME=$(stripit $1) - -if ! can_apply $PATCHNAME -then - echo Patch $PATCHNAME does not apply - exit 1 -fi - -pcpatch $PATCHNAME -for i in $(cat $P/pc/$PATCHNAME.pc) -do - fpatch $i -done - -patch -p1 -i "$P/patches/$PATCHNAME.patch" -f diff --git a/lustre/kernel_patches/scripts/linus-patch b/lustre/kernel_patches/scripts/linus-patch deleted file mode 100755 index 290b9cf..0000000 --- a/lustre/kernel_patches/scripts/linus-patch +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/sh -# -# Grab a patch frmo kernel.org, install it. -# -# Usage: linus-patch http://www.kernel.org/pub/linux/kernel/people/dwmw2/bk-2.5/cset-1.786.152.7-to-1.798.txt.gz -# - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -poppatch 999 || die poppatch -wget $1 || die wget -FILE=$(basename $1) -gzip -d < $FILE > $P/patches/linus.patch -pcpatch linus || die pcpatch -( - echo DESC - echo $FILE - echo EDESC - echo - echo $FILE -) > $P/txt/linus.txt -rm $FILE diff --git a/lustre/kernel_patches/scripts/mpatch b/lustre/kernel_patches/scripts/mpatch deleted file mode 100755 index 16d4eb7..0000000 --- a/lustre/kernel_patches/scripts/mpatch +++ /dev/null @@ -1,101 +0,0 @@ -#!/bin/sh - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -usage() -{ - echo "Usage: mpatch patchname [output_dir]" - exit 1 -} - -doit() -{ - echo $* 1>&2 - $* || { - echo oops - exit 1 - } -} - -epoch() -{ -# doit touch -t 7001011000.00 $1 - doit touch -t 7001010000.00 $1 -} - -dirfor() -{ - dir=$(dirname $1) - if [ ! -d $dir ] - then - doit mkdir -p $dir - RMDIRS="$RMDIRS $dir" - fi -} - -if [ $# == 0 ] -then - usage -fi - -PATCH_NAME=$(stripit $1) -OUTPUT_DIR=$2 - -FILES=$(cat $P/pc/$PATCH_NAME.pc) -OUT=$P/patches/$PATCH_NAME.patch -TMPOUT=$(mktemp /tmp/patch-$PATCH_NAME-XXXXXX) -TXT=$P/txt/$PATCH_NAME.txt -OLDDIR=$(basename $(/bin/pwd)) -NEWDIR=$OLDDIR-$LOGNAME - -if is_applied_last $PATCH_NAME -then - true -else - echo $PATCH_NAME is not the last-applied patch - exit 1 -fi - -doit rm -f $OUT -echo "Placing patch in " $OUT - -if [ -e $TXT -a -s $TXT ] -then - echo >> $OUT - body $TXT >> $OUT - echo >> $OUT - echo >> $OUT -else - echo "**** No patch description for $PATCH_NAME ****" -fi - -rm -f $TMPOUT - -for file in $FILES -do - OLD_FILE="$file"~"$PATCH_NAME" - if [ ! -e $OLD_FILE ] - then - OLD_FILE=/dev/null - fi - NEW_FILE=$file - XDIFF_OPTS="" - if [ ! -e $NEW_FILE ] - then - NEW_FILE=/dev/null - XDIFF_OPTS="-L $file" - fi - - echo diff -puN $XDIFF_OPTS $DIFF_OPTS $OLD_FILE $NEW_FILE - diff -puN $XDIFF_OPTS $DIFF_OPTS $OLD_FILE $NEW_FILE | p0-2-p1 $OLDDIR $NEWDIR >> $TMPOUT -done -diffstat -p1 $TMPOUT >> $OUT 2>/dev/null -echo >> $OUT -cat $TMPOUT >> $OUT -echo >> $OUT -echo "_" >> $OUT -rm -f $TMPOUT diff --git a/lustre/kernel_patches/scripts/new-kernel b/lustre/kernel_patches/scripts/new-kernel deleted file mode 100755 index 2b065a6..0000000 --- a/lustre/kernel_patches/scripts/new-kernel +++ /dev/null @@ -1,82 +0,0 @@ -#!/bin/sh - -usage() -{ - echo "Usage: new-kernel linux-2.4.2-pre2 linux-2.4.3-pre3 linux-2.4.3 patch.gz cvs-dir" - exit 1 -} - -wantdir() -{ - if [ x$1 = x ] - then - usage - fi - if [ ! -d $1 ] - then - echo "directory $1 does not exist" - usage - fi -} - -wantfile() -{ - if [ x$1 = x ] - then - usage - fi - if [ ! -f $1 ] - then - echo "file $1 does not exist" - usage - fi -} - -doit() -{ - echo $* 1>&2 - $* || { - echo oops - exit 1 - } -} - - -CURRENT_KERNEL=$1 -NEXT_KERNEL=$2 -BASE_KERNEL=$3 -PATCH_FILE=$4 -CVS_DIR=$5 - -TEMP_PATCH=$(mktemp /tmp/patch-XXXXXX) -MY_DIFF="$CURRENT_KERNEL"--"$NEXT_KERNEL" - -wantdir $CURRENT_KERNEL -wantdir $BASE_KERNEL -wantdir $CVS_DIR -wantfile $PATCH_FILE - -doit rm -rf $NEXT_KERNEL -doit cp -a $BASE_KERNEL $NEXT_KERNEL -doit rm -f $TEMP_PATCH -doit gunzip < $PATCH_FILE > $TEMP_PATCH -cd $NEXT_KERNEL -doit patch -p1 --dry-run -i $TEMP_PATCH -doit patch -p1 -s -i $TEMP_PATCH -echo cd .. -cd .. - -echo diff -uNrp $CURRENT_KERNEL $NEXT_KERNEL -diff -uNrp $CURRENT_KERNEL $NEXT_KERNEL > $MY_DIFF - -echo cd $CVS_DIR -cd $CVS_DIR -doit patch -p1 --dry-run -s -i ../$MY_DIFF -doit patch -p1 -s -i ../$MY_DIFF -cvs-take-patch ../$MY_DIFF -cvs commit -m "'doing $NEXT_KERNEL'" -cvs update -ko -d -P - -TAG=$(echo $NEXT_KERNEL | sed -e 's@\.@_@g') -cvs tag $TAG -rm -f $TEMP_PATCH diff --git a/lustre/kernel_patches/scripts/p0-2-p1 b/lustre/kernel_patches/scripts/p0-2-p1 deleted file mode 100755 index 266c698..0000000 --- a/lustre/kernel_patches/scripts/p0-2-p1 +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh -# -# Usage: p0-2-p1 olddir newdir -# -OLDDIR=$1 -NEWDIR=$2 - -sed -e "s/^--- \([^\/].*\)/--- $OLDDIR\/\1/" | -sed -e "s/^+++ \([^\/].*\)/+++ $NEWDIR\/\1/" - diff --git a/lustre/kernel_patches/scripts/p_diff b/lustre/kernel_patches/scripts/p_diff deleted file mode 100755 index 1ad3e09..0000000 --- a/lustre/kernel_patches/scripts/p_diff +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/sh - -# -# Bring up a patched file in diff. We show the diffs -# in the topmost patch, unless it was specified -# - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -usage() -{ - echo "Usage: pdiff [patchname] filename" - echo " pdiff [patchname] -" - exit 1 -} - -if [ $# == 1 ] -then - PATCH_NAME=$(top_patch) - FILENAME=$1 -elif [ $# == 2 ] -then - PATCH_NAME=$(stripit $1) - FILENAME=$2 -else - usage -fi - -if ! is_applied $PATCH_NAME -then - echo $PATCH_NAME is not applied - exit 1 -fi - -doit() -{ - filename=$1 - unpatched_file=$filename"~"$PATCH_NAME - need_file_there $filename - if [ -e $unpatched_file ] - then - diff -u $unpatched_file $filename - else - echo pdiff: $filename appears to not be in $PATCH_NAME - fi -} - -if [ x"$FILENAME" = "x-" ] -then - FILENAME=$(cat $P/pc/$PATCH_NAME.pc) -fi - -for i in $FILENAME -do - doit $i -done diff --git a/lustre/kernel_patches/scripts/patchdesc b/lustre/kernel_patches/scripts/patchdesc deleted file mode 100755 index 9a886fd..0000000 --- a/lustre/kernel_patches/scripts/patchdesc +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/sh - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -desc1() -{ - PATCH=$(stripit $1) - TXT=$P/txt/$PATCH.txt - echo $PATCH.patch - desc < $TXT - echo -} - -for i in $* -do - desc1 $i -done diff --git a/lustre/kernel_patches/scripts/patchfns b/lustre/kernel_patches/scripts/patchfns deleted file mode 100644 index 78e494b..0000000 --- a/lustre/kernel_patches/scripts/patchfns +++ /dev/null @@ -1,256 +0,0 @@ -DB=applied-patches - -# -# Work out where the user's pc/, patch/ and txt/ directories live. -# -# If the user specified PATCHSCRIPTS in environment then use that (it's -# probably a relative path) -# -# If there is a directory ./patch-scripts then use that -# -# Otherwise use "." -# - -if [ x$PATCHSCRIPTS_LIBDIR != x ] -then - P=$PATCHSCRIPTS_LIBDIR -elif [ -d ./patch-scripts ] -then - P=./patch-scripts -elif [ -d ./patches ] -then - P=. -else - echo "could not locate your pc/ and patches/ directories" - exit 1 -fi - -top_patch() -{ - tail -1 $DB -} - -die() -{ - echo error: $* - exit 1 -} - -is_numeric() -{ - if echo $1 | egrep '^[0-9]*$' > /dev/null - then - return 0 - fi - return 1 -} - -is_applied_last() -{ - name="$(stripit $1)" - top_patch >$DB.1 - if grep "^$name$" "$DB.1" > /dev/null 2>&1 - then - rm $DB.1 - return 0 - else - rm $DB.1 - return 1 - fi -} - -is_applied() -{ - name=$(stripit "$1") - if grep "^$name$" "$DB" > /dev/null 2>&1 - then - return 0 - else - return 1 - fi -} -check_pc_match() -{ - if [ -f /usr/bin/lsdiff ]; then - tmpfile=$(mktemp /tmp/p_XXXXXX) || exit 1 - lsdiff --strip=1 $1 > $tmpfile - diff $2 $tmpfile > /dev/null - if [ $? != 0 ]; then - echo " $1 do not match with $2 " - echo " $2 will be changed to match $2" - cat $tmpfile > $P/pc/$PATCH_NAME.pc - fi - rm -rf $tmpfile - fi -} -can_apply() -{ - if patch -p1 --dry-run -i "$1" -f - then - return 0 - else - return 1 - fi -} - -can_remove() -{ - if patch -R -p1 --dry-run -i $P/patches/"$1".patch -f - then - return 0 - else - return 1 - fi -} - -remove_from_db() -{ - tmpfile=$(mktemp /tmp/p_XXXXXX) - name="$1" - sed -e "/^$name$/d" < "$DB" > $tmpfile - mv $tmpfile "$DB" -} - -stripit() -{ - ret=$(basename $1) - ret=$(echo $ret | sed -e 's/\.patch$//') - ret=$(echo $ret | sed -e 's/\.pc$//') - ret=$(echo $ret | sed -e 's/\.txt$//') - echo $ret -} - -top_is_current() -{ - patch_name=$(top_patch) - if [ x$patch_name == x ] - then - return 1 - else - patch_file=$P/patches/"$patch_name".patch - files=$(cat $P/pc/$patch_name.pc) - for file in $files - do - if [ $file -nt $patch_file ] - then - echo $file newer than $patch_file - return 0 - fi - done - fi - return 1 -} - -need_top_current() -{ - if top_is_current - then - echo "Error: Top patch is not up-to-date" - exit 1 - fi -} - -warn_top_current() -{ - if top_is_current - then - echo "Warning: Top patch is not up-to-date" - fi -} - -file_in_patch() -{ - file=$1 - patch=$2 - - if [ -e $P/pc/$patch.pc ] - then - if grep "^"$file"$" $P/pc/$patch.pc > /dev/null - then - return 0 - fi - fi - return 1 -} - -# copy_file_to_bup filename patchname -copy_file_to_bup() -{ - file=$1 - patch=$2 - bup="$file"~"$patch" - orig="$file"~"orig" - src_dir=`pwd` - - if [ -e $bup ] - then - echo "Cannot install file $file in patch $patch: backup $bup exists" - exit 1 - fi - if [ -e $file ] - then - cp -p $file "$file"~"$patch" - else - echo "file $file appears to be newly added" - fi - if [ ! -L "$orig" ]; then - ln -s "$src_dir/$bup" $orig - fi -} - -install_file_in_patch() -{ - file=$1 - patch=$2 - - copy_file_to_bup $file $patch - echo $file >> $P/pc/$patch.pc -# touch $P/txt/$patch.txt -} - -need_file_there() -{ - if [ ! -e $1 ] - then - echo "File $1 does not exist" - exit 1 - fi -} - -desc() -{ - state=0 - while read x - do - if [ x"$x" = xDESC ] - then - state=1 - elif [ x"$x" = xEDESC ] - then - state=0 - elif [ $state = 1 ] - then - echo " $x" - fi - done -} - -body() -{ - file=$1 - - did_stuff=0 - while read x - do - if [ x"$x" = xEDESC ] - then - cat - did_stuff=1 - fi - done < $file - - if [ $did_stuff = 0 ] - then - cat $file - fi -} diff --git a/lustre/kernel_patches/scripts/pcpatch b/lustre/kernel_patches/scripts/pcpatch deleted file mode 100755 index fa53385..0000000 --- a/lustre/kernel_patches/scripts/pcpatch +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/sh - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -usage() -{ - echo "pcpatch: recreate the pc file from patches/{patchname}.patch" - exit 1 -} - -doit() -{ - echo $* 1>&2 - $* || { - echo oops - exit 1 - } -} - -if [ $# != 1 -o "$1" = "help" ] -then - usage -fi -PATCH=$1 -PATCH_NAME=$(stripit $PATCH) -PC=$P/pc/$PATCH_NAME.pc - -if [ ! -e $P/patches/$PATCH_NAME.patch ] -then - echo "$P/patches/$PATCH_NAME.patch does not exist" - exit 1 -fi - -if is_applied "$PATCH" -then - echo $PATCH is applied! - exit 1 -fi - -touched-by-patch $P/patches/$PATCH_NAME.patch > $PC -echo Recreated $PC diff --git a/lustre/kernel_patches/scripts/poppatch b/lustre/kernel_patches/scripts/poppatch deleted file mode 100755 index 70055d6..0000000 --- a/lustre/kernel_patches/scripts/poppatch +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/sh - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -usage() -{ - echo "Usage: poppatch [npatches]" - exit 1 -} - -doit() -{ - echo $* 1>&2 - $* || { - echo oops - exit 1 - } -} - -if [ $# -gt 1 ] -then - usage -fi - -NR=1 -STOP_AT="" -if [ $# -eq 1 ] -then - if is_numeric $1 - then - NR=$1 - else - NR=1000 - STOP_AT=$(stripit $1) - fi -fi - -pop_one() -{ - TOP_PATCH=$(top_patch) - if [ x$TOP_PATCH == x ] - then - echo "no patches applied" - exit 0 - else - popped_patch="$(top_patch)" - if ! rpatch $(top_patch) - then - echo still at $(top_patch) - exit 1 - fi - echo - fi -} - -for i in $(seq 1 $NR) -do - pop_one - if [ x$STOP_AT != "x" ] - then - if [ $STOP_AT == $(toppatch) ] - then - sum-series applied-patch - exit 0 - fi - fi -done -sum-series applied-patch diff --git a/lustre/kernel_patches/scripts/prep-patch b/lustre/kernel_patches/scripts/prep-patch deleted file mode 100755 index 1d60ea9..0000000 --- a/lustre/kernel_patches/scripts/prep-patch +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/sh - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -if [ $# -ne 1 ] -then - echo "Usage prep-patch patchname" - exit 1 -fi - -PATCHNAME=$(stripit $1) - -xcb -s 2 < $P/patches/$PATCHNAME.patch -head -2 $P/txt/$PATCHNAME.txt | tail -1 | tr -d '\n' | xcb -s 1 diff --git a/lustre/kernel_patches/scripts/pstatus b/lustre/kernel_patches/scripts/pstatus deleted file mode 100755 index f735d8d..0000000 --- a/lustre/kernel_patches/scripts/pstatus +++ /dev/null @@ -1,156 +0,0 @@ -#!/bin/sh - -# print out patch status. Usage: pstatus [ patchfile ... ] -# -# Stephen Cameron <steve.cameron@hp.com> -# - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -if [ ! -f ./series ] -then - echo "./series does not exist." 1>&2 - exit 1 -fi - -if [ ! -d ./patches ] -then - echo "Directory ./patches does not exist." 1>&2 - exit 1 -fi - - -PATCHLIST="$*" -if [ "$PATCHLIST" = "" ] -then - series_optimize=yes - PATCHLIST=`cat series | sed -e 's/[.]patch[ ]*$//'` - SORTSERIES=`mktemp /tmp/ser.XXXXXX` || exit 1 - SORTPATCHES=`mktemp /tmp/pat.XXXXXX` || exit 1 - sed -e 's/^[ ]//' -e 's/[.]patch[ ]*$//' < series | \ - sort > $SORTSERIES - exists="`echo $P/patches/*.patch 2>/dev/null`" - if [ "$exists" != "$P/patches/*.patch" ] - then - ls -1 $P/patches/*.patch | sed -e 's/^.*\/patches\///' \ - -e 's/[.]patch[ ]*$//' | sort > $SORTPATCHES - PATCHLIST="$PATCHLIST"" `comm -1 -3 $SORTSERIES $SORTPATCHES`" - fi - rm -f $SORTPATCHES $SORTSERIES -else - series_optimize=no -fi - -NSERIES=`wc -l series | awk '{ print $1; }'` -series=1 -for PATCH_NAME in $PATCHLIST -do - PATCH_NAME=$(stripit $PATCH_NAME) - # see if this patch even exists - if [ ! -f $P/patches/"$PATCH_NAME".patch ] - then - echo "$PATCH_NAME does not exist." - continue - fi - # see if this patch is applied - applied="-" - if [ -f applied-patches ] - then - grep '^'"$PATCH_NAME"'$' applied-patches > /dev/null - if [ "$?" = "0" ] - then - applied="a" - fi - fi - - # figure the status of this patch, that is, - # if it needs changelog, pcpatch, refpatch - - stat="" - if [ ! -f $P/txt/"$PATCH_NAME".txt ] - then - stat="changelog " - fi - if [ ! -f $P/pc/"$PATCH_NAME".pc ] - then - stat="$stat""pcpatch " - elif [ "$applied" != '-' ] - then - rpatch=n - - # for each file this patch touches - for y in `cat $P/pc/"$PATCH_NAME".pc` - do - # is the patch adding the file? - if [ ! -e "$y"'~'"$PATCH_NAME" -a -f "$y" ] - then - # file is newer than the patch? - if [ "$y" -nt $P/patches/"$PATCH_NAME".patch ] - then - rpatch=y - stat="$stat""refpatch " - break - fi - else - # modified file is newer than the patch? - if [ "$y"'~'"$PATCH_NAME" -nt \ - $P/patches/"$PATCH_NAME".patch ] - then - rpatch=y - stat="$stat""refpatch " - break - fi - if [ "`toppatch`" = "$PATCH_NAME" -a \ - "$y" -nt $P/patches/"$PATCH_NAME".patch ] - then - # toppatch, so check if the file - # is newer than the patch? - rpatch=y - stat="$stat""refpatch " - break - fi - fi - done - fi - # check if they changed the changelog recently - if [ "$rpatch" = "n" -a -f $P/txt/"$PATCH_NAME".txt \ - -a $P/txt/"$PATCH_NAME".txt -nt \ - $P/patches/"$PATCH_NAME".patch ] - then - rpatch=y - stat="$stat""refpatch " - fi - if [ "$stat" != "" ] - then - stat="Needs ""$stat" - fi - - if [ "$series_optimize" != "yes" ] - then - # have to find the series number the hard way. - series=`grep -n '^'"$PATCH_NAME"'\.patch$' series |\ - awk -F: '{ printf "%d", $1}' ` - if [ "$series" = "" ] - then - series="?" - fi - fi - - echo "$series":"$applied":"$PATCH_NAME $stat" - - if [ "$series_optimize" = "yes" ] - then - if [ "$series" != "?" ] - then - series=`expr $series + 1` - if [ $series -gt $NSERIES ] - then - series="?" - fi - fi - fi -done diff --git a/lustre/kernel_patches/scripts/ptkdiff b/lustre/kernel_patches/scripts/ptkdiff deleted file mode 100755 index 97c9982..0000000 --- a/lustre/kernel_patches/scripts/ptkdiff +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/sh - -# -# Bring up a patched file in tkdiff. We show the diffs -# in the topmost patch, unless it was specified -# - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -usage() -{ - echo "Usage: ptkdiff filename ..." - echo " ptkdiff -" - exit 1 -} - -PATCH_NAME=$(top_patch) - -doit() -{ - filename=$1 - unpatched_file=$filename"~"$PATCH_NAME - need_file_there $filename - if [ -e $unpatched_file ] - then - tkdiff $unpatched_file $filename - else - echo ptkdiff: $filename appears to not be in $PATCH_NAME - fi -} - -if [ x"$1" = "x-" ] -then - FILENAME=$(cat $P/pc/$PATCH_NAME.pc) -else - FILENAME="$*" -fi - -for i in $FILENAME -do - doit $i & -done diff --git a/lustre/kernel_patches/scripts/pushpatch b/lustre/kernel_patches/scripts/pushpatch deleted file mode 100755 index 6702e63..0000000 --- a/lustre/kernel_patches/scripts/pushpatch +++ /dev/null @@ -1,86 +0,0 @@ -#!/bin/sh - -# -# Add next patch in series -# - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -usage() -{ - echo "Usage: pushpatch [npatches]" - exit 1 -} - -opt_force=0 - -for i in $* -do - case "$i" in - -f) - opt_force=1;; - *) - if [ -n "$NR" -o -n "$STOP_AT" ] - then - usage - fi - if is_numeric $i - then - NR=$i - else - NR=1000 - STOP_AT=$(stripit $i) - fi;; - esac -done - -[ $opt_force = 1 ] && force="-f" - -SERIES=series - -if [ ! -e $SERIES ] -then - echo 'File "series" not found' - exit 1 -fi - -push_one() -{ - top=$(toppatch) - if [ x"$top" == x ] - then - todo=$(head -1 $SERIES) - else - last_in_series=$(stripit $(tail -1 $SERIES)) - if [ $last_in_series == $top ] - then - echo "Series fully applied. Ends at $top" - exit 0 - fi - todo=$(grep -C1 "^$top\.patch" $SERIES | tail -1) - if [ x$todo = x ] - then - todo=$(head -1 $SERIES) - fi - fi - - apatch $force $todo -} - -for i in $(seq 1 $NR) -do - push_one - if [ x$STOP_AT != "x" ] - then - if [ $STOP_AT == $(toppatch) ] - then - sum-series applied-patch - exit 0 - fi - fi -done -sum-series applied-patch diff --git a/lustre/kernel_patches/scripts/refpatch b/lustre/kernel_patches/scripts/refpatch deleted file mode 100755 index 3195a57..0000000 --- a/lustre/kernel_patches/scripts/refpatch +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/sh - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -usage() -{ - echo "Usage: refpatch" - exit 1 -} - -doit() -{ - echo $* 1>&2 - $* || { - echo oops - exit 1 - } -} - -if [ $# != 0 ] -then - usage -fi - -TOP_PATCH=$(top_patch) -mpatch $* $(top_patch) -sum-series applied-patch -echo "Refreshed $TOP_PATCH" diff --git a/lustre/kernel_patches/scripts/removed-by-patch b/lustre/kernel_patches/scripts/removed-by-patch deleted file mode 100755 index ff12970..0000000 --- a/lustre/kernel_patches/scripts/removed-by-patch +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/sh -# Extract names of new files from a patch, print them out - -PATCHFILE=$1 -case "$PATCHFILE" in -*.gz) CMD="gzip -d < $PATCHFILE";; -*) CMD="cat $PATCHFILE";; -esac - -TMP=$(mktemp /tmp/rbp-XXXXXX) - -eval $CMD | egrep '^\+\+\+.*1970|\+\+\+.*1969' > $TMP -sed -e 's@[^/]*/\([^ ]*\).*@\1@' < $TMP | sed -e 's@^linux/@@' | sort -rm -f $TMP diff --git a/lustre/kernel_patches/scripts/rename-patch b/lustre/kernel_patches/scripts/rename-patch deleted file mode 100755 index 8334f1e..0000000 --- a/lustre/kernel_patches/scripts/rename-patch +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/sh -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} -OLD=$(stripit $1) -NEW=$(stripit $2) - -mv $P/pc/$OLD.pc $P/pc/$NEW.pc -mv $P/patches/$OLD.patch $P/patches/$NEW.patch -mv $P/txt/$OLD.txt $P/txt/$NEW.txt - -cvs remove $P/pc/$OLD.pc -cvs remove $P/patches/$OLD.patch -cvs remove $P/txt/$OLD.txt - -cvs add $P/pc/$NEW.pc -cvs add $P/patches/$NEW.patch -cvs add $P/txt/$NEW.txt diff --git a/lustre/kernel_patches/scripts/rolled-up-patch b/lustre/kernel_patches/scripts/rolled-up-patch deleted file mode 100755 index 52676dc..0000000 --- a/lustre/kernel_patches/scripts/rolled-up-patch +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/sh - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -usage() -{ - echo "Usage: rolled-up-patch" - exit 1 -} - -if [ $# != 0 ] -then - usage -fi - -RUP=$(mktemp /tmp/rup-XXXXXX) -rm -f $RUP - -for i in $(cat applied-patches) -do - patch_name=$(stripit $i) - cat $P/pc/$patch_name.pc -done | sort | uniq > $RUP - -kdiff $(cat $RUP) -rm -f $RUP diff --git a/lustre/kernel_patches/scripts/rpatch b/lustre/kernel_patches/scripts/rpatch deleted file mode 100755 index 5a8da38..0000000 --- a/lustre/kernel_patches/scripts/rpatch +++ /dev/null @@ -1,90 +0,0 @@ -#!/bin/sh - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -# do_remove() -# { -# if patch -R -p1 -s -i $P/patches/"$1".patch -# then -# true -# else -# echo SOMETHING WENT WRONG -# exit 1 -# fi -# } - -do_remove() -{ - FILES=$(cat $P/pc/$1.pc) - for file in $FILES ; do - base_dir=`pwd` - if [ -L "$file"~"orig" ]; then - if [ `readlink "$file"~"orig"` = "$base_dir/""$file"~"$1" ]; then - rm -rf "$file"~"orig" - fi - fi - if [ -f "$file"~"$1" ]; then - mv -f "$file"~"$1" "$file" - else - rm -f "$file" - fi - done - true -} - -kill_old_ones() -{ - FILES=$(cat $P/pc/$1.pc) - for file in $FILES - do - rm -f "$file"~"$1" - done -} - -usage() -{ - echo "Usage: rpatch patchname" - exit 1 -} - -if [ $# == 0 ] -then - usage -fi - -PATCH_NAME=$(stripit $1) - -warn_top_current -if is_applied "$PATCH_NAME" -then -# if can_remove "$PATCH_NAME" -# then - if [ ! -f $P/pc/$PATCH_NAME.pc ]; then - exit 1 - fi - do_remove "$PATCH_NAME" - kill_old_ones "$PATCH_NAME" - remove_from_db "$PATCH_NAME" -# else -# echo "$PATCH_NAME" does not remove cleanly -# exit 1 -# fi -else - echo "$PATCH_NAME" is not applied - exit 1 -fi - -top=$(top_patch) -if [ x"$top" == x ] -then - msg="no patches applied" -else - msg="now at $top" -fi - -echo Removed $PATCH_NAME, $msg - diff --git a/lustre/kernel_patches/scripts/split-patch b/lustre/kernel_patches/scripts/split-patch deleted file mode 100755 index 08ce431..0000000 --- a/lustre/kernel_patches/scripts/split-patch +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/perl -w -$out = ""; -while (<>) { - next if (/^Only/); - next if (/^Binary/); - if (/^diff/ || /^Index/) { - if ($out) { - close OUT; - } - (@out) = split(' ', $_); - shift(@out) if (/^diff/); - $out = pop(@out); - $out =~ s:/*usr/:/:; - $out =~ s:/*src/:/:; - $out =~ s:^/*linux[^/]*::; - $out =~ s:\(w\)::; - next if ($out eq ""); - $out = "/var/tmp/patches/$out"; - $dir = $out; - $dir =~ s:/[^/]*$::; - print STDERR "$out\n"; - system("mkdir -p $dir"); - open(OUT, ">$out") || die("cannot open $out"); - } - if ($out) { - print OUT $_; - } -} - diff --git a/lustre/kernel_patches/scripts/sum-series b/lustre/kernel_patches/scripts/sum-series deleted file mode 100755 index 5b628fb..0000000 --- a/lustre/kernel_patches/scripts/sum-series +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/sh - -# -# Make superpatch from current series using combinediff. -# - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -usage() -{ - echo "Usage: sum-series output-file" - exit 1 -} - -if [ $# -ne 1 ] -then - usage -fi - -need_file_there applied-patches -CURRENT=$(mktemp /tmp/cmbd-XXXXXXXX) -for FILE in $(cat applied-patches) -do -# echo "Adding patch $FILE...." - if [ -f $P/patches/$FILE ] - then - cat $P/patches/$FILE >> $CURRENT - elif [ -f $P/patches/$FILE.patch ] - then - cat $P/patches/$FILE.patch >> $CURRENT - elif [ -f $FILE ] - then - cat $FILE >> $CURRENT - fi -done - -mv $CURRENT "$1" diff --git a/lustre/kernel_patches/scripts/tag-series b/lustre/kernel_patches/scripts/tag-series deleted file mode 100755 index 17f3dfe..0000000 --- a/lustre/kernel_patches/scripts/tag-series +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/sh - -# tag-series tagname series-file-name -# -# Does a `cvs tag tagname' of all the .pc, .txt and .patch files mentioned -# in series-file-name. Also tags series-file-name. -# - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -# tag_one tag patchname -# -tag_one() -{ - PN=$(stripit $2) - if [ -r $P/txt/$PN.txt ] - then - cvs tag $1 $P/pc/$PN.pc $P/patches/$PN.patch $P/txt/$PN.txt - else - cvs tag $1 $P/pc/$PN.pc $P/patches/$PN.patch - fi -} - -if [ $# -ne 2 ] -then - echo Usage: tag-series tagname series-file-name - exit 1 -fi - -TAG=$1 -SERIES=$2 - -for p in $(cat $SERIES) -do - tag_one $TAG $p -done -cvs tag $TAG $SERIES diff --git a/lustre/kernel_patches/scripts/toppatch b/lustre/kernel_patches/scripts/toppatch deleted file mode 100755 index 6df239d..0000000 --- a/lustre/kernel_patches/scripts/toppatch +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/sh - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -usage() -{ - echo "Usage: toppatch" - exit 1 -} - -if [ $# != 0 ] -then - usage -fi - -if [ -e $DB ] -then - TOP_PATCH=$(top_patch) - if [ x$TOP_PATCH != x ] - then - echo $TOP_PATCH - fi -fi diff --git a/lustre/kernel_patches/scripts/touched-by-patch b/lustre/kernel_patches/scripts/touched-by-patch deleted file mode 100755 index df5b387..0000000 --- a/lustre/kernel_patches/scripts/touched-by-patch +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/sh -# Extract names of new files from a patch, print them out - -PATCHFILE=$1 -case "$PATCHFILE" in -*.gz) CMD="gzip -d < $PATCHFILE";; -*) CMD="cat $PATCHFILE";; -esac - -TMP=$(mktemp /tmp/tbp-XXXXXX) || exit 1 -TMP2=$(mktemp /tmp/tbp2-XXXXXX) || exit 1 - -eval $CMD | egrep '^\+\+\+ |^\-\-\- ' > $TMP - -cat $TMP | sed -e 's@[^/]*/\([^ ]*\).*@\1@' \ - | grep -v '^dev\/null$' \ - | sort \ - | uniq \ - > $TMP2 - -rm -f $TMP -grep < $TMP2 '^[+][+][+]' > /dev/null -if [ "$?" = "0" ] -then - echo "WARNING: $PATCHFILE appears to be -p0 form rather than -p1." 1>&2 - echo " Use "\'"p0-2-p1 . . < $PATCHFILE"\'" to fix" 1>&2 - awk '{ print $2 }' < $TMP2 -else - cat $TMP2 -fi | grep -v '~' - -rm -f $TMP2 diff --git a/lustre/kernel_patches/scripts/trypatch b/lustre/kernel_patches/scripts/trypatch deleted file mode 100755 index 2e3cd15..0000000 --- a/lustre/kernel_patches/scripts/trypatch +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/sh - -# -# Fork the next patch in the series -# - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -usage() -{ - echo "Usage: trypatch <newname>" - exit 1 -} - -if [ $# -ne 1 ] -then - usage -fi - -NEW=$1 -BASE=`stripit $NEW` -SERIES=series - -if [ ! -e $SERIES ] -then - echo 'File "series" not found' - exit 1 -fi - -if grep $BASE $SERIES >& /dev/null ; then - echo "Patch $NEW already exists in series" - exit 1 -fi - -if [ ! -f $P/patches/$BASE.patch ] ; then - echo "Patch $NEW doesn't exist as a file" - exit 1 -fi - -$TMPSERIES=$(mktemp /tmp/series-XXXXXXXX) -top=$(toppatch) -if [ x"$top" == x ] -then - todo=$(head -1 $SERIES) -else - last_in_series=$(stripit $(tail -1 $SERIES)) - if [ $last_in_series == $top ] - then - echo "Series fully applied. Ends at $top" - exit 0 - fi - todo=$(grep -C1 "^$top\.patch" $SERIES | tail -1) - if [ x$todo = x ] - then - todo=$(head -1 $SERIES) - fi -fi - -if patch -p1 -i $P/patches/$BASE.patch ; then - patch -R -p1 -i $P/patches/$BASE.patch - - $basetodo=$(basename $todo) - sed "s/$todo/$BASE/" < $SERIES > $TMPSERIES - mv -f $TMPSERIES $SERIES - echo "Replaced $todo with $BASE" -else - echo "Failed to replace $todo with $BASE" -fi diff --git a/lustre/kernel_patches/scripts/unitdiff.py b/lustre/kernel_patches/scripts/unitdiff.py deleted file mode 100755 index d19d5e7..0000000 --- a/lustre/kernel_patches/scripts/unitdiff.py +++ /dev/null @@ -1,223 +0,0 @@ -#!/usr/bin/python - -import sys -import re -import string - -#TODO -# clean up rest/file -# clean up +6 and like (assumptions). should be turned into 'find' -# make regession tests for all cases (Only in, etc) - -try: - filename = sys.argv[1] -except: - print 'requires a file name' - sys.exit(1) - -filefd = open(filename) -file = filefd.read() -filefd.close() - -rest = file -pat = "(^(?:diff .*\n)?--- .*\n\+\+\+ .*)?\n@@ -(\d+),?(\d+)? \+(\d+),?(\d+)? @@|^(Only in .*)" -startpat = re.compile(pat, re.M) - -pos = 0 -oldpos = 0 -filelen = len(rest) -oldrest = "" -while(1): - rexp = startpat.search(rest) - if not rexp: - break - - if rexp.group(6): - print rexp.group(6) - rest = rest[rexp.end(6)+1:] - continue - - header = rexp.group(1) - orgfile_start = string.atoi(rexp.group(2)) - if rexp.group(3): - orgfile_len = string.atoi(rexp.group(3)) - else: - orgfile_len = -1 - newfile_start = string.atoi(rexp.group(4)) - if rexp.group(5): - newfile_len = string.atoi(rexp.group(5)) - else: - newfile_len = -1 - rest = rest[rexp.start(2):] - rest = rest[string.find(rest, "\n")+1:] - - rexp2 = startpat.search(rest) - if rexp2: - if rexp2.start(6) != -1: - oldrest = rest[rexp2.start(6)-1:] - rest = rest[:rexp2.start(6)] - elif rexp2.start(1) == -1: - oldrest = rest[rexp2.start(2)-5:] - rest = rest[:rexp2.start(2)-4] - else: - oldrest = rest[rexp2.start(1)-1:] - rest = rest[:rexp2.start(1)] - else: - oldrest = rest - -# pos = filelen - len(oldrest) -# if pos - oldpos > 100: -# sys.stderr.write(`pos`+'/'+`filelen`+'\n') -# oldpos = pos - - first = 1 - oldminuses = 0 - oldplusses = 0 - oldoffset = 0 - while(1): - #erstat early line stuff med lookbehind paa {1,2}-dims - #nedenfor RAA - linepat = "^([^-+\n]*)\n?(((^[-+].*\n)|^(.*\n){1,2}(?=^[-+].*\n))+)(.*)\n?" - compat = re.compile(linepat, re.M) - rexp = compat.search(rest) - if not rexp: - break - - prematch = rexp.group(1) - match = rexp.group(2) - muddle = len(match) - -# print rest -# print 'prematch ', rexp.start(1), rexp.end(1), prematch -# print 'match ---------' -# print match -# print 'match --------' - - # dump unwanted early lines... - if match[0] != "+" and match[0] != "-": - while(1): - next = string.find(match, '\n') - if next == -1: - break - if match[next+1] == "+" or match[next+1] == "-": - prematch = match[:next] - match = match[next+1:] - break - match = match[next+1:] - - -# print 'prematch ', rexp.start(1), rexp.end(1), len(prematch) -# print '('+prematch+')' -# if prematch == ' ': -# print 'space' - muddle = muddle - len(match) - - lines = string.count(match, "\n") - compat = re.compile("^-", re.M) - minuses = len(compat.findall(match)) - compat = re.compile("^\+", re.M) - plusses = len(compat.findall(match)) - orgsize = minuses + 2 + (lines - minuses - plusses) - newsize = plusses + 2 + (lines - minuses - plusses) - - noeol = "^(\\\ No newline at end of file)$" - compnoeol = re.compile(noeol, re.M) - if compnoeol.search(match) or compnoeol.search(rexp.group(6)): - orgsize = orgsize - 1 - newsize = newsize - 1 - - coherent = 0 - if lines - plusses == 0: - coherent = 1 - elif lines - minuses == 0: - coherent = 1 - - # RAA FIXME - if not len(prematch):#or len(prematch) == 1 and prematch == ' ': - orgsize = orgsize -1 - newsize = newsize -1 - if rexp.start(6) == rexp.end(6): - orgsize = orgsize -1 - newsize = newsize -1 - -# print "lines in match: ", lines -# print "number of minuses: ", minuses -# print "number of plusses: ", plusses - - matchpos = rexp.start(2) + muddle - offset = string.count(rest[:matchpos], "\n") - -# print 'offset/oldoffset: ', offset,oldoffset -# print 'oldplusses/oldminuses: ', oldplusses, oldminuses -# print 'orgfile_start/newfile_start: ', orgfile_start, newfile_start - - orgstart = orgfile_start + offset + oldoffset - oldplusses - newstart = newfile_start + offset - oldminuses + oldoffset - - # RAA: Bwadr. Fix antagelse om prematch paa en anden - # maade - orgstartmod = 0 - newstartmod = 0 - if orgfile_start == 1 and not len(prematch): - orgstartmod = 1 - if newfile_start == 1 and not len(prematch): - newstartmod = 1 - if orgfile_start == 0 and orgfile_len == 0: - orgstartmod = 1 - # RAA Hack! - plusses = plusses + 1 - minuses = minuses +1 - if newfile_start == 0 and newfile_len == 0: - newstartmod = 1 - # RAA Hack! - plusses = plusses + 1 - minuses = minuses +1 - - if header and first: - print header - first = 0 - - # should the start(1) == 0 be orgstart == 1? RAA - if orgstart == 1 and newstart == 1 and plusses == 0 and coherent: - print "@@ -"+`orgstart`+","+`orgsize`+" +"+`newstart`+" @@" - print match[:string.rfind(match, "\n")] - print rexp.group(6) - elif rexp.start(6) == rexp.end(6) and plusses == 0 and coherent: - if orgstartmod: - orgstart = orgstart + 1 - if newstartmod: - newstart = newstart + 1 - print "@@ -"+`orgstart-1`+","+`orgsize`+" +"+`newstart-1`+" @@" - print prematch - print match[:string.rfind(match, "\n")] - elif orgstart == 1 and orgstart == 1 and minuses == 0 and coherent: - print "@@ -"+`orgstart`+" +"+`newstart`+","+`newsize`+" @@" - print match[:string.rfind(match, "\n")] - print rexp.group(6) - elif rexp.start(6) == rexp.end(6) and minuses == 0 and coherent: - if orgstartmod: - orgstart = orgstart + 1 - if newstartmod: - newstart = newstart + 1 - print "@@ -"+`orgstart-1`+" +"+`newstart-1`+","+`newsize`+" @@" - print prematch - print match[:string.rfind(match, "\n")] - else: - if orgstartmod: - orgstart = orgstart + 1 - if newstartmod: - newstart = newstart + 1 - print "@@ -"+`orgstart-1`+","+`orgsize`+" +"+`newstart-1`+","+`newsize`+" @@" - if len(prematch): - print prematch - print match[:string.rfind(match, "\n")] - if rexp.start(6) != rexp.end(6): - print rexp.group(6) - - rest = rest[rexp.end(6):] - oldminuses = minuses + oldminuses - oldplusses = plusses + oldplusses - oldoffset = oldoffset + offset + lines #include match()-lines - - - rest = oldrest diff --git a/lustre/kernel_patches/scripts/unused-patches b/lustre/kernel_patches/scripts/unused-patches deleted file mode 100755 index 2f3a70a..0000000 --- a/lustre/kernel_patches/scripts/unused-patches +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/sh - -# -# List unused patches -# - -. patchfns >/dev/null || . /usr/lib/patch-scripts/patchfns >/dev/null || { \ - echo "Impossible to find my library 'patchfns'." - echo "Check your install, or go to the right directory" - exit 1 -} - -usage() -{ - echo "Usage: unused-patches" - exit 1 -} - -if [ $# -ne 0 ] -then - usage -fi - -for FILE in $(ls $P/patches) -do - BASE=`stripit $FILE` -# echo checking $BASE in $P/patches - if grep $FILE $P/series/* >& /dev/null ; then - true -# echo $FILE found in $P/series - else - if [ $BASE != CVS ]; then - echo patches/$FILE - echo txt/$BASE.txt - echo pc/$BASE.pc - fi - fi -done - diff --git a/lustre/kernel_patches/series/chaos b/lustre/kernel_patches/series/chaos deleted file mode 100644 index 00ae7fd..0000000 --- a/lustre/kernel_patches/series/chaos +++ /dev/null @@ -1,20 +0,0 @@ -dev_read_only.patch -exports.patch -kmem_cache_validate.patch -lustre_version.patch -vfs_intent-2.4.18-18.patch -invalidate_show.patch -iod-rmap-exports.patch -export-truncate.patch -htree-ext3-2.4.18.patch -linux-2.4.18ea-0.8.26.patch -ext3-2.4-ino_t.patch -ext3-2.4.18-ino_sb_macro.patch -ext3-orphan_lock.patch -ext3-delete_thread-2.4.18.patch -extN-misc-fixup.patch -extN-noread.patch -extN-wantedi.patch -extN-san.patch -extN-2.4.18-ino_sb_fixup.patch -iopen-2.4.18.patch diff --git a/lustre/kernel_patches/series/hp-pnnl-2.4.20 b/lustre/kernel_patches/series/hp-pnnl-2.4.20 deleted file mode 100644 index b951209..0000000 --- a/lustre/kernel_patches/series/hp-pnnl-2.4.20 +++ /dev/null @@ -1,25 +0,0 @@ -dev_read_only_hp_2.4.20.patch -exports_2.4.20-rh-hp.patch -kmem_cache_validate_hp.patch -lustre_version.patch -vfs_intent-2.4.20-vanilla.patch -invalidate_show.patch -export-truncate.patch -iod-stock-24-exports_hp.patch -ext-2.4-patch-1.patch -ext-2.4-patch-2.patch -ext-2.4-patch-3.patch -ext-2.4-patch-4.patch -linux-2.4.20-xattr-0.8.54-hp.patch -ext3-2.4.20-fixes.patch -ext3-2.4-ino_t.patch -ext3-largefile.patch -ext3-truncate_blocks.patch -ext3-use-after-free.patch -ext3-orphan_lock.patch -ext3-delete_thread-2.4.20.patch -ext3-noread-2.4.20.patch -extN-wantedi.patch -ext3-san-2.4.20.patch -iopen-2.4.20.patch -tcp-zero-copy.patch diff --git a/lustre/kernel_patches/series/rh-2.4.18-18 b/lustre/kernel_patches/series/rh-2.4.18-18 deleted file mode 100644 index df7f536..0000000 --- a/lustre/kernel_patches/series/rh-2.4.18-18 +++ /dev/null @@ -1,24 +0,0 @@ -dev_read_only.patch -exports.patch -kmem_cache_validate.patch -lustre_version.patch -uml_check_get_page.patch -uml_no_panic.patch -vfs_intent-2.4.18-18.patch -uml_compile_fixes.patch -invalidate_show.patch -export-truncate.patch -iod-rmap-exports.patch -htree-ext3-2.4.18.patch -linux-2.4.18ea-0.8.26.patch -ext3-2.4-ino_t.patch -ext3-2.4.18-ino_sb_macro.patch -ext3-orphan_lock.patch -ext3-delete_thread-2.4.18.patch -extN-misc-fixup.patch -extN-noread.patch -extN-wantedi.patch -extN-san.patch -extN-2.4.18-ino_sb_fixup.patch -iopen-2.4.18.patch -tcp-zero-copy.patch diff --git a/lustre/kernel_patches/series/rh-2.4.20 b/lustre/kernel_patches/series/rh-2.4.20 deleted file mode 100644 index a97c37c..0000000 --- a/lustre/kernel_patches/series/rh-2.4.20 +++ /dev/null @@ -1,23 +0,0 @@ -mcore-2.4.20-8.patch -dsp.patch -dev_read_only_2.4.20-rh.patch -exports_2.4.20-rh-hp.patch -kmem_cache_validate_2.4.20-rh.patch -lustre_version.patch -vfs_intent-2.4.20-rh.patch -invalidate_show-2.4.20-rh.patch -iod-rmap-exports-2.4.20.patch -export-truncate.patch -ext-2.4-patch-1-chaos.patch -ext-2.4-patch-2.patch -ext-2.4-patch-3.patch -ext-2.4-patch-4.patch -linux-2.4.20-xattr-0.8.54-chaos.patch -ext3-2.4.20-fixes.patch -ext3_orphan_lock-2.4.20-rh.patch -ext3-delete_thread-2.4.20.patch -ext3-noread-2.4.20.patch -extN-wantedi.patch -ext3-san-2.4.20.patch -iopen-2.4.20.patch -tcp-zero-copy.patch diff --git a/lustre/kernel_patches/series/vanilla-2.4.20 b/lustre/kernel_patches/series/vanilla-2.4.20 deleted file mode 100644 index e56cac6c..0000000 --- a/lustre/kernel_patches/series/vanilla-2.4.20 +++ /dev/null @@ -1,29 +0,0 @@ -uml-patch-2.4.20-4.patch -dev_read_only_2.4.20.patch -exports_2.4.20.patch -kmem_cache_validate_2.4.20.patch -lustre_version.patch -vfs_intent-2.4.20-vanilla.patch -invalidate_show.patch -export-truncate.patch -iod-stock-24-exports.patch -uml_check_get_page.patch -uml_no_panic.patch -ext-2.4-patch-1.patch -ext-2.4-patch-2.patch -ext-2.4-patch-3.patch -ext-2.4-patch-4.patch -linux-2.4.20-xattr-0.8.54.patch -ext3-2.4.20-fixes.patch -ext3-2.4-ino_t.patch -ext3-largefile.patch -ext3-truncate_blocks.patch -ext3-unmount_sync.patch -ext3-use-after-free.patch -ext3-orphan_lock.patch -ext3-noread-2.4.20.patch -ext3-delete_thread-2.4.20.patch -extN-wantedi.patch -ext3-san-2.4.20.patch -iopen-2.4.20.patch -tcp-zero-copy.patch diff --git a/lustre/kernel_patches/series/vanilla-2.5 b/lustre/kernel_patches/series/vanilla-2.5 deleted file mode 100644 index b77c77b..0000000 --- a/lustre/kernel_patches/series/vanilla-2.5 +++ /dev/null @@ -1,2 +0,0 @@ -lustre_version.patch -lustre-2.5.63.patch diff --git a/lustre/kernel_patches/txt/dev_read_only.txt b/lustre/kernel_patches/txt/dev_read_only.txt deleted file mode 100644 index 010cdb7..0000000 --- a/lustre/kernel_patches/txt/dev_read_only.txt +++ /dev/null @@ -1,3 +0,0 @@ -DESC -(undescribed patch) -EDESC diff --git a/lustre/kernel_patches/txt/exports.txt b/lustre/kernel_patches/txt/exports.txt deleted file mode 100644 index 00b991e..0000000 --- a/lustre/kernel_patches/txt/exports.txt +++ /dev/null @@ -1,3 +0,0 @@ -DESC -Required kernel function exports for Lustre. -EDESC diff --git a/lustre/kernel_patches/txt/exports_hp.txt b/lustre/kernel_patches/txt/exports_hp.txt deleted file mode 100644 index 00b991e..0000000 --- a/lustre/kernel_patches/txt/exports_hp.txt +++ /dev/null @@ -1,3 +0,0 @@ -DESC -Required kernel function exports for Lustre. -EDESC diff --git a/lustre/kernel_patches/txt/ext3-2.4.20-fixes.txt b/lustre/kernel_patches/txt/ext3-2.4.20-fixes.txt deleted file mode 100644 index b890cbd..0000000 --- a/lustre/kernel_patches/txt/ext3-2.4.20-fixes.txt +++ /dev/null @@ -1,3 +0,0 @@ -DESC -Fix for block allocation errors if block bitmap or inode block list is corrupt. -EDESC diff --git a/lustre/kernel_patches/txt/invalidate_show.txt b/lustre/kernel_patches/txt/invalidate_show.txt deleted file mode 100644 index 88f093a..0000000 --- a/lustre/kernel_patches/txt/invalidate_show.txt +++ /dev/null @@ -1,3 +0,0 @@ -DESC -Prints which inodes are busy at filesystem unmount time. -EDESC diff --git a/lustre/kernel_patches/txt/kmem_cache_validate.txt b/lustre/kernel_patches/txt/kmem_cache_validate.txt deleted file mode 100644 index 010cdb7..0000000 --- a/lustre/kernel_patches/txt/kmem_cache_validate.txt +++ /dev/null @@ -1,3 +0,0 @@ -DESC -(undescribed patch) -EDESC diff --git a/lustre/kernel_patches/txt/lustre_version.txt b/lustre/kernel_patches/txt/lustre_version.txt deleted file mode 100644 index 010cdb7..0000000 --- a/lustre/kernel_patches/txt/lustre_version.txt +++ /dev/null @@ -1,3 +0,0 @@ -DESC -(undescribed patch) -EDESC diff --git a/lustre/kernel_patches/txt/uml_check_get_page.txt b/lustre/kernel_patches/txt/uml_check_get_page.txt deleted file mode 100644 index 010cdb7..0000000 --- a/lustre/kernel_patches/txt/uml_check_get_page.txt +++ /dev/null @@ -1,3 +0,0 @@ -DESC -(undescribed patch) -EDESC diff --git a/lustre/kernel_patches/txt/uml_compile_fixes.txt b/lustre/kernel_patches/txt/uml_compile_fixes.txt deleted file mode 100644 index 010cdb7..0000000 --- a/lustre/kernel_patches/txt/uml_compile_fixes.txt +++ /dev/null @@ -1,3 +0,0 @@ -DESC -(undescribed patch) -EDESC diff --git a/lustre/kernel_patches/txt/uml_no_panic.txt b/lustre/kernel_patches/txt/uml_no_panic.txt deleted file mode 100644 index 010cdb7..0000000 --- a/lustre/kernel_patches/txt/uml_no_panic.txt +++ /dev/null @@ -1,3 +0,0 @@ -DESC -(undescribed patch) -EDESC diff --git a/lustre/kernel_patches/which_patch b/lustre/kernel_patches/which_patch deleted file mode 100644 index 2ef001d..0000000 --- a/lustre/kernel_patches/which_patch +++ /dev/null @@ -1,13 +0,0 @@ -SERIES MEMNONIC COMMENT - -hp-pnnl-2.4.20 linux-2.4.20-hp4_pnnl1 same as vanilla but no uml -vanilla-2.4.20 linux-2.4.20 patch includes uml -chaos-2.4.20 linux-chaos-2.4.20 same as rh-2.4.20-8 -rh-2.4.20 linux-rh-2.4.20-8 same as chaos-2.4.20 -rh-2.4.18-18 linux-rh-2.4.18-18 same as chaos but includes uml -chaos linux-chaos-2.4.18 same as rh-2.4.18-18 but no uml - -REVIEW: - -vanilla-2.5 linux-2.5.63 -hp-pnnl linux-2.4.19-hp2_pnnl6 diff --git a/lustre/ldlm/.cvsignore b/lustre/ldlm/.cvsignore deleted file mode 100644 index e995588..0000000 --- a/lustre/ldlm/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lustre/ldlm/Makefile.am b/lustre/ldlm/Makefile.am deleted file mode 100644 index b4e4cea..0000000 --- a/lustre/ldlm/Makefile.am +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -DEFS= - -LDLMSOURCES= l_lock.c ldlm_lock.c ldlm_resource.c ldlm_lib.c \ -ldlm_extent.c ldlm_request.c ldlm_lockd.c ldlm_internal.h - -if LIBLUSTRE -lib_LIBRARIES = libldlm.a -libldlm_a_SOURCES = $(LDLMSOURCES) -else -MODULE = ldlm -modulefs_DATA = ldlm.o -EXTRA_PROGRAMS = ldlm - -ldlm_SOURCES = $(LDLMSOURCES) -endif - -include $(top_srcdir)/Rules diff --git a/lustre/ldlm/l_lock.c b/lustre/ldlm/l_lock.c deleted file mode 100644 index c439eed..0000000 --- a/lustre/ldlm/l_lock.c +++ /dev/null @@ -1,116 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define DEBUG_SUBSYSTEM S_LDLM -#ifdef __KERNEL__ -#include <linux/config.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/unistd.h> -#include <linux/version.h> - -#include <asm/system.h> -#include <asm/uaccess.h> - -#include <linux/fs.h> -#include <linux/stat.h> -#include <asm/uaccess.h> -#include <asm/segment.h> -#include <linux/mm.h> -#include <linux/pagemap.h> -#include <linux/smp_lock.h> -#else -#include <liblustre.h> -#endif - -#include <linux/lustre_dlm.h> -#include <linux/obd_class.h> -#include <linux/lustre_lib.h> - -/* invariants: - - only the owner of the lock changes l_owner/l_depth - - if a non-owner changes or checks the variables a spin lock is taken -*/ - -void l_lock_init(struct lustre_lock *lock) -{ - sema_init(&lock->l_sem, 1); - spin_lock_init(&lock->l_spin); -} - -void l_lock(struct lustre_lock *lock) -{ - int owner = 0; - - spin_lock(&lock->l_spin); - if (lock->l_owner == current) - owner = 1; - spin_unlock(&lock->l_spin); - - /* This is safe to increment outside the spinlock because we - * can only have 1 CPU running on the current task - * (i.e. l_owner == current), regardless of the number of CPUs. - */ - if (owner) { - ++lock->l_depth; - } else { - down(&lock->l_sem); - spin_lock(&lock->l_spin); - lock->l_owner = current; - lock->l_depth = 0; - spin_unlock(&lock->l_spin); - } -} - -void l_unlock(struct lustre_lock *lock) -{ - LASSERT(lock->l_owner == current); - LASSERT(lock->l_depth >= 0); - - spin_lock(&lock->l_spin); - if (--lock->l_depth < 0) { - lock->l_owner = NULL; - spin_unlock(&lock->l_spin); - up(&lock->l_sem); - return; - } - spin_unlock(&lock->l_spin); -} - -int l_has_lock(struct lustre_lock *lock) -{ - int depth = -1, owner = 0; - - spin_lock(&lock->l_spin); - if (lock->l_owner == current) { - depth = lock->l_depth; - owner = 1; - } - spin_unlock(&lock->l_spin); - - if (depth >= 0) - CDEBUG(D_INFO, "lock_depth: %d\n", depth); - return owner; -} diff --git a/lustre/ldlm/ldlm_extent.c b/lustre/ldlm/ldlm_extent.c deleted file mode 100644 index f6a9f5e..0000000 --- a/lustre/ldlm/ldlm_extent.c +++ /dev/null @@ -1,122 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cluster File Systems, Inc. - * Author: Peter Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LDLM -#ifndef __KERNEL__ -# include <liblustre.h> -#endif - -#include <linux/lustre_dlm.h> -#include <linux/obd_support.h> -#include <linux/lustre_lib.h> - -/* This function will be called to judge if one extent overlaps with another */ -int ldlm_extent_compat(struct ldlm_lock *a, struct ldlm_lock *b) -{ - if ((a->l_extent.start <= b->l_extent.end) && - (a->l_extent.end >= b->l_extent.start)) - RETURN(0); - - RETURN(1); -} - -/* The purpose of this function is to return: - * - the maximum extent - * - containing the requested extent - * - and not overlapping existing conflicting extents outside the requested one - * - * An alternative policy is to not shrink the new extent when conflicts exist. - * - * To reconstruct our formulas, take a deep breath. */ -static void policy_internal(struct list_head *queue, struct ldlm_extent *req_ex, - struct ldlm_extent *new_ex, ldlm_mode_t mode) -{ - struct list_head *tmp; - - list_for_each(tmp, queue) { - struct ldlm_lock *lock; - lock = list_entry(tmp, struct ldlm_lock, l_res_link); - - /* if lock doesn't overlap new_ex, skip it. */ - if (lock->l_extent.end < new_ex->start || - lock->l_extent.start > new_ex->end) - continue; - - /* Locks are compatible, overlap doesn't matter */ - if (lockmode_compat(lock->l_req_mode, mode)) - continue; - - if (lock->l_extent.start < req_ex->start) { - if (lock->l_extent.end == ~0) { - new_ex->start = req_ex->start; - new_ex->end = req_ex->end; - return; - } - new_ex->start = MIN(lock->l_extent.end + 1, - req_ex->start); - } - - if (lock->l_extent.end > req_ex->end) { - if (lock->l_extent.start == 0) { - new_ex->start = req_ex->start; - new_ex->end = req_ex->end; - return; - } - new_ex->end = MAX(lock->l_extent.start - 1, - req_ex->end); - } - } -} - -/* apply the internal policy by walking all the lists */ -int ldlm_extent_policy(struct ldlm_namespace *ns, struct ldlm_lock **lockp, - void *req_cookie, ldlm_mode_t mode, int flags, - void *data) -{ - struct ldlm_lock *lock = *lockp; - struct ldlm_resource *res = lock->l_resource; - struct ldlm_extent *req_ex = req_cookie; - struct ldlm_extent new_ex; - new_ex.start = 0; - new_ex.end = ~0; - - if (!res) - LBUG(); - - l_lock(&ns->ns_lock); - policy_internal(&res->lr_granted, req_ex, &new_ex, mode); - policy_internal(&res->lr_converting, req_ex, &new_ex, mode); - policy_internal(&res->lr_waiting, req_ex, &new_ex, mode); - l_unlock(&ns->ns_lock); - - memcpy(&lock->l_extent, &new_ex, sizeof(new_ex)); - - LDLM_DEBUG(lock, "requested extent ["LPU64"->"LPU64"], new extent [" - LPU64"->"LPU64"]", - req_ex->start, req_ex->end, new_ex.start, new_ex.end); - - if (new_ex.end != req_ex->end || new_ex.start != req_ex->start) - return ELDLM_LOCK_CHANGED; - else - return 0; -} diff --git a/lustre/ldlm/ldlm_internal.h b/lustre/ldlm/ldlm_internal.h deleted file mode 100644 index b8bfdac..0000000 --- a/lustre/ldlm/ldlm_internal.h +++ /dev/null @@ -1 +0,0 @@ -int ldlm_cancel_lru(struct ldlm_namespace *ns); diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c deleted file mode 100644 index 9b53b54..0000000 --- a/lustre/ldlm/ldlm_lib.c +++ /dev/null @@ -1,885 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define EXPORT_SYMTAB -#define DEBUG_SUBSYSTEM S_LDLM - -#ifdef __KERNEL__ -# include <linux/module.h> -#else -# include <liblustre.h> -#endif -#include <linux/obd_ost.h> -#include <linux/lustre_dlm.h> -#include <linux/lustre_mds.h> -#include <linux/lustre_net.h> - -int client_import_connect(struct lustre_handle *dlm_handle, - struct obd_device *obd, - struct obd_uuid *cluuid) -{ - struct client_obd *cli = &obd->u.cli; - struct obd_import *imp = cli->cl_import; - struct obd_export *exp; - struct ptlrpc_request *request; - /* XXX maybe this is a good time to create a connect struct? */ - int rc, size[] = {sizeof(imp->imp_target_uuid), - sizeof(obd->obd_uuid), - sizeof(*dlm_handle)}; - char *tmp[] = {imp->imp_target_uuid.uuid, - obd->obd_uuid.uuid, - (char *)dlm_handle}; - int rq_opc = (obd->obd_type->typ_ops->o_brw) ? OST_CONNECT :MDS_CONNECT; - int msg_flags; - - ENTRY; - down(&cli->cl_sem); - rc = class_connect(dlm_handle, obd, cluuid); - if (rc) - GOTO(out_sem, rc); - - cli->cl_conn_count++; - if (cli->cl_conn_count > 1) - GOTO(out_sem, rc); - - if (obd->obd_namespace != NULL) - CERROR("already have namespace!\n"); - obd->obd_namespace = ldlm_namespace_new(obd->obd_name, - LDLM_NAMESPACE_CLIENT); - if (obd->obd_namespace == NULL) - GOTO(out_disco, rc = -ENOMEM); - - request = ptlrpc_prep_req(imp, rq_opc, 3, size, tmp); - if (!request) - GOTO(out_ldlm, rc = -ENOMEM); - - request->rq_level = LUSTRE_CONN_NEW; - request->rq_replen = lustre_msg_size(0, NULL); - - imp->imp_dlm_handle = *dlm_handle; - - imp->imp_level = LUSTRE_CONN_CON; - rc = ptlrpc_queue_wait(request); - if (rc) { - class_disconnect(dlm_handle, 0); - GOTO(out_req, rc); - } - - exp = class_conn2export(dlm_handle); - exp->exp_connection = ptlrpc_connection_addref(request->rq_connection); - class_export_put(exp); - - msg_flags = lustre_msg_get_op_flags(request->rq_repmsg); - if (rq_opc == MDS_CONNECT || msg_flags & MSG_CONNECT_REPLAYABLE) { - imp->imp_replayable = 1; - CDEBUG(D_HA, "connected to replayable target: %s\n", - imp->imp_target_uuid.uuid); - ptlrpc_pinger_add_import(imp); - } - imp->imp_level = LUSTRE_CONN_FULL; - imp->imp_remote_handle = request->rq_repmsg->handle; - CDEBUG(D_HA, "local import: %p, remote handle: "LPX64"\n", imp, - imp->imp_remote_handle.cookie); - - EXIT; -out_req: - ptlrpc_req_finished(request); - if (rc) { -out_ldlm: - ldlm_namespace_free(obd->obd_namespace); - obd->obd_namespace = NULL; -out_disco: - cli->cl_conn_count--; - class_disconnect(dlm_handle, 0); - } -out_sem: - up(&cli->cl_sem); - return rc; -} - -int client_import_disconnect(struct lustre_handle *dlm_handle, int failover) -{ - struct obd_device *obd = class_conn2obd(dlm_handle); - struct client_obd *cli = &obd->u.cli; - struct obd_import *imp = cli->cl_import; - struct ptlrpc_request *request = NULL; - int rc = 0, err, rq_opc; - ENTRY; - - if (!obd) { - CERROR("invalid connection for disconnect: cookie "LPX64"\n", - dlm_handle ? dlm_handle->cookie : -1UL); - RETURN(-EINVAL); - } - - rq_opc = obd->obd_type->typ_ops->o_brw ? OST_DISCONNECT:MDS_DISCONNECT; - down(&cli->cl_sem); - if (!cli->cl_conn_count) { - CERROR("disconnecting disconnected device (%s)\n", - obd->obd_name); - GOTO(out_sem, rc = -EINVAL); - } - - cli->cl_conn_count--; - if (cli->cl_conn_count) - GOTO(out_no_disconnect, rc = 0); - - if (obd->obd_namespace != NULL) { - /* obd_no_recov == local only */ - ldlm_cli_cancel_unused(obd->obd_namespace, NULL, - obd->obd_no_recov, NULL); - ldlm_namespace_free(obd->obd_namespace); - obd->obd_namespace = NULL; - } - - /* Yeah, obd_no_recov also (mainly) means "forced shutdown". */ - if (obd->obd_no_recov) { - ptlrpc_set_import_active(imp, 0); - } else { - request = ptlrpc_prep_req(imp, rq_opc, 0, NULL, NULL); - if (!request) - GOTO(out_req, rc = -ENOMEM); - - request->rq_replen = lustre_msg_size(0, NULL); - - rc = ptlrpc_queue_wait(request); - if (rc) - GOTO(out_req, rc); - } - if (imp->imp_replayable) - ptlrpc_pinger_del_import(imp); - - EXIT; - out_req: - if (request) - ptlrpc_req_finished(request); - out_no_disconnect: - err = class_disconnect(dlm_handle, 0); - if (!rc && err) - rc = err; - out_sem: - up(&cli->cl_sem); - RETURN(rc); -} - -/* -------------------------------------------------------------------------- - * from old lib/target.c - * -------------------------------------------------------------------------- */ - -int target_handle_reconnect(struct lustre_handle *conn, struct obd_export *exp, - struct obd_uuid *cluuid) -{ - if (exp->exp_connection) { - struct lustre_handle *hdl; - hdl = &exp->exp_ldlm_data.led_import->imp_remote_handle; - /* Might be a re-connect after a partition. */ - if (!memcmp(&conn->cookie, &hdl->cookie, sizeof conn->cookie)) { - CERROR("%s reconnecting\n", cluuid->uuid); - conn->cookie = exp->exp_handle.h_cookie; - RETURN(EALREADY); - } else { - CERROR("%s reconnecting from %s, " - "handle mismatch (ours "LPX64", theirs " - LPX64")\n", cluuid->uuid, - exp->exp_connection->c_remote_uuid.uuid, - hdl->cookie, conn->cookie); - /* XXX disconnect them here? */ - memset(conn, 0, sizeof *conn); - /* This is a little scary, but right now we build this - * file separately into each server module, so I won't - * go _immediately_ to hell. - */ - RETURN(-EALREADY); - } - } - - conn->cookie = exp->exp_handle.h_cookie; - CDEBUG(D_INFO, "existing export for UUID '%s' at %p\n", - cluuid->uuid, exp); - CDEBUG(D_IOCTL,"connect: cookie "LPX64"\n", conn->cookie); - RETURN(0); -} - -int target_handle_connect(struct ptlrpc_request *req, svc_handler_t handler) -{ - struct obd_device *target; - struct obd_export *export = NULL; - struct obd_import *dlmimp; - struct lustre_handle conn; - struct obd_uuid tgtuuid; - struct obd_uuid cluuid; - struct obd_uuid remote_uuid; - struct list_head *p; - char *str, *tmp; - int rc, i, abort_recovery; - ENTRY; - - LASSERT_REQSWAB (req, 0); - str = lustre_msg_string (req->rq_reqmsg, 0, sizeof (tgtuuid.uuid) - 1); - if (str == NULL) { - CERROR("bad target UUID for connect\n"); - GOTO(out, rc = -EINVAL); - } - obd_str2uuid (&tgtuuid, str); - - LASSERT_REQSWAB (req, 1); - str = lustre_msg_string (req->rq_reqmsg, 1, sizeof (cluuid.uuid) - 1); - if (str == NULL) { - CERROR("bad client UUID for connect\n"); - GOTO(out, rc = -EINVAL); - } - obd_str2uuid (&cluuid, str); - - i = class_uuid2dev(&tgtuuid); - if (i == -1) { - CERROR("UUID '%s' not found for connect\n", tgtuuid.uuid); - GOTO(out, rc = -ENODEV); - } - - target = &obd_dev[i]; - if (!target || target->obd_stopping || !target->obd_set_up) { - CERROR("UUID '%s' is not available for connect\n", str); - GOTO(out, rc = -ENODEV); - } - - /* XXX extract a nettype and format accordingly */ - snprintf(remote_uuid.uuid, sizeof remote_uuid, - "NET_"LPX64"_UUID", req->rq_peer.peer_nid); - - spin_lock_bh(&target->obd_processing_task_lock); - abort_recovery = target->obd_abort_recovery; - spin_unlock_bh(&target->obd_processing_task_lock); - if (abort_recovery) - target_abort_recovery(target); - - tmp = lustre_msg_buf(req->rq_reqmsg, 2, sizeof conn); - if (tmp == NULL) - GOTO(out, rc = -EPROTO); - - memcpy(&conn, tmp, sizeof conn); - - rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc) - GOTO(out, rc); - - /* lctl gets a backstage, all-access pass. */ - if (obd_uuid_equals(&cluuid, &lctl_fake_uuid)) - goto dont_check_exports; - - spin_lock(&target->obd_dev_lock); - list_for_each(p, &target->obd_exports) { - export = list_entry(p, struct obd_export, exp_obd_chain); - if (obd_uuid_equals(&cluuid, &export->exp_client_uuid)) { - spin_unlock(&target->obd_dev_lock); - LASSERT(export->exp_obd == target); - - rc = target_handle_reconnect(&conn, export, &cluuid); - break; - } - export = NULL; - } - /* If we found an export, we already unlocked. */ - if (!export) - spin_unlock(&target->obd_dev_lock); - - /* Tell the client if we're in recovery. */ - /* If this is the first client, start the recovery timer */ - if (target->obd_recovering) { - lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECOVERING); - target_start_recovery_timer(target, handler); - } - - /* Tell the client if we support replayable requests */ - if (target->obd_replayable) - lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_REPLAYABLE); - - if (export == NULL) { - if (target->obd_recovering) { - CERROR("denying connection for new client %s: " - "in recovery\n", cluuid.uuid); - rc = -EBUSY; - } else { - dont_check_exports: - rc = obd_connect(&conn, target, &cluuid); - } - } - - /* If all else goes well, this is our RPC return code. */ - req->rq_status = 0; - - if (rc && rc != EALREADY) - GOTO(out, rc); - - req->rq_repmsg->handle = conn; - - /* If the client and the server are the same node, we will already - * have an export that really points to the client's DLM export, - * because we have a shared handles table. - * - * XXX this will go away when shaver stops sending the "connect" handle - * in the real "remote handle" field of the request --phik 24 Apr 2003 - */ - if (req->rq_export != NULL) - class_export_put(req->rq_export); - - /* ownership of this export ref transfers to the request */ - export = req->rq_export = class_conn2export(&conn); - LASSERT(export != NULL); - - if (req->rq_connection != NULL) - ptlrpc_put_connection(req->rq_connection); - if (export->exp_connection != NULL) - ptlrpc_put_connection(export->exp_connection); - export->exp_connection = ptlrpc_get_connection(&req->rq_peer, - &remote_uuid); - req->rq_connection = ptlrpc_connection_addref(export->exp_connection); - - if (rc == EALREADY) { - /* We indicate the reconnection in a flag, not an error code. */ - lustre_msg_add_op_flags(req->rq_repmsg, MSG_CONNECT_RECONNECT); - GOTO(out, rc = 0); - } - - memcpy(&conn, lustre_msg_buf(req->rq_reqmsg, 2, sizeof conn), - sizeof conn); - - if (export->exp_ldlm_data.led_import != NULL) - class_destroy_import(export->exp_ldlm_data.led_import); - dlmimp = export->exp_ldlm_data.led_import = class_new_import(); - dlmimp->imp_connection = ptlrpc_connection_addref(req->rq_connection); - dlmimp->imp_client = &export->exp_obd->obd_ldlm_client; - dlmimp->imp_remote_handle = conn; - dlmimp->imp_obd = target; - dlmimp->imp_dlm_fake = 1; - dlmimp->imp_level = LUSTRE_CONN_FULL; - class_import_put(dlmimp); -out: - if (rc) - req->rq_status = rc; - RETURN(rc); -} - -int target_handle_disconnect(struct ptlrpc_request *req) -{ - struct lustre_handle *conn = &req->rq_reqmsg->handle; - struct obd_import *dlmimp; - int rc; - ENTRY; - - rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc) - RETURN(rc); - - req->rq_status = obd_disconnect(conn, 0); - - dlmimp = req->rq_export->exp_ldlm_data.led_import; - class_destroy_import(dlmimp); - - class_export_put(req->rq_export); - req->rq_export = NULL; - RETURN(0); -} - -/* - * Recovery functions - */ - -void target_cancel_recovery_timer(struct obd_device *obd) -{ - del_timer(&obd->obd_recovery_timer); -} - -static void abort_delayed_replies(struct obd_device *obd) -{ - struct ptlrpc_request *req; - struct list_head *tmp, *n; - list_for_each_safe(tmp, n, &obd->obd_delayed_reply_queue) { - req = list_entry(tmp, struct ptlrpc_request, rq_list); - DEBUG_REQ(D_ERROR, req, "aborted:"); - req->rq_status = -ENOTCONN; - req->rq_type = PTL_RPC_MSG_ERR; - ptlrpc_reply(req); - list_del(&req->rq_list); - OBD_FREE(req->rq_reqmsg, req->rq_reqlen); - OBD_FREE(req, sizeof *req); - } -} - -static void abort_recovery_queue(struct obd_device *obd) -{ - struct ptlrpc_request *req; - struct list_head *tmp, *n; - int rc; - - list_for_each_safe(tmp, n, &obd->obd_recovery_queue) { - req = list_entry(tmp, struct ptlrpc_request, rq_list); - DEBUG_REQ(D_ERROR, req, "aborted:"); - req->rq_status = -ENOTCONN; - req->rq_type = PTL_RPC_MSG_ERR; - rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, - &req->rq_repmsg); - if (rc == 0) { - ptlrpc_reply(req); - } else { - DEBUG_REQ(D_ERROR, req, - "packing failed for abort-reply; skipping"); - } - list_del(&req->rq_list); - class_export_put(req->rq_export); - OBD_FREE(req->rq_reqmsg, req->rq_reqlen); - OBD_FREE(req, sizeof *req); - } -} - -void target_abort_recovery(void *data) -{ - struct obd_device *obd = data; - - CERROR("disconnecting clients and aborting recovery\n"); - spin_lock_bh(&obd->obd_processing_task_lock); - if (!obd->obd_recovering) { - spin_unlock_bh(&obd->obd_processing_task_lock); - EXIT; - return; - } - - obd->obd_recovering = obd->obd_abort_recovery = 0; - obd->obd_recoverable_clients = 0; - wake_up(&obd->obd_next_transno_waitq); - target_cancel_recovery_timer(obd); - spin_unlock_bh(&obd->obd_processing_task_lock); - class_disconnect_exports(obd, 0); - abort_delayed_replies(obd); - abort_recovery_queue(obd); - ptlrpc_run_recovery_over_upcall(obd); -} - -static void target_recovery_expired(unsigned long castmeharder) -{ - struct obd_device *obd = (struct obd_device *)castmeharder; - CERROR("recovery timed out, aborting\n"); - spin_lock_bh(&obd->obd_processing_task_lock); - obd->obd_abort_recovery = 1; - wake_up(&obd->obd_next_transno_waitq); - spin_unlock_bh(&obd->obd_processing_task_lock); -} - -static void reset_recovery_timer(struct obd_device *obd) -{ - int recovering; - spin_lock(&obd->obd_dev_lock); - recovering = obd->obd_recovering; - spin_unlock(&obd->obd_dev_lock); - - if (!recovering) - return; - CDEBUG(D_ERROR, "timer will expire in %ld seconds\n", - OBD_RECOVERY_TIMEOUT / HZ); - mod_timer(&obd->obd_recovery_timer, jiffies + OBD_RECOVERY_TIMEOUT); -} - - -/* Only start it the first time called */ -void target_start_recovery_timer(struct obd_device *obd, svc_handler_t handler) -{ - spin_lock_bh(&obd->obd_processing_task_lock); - if (obd->obd_recovery_handler) { - spin_unlock_bh(&obd->obd_processing_task_lock); - return; - } - CERROR("%s: starting recovery timer\n", obd->obd_name); - obd->obd_recovery_handler = handler; - obd->obd_recovery_timer.function = target_recovery_expired; - obd->obd_recovery_timer.data = (unsigned long)obd; - init_timer(&obd->obd_recovery_timer); - spin_unlock_bh(&obd->obd_processing_task_lock); - - reset_recovery_timer(obd); -} - -static int check_for_next_transno(struct obd_device *obd) -{ - struct ptlrpc_request *req; - int wake_up; - - req = list_entry(obd->obd_recovery_queue.next, - struct ptlrpc_request, rq_list); - LASSERT(req->rq_reqmsg->transno >= obd->obd_next_recovery_transno); - - wake_up = req->rq_reqmsg->transno == obd->obd_next_recovery_transno || - (obd->obd_recovering) == 0; - CDEBUG(D_HA, "check_for_next_transno: "LPD64" vs "LPD64", %d == %d\n", - req->rq_reqmsg->transno, obd->obd_next_recovery_transno, - obd->obd_recovering, wake_up); - return wake_up; -} - -static void process_recovery_queue(struct obd_device *obd) -{ - struct ptlrpc_request *req; - int abort_recovery = 0; - struct l_wait_info lwi = { 0 }; - ENTRY; - - for (;;) { - spin_lock_bh(&obd->obd_processing_task_lock); - LASSERT(obd->obd_processing_task == current->pid); - req = list_entry(obd->obd_recovery_queue.next, - struct ptlrpc_request, rq_list); - - if (req->rq_reqmsg->transno != obd->obd_next_recovery_transno) { - spin_unlock_bh(&obd->obd_processing_task_lock); - CDEBUG(D_HA, "Waiting for transno "LPD64" (1st is " - LPD64")\n", - obd->obd_next_recovery_transno, - req->rq_reqmsg->transno); - l_wait_event(obd->obd_next_transno_waitq, - check_for_next_transno(obd), &lwi); - spin_lock_bh(&obd->obd_processing_task_lock); - abort_recovery = obd->obd_abort_recovery; - spin_unlock_bh(&obd->obd_processing_task_lock); - if (abort_recovery) { - target_abort_recovery(obd); - return; - } - continue; - } - list_del_init(&req->rq_list); - spin_unlock_bh(&obd->obd_processing_task_lock); - - DEBUG_REQ(D_ERROR, req, "processing: "); - (void)obd->obd_recovery_handler(req); - reset_recovery_timer(obd); -#warning FIXME: mds_fsync_super(mds->mds_sb); - class_export_put(req->rq_export); - OBD_FREE(req->rq_reqmsg, req->rq_reqlen); - OBD_FREE(req, sizeof *req); - spin_lock_bh(&obd->obd_processing_task_lock); - obd->obd_next_recovery_transno++; - if (list_empty(&obd->obd_recovery_queue)) { - obd->obd_processing_task = 0; - spin_unlock_bh(&obd->obd_processing_task_lock); - break; - } - spin_unlock_bh(&obd->obd_processing_task_lock); - } - EXIT; -} - -int target_queue_recovery_request(struct ptlrpc_request *req, - struct obd_device *obd) -{ - struct list_head *tmp; - int inserted = 0; - __u64 transno = req->rq_reqmsg->transno; - struct ptlrpc_request *saved_req; - struct lustre_msg *reqmsg; - - /* CAVEAT EMPTOR: The incoming request message has been swabbed - * (i.e. buflens etc are in my own byte order), but type-dependent - * buffers (eg mds_body, ost_body etc) have NOT been swabbed. */ - - if (!transno) { - INIT_LIST_HEAD(&req->rq_list); - DEBUG_REQ(D_HA, req, "not queueing"); - return 1; - } - - /* XXX If I were a real man, these LBUGs would be sane cleanups. */ - /* XXX just like the request-dup code in queue_final_reply */ - OBD_ALLOC(saved_req, sizeof *saved_req); - if (!saved_req) - LBUG(); - OBD_ALLOC(reqmsg, req->rq_reqlen); - if (!reqmsg) - LBUG(); - - spin_lock_bh(&obd->obd_processing_task_lock); - - /* If we're processing the queue, we want don't want to queue this - * message. - * - * Also, if this request has a transno less than the one we're waiting - * for, we should process it now. It could (and currently always will) - * be an open request for a descriptor that was opened some time ago. - */ - if (obd->obd_processing_task == current->pid || - transno < obd->obd_next_recovery_transno) { - /* Processing the queue right now, don't re-add. */ - LASSERT(list_empty(&req->rq_list)); - spin_unlock_bh(&obd->obd_processing_task_lock); - OBD_FREE(reqmsg, req->rq_reqlen); - OBD_FREE(saved_req, sizeof *saved_req); - return 1; - } - - memcpy(saved_req, req, sizeof *req); - memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen); - req = saved_req; - req->rq_reqmsg = reqmsg; - class_export_get(req->rq_export); - INIT_LIST_HEAD(&req->rq_list); - - /* XXX O(n^2) */ - list_for_each(tmp, &obd->obd_recovery_queue) { - struct ptlrpc_request *reqiter = - list_entry(tmp, struct ptlrpc_request, rq_list); - - if (reqiter->rq_reqmsg->transno > transno) { - list_add_tail(&req->rq_list, &reqiter->rq_list); - inserted = 1; - break; - } - } - - if (!inserted) { - list_add_tail(&req->rq_list, &obd->obd_recovery_queue); - } - - if (obd->obd_processing_task != 0) { - /* Someone else is processing this queue, we'll leave it to - * them. - */ - if (transno == obd->obd_next_recovery_transno) - wake_up(&obd->obd_next_transno_waitq); - spin_unlock_bh(&obd->obd_processing_task_lock); - return 0; - } - - /* Nobody is processing, and we know there's (at least) one to process - * now, so we'll do the honours. - */ - obd->obd_processing_task = current->pid; - spin_unlock_bh(&obd->obd_processing_task_lock); - - process_recovery_queue(obd); - return 0; -} - -struct obd_device * target_req2obd(struct ptlrpc_request *req) -{ - return req->rq_export->exp_obd; -} - -int target_queue_final_reply(struct ptlrpc_request *req, int rc) -{ - struct obd_device *obd = target_req2obd(req); - struct ptlrpc_request *saved_req; - struct lustre_msg *reqmsg; - int recovery_done = 0; - - if (rc) { - /* Just like ptlrpc_error, but without the sending. */ - lustre_pack_msg(0, NULL, NULL, &req->rq_replen, - &req->rq_repmsg); - req->rq_type = PTL_RPC_MSG_ERR; - } - - LASSERT(list_empty(&req->rq_list)); - /* XXX just like the request-dup code in queue_recovery_request */ - OBD_ALLOC(saved_req, sizeof *saved_req); - if (!saved_req) - LBUG(); - OBD_ALLOC(reqmsg, req->rq_reqlen); - if (!reqmsg) - LBUG(); - memcpy(saved_req, req, sizeof *saved_req); - memcpy(reqmsg, req->rq_reqmsg, req->rq_reqlen); - req = saved_req; - req->rq_reqmsg = reqmsg; - list_add(&req->rq_list, &obd->obd_delayed_reply_queue); - - spin_lock_bh(&obd->obd_processing_task_lock); - --obd->obd_recoverable_clients; - recovery_done = (obd->obd_recoverable_clients == 0); - spin_unlock_bh(&obd->obd_processing_task_lock); - - if (recovery_done) { - struct list_head *tmp, *n; - ldlm_reprocess_all_ns(req->rq_export->exp_obd->obd_namespace); - CDEBUG(D_ERROR, - "%s: all clients recovered, sending delayed replies\n", - obd->obd_name); - obd->obd_recovering = 0; - list_for_each_safe(tmp, n, &obd->obd_delayed_reply_queue) { - req = list_entry(tmp, struct ptlrpc_request, rq_list); - DEBUG_REQ(D_ERROR, req, "delayed:"); - ptlrpc_reply(req); - list_del(&req->rq_list); - OBD_FREE(req->rq_reqmsg, req->rq_reqlen); - OBD_FREE(req, sizeof *req); - } - target_cancel_recovery_timer(obd); - } else { - CERROR("%s: %d recoverable clients remain\n", - obd->obd_name, obd->obd_recoverable_clients); - } - - return 1; -} - -static void ptlrpc_abort_reply (struct ptlrpc_request *req) -{ - /* On return, we must be sure that the ACK callback has either - * happened or will not happen. Note that the SENT callback will - * happen come what may since we successfully posted the PUT. */ - int rc; - struct l_wait_info lwi; - unsigned long flags; - - again: - /* serialise with ACK callback */ - spin_lock_irqsave (&req->rq_lock, flags); - if (!req->rq_want_ack) { - spin_unlock_irqrestore (&req->rq_lock, flags); - /* The ACK callback has happened already. Although the - * SENT callback might still be outstanding (yes really) we - * don't care; this is just like normal completion. */ - return; - } - spin_unlock_irqrestore (&req->rq_lock, flags); - - /* Have a bash at unlinking the MD. This will fail until the SENT - * callback has happened since the MD is busy from the PUT. If the - * ACK still hasn't arrived after then, a successful unlink will - * ensure the ACK callback never happens. */ - rc = PtlMDUnlink (req->rq_reply_md_h); - switch (rc) { - default: - LBUG (); - case PTL_OK: - /* SENT callback happened; ACK callback preempted */ - LASSERT (req->rq_want_ack); - spin_lock_irqsave (&req->rq_lock, flags); - req->rq_want_ack = 0; - spin_unlock_irqrestore (&req->rq_lock, flags); - return; - case PTL_INV_MD: - return; - case PTL_MD_INUSE: - /* Still sending or ACK callback in progress: wait until - * either callback has completed and try again. - * Actually we can't wait for the SENT callback because - * there's no state the SENT callback can touch that will - * allow it to communicate with us! So we just wait here - * for a short time, effectively polling for the SENT - * callback by calling PtlMDUnlink() again, to see if it - * has finished. Note that if the ACK does arrive, its - * callback wakes us in short order. --eeb */ - lwi = LWI_TIMEOUT (HZ/4, NULL, NULL); - rc = l_wait_event(req->rq_wait_for_rep, !req->rq_want_ack, - &lwi); - CDEBUG (D_HA, "Retrying req %p: %d\n", req, rc); - /* NB go back and test rq_want_ack with locking, to ensure - * if ACK callback happened, it has completed stopped - * referencing this req. */ - goto again; - } -} - -void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id) -{ - int i; - int netrc; - unsigned long flags; - struct ptlrpc_req_ack_lock *ack_lock; - struct l_wait_info lwi = { 0 }; - wait_queue_t commit_wait; - struct obd_device *obd = - req->rq_export ? req->rq_export->exp_obd : NULL; - struct obd_export *exp = - (req->rq_export && req->rq_ack_locks[0].mode) ? - req->rq_export : NULL; - - if (exp) { - exp->exp_outstanding_reply = req; - spin_lock_irqsave (&req->rq_lock, flags); - req->rq_want_ack = 1; - spin_unlock_irqrestore (&req->rq_lock, flags); - } - - if (!OBD_FAIL_CHECK(fail_id | OBD_FAIL_ONCE)) { - if (rc) { - DEBUG_REQ(D_ERROR, req, "processing error (%d)", rc); - netrc = ptlrpc_error(req); - } else { - DEBUG_REQ(D_NET, req, "sending reply"); - netrc = ptlrpc_reply(req); - } - } else { - obd_fail_loc |= OBD_FAIL_ONCE | OBD_FAILED; - DEBUG_REQ(D_ERROR, req, "dropping reply"); - if (!exp && req->rq_repmsg) { - OBD_FREE(req->rq_repmsg, req->rq_replen); - req->rq_repmsg = NULL; - } - init_waitqueue_head(&req->rq_wait_for_rep); - netrc = 0; - } - - /* a failed send simulates the callbacks */ - LASSERT(netrc == 0 || req->rq_want_ack == 0); - if (exp == NULL) { - LASSERT(req->rq_want_ack == 0); - return; - } - LASSERT(obd != NULL); - - init_waitqueue_entry(&commit_wait, current); - add_wait_queue(&obd->obd_commit_waitq, &commit_wait); - rc = l_wait_event(req->rq_wait_for_rep, - !req->rq_want_ack || req->rq_resent || - req->rq_transno <= obd->obd_last_committed, &lwi); - remove_wait_queue(&obd->obd_commit_waitq, &commit_wait); - - spin_lock_irqsave (&req->rq_lock, flags); - /* If we got here because the ACK callback ran, this acts as a - * barrier to ensure the callback completed the wakeup. */ - spin_unlock_irqrestore (&req->rq_lock, flags); - - /* If we committed the transno already, then we might wake up before - * the ack arrives. We need to stop waiting for the ack before we can - * reuse this request structure. We are guaranteed by this point that - * this cannot abort the sending of the actual reply.*/ - ptlrpc_abort_reply(req); - - if (req->rq_resent) { - DEBUG_REQ(D_HA, req, "resent: not cancelling locks"); - return; - } - - LASSERT(rc == 0); - DEBUG_REQ(D_HA, req, "cancelling locks for %s", - req->rq_want_ack ? "commit" : "ack"); - - exp->exp_outstanding_reply = NULL; - - for (ack_lock = req->rq_ack_locks, i = 0; i < 4; i++, ack_lock++) { - if (!ack_lock->mode) - break; - ldlm_lock_decref(&ack_lock->lock, ack_lock->mode); - } -} - -int target_handle_ping(struct ptlrpc_request *req) -{ - return lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg); -} diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c deleted file mode 100644 index 2dc60cf..0000000 --- a/lustre/ldlm/ldlm_lock.c +++ /dev/null @@ -1,1194 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * Author: Peter Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LDLM - -#ifdef __KERNEL__ -# include <linux/slab.h> -# include <linux/module.h> -# include <linux/lustre_dlm.h> -# include <linux/lustre_mds.h> -#else -# include <liblustre.h> -# include <linux/kp30.h> -#endif - -#include <linux/obd_class.h> -#include "ldlm_internal.h" - -//struct lustre_lock ldlm_everything_lock; - -/* lock types */ -char *ldlm_lockname[] = { - [0] "--", - [LCK_EX] "EX", - [LCK_PW] "PW", - [LCK_PR] "PR", - [LCK_CW] "CW", - [LCK_CR] "CR", - [LCK_NL] "NL" -}; -char *ldlm_typename[] = { - [LDLM_PLAIN] "PLN", - [LDLM_EXTENT] "EXT", -}; - -#ifdef __KERNEL__ -char *ldlm_it2str(int it) -{ - switch (it) { - case IT_OPEN: - return "open"; - case IT_CREAT: - return "creat"; - case (IT_OPEN | IT_CREAT): - return "open|creat"; - case IT_READDIR: - return "readdir"; - case IT_GETATTR: - return "getattr"; - case IT_LOOKUP: - return "lookup"; - case IT_UNLINK: - return "unlink"; - default: - CERROR("Unknown intent %d\n", it); - return "UNKNOWN"; - } -} -#endif - -extern kmem_cache_t *ldlm_lock_slab; -struct lustre_lock ldlm_handle_lock; - -static int ldlm_plain_compat(struct ldlm_lock *a, struct ldlm_lock *b); - -static ldlm_res_compat ldlm_res_compat_table[] = { - [LDLM_PLAIN] ldlm_plain_compat, - [LDLM_EXTENT] ldlm_extent_compat, -}; - -static ldlm_res_policy ldlm_intent_policy_func; - -static int ldlm_plain_policy(struct ldlm_namespace *ns, struct ldlm_lock **lock, - void *req_cookie, ldlm_mode_t mode, int flags, - void *data) -{ - if ((flags & LDLM_FL_HAS_INTENT) && ldlm_intent_policy_func) { - return ldlm_intent_policy_func(ns, lock, req_cookie, mode, - flags, data); - } - - return ELDLM_OK; -} - -static ldlm_res_policy ldlm_res_policy_table[] = { - [LDLM_PLAIN] ldlm_plain_policy, - [LDLM_EXTENT] ldlm_extent_policy, -}; - -void ldlm_register_intent(ldlm_res_policy arg) -{ - ldlm_intent_policy_func = arg; -} - -void ldlm_unregister_intent(void) -{ - ldlm_intent_policy_func = NULL; -} - -/* - * REFCOUNTED LOCK OBJECTS - */ - - -/* - * Lock refcounts, during creation: - * - one special one for allocation, dec'd only once in destroy - * - one for being a lock that's in-use - * - one for the addref associated with a new lock - */ -struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock) -{ - atomic_inc(&lock->l_refc); - return lock; -} - -void ldlm_lock_put(struct ldlm_lock *lock) -{ - struct ldlm_namespace *ns = lock->l_resource->lr_namespace; - ENTRY; - - if (atomic_dec_and_test(&lock->l_refc)) { - l_lock(&ns->ns_lock); - LDLM_DEBUG(lock, "final lock_put on destroyed lock, freeing"); - LASSERT(lock->l_destroyed); - LASSERT(list_empty(&lock->l_res_link)); - - spin_lock(&ns->ns_counter_lock); - ns->ns_locks--; - spin_unlock(&ns->ns_counter_lock); - - ldlm_resource_putref(lock->l_resource); - lock->l_resource = NULL; - - if (lock->l_parent) - LDLM_LOCK_PUT(lock->l_parent); - - OBD_SLAB_FREE(lock, ldlm_lock_slab, sizeof(*lock)); - l_unlock(&ns->ns_lock); - } - - EXIT; -} - -void ldlm_lock_remove_from_lru(struct ldlm_lock *lock) -{ - ENTRY; - l_lock(&lock->l_resource->lr_namespace->ns_lock); - if (!list_empty(&lock->l_lru)) { - list_del_init(&lock->l_lru); - lock->l_resource->lr_namespace->ns_nr_unused--; - LASSERT(lock->l_resource->lr_namespace->ns_nr_unused >= 0); - } - l_unlock(&lock->l_resource->lr_namespace->ns_lock); - EXIT; -} - -/* This used to have a 'strict' flact, which recovery would use to mark an - * in-use lock as needing-to-die. Lest I am ever tempted to put it back, I - * shall explain why it's gone: with the new hash table scheme, once you call - * ldlm_lock_destroy, you can never drop your final references on this lock. - * Because it's not in the hash table anymore. -phil */ -void ldlm_lock_destroy(struct ldlm_lock *lock) -{ - ENTRY; - l_lock(&lock->l_resource->lr_namespace->ns_lock); - - if (!list_empty(&lock->l_children)) { - LDLM_ERROR(lock, "still has children (%p)!", - lock->l_children.next); - ldlm_lock_dump(D_ERROR, lock); - LBUG(); - } - if (lock->l_readers || lock->l_writers) { - LDLM_ERROR(lock, "lock still has references"); - ldlm_lock_dump(D_ERROR, lock); - LBUG(); - } - - if (!list_empty(&lock->l_res_link)) { - ldlm_lock_dump(D_ERROR, lock); - LBUG(); - } - - if (lock->l_destroyed) { - LASSERT(list_empty(&lock->l_lru)); - l_unlock(&lock->l_resource->lr_namespace->ns_lock); - EXIT; - return; - } - lock->l_destroyed = 1; - - list_del_init(&lock->l_export_chain); - ldlm_lock_remove_from_lru(lock); - class_handle_unhash(&lock->l_handle); - -#if 0 - /* Wake anyone waiting for this lock */ - /* FIXME: I should probably add yet another flag, instead of using - * l_export to only call this on clients */ - lock->l_export = NULL; - if (lock->l_export && lock->l_completion_ast) - lock->l_completion_ast(lock, 0); -#endif - - l_unlock(&lock->l_resource->lr_namespace->ns_lock); - LDLM_LOCK_PUT(lock); - EXIT; -} - -/* this is called by portals_handle2object with the handle lock taken */ -static void lock_handle_addref(void *lock) -{ - LDLM_LOCK_GET((struct ldlm_lock *)lock); -} - -/* - * usage: pass in a resource on which you have done ldlm_resource_get - * pass in a parent lock on which you have done a ldlm_lock_get - * after return, ldlm_*_put the resource and parent - * returns: lock with refcount 1 - */ -static struct ldlm_lock *ldlm_lock_new(struct ldlm_lock *parent, - struct ldlm_resource *resource) -{ - struct ldlm_lock *lock; - ENTRY; - - if (resource == NULL) - LBUG(); - - OBD_SLAB_ALLOC(lock, ldlm_lock_slab, SLAB_KERNEL, sizeof(*lock)); - if (lock == NULL) - RETURN(NULL); - - lock->l_resource = ldlm_resource_getref(resource); - - atomic_set(&lock->l_refc, 2); - INIT_LIST_HEAD(&lock->l_children); - INIT_LIST_HEAD(&lock->l_res_link); - INIT_LIST_HEAD(&lock->l_lru); - INIT_LIST_HEAD(&lock->l_export_chain); - INIT_LIST_HEAD(&lock->l_pending_chain); - init_waitqueue_head(&lock->l_waitq); - - spin_lock(&resource->lr_namespace->ns_counter_lock); - resource->lr_namespace->ns_locks++; - spin_unlock(&resource->lr_namespace->ns_counter_lock); - - if (parent != NULL) { - l_lock(&parent->l_resource->lr_namespace->ns_lock); - lock->l_parent = LDLM_LOCK_GET(parent); - list_add(&lock->l_childof, &parent->l_children); - l_unlock(&parent->l_resource->lr_namespace->ns_lock); - } - - INIT_LIST_HEAD(&lock->l_handle.h_link); - class_handle_hash(&lock->l_handle, lock_handle_addref); - - RETURN(lock); -} - -int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock, - struct ldlm_res_id new_resid) -{ - struct ldlm_resource *oldres = lock->l_resource; - ENTRY; - - l_lock(&ns->ns_lock); - if (memcmp(&new_resid, &lock->l_resource->lr_name, - sizeof(lock->l_resource->lr_name)) == 0) { - /* Nothing to do */ - l_unlock(&ns->ns_lock); - RETURN(0); - } - - LASSERT(new_resid.name[0] != 0); - - /* This function assumes that the lock isn't on any lists */ - LASSERT(list_empty(&lock->l_res_link)); - - lock->l_resource = ldlm_resource_get(ns, NULL, new_resid, - lock->l_resource->lr_type, 1); - if (lock->l_resource == NULL) { - LBUG(); - RETURN(-ENOMEM); - } - - /* ...and the flowers are still standing! */ - ldlm_resource_putref(oldres); - - l_unlock(&ns->ns_lock); - RETURN(0); -} - -/* - * HANDLES - */ - -void ldlm_lock2handle(struct ldlm_lock *lock, struct lustre_handle *lockh) -{ - lockh->cookie = lock->l_handle.h_cookie; -} - -/* if flags: atomically get the lock and set the flags. - * Return NULL if flag already set - */ - -struct ldlm_lock *__ldlm_handle2lock(struct lustre_handle *handle, int flags) -{ - struct ldlm_lock *lock = NULL, *retval = NULL; - ENTRY; - - LASSERT(handle); - - lock = class_handle2object(handle->cookie); - if (lock == NULL) - RETURN(NULL); - - LASSERT(lock->l_resource != NULL); - LASSERT(lock->l_resource->lr_namespace != NULL); - - l_lock(&lock->l_resource->lr_namespace->ns_lock); - - /* It's unlikely but possible that someone marked the lock as - * destroyed after we did handle2object on it */ - if (lock->l_destroyed) { - CDEBUG(D_INFO, "lock already destroyed: lock %p\n", lock); - LDLM_LOCK_PUT(lock); - GOTO(out, retval); - } - - if (flags && (lock->l_flags & flags)) { - LDLM_LOCK_PUT(lock); - GOTO(out, retval); - } - - if (flags) - lock->l_flags |= flags; - - retval = lock; - EXIT; - out: - l_unlock(&lock->l_resource->lr_namespace->ns_lock); - return retval; -} - -struct ldlm_lock *ldlm_handle2lock_ns(struct ldlm_namespace *ns, - struct lustre_handle *handle) -{ - struct ldlm_lock *retval = NULL; - - l_lock(&ns->ns_lock); - retval = __ldlm_handle2lock(handle, 0); - l_unlock(&ns->ns_lock); - - return retval; -} - -static int ldlm_plain_compat(struct ldlm_lock *a, struct ldlm_lock *b) -{ - return lockmode_compat(a->l_req_mode, b->l_req_mode); -} - -void ldlm_lock2desc(struct ldlm_lock *lock, struct ldlm_lock_desc *desc) -{ - ldlm_res2desc(lock->l_resource, &desc->l_resource); - desc->l_req_mode = lock->l_req_mode; - desc->l_granted_mode = lock->l_granted_mode; - memcpy(&desc->l_extent, &lock->l_extent, sizeof(desc->l_extent)); - memcpy(desc->l_version, lock->l_version, sizeof(desc->l_version)); -} - -static void ldlm_add_ast_work_item(struct ldlm_lock *lock, - struct ldlm_lock *new, - void *data, int datalen) -{ - struct ldlm_ast_work *w; - ENTRY; - - l_lock(&lock->l_resource->lr_namespace->ns_lock); - if (new && (lock->l_flags & LDLM_FL_AST_SENT)) - GOTO(out, 0); - - OBD_ALLOC(w, sizeof(*w)); - if (!w) { - LBUG(); - GOTO(out, 0); - } - - w->w_data = data; - w->w_datalen = datalen; - if (new) { - lock->l_flags |= LDLM_FL_AST_SENT; - w->w_blocking = 1; - ldlm_lock2desc(new, &w->w_desc); - } - - w->w_lock = LDLM_LOCK_GET(lock); - list_add(&w->w_list, lock->l_resource->lr_tmp); - EXIT; - out: - l_unlock(&lock->l_resource->lr_namespace->ns_lock); - return; -} - -void ldlm_lock_addref(struct lustre_handle *lockh, __u32 mode) -{ - struct ldlm_lock *lock; - - lock = ldlm_handle2lock(lockh); - ldlm_lock_addref_internal(lock, mode); - LDLM_LOCK_PUT(lock); -} - -/* only called for local locks */ -void ldlm_lock_addref_internal(struct ldlm_lock *lock, __u32 mode) -{ - l_lock(&lock->l_resource->lr_namespace->ns_lock); - ldlm_lock_remove_from_lru(lock); - if (mode == LCK_NL || mode == LCK_CR || mode == LCK_PR) - lock->l_readers++; - else - lock->l_writers++; - l_unlock(&lock->l_resource->lr_namespace->ns_lock); - LDLM_LOCK_GET(lock); - LDLM_DEBUG(lock, "ldlm_lock_addref(%s)", ldlm_lockname[mode]); -} - -void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode) -{ - struct ldlm_namespace *ns; - ENTRY; - - LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]); - ns = lock->l_resource->lr_namespace; - l_lock(&ns->ns_lock); - if (mode == LCK_NL || mode == LCK_CR || mode == LCK_PR) { - LASSERT(lock->l_readers > 0); - lock->l_readers--; - } else { - LASSERT(lock->l_writers > 0); - lock->l_writers--; - } - - if (lock->l_flags & LDLM_FL_LOCAL && - !lock->l_readers && !lock->l_writers) { - /* If this is a local lock on a server namespace and this was - * the last reference, cancel the lock. */ - CDEBUG(D_INFO, "forcing cancel of local lock\n"); - lock->l_flags |= LDLM_FL_CBPENDING; - } - - if (!lock->l_readers && !lock->l_writers && - (lock->l_flags & LDLM_FL_CBPENDING)) { - /* If we received a blocked AST and this was the last reference, - * run the callback. */ - if (!ns->ns_client && lock->l_export) - CERROR("FL_CBPENDING set on non-local lock--just a " - "warning\n"); - - LDLM_DEBUG(lock, "final decref done on cbpending lock"); - l_unlock(&ns->ns_lock); - - /* FIXME: need a real 'desc' here */ - if (lock->l_blocking_ast != NULL) - lock->l_blocking_ast(lock, NULL, lock->l_data, - LDLM_CB_BLOCKING); - else - LDLM_DEBUG(lock, "No blocking AST?"); - } else if (ns->ns_client && !lock->l_readers && !lock->l_writers) { - /* If this is a client-side namespace and this was the last - * reference, put it on the LRU. */ - LASSERT(list_empty(&lock->l_lru)); - LASSERT(ns->ns_nr_unused >= 0); - list_add_tail(&lock->l_lru, &ns->ns_unused_list); - ns->ns_nr_unused++; - l_unlock(&ns->ns_lock); - ldlm_cancel_lru(ns); - } else { - l_unlock(&ns->ns_lock); - } - - LDLM_LOCK_PUT(lock); /* matches the ldlm_lock_get in addref */ - - EXIT; -} - -void ldlm_lock_decref(struct lustre_handle *lockh, __u32 mode) -{ - struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0); - LASSERT(lock != NULL); - ldlm_lock_decref_internal(lock, mode); - LDLM_LOCK_PUT(lock); -} - -/* This will drop a lock reference and mark it for destruction, but will not - * necessarily cancel the lock before returning. */ -void ldlm_lock_decref_and_cancel(struct lustre_handle *lockh, __u32 mode) -{ - struct ldlm_lock *lock = __ldlm_handle2lock(lockh, 0); - ENTRY; - - LASSERT(lock != NULL); - - LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]); - l_lock(&lock->l_resource->lr_namespace->ns_lock); - lock->l_flags |= LDLM_FL_CBPENDING; - l_unlock(&lock->l_resource->lr_namespace->ns_lock); - ldlm_lock_decref_internal(lock, mode); - LDLM_LOCK_PUT(lock); -} - -static int ldlm_lock_compat_list(struct ldlm_lock *lock, int send_cbs, - struct list_head *queue) -{ - struct list_head *tmp, *pos; - int rc = 1; - - list_for_each_safe(tmp, pos, queue) { - struct ldlm_lock *child; - ldlm_res_compat compat; - - child = list_entry(tmp, struct ldlm_lock, l_res_link); - if (lock == child) - continue; - - compat = ldlm_res_compat_table[child->l_resource->lr_type]; - if (compat && compat(child, lock)) { - CDEBUG(D_OTHER, "compat function succeded, next.\n"); - continue; - } - if (lockmode_compat(child->l_granted_mode, lock->l_req_mode)) { - CDEBUG(D_OTHER, "lock modes are compatible, next.\n"); - continue; - } - - rc = 0; - - if (send_cbs && child->l_blocking_ast != NULL) { - CDEBUG(D_OTHER, "lock %p incompatible; sending " - "blocking AST.\n", child); - ldlm_add_ast_work_item(child, lock, NULL, 0); - } - } - - return rc; -} - -static int ldlm_lock_compat(struct ldlm_lock *lock, int send_cbs) -{ - int rc; - ENTRY; - - l_lock(&lock->l_resource->lr_namespace->ns_lock); - rc = ldlm_lock_compat_list(lock, send_cbs, - &lock->l_resource->lr_granted); - /* FIXME: should we be sending ASTs to converting? */ - if (rc) - rc = ldlm_lock_compat_list - (lock, send_cbs, &lock->l_resource->lr_converting); - - l_unlock(&lock->l_resource->lr_namespace->ns_lock); - RETURN(rc); -} - -/* NOTE: called by - * - ldlm_lock_enqueue - * - ldlm_reprocess_queue - * - ldlm_lock_convert - */ -void ldlm_grant_lock(struct ldlm_lock *lock, void *data, int datalen) -{ - struct ldlm_resource *res = lock->l_resource; - ENTRY; - - l_lock(&lock->l_resource->lr_namespace->ns_lock); - ldlm_resource_add_lock(res, &res->lr_granted, lock); - lock->l_granted_mode = lock->l_req_mode; - - if (lock->l_granted_mode < res->lr_most_restr) - res->lr_most_restr = lock->l_granted_mode; - - if (lock->l_completion_ast != NULL) - ldlm_add_ast_work_item(lock, NULL, data, datalen); - - l_unlock(&lock->l_resource->lr_namespace->ns_lock); - EXIT; -} - -/* returns a referenced lock or NULL. See the flag descriptions below, in the - * comment above ldlm_lock_match */ -static struct ldlm_lock *search_queue(struct list_head *queue, ldlm_mode_t mode, - struct ldlm_extent *extent, - struct ldlm_lock *old_lock, void *data, - int flags) -{ - struct ldlm_lock *lock; - struct list_head *tmp; - - list_for_each(tmp, queue) { - lock = list_entry(tmp, struct ldlm_lock, l_res_link); - - if (lock == old_lock) - break; - - /* llite sometimes wants to match locks that will be - * canceled when their users drop, but we allow it to match - * if it passes in CBPENDING and the lock still has users. - * this is generally only going to be used by children - * whose parents already hold a lock so forward progress - * can still happen. */ - if (lock->l_flags & LDLM_FL_CBPENDING && - !(flags & LDLM_FL_CBPENDING)) - continue; - if (lock->l_flags & LDLM_FL_CBPENDING && - lock->l_readers == 0 && lock->l_writers == 0) - continue; - - if (lock->l_req_mode != mode) - continue; - - if (lock->l_resource->lr_type == LDLM_EXTENT && - (lock->l_extent.start > extent->start || - lock->l_extent.end < extent->end)) - continue; - - if (lock->l_destroyed) - continue; - - if ((flags & LDLM_FL_LOCAL_ONLY) && - !(lock->l_flags & LDLM_FL_LOCAL)) - continue; - - if ((flags & LDLM_FL_MATCH_DATA) && lock->l_data != data) { - LDLM_DEBUG(lock, "data mismatch: have %p, want %p", - lock->l_data, data); - continue; - } - - ldlm_lock_addref_internal(lock, mode); - return lock; - } - - return NULL; -} - -/* Can be called in two ways: - * - * If 'ns' is NULL, then lockh describes an existing lock that we want to look - * for a duplicate of. - * - * Otherwise, all of the fields must be filled in, to match against. - * - * If 'flags' contains LDLM_FL_LOCAL_ONLY, then only match local locks on the - * server (ie, connh is NULL) - * If 'flags' contains LDLM_FL_BLOCK_GRANTED, then only locks on the granted - * list will be considered - * If 'flags' contains LDLM_FL_CBPENDING, then locks that have been marked - * to be canceled can still be matched as long as they still have reader - * or writer refernces - * If 'flags' contains LDLM_FL_MATCH_DATA, then only match a lock if the opaque - * data is the same. - * - * Returns 1 if it finds an already-existing lock that is compatible; in this - * case, lockh is filled in with a addref()ed lock - */ -int ldlm_lock_match(struct ldlm_namespace *ns, int flags, - struct ldlm_res_id *res_id, __u32 type, void *cookie, - int cookielen, ldlm_mode_t mode, void *data, - struct lustre_handle *lockh) -{ - struct ldlm_resource *res; - struct ldlm_lock *lock, *old_lock = NULL; - int rc = 0; - ENTRY; - - if (ns == NULL) { - old_lock = ldlm_handle2lock(lockh); - LASSERT(old_lock); - - ns = old_lock->l_resource->lr_namespace; - res_id = &old_lock->l_resource->lr_name; - type = old_lock->l_resource->lr_type; - mode = old_lock->l_req_mode; - } - - res = ldlm_resource_get(ns, NULL, *res_id, type, 0); - if (res == NULL) { - LASSERT(old_lock == NULL); - RETURN(0); - } - - l_lock(&ns->ns_lock); - - lock = search_queue(&res->lr_granted, mode, cookie, old_lock, data, - flags); - if (lock != NULL) - GOTO(out, rc = 1); - if (flags & LDLM_FL_BLOCK_GRANTED) - GOTO(out, rc = 0); - lock = search_queue(&res->lr_converting, mode, cookie, old_lock, data, - flags); - if (lock != NULL) - GOTO(out, rc = 1); - lock = search_queue(&res->lr_waiting, mode, cookie, old_lock, data, - flags); - if (lock != NULL) - GOTO(out, rc = 1); - - EXIT; - out: - ldlm_resource_putref(res); - l_unlock(&ns->ns_lock); - - if (lock) { - ldlm_lock2handle(lock, lockh); - if (lock->l_completion_ast) - lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC, - NULL); - } - if (rc) - LDLM_DEBUG(lock, "matched"); - else - LDLM_DEBUG_NOLOCK("not matched"); - - if (old_lock) - LDLM_LOCK_PUT(old_lock); - - return rc; -} - -/* Returns a referenced lock */ -struct ldlm_lock *ldlm_lock_create(struct ldlm_namespace *ns, - struct lustre_handle *parent_lock_handle, - struct ldlm_res_id res_id, __u32 type, - ldlm_mode_t mode, - ldlm_blocking_callback blocking, - void *data) -{ - struct ldlm_resource *res, *parent_res = NULL; - struct ldlm_lock *lock, *parent_lock = NULL; - ENTRY; - - if (parent_lock_handle) { - parent_lock = ldlm_handle2lock(parent_lock_handle); - if (parent_lock) - parent_res = parent_lock->l_resource; - } - - res = ldlm_resource_get(ns, parent_res, res_id, type, 1); - if (res == NULL) - RETURN(NULL); - - lock = ldlm_lock_new(parent_lock, res); - ldlm_resource_putref(res); - if (parent_lock != NULL) - LDLM_LOCK_PUT(parent_lock); - - if (lock == NULL) - RETURN(NULL); - - lock->l_req_mode = mode; - lock->l_data = data; - lock->l_blocking_ast = blocking; - - RETURN(lock); -} - -ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns, - struct ldlm_lock **lockp, - void *cookie, int cookie_len, - int *flags, - ldlm_completion_callback completion) -{ - struct ldlm_resource *res; - struct ldlm_lock *lock = *lockp; - int local; - ldlm_res_policy policy; - ENTRY; - - res = lock->l_resource; - - if (res->lr_type == LDLM_EXTENT) - memcpy(&lock->l_extent, cookie, sizeof(lock->l_extent)); - - /* policies are not executed on the client or during replay */ - local = res->lr_namespace->ns_client; - if (!local && !(*flags & LDLM_FL_REPLAY) && - (policy = ldlm_res_policy_table[res->lr_type])) { - int rc; - rc = policy(ns, lockp, cookie, lock->l_req_mode, *flags, NULL); - if (rc == ELDLM_LOCK_CHANGED) { - res = lock->l_resource; - *flags |= LDLM_FL_LOCK_CHANGED; - } else if (rc == ELDLM_LOCK_REPLACED) { - /* The lock that was returned has already been granted, - * and placed into lockp. Destroy the old one and our - * work here is done. */ - ldlm_lock_destroy(lock); - LDLM_LOCK_PUT(lock); - *flags |= LDLM_FL_LOCK_CHANGED; - RETURN(0); - } else if (rc == ELDLM_LOCK_ABORTED) { - ldlm_lock_destroy(lock); - RETURN(rc); - } - } - - l_lock(&ns->ns_lock); - if (local && lock->l_req_mode == lock->l_granted_mode) { - /* The server returned a blocked lock, but it was granted before - * we got a chance to actually enqueue it. We don't need to do - * anything else. */ - *flags &= ~(LDLM_FL_BLOCK_GRANTED | - LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_WAIT); - GOTO(out, ELDLM_OK); - } - - /* This distinction between local lock trees is very important; a client - * namespace only has information about locks taken by that client, and - * thus doesn't have enough information to decide for itself if it can - * be granted (below). In this case, we do exactly what the server - * tells us to do, as dictated by the 'flags'. - * - * We do exactly the same thing during recovery, when the server is - * more or less trusting the clients not to lie. - * - * FIXME (bug 268): Detect obvious lies by checking compatibility in - * granted/converting queues. */ - ldlm_resource_unlink_lock(lock); - if (local) { - if (*flags & LDLM_FL_BLOCK_CONV) - ldlm_resource_add_lock(res, &res->lr_converting, lock); - else if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED)) - ldlm_resource_add_lock(res, &res->lr_waiting, lock); - else - ldlm_grant_lock(lock, NULL, 0); - GOTO(out, ELDLM_OK); - } else if (*flags & LDLM_FL_REPLAY) { - if (*flags & LDLM_FL_BLOCK_CONV) { - ldlm_resource_add_lock(res, &res->lr_converting, lock); - GOTO(out, ELDLM_OK); - } else if (*flags & LDLM_FL_BLOCK_WAIT) { - ldlm_resource_add_lock(res, &res->lr_waiting, lock); - GOTO(out, ELDLM_OK); - } else if (*flags & LDLM_FL_BLOCK_GRANTED) { - ldlm_grant_lock(lock, NULL, 0); - GOTO(out, ELDLM_OK); - } - /* If no flags, fall through to normal enqueue path. */ - } - - /* FIXME: We may want to optimize by checking lr_most_restr */ - if (!list_empty(&res->lr_converting)) { - ldlm_resource_add_lock(res, &res->lr_waiting, lock); - *flags |= LDLM_FL_BLOCK_CONV; - GOTO(out, ELDLM_OK); - } - if (!list_empty(&res->lr_waiting)) { - ldlm_resource_add_lock(res, &res->lr_waiting, lock); - *flags |= LDLM_FL_BLOCK_WAIT; - GOTO(out, ELDLM_OK); - } - if (!ldlm_lock_compat(lock, 0)) { - ldlm_resource_add_lock(res, &res->lr_waiting, lock); - *flags |= LDLM_FL_BLOCK_GRANTED; - GOTO(out, ELDLM_OK); - } - ldlm_grant_lock(lock, NULL, 0); - EXIT; - out: - /* Don't set 'completion_ast' until here so that if the lock is granted - * immediately we don't do an unnecessary completion call. */ - lock->l_completion_ast = completion; - l_unlock(&ns->ns_lock); - return ELDLM_OK; -} - -/* Must be called with namespace taken: queue is waiting or converting. */ -static int ldlm_reprocess_queue(struct ldlm_resource *res, - struct list_head *queue) -{ - struct list_head *tmp, *pos; - ENTRY; - - list_for_each_safe(tmp, pos, queue) { - struct ldlm_lock *pending; - pending = list_entry(tmp, struct ldlm_lock, l_res_link); - - CDEBUG(D_INFO, "Reprocessing lock %p\n", pending); - - if (!ldlm_lock_compat(pending, 1)) - RETURN(1); - - list_del_init(&pending->l_res_link); - ldlm_grant_lock(pending, NULL, 0); - } - - RETURN(0); -} - -int ldlm_run_ast_work(struct list_head *rpc_list) -{ - struct list_head *tmp, *pos; - int rc, retval = 0; - ENTRY; - - list_for_each_safe(tmp, pos, rpc_list) { - struct ldlm_ast_work *w = - list_entry(tmp, struct ldlm_ast_work, w_list); - - /* It's possible to receive a completion AST before we've set - * the l_completion_ast pointer: either because the AST arrived - * before the reply, or simply because there's a small race - * window between receiving the reply and finishing the local - * enqueue. (bug 842) - * - * This can't happen with the blocking_ast, however, because we - * will never call the local blocking_ast until we drop our - * reader/writer reference, which we won't do until we get the - * reply and finish enqueueing. */ - if (w->w_blocking) { - LASSERT(w->w_lock->l_blocking_ast != NULL); - rc = w->w_lock->l_blocking_ast - (w->w_lock, &w->w_desc, w->w_data, - LDLM_CB_BLOCKING); - } else if (w->w_lock->l_completion_ast != NULL) { - rc = w->w_lock->l_completion_ast(w->w_lock, w->w_flags, - w->w_data); - } else { - rc = 0; - } - if (rc == -ERESTART) - retval = rc; - else if (rc) - CERROR("Failed AST - should clean & disconnect " - "client\n"); - LDLM_LOCK_PUT(w->w_lock); - list_del(&w->w_list); - OBD_FREE(w, sizeof(*w)); - } - RETURN(retval); -} - -static int reprocess_one_queue(struct ldlm_resource *res, void *closure) -{ - ldlm_reprocess_all(res); - return LDLM_ITER_CONTINUE; -} - -void ldlm_reprocess_all_ns(struct ldlm_namespace *ns) -{ - (void)ldlm_namespace_foreach_res(ns, reprocess_one_queue, NULL); -} - -void ldlm_reprocess_all(struct ldlm_resource *res) -{ - struct list_head rpc_list = LIST_HEAD_INIT(rpc_list); - int rc; - ENTRY; - - /* Local lock trees don't get reprocessed. */ - if (res->lr_namespace->ns_client) { - EXIT; - return; - } - - restart: - l_lock(&res->lr_namespace->ns_lock); - res->lr_tmp = &rpc_list; - - ldlm_reprocess_queue(res, &res->lr_converting); - if (list_empty(&res->lr_converting)) - ldlm_reprocess_queue(res, &res->lr_waiting); - - res->lr_tmp = NULL; - l_unlock(&res->lr_namespace->ns_lock); - - rc = ldlm_run_ast_work(&rpc_list); - if (rc == -ERESTART) - goto restart; - EXIT; -} - -void ldlm_cancel_callback(struct ldlm_lock *lock) -{ - l_lock(&lock->l_resource->lr_namespace->ns_lock); - if (!(lock->l_flags & LDLM_FL_CANCEL)) { - lock->l_flags |= LDLM_FL_CANCEL; - if (lock->l_blocking_ast) { - l_unlock(&lock->l_resource->lr_namespace->ns_lock); - lock->l_blocking_ast(lock, NULL, lock->l_data, - LDLM_CB_CANCELING); - return; - } else { - LDLM_DEBUG(lock, "no blocking ast"); - } - } - l_unlock(&lock->l_resource->lr_namespace->ns_lock); -} - -void ldlm_lock_cancel(struct ldlm_lock *lock) -{ - struct ldlm_resource *res; - struct ldlm_namespace *ns; - ENTRY; - - ldlm_del_waiting_lock(lock); - - res = lock->l_resource; - ns = res->lr_namespace; - - l_lock(&ns->ns_lock); - /* Please do not, no matter how tempting, remove this LBUG without - * talking to me first. -phik */ - if (lock->l_readers || lock->l_writers) { - LDLM_DEBUG(lock, "lock still has references"); - ldlm_lock_dump(D_OTHER, lock); - LBUG(); - } - - ldlm_cancel_callback(lock); /* XXX FIXME bug 1030 */ - - ldlm_resource_unlink_lock(lock); - ldlm_lock_destroy(lock); - l_unlock(&ns->ns_lock); - EXIT; -} - -int ldlm_lock_set_data(struct lustre_handle *lockh, void *data) -{ - struct ldlm_lock *lock = ldlm_handle2lock(lockh); - ENTRY; - - if (lock == NULL) - RETURN(-EINVAL); - - lock->l_data = data; - - LDLM_LOCK_PUT(lock); - - RETURN(0); -} - -/* This function is only called from one thread (per export); no locking around - * the list ops needed */ -void ldlm_cancel_locks_for_export(struct obd_export *exp) -{ - struct list_head *iter, *n; - - list_for_each_safe(iter, n, &exp->exp_ldlm_data.led_held_locks) { - struct ldlm_lock *lock; - struct ldlm_resource *res; - lock = list_entry(iter, struct ldlm_lock, l_export_chain); - res = ldlm_resource_getref(lock->l_resource); - LDLM_DEBUG(lock, "export %p", exp); - ldlm_lock_cancel(lock); - ldlm_reprocess_all(res); - ldlm_resource_putref(res); - } -} - -struct ldlm_resource *ldlm_lock_convert(struct ldlm_lock *lock, int new_mode, - int *flags) -{ - struct list_head rpc_list = LIST_HEAD_INIT(rpc_list); - struct ldlm_resource *res; - struct ldlm_namespace *ns; - int granted = 0; - ENTRY; - - LBUG(); - - res = lock->l_resource; - ns = res->lr_namespace; - - l_lock(&ns->ns_lock); - - lock->l_req_mode = new_mode; - ldlm_resource_unlink_lock(lock); - - /* If this is a local resource, put it on the appropriate list. */ - if (res->lr_namespace->ns_client) { - if (*flags & (LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_GRANTED)) { - ldlm_resource_add_lock(res, &res->lr_converting, lock); - } else { - /* This should never happen, because of the way the - * server handles conversions. */ - LBUG(); - - res->lr_tmp = &rpc_list; - ldlm_grant_lock(lock, NULL, 0); - res->lr_tmp = NULL; - granted = 1; - /* FIXME: completion handling not with ns_lock held ! */ - if (lock->l_completion_ast) - lock->l_completion_ast(lock, 0, NULL); - } - } else { - /* FIXME: We should try the conversion right away and possibly - * return success without the need for an extra AST */ - ldlm_resource_add_lock(res, &res->lr_converting, lock); - *flags |= LDLM_FL_BLOCK_CONV; - } - - l_unlock(&ns->ns_lock); - - if (granted) - ldlm_run_ast_work(&rpc_list); - RETURN(res); -} - -void ldlm_lock_dump(int level, struct ldlm_lock *lock) -{ - char ver[128]; - struct obd_device *obd; - - if (!((portal_debug | D_ERROR) & level)) - return; - - if (RES_VERSION_SIZE != 4) - LBUG(); - - if (!lock) { - CDEBUG(level, " NULL LDLM lock\n"); - return; - } - - snprintf(ver, sizeof(ver), "%x %x %x %x", - lock->l_version[0], lock->l_version[1], - lock->l_version[2], lock->l_version[3]); - - CDEBUG(level, " -- Lock dump: %p (%s) (rc: %d)\n", lock, ver, - atomic_read(&lock->l_refc)); - obd = class_conn2obd(lock->l_connh); - if (lock->l_export && lock->l_export->exp_connection) { - CDEBUG(level, " Node: NID "LPX64" on %s (rhandle: "LPX64")\n", - lock->l_export->exp_connection->c_peer.peer_nid, - lock->l_export->exp_connection->c_peer.peer_ni->pni_name, - lock->l_remote_handle.cookie); - } else if (obd == NULL) { - CDEBUG(level, " Node: local\n"); - } else { - struct obd_import *imp = obd->u.cli.cl_import; - CDEBUG(level, " Node: NID "LPX64" on %s (rhandle: "LPX64")\n", - imp->imp_connection->c_peer.peer_nid, - imp->imp_connection->c_peer.peer_ni->pni_name, - lock->l_remote_handle.cookie); - } - CDEBUG(level, " Parent: %p\n", lock->l_parent); - CDEBUG(level, " Resource: %p ("LPD64")\n", lock->l_resource, - lock->l_resource->lr_name.name[0]); - CDEBUG(level, " Requested mode: %d, granted mode: %d\n", - (int)lock->l_req_mode, (int)lock->l_granted_mode); - CDEBUG(level, " Readers: %u ; Writers; %u\n", - lock->l_readers, lock->l_writers); - if (lock->l_resource->lr_type == LDLM_EXTENT) - CDEBUG(level, " Extent: "LPU64" -> "LPU64"\n", - lock->l_extent.start, lock->l_extent.end); -} - -void ldlm_lock_dump_handle(int level, struct lustre_handle *lockh) -{ - struct ldlm_lock *lock; - - lock = ldlm_handle2lock(lockh); - if (lock == NULL) - return; - - ldlm_lock_dump(D_OTHER, lock); - - LDLM_LOCK_PUT(lock); -} diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c deleted file mode 100644 index de304d4..0000000 --- a/lustre/ldlm/ldlm_lockd.c +++ /dev/null @@ -1,1132 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002, 2003 Cluster File Systems, Inc. - * Author: Peter Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define EXPORT_SYMTAB -#define DEBUG_SUBSYSTEM S_LDLM - -#ifdef __KERNEL__ -# include <linux/module.h> -# include <linux/slab.h> -# include <linux/init.h> -# include <linux/wait.h> -#else -# include <liblustre.h> -#endif - -#include <linux/lustre_dlm.h> -#include <linux/obd_class.h> -extern kmem_cache_t *ldlm_resource_slab; -extern kmem_cache_t *ldlm_lock_slab; -extern struct lustre_lock ldlm_handle_lock; -extern struct list_head ldlm_namespace_list; -extern int (*mds_reint_p)(int offset, struct ptlrpc_request *req); -extern int (*mds_getattr_name_p)(int offset, struct ptlrpc_request *req); - -static int ldlm_already_setup = 0; - -#ifdef __KERNEL__ - -inline unsigned long round_timeout(unsigned long timeout) -{ - return ((timeout / HZ) + 1) * HZ; -} - -/* XXX should this be per-ldlm? */ -static struct list_head waiting_locks_list; -static spinlock_t waiting_locks_spinlock; -static struct timer_list waiting_locks_timer; - -static struct expired_lock_thread { - wait_queue_head_t elt_waitq; - int elt_state; - struct list_head elt_expired_locks; - spinlock_t elt_lock; -} expired_lock_thread; - -#define ELT_STOPPED 0 -#define ELT_READY 1 -#define ELT_TERMINATE 2 - -static inline int have_expired_locks(void) -{ - int need_to_run; - - spin_lock_bh(&expired_lock_thread.elt_lock); - need_to_run = !list_empty(&expired_lock_thread.elt_expired_locks); - spin_unlock_bh(&expired_lock_thread.elt_lock); - - RETURN(need_to_run); -} - -static int expired_lock_main(void *arg) -{ - struct list_head *expired = &expired_lock_thread.elt_expired_locks; - struct l_wait_info lwi = { 0 }; - unsigned long flags; - - ENTRY; - lock_kernel(); - kportal_daemonize("ldlm_elt"); - - SIGNAL_MASK_LOCK(current, flags); - sigfillset(¤t->blocked); - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); - - unlock_kernel(); - - expired_lock_thread.elt_state = ELT_READY; - wake_up(&expired_lock_thread.elt_waitq); - - while (1) { - l_wait_event(expired_lock_thread.elt_waitq, - have_expired_locks() || - expired_lock_thread.elt_state == ELT_TERMINATE, - &lwi); - - spin_lock_bh(&expired_lock_thread.elt_lock); - while (!list_empty(expired)) { - struct ldlm_lock *lock = list_entry(expired->next, - struct ldlm_lock, - l_pending_chain); - spin_unlock_bh(&expired_lock_thread.elt_lock); - - ptlrpc_fail_export(lock->l_export); - - spin_lock_bh(&expired_lock_thread.elt_lock); - } - spin_unlock_bh(&expired_lock_thread.elt_lock); - - if (expired_lock_thread.elt_state == ELT_TERMINATE) - break; - } - - expired_lock_thread.elt_state = ELT_STOPPED; - wake_up(&expired_lock_thread.elt_waitq); - RETURN(0); -} - -static void waiting_locks_callback(unsigned long unused) -{ - struct ldlm_lock *lock; - - spin_lock_bh(&waiting_locks_spinlock); - while (!list_empty(&waiting_locks_list)) { - lock = list_entry(waiting_locks_list.next, struct ldlm_lock, - l_pending_chain); - - if (lock->l_callback_timeout > jiffies) - break; - - LDLM_ERROR(lock, "lock callback timer expired: evicting client " - "%s@%s nid "LPU64, - lock->l_export->exp_client_uuid.uuid, - lock->l_export->exp_connection->c_remote_uuid.uuid, - lock->l_export->exp_connection->c_peer.peer_nid); - - spin_lock_bh(&expired_lock_thread.elt_lock); - list_del(&lock->l_pending_chain); - list_add(&lock->l_pending_chain, - &expired_lock_thread.elt_expired_locks); - spin_unlock_bh(&expired_lock_thread.elt_lock); - wake_up(&expired_lock_thread.elt_waitq); - } - - spin_unlock_bh(&waiting_locks_spinlock); -} - -/* - * Indicate that we're waiting for a client to call us back cancelling a given - * lock. We add it to the pending-callback chain, and schedule the lock-timeout - * timer to fire appropriately. (We round up to the next second, to avoid - * floods of timer firings during periods of high lock contention and traffic). - */ -static int ldlm_add_waiting_lock(struct ldlm_lock *lock) -{ - unsigned long timeout_rounded; - - LDLM_DEBUG(lock, "adding to wait list"); - LASSERT(list_empty(&lock->l_pending_chain)); - - spin_lock_bh(&waiting_locks_spinlock); - lock->l_callback_timeout = jiffies + (obd_timeout * HZ / 2); - - timeout_rounded = round_timeout(lock->l_callback_timeout); - - if (timeout_rounded < waiting_locks_timer.expires || - !timer_pending(&waiting_locks_timer)) { - mod_timer(&waiting_locks_timer, timeout_rounded); - } - list_add_tail(&lock->l_pending_chain, &waiting_locks_list); /* FIFO */ - spin_unlock_bh(&waiting_locks_spinlock); - /* We drop this ref when we get removed from the list. */ - class_export_get(lock->l_export); - return 1; -} - -/* - * Remove a lock from the pending list, likely because it had its cancellation - * callback arrive without incident. This adjusts the lock-timeout timer if - * needed. Returns 0 if the lock wasn't pending after all, 1 if it was. - */ -int ldlm_del_waiting_lock(struct ldlm_lock *lock) -{ - struct list_head *list_next; - - if (lock->l_export == NULL) { - /* We don't have a "waiting locks list" on clients. */ - LDLM_DEBUG(lock, "client lock: no-op"); - return 0; - } - - spin_lock_bh(&waiting_locks_spinlock); - - if (list_empty(&lock->l_pending_chain)) { - spin_unlock_bh(&waiting_locks_spinlock); - LDLM_DEBUG(lock, "wasn't waiting"); - return 0; - } - - list_next = lock->l_pending_chain.next; - if (lock->l_pending_chain.prev == &waiting_locks_list) { - /* Removing the head of the list, adjust timer. */ - if (list_next == &waiting_locks_list) { - /* No more, just cancel. */ - del_timer(&waiting_locks_timer); - } else { - struct ldlm_lock *next; - next = list_entry(list_next, struct ldlm_lock, - l_pending_chain); - mod_timer(&waiting_locks_timer, - round_timeout(next->l_callback_timeout)); - } - } - list_del_init(&lock->l_pending_chain); - spin_unlock_bh(&waiting_locks_spinlock); - /* We got this ref when we were added to the list. */ - class_export_put(lock->l_export); - LDLM_DEBUG(lock, "removed"); - return 1; -} - -#else /* !__KERNEL__ */ - -static int ldlm_add_waiting_lock(struct ldlm_lock *lock) -{ - RETURN(1); -} - -int ldlm_del_waiting_lock(struct ldlm_lock *lock) -{ - RETURN(0); -} - -#endif /* __KERNEL__ */ - -static inline void ldlm_failed_ast(struct ldlm_lock *lock, int rc, - char *ast_type) -{ - CERROR("%s AST failed (%d) for res "LPU64"/"LPU64 - ", mode %s: evicting client %s@%s NID "LPU64"\n", - ast_type, rc, - lock->l_resource->lr_name.name[0], - lock->l_resource->lr_name.name[1], - ldlm_lockname[lock->l_granted_mode], - lock->l_export->exp_client_uuid.uuid, - lock->l_export->exp_connection->c_remote_uuid.uuid, - lock->l_export->exp_connection->c_peer.peer_nid); - ptlrpc_fail_export(lock->l_export); -} - -int ldlm_server_blocking_ast(struct ldlm_lock *lock, - struct ldlm_lock_desc *desc, - void *data, int flag) -{ - struct ldlm_request *body; - struct ptlrpc_request *req; - int rc = 0, size = sizeof(*body); - ENTRY; - - if (flag == LDLM_CB_CANCELING) { - /* Don't need to do anything here. */ - RETURN(0); - } - - LASSERT(lock); - - l_lock(&lock->l_resource->lr_namespace->ns_lock); - /* XXX This is necessary because, with the lock re-tasking, we actually - * _can_ get called in here twice. (bug 830) */ - if (!list_empty(&lock->l_pending_chain)) { - l_unlock(&lock->l_resource->lr_namespace->ns_lock); - RETURN(0); - } - - if (lock->l_destroyed) { - /* What's the point? */ - l_unlock(&lock->l_resource->lr_namespace->ns_lock); - RETURN(0); - } - -#if 0 - if (LTIME_S(CURRENT_TIME) - lock->l_export->exp_last_request_time > 30){ - ldlm_failed_ast(lock, -ETIMEDOUT, "Not-attempted blocking"); - RETURN(-ETIMEDOUT); - } -#endif - - req = ptlrpc_prep_req(lock->l_export->exp_ldlm_data.led_import, - LDLM_BL_CALLBACK, 1, &size, NULL); - if (!req) - RETURN(-ENOMEM); - - body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); - memcpy(&body->lock_handle1, &lock->l_remote_handle, - sizeof(body->lock_handle1)); - memcpy(&body->lock_desc, desc, sizeof(*desc)); - - LDLM_DEBUG(lock, "server preparing blocking AST"); - req->rq_replen = lustre_msg_size(0, NULL); - - ldlm_add_waiting_lock(lock); - l_unlock(&lock->l_resource->lr_namespace->ns_lock); - - req->rq_level = LUSTRE_CONN_RECOVER; - req->rq_timeout = 2; /* 2 second timeout for initial AST reply */ - rc = ptlrpc_queue_wait(req); - if (rc == -ETIMEDOUT || rc == -EINTR) { - ldlm_del_waiting_lock(lock); - ldlm_failed_ast(lock, rc, "blocking"); - } else if (rc) { - if (rc == -EINVAL) - CDEBUG(D_DLMTRACE, "client (nid "LPU64") returned %d " - "from blocking AST for lock %p--normal race\n", - req->rq_connection->c_peer.peer_nid, - req->rq_repmsg->status, lock); - else if (rc == -ENOTCONN) - CDEBUG(D_DLMTRACE, "client (nid "LPU64") returned %d " - "from blocking AST for lock %p--this client was " - "probably rebooted while it held a lock, nothing" - " serious\n",req->rq_connection->c_peer.peer_nid, - req->rq_repmsg->status, lock); - else - CDEBUG(D_ERROR, "client (nid "LPU64") returned %d " - "from blocking AST for lock %p\n", - req->rq_connection->c_peer.peer_nid, - req->rq_repmsg->status, lock); - LDLM_DEBUG(lock, "client returned error %d from blocking AST", - req->rq_status); - ldlm_lock_cancel(lock); - /* Server-side AST functions are called from ldlm_reprocess_all, - * which needs to be told to please restart its reprocessing. */ - rc = -ERESTART; - } - - ptlrpc_req_finished(req); - - RETURN(rc); -} - -int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data) -{ - struct ldlm_request *body; - struct ptlrpc_request *req; - int rc = 0, size = sizeof(*body); - ENTRY; - - if (lock == NULL) { - LBUG(); - RETURN(-EINVAL); - } - - req = ptlrpc_prep_req(lock->l_export->exp_ldlm_data.led_import, - LDLM_CP_CALLBACK, 1, &size, NULL); - if (!req) - RETURN(-ENOMEM); - - body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); - memcpy(&body->lock_handle1, &lock->l_remote_handle, - sizeof(body->lock_handle1)); - body->lock_flags = flags; - ldlm_lock2desc(lock, &body->lock_desc); - - LDLM_DEBUG(lock, "server preparing completion AST"); - req->rq_replen = lustre_msg_size(0, NULL); - - req->rq_level = LUSTRE_CONN_RECOVER; - req->rq_timeout = 2; /* 2 second timeout for initial AST reply */ - rc = ptlrpc_queue_wait(req); - if (rc == -ETIMEDOUT || rc == -EINTR) { - ldlm_del_waiting_lock(lock); - ldlm_failed_ast(lock, rc, "completion"); - } else if (rc) { - CERROR("client returned %d from completion AST for lock %p\n", - req->rq_status, lock); - LDLM_DEBUG(lock, "client returned error %d from completion AST", - req->rq_status); - ldlm_lock_cancel(lock); - /* Server-side AST functions are called from ldlm_reprocess_all, - * which needs to be told to please restart its reprocessing. */ - rc = -ERESTART; - } - ptlrpc_req_finished(req); - - RETURN(rc); -} - -int ldlm_handle_enqueue(struct ptlrpc_request *req, - ldlm_completion_callback completion_callback, - ldlm_blocking_callback blocking_callback) -{ - struct obd_device *obddev = req->rq_export->exp_obd; - struct ldlm_reply *dlm_rep; - struct ldlm_request *dlm_req; - int rc, size = sizeof(*dlm_rep), cookielen = 0; - __u32 flags; - ldlm_error_t err; - struct ldlm_lock *lock = NULL; - void *cookie = NULL; - ENTRY; - - LDLM_DEBUG_NOLOCK("server-side enqueue handler START"); - - dlm_req = lustre_swab_reqbuf (req, 0, sizeof (*dlm_req), - lustre_swab_ldlm_request); - if (dlm_req == NULL) { - CERROR ("Can't unpack dlm_req\n"); - RETURN (-EFAULT); - } - - flags = dlm_req->lock_flags; - if (dlm_req->lock_desc.l_resource.lr_type == LDLM_PLAIN && - (flags & LDLM_FL_HAS_INTENT)) { - /* In this case, the reply buffer is allocated deep in - * local_lock_enqueue by the policy function. */ - cookie = req; - cookielen = sizeof(*req); - } else { - rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, - &req->rq_repmsg); - if (rc) { - CERROR("out of memory\n"); - RETURN(-ENOMEM); - } - if (dlm_req->lock_desc.l_resource.lr_type == LDLM_EXTENT) { - cookie = &dlm_req->lock_desc.l_extent; - cookielen = sizeof(struct ldlm_extent); - } - } - - /* The lock's callback data might be set in the policy function */ - lock = ldlm_lock_create(obddev->obd_namespace, - &dlm_req->lock_handle2, - dlm_req->lock_desc.l_resource.lr_name, - dlm_req->lock_desc.l_resource.lr_type, - dlm_req->lock_desc.l_req_mode, - blocking_callback, NULL); - if (!lock) - GOTO(out, err = -ENOMEM); - - memcpy(&lock->l_remote_handle, &dlm_req->lock_handle1, - sizeof(lock->l_remote_handle)); - LDLM_DEBUG(lock, "server-side enqueue handler, new lock created"); - - LASSERT(req->rq_export); - lock->l_export = req->rq_export; - l_lock(&lock->l_resource->lr_namespace->ns_lock); - list_add(&lock->l_export_chain, - &lock->l_export->exp_ldlm_data.led_held_locks); - l_unlock(&lock->l_resource->lr_namespace->ns_lock); - - err = ldlm_lock_enqueue(obddev->obd_namespace, &lock, cookie, cookielen, - &flags, completion_callback); - if (err) - GOTO(out, err); - - dlm_rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*dlm_rep)); - dlm_rep->lock_flags = flags; - - ldlm_lock2handle(lock, &dlm_rep->lock_handle); - if (dlm_req->lock_desc.l_resource.lr_type == LDLM_EXTENT) - memcpy(&dlm_rep->lock_extent, &lock->l_extent, - sizeof(lock->l_extent)); - if (dlm_rep->lock_flags & LDLM_FL_LOCK_CHANGED) { - memcpy(&dlm_rep->lock_resource_name, &lock->l_resource->lr_name, - sizeof(dlm_rep->lock_resource_name)); - dlm_rep->lock_mode = lock->l_req_mode; - } - - EXIT; - out: - if (lock) - LDLM_DEBUG(lock, "server-side enqueue handler, sending reply" - "(err=%d)", err); - req->rq_status = err; - - /* The LOCK_CHANGED code in ldlm_lock_enqueue depends on this - * ldlm_reprocess_all. If this moves, revisit that code. -phil */ - if (lock) { - if (!err) - ldlm_reprocess_all(lock->l_resource); - LDLM_LOCK_PUT(lock); - } - LDLM_DEBUG_NOLOCK("server-side enqueue handler END (lock %p)", lock); - - return 0; -} - -int ldlm_handle_convert(struct ptlrpc_request *req) -{ - struct ldlm_request *dlm_req; - struct ldlm_reply *dlm_rep; - struct ldlm_lock *lock; - int rc, size = sizeof(*dlm_rep); - ENTRY; - - dlm_req = lustre_swab_reqbuf (req, 0, sizeof (*dlm_req), - lustre_swab_ldlm_request); - if (dlm_req == NULL) { - CERROR ("Can't unpack dlm_req\n"); - RETURN (-EFAULT); - } - - rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc) { - CERROR("out of memory\n"); - RETURN(-ENOMEM); - } - dlm_rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*dlm_rep)); - dlm_rep->lock_flags = dlm_req->lock_flags; - - lock = ldlm_handle2lock(&dlm_req->lock_handle1); - if (!lock) { - req->rq_status = EINVAL; - } else { - LDLM_DEBUG(lock, "server-side convert handler START"); - ldlm_lock_convert(lock, dlm_req->lock_desc.l_req_mode, - &dlm_rep->lock_flags); - if (ldlm_del_waiting_lock(lock)) - CDEBUG(D_DLMTRACE, "converted waiting lock %p\n", lock); - req->rq_status = 0; - } - - if (lock) { - ldlm_reprocess_all(lock->l_resource); - LDLM_DEBUG(lock, "server-side convert handler END"); - LDLM_LOCK_PUT(lock); - } else - LDLM_DEBUG_NOLOCK("server-side convert handler END"); - - RETURN(0); -} - -int ldlm_handle_cancel(struct ptlrpc_request *req) -{ - struct ldlm_request *dlm_req; - struct ldlm_lock *lock; - int rc; - ENTRY; - - dlm_req = lustre_swab_reqbuf (req, 0, sizeof (*dlm_req), - lustre_swab_ldlm_request); - if (dlm_req == NULL) { - CERROR("bad request buffer for cancel\n"); - RETURN(-EFAULT); - } - - rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc) { - CERROR("out of memory\n"); - RETURN(-ENOMEM); - } - - lock = ldlm_handle2lock(&dlm_req->lock_handle1); - if (!lock) { - CERROR("received cancel for unknown lock cookie "LPX64 - " from nid "LPU64"\n", dlm_req->lock_handle1.cookie, - req->rq_connection->c_peer.peer_nid); - LDLM_DEBUG_NOLOCK("server-side cancel handler stale lock " - "(cookie "LPU64")", - dlm_req->lock_handle1.cookie); - req->rq_status = ESTALE; - } else { - LDLM_DEBUG(lock, "server-side cancel handler START"); - ldlm_lock_cancel(lock); - if (ldlm_del_waiting_lock(lock)) - CDEBUG(D_DLMTRACE, "cancelled waiting lock %p\n", lock); - req->rq_status = 0; - } - - if (ptlrpc_reply(req) != 0) - LBUG(); - - if (lock) { - ldlm_reprocess_all(lock->l_resource); - LDLM_DEBUG(lock, "server-side cancel handler END"); - LDLM_LOCK_PUT(lock); - } - - RETURN(0); -} - -static void ldlm_handle_bl_callback(struct ptlrpc_request *req, - struct ldlm_namespace *ns, - struct ldlm_request *dlm_req, - struct ldlm_lock *lock) -{ - int do_ast; - ENTRY; - - l_lock(&ns->ns_lock); - LDLM_DEBUG(lock, "client blocking AST callback handler START"); - - lock->l_flags |= LDLM_FL_CBPENDING; - do_ast = (!lock->l_readers && !lock->l_writers); - - if (do_ast) { - LDLM_DEBUG(lock, "already unused, calling " - "callback (%p)", lock->l_blocking_ast); - if (lock->l_blocking_ast != NULL) { - l_unlock(&ns->ns_lock); - lock->l_blocking_ast(lock, &dlm_req->lock_desc, - lock->l_data, LDLM_CB_BLOCKING); - l_lock(&ns->ns_lock); - } - } else { - LDLM_DEBUG(lock, "Lock still has references, will be" - " cancelled later"); - } - - LDLM_DEBUG(lock, "client blocking callback handler END"); - l_unlock(&ns->ns_lock); - LDLM_LOCK_PUT(lock); - EXIT; -} - -static void ldlm_handle_cp_callback(struct ptlrpc_request *req, - struct ldlm_namespace *ns, - struct ldlm_request *dlm_req, - struct ldlm_lock *lock) -{ - LIST_HEAD(ast_list); - ENTRY; - - l_lock(&ns->ns_lock); - LDLM_DEBUG(lock, "client completion callback handler START"); - - /* If we receive the completion AST before the actual enqueue returned, - * then we might need to switch lock modes, resources, or extents. */ - if (dlm_req->lock_desc.l_granted_mode != lock->l_req_mode) { - lock->l_req_mode = dlm_req->lock_desc.l_granted_mode; - LDLM_DEBUG(lock, "completion AST, new lock mode"); - } - if (lock->l_resource->lr_type == LDLM_EXTENT) { - memcpy(&lock->l_extent, &dlm_req->lock_desc.l_extent, - sizeof(lock->l_extent)); - - if ((lock->l_extent.end & ~PAGE_MASK) != ~PAGE_MASK) { - /* XXX Old versions of BA OST code have a fencepost bug - * which will cause them to grant a lock that's one - * byte too large. This can be safely removed after BA - * ships their next release -phik (02 Apr 2003) */ - lock->l_extent.end--; - } else if ((lock->l_extent.start & ~PAGE_MASK) == - ~PAGE_MASK) { - lock->l_extent.start++; - } - } - - ldlm_resource_unlink_lock(lock); - if (memcmp(&dlm_req->lock_desc.l_resource.lr_name, - &lock->l_resource->lr_name, - sizeof(lock->l_resource->lr_name)) != 0) { - ldlm_lock_change_resource(ns, lock, - dlm_req->lock_desc.l_resource.lr_name); - LDLM_DEBUG(lock, "completion AST, new resource"); - } - lock->l_resource->lr_tmp = &ast_list; - ldlm_grant_lock(lock, req, sizeof(*req)); - lock->l_resource->lr_tmp = NULL; - LDLM_DEBUG(lock, "callback handler finished, about to run_ast_work"); - l_unlock(&ns->ns_lock); - LDLM_LOCK_PUT(lock); - - ldlm_run_ast_work(&ast_list); - - LDLM_DEBUG_NOLOCK("client completion callback handler END (lock %p)", - lock); - EXIT; -} - -static int ldlm_callback_reply(struct ptlrpc_request *req, int rc) -{ - req->rq_status = rc; - rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, - &req->rq_repmsg); - if (rc) - return rc; - return ptlrpc_reply(req); -} - -static int ldlm_callback_handler(struct ptlrpc_request *req) -{ - struct ldlm_namespace *ns; - struct ldlm_request *dlm_req; - struct ldlm_lock *lock; - ENTRY; - - /* Requests arrive in sender's byte order. The ptlrpc service - * handler has already checked and, if necessary, byte-swapped the - * incoming request message body, but I am responsible for the - * message buffers. */ - - if (req->rq_export == NULL) { - struct ldlm_request *dlm_req; - - CDEBUG(D_RPCTRACE, "operation %d from nid "LPU64" with bad " - "export cookie "LPX64" (ptl req %d/rep %d); this is " - "normal if this node rebooted with a lock held\n", - req->rq_reqmsg->opc, req->rq_connection->c_peer.peer_nid, - req->rq_reqmsg->handle.cookie, - req->rq_request_portal, req->rq_reply_portal); - - dlm_req = lustre_swab_reqbuf(req, 0, sizeof (*dlm_req), - lustre_swab_ldlm_request); - if (dlm_req != NULL) - CDEBUG(D_RPCTRACE, "--> lock cookie: "LPX64"\n", - dlm_req->lock_handle1.cookie); - - ldlm_callback_reply(req, -ENOTCONN); - RETURN(0); - } - - if (req->rq_reqmsg->opc == LDLM_BL_CALLBACK) { - OBD_FAIL_RETURN(OBD_FAIL_LDLM_BL_CALLBACK, 0); - } else if (req->rq_reqmsg->opc == LDLM_CP_CALLBACK) { - OBD_FAIL_RETURN(OBD_FAIL_LDLM_CP_CALLBACK, 0); - } else { - ldlm_callback_reply(req, -EPROTO); - RETURN(0); - } - - LASSERT(req->rq_export != NULL); - LASSERT(req->rq_export->exp_obd != NULL); - ns = req->rq_export->exp_obd->obd_namespace; - LASSERT(ns != NULL); - - dlm_req = lustre_swab_reqbuf (req, 0, sizeof (*dlm_req), - lustre_swab_ldlm_request); - if (dlm_req == NULL) { - CERROR ("can't unpack dlm_req\n"); - ldlm_callback_reply (req, -EPROTO); - RETURN (0); - } - - lock = ldlm_handle2lock_ns(ns, &dlm_req->lock_handle1); - if (!lock) { - CDEBUG(D_INODE, "callback on lock "LPX64" - lock disappeared\n", - dlm_req->lock_handle1.cookie); - ldlm_callback_reply(req, -EINVAL); - RETURN(0); - } - - /* we want the ost thread to get this reply so that it can respond - * to ost requests (write cache writeback) that might be triggered - * in the callback */ - ldlm_callback_reply(req, 0); - - switch (req->rq_reqmsg->opc) { - case LDLM_BL_CALLBACK: - CDEBUG(D_INODE, "blocking ast\n"); - ldlm_handle_bl_callback(req, ns, dlm_req, lock); - break; - case LDLM_CP_CALLBACK: - CDEBUG(D_INODE, "completion ast\n"); - ldlm_handle_cp_callback(req, ns, dlm_req, lock); - break; - default: - LBUG(); /* checked above */ - } - - RETURN(0); -} - -static int ldlm_cancel_handler(struct ptlrpc_request *req) -{ - int rc; - ENTRY; - - /* Requests arrive in sender's byte order. The ptlrpc service - * handler has already checked and, if necessary, byte-swapped the - * incoming request message body, but I am responsible for the - * message buffers. */ - - if (req->rq_export == NULL) { - struct ldlm_request *dlm_req; - CERROR("operation %d with bad export (ptl req %d/rep %d)\n", - req->rq_reqmsg->opc, req->rq_request_portal, - req->rq_reply_portal); - CERROR("--> export cookie: "LPX64"\n", - req->rq_reqmsg->handle.cookie); - dlm_req = lustre_swab_reqbuf(req, 0, sizeof (*dlm_req), - lustre_swab_ldlm_request); - if (dlm_req != NULL) - ldlm_lock_dump_handle(D_ERROR, &dlm_req->lock_handle1); - RETURN(-ENOTCONN); - } - - switch (req->rq_reqmsg->opc) { - - /* XXX FIXME move this back to mds/handler.c, bug 249 */ - case LDLM_CANCEL: - CDEBUG(D_INODE, "cancel\n"); - OBD_FAIL_RETURN(OBD_FAIL_LDLM_CANCEL, 0); - rc = ldlm_handle_cancel(req); - if (rc) - break; - RETURN(0); - - default: - CERROR("invalid opcode %d\n", req->rq_reqmsg->opc); - RETURN(-EINVAL); - } - - RETURN(0); -} - -static int ldlm_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len, - void *karg, void *uarg) -{ - struct obd_device *obddev = class_conn2obd(conn); - struct ptlrpc_connection *connection; - struct obd_uuid uuid = { "ldlm" }; - int err = 0; - ENTRY; - - if (_IOC_TYPE(cmd) != IOC_LDLM_TYPE || _IOC_NR(cmd) < IOC_LDLM_MIN_NR || - _IOC_NR(cmd) > IOC_LDLM_MAX_NR) { - CDEBUG(D_IOCTL, "invalid ioctl (type %d, nr %d, size %d)\n", - _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd)); - RETURN(-EINVAL); - } - - OBD_ALLOC(obddev->u.ldlm.ldlm_client, - sizeof(*obddev->u.ldlm.ldlm_client)); - connection = ptlrpc_uuid_to_connection(&uuid); - if (!connection) - CERROR("No LDLM UUID found: assuming ldlm is local.\n"); - - switch (cmd) { - case IOC_LDLM_TEST: - //err = ldlm_test(obddev, conn); - err = 0; - CERROR("-- NO TESTS WERE RUN done err %d\n", err); - GOTO(out, err); - case IOC_LDLM_DUMP: - ldlm_dump_all_namespaces(); - GOTO(out, err); - default: - GOTO(out, err = -EINVAL); - } - - out: - if (connection) - ptlrpc_put_connection(connection); - OBD_FREE(obddev->u.ldlm.ldlm_client, - sizeof(*obddev->u.ldlm.ldlm_client)); - return err; -} - -static int ldlm_setup(struct obd_device *obddev, obd_count len, void *buf) -{ - struct ldlm_obd *ldlm = &obddev->u.ldlm; - int rc, i; - ENTRY; - - if (ldlm_already_setup) - RETURN(-EALREADY); - - rc = ldlm_proc_setup(obddev); - if (rc != 0) - RETURN(rc); - -#ifdef __KERNEL__ - inter_module_register("ldlm_cli_cancel_unused", THIS_MODULE, - ldlm_cli_cancel_unused); - inter_module_register("ldlm_namespace_cleanup", THIS_MODULE, - ldlm_namespace_cleanup); - inter_module_register("ldlm_replay_locks", THIS_MODULE, - ldlm_replay_locks); - - ldlm->ldlm_cb_service = - ptlrpc_init_svc(LDLM_NEVENTS, LDLM_NBUFS, LDLM_BUFSIZE, - LDLM_MAXREQSIZE, LDLM_CB_REQUEST_PORTAL, - LDLM_CB_REPLY_PORTAL, - ldlm_callback_handler, "ldlm_cbd", obddev); - - if (!ldlm->ldlm_cb_service) { - CERROR("failed to start service\n"); - GOTO(out_proc, rc = -ENOMEM); - } - - ldlm->ldlm_cancel_service = - ptlrpc_init_svc(LDLM_NEVENTS, LDLM_NBUFS, LDLM_BUFSIZE, - LDLM_MAXREQSIZE, LDLM_CANCEL_REQUEST_PORTAL, - LDLM_CANCEL_REPLY_PORTAL, - ldlm_cancel_handler, "ldlm_canceld", obddev); - - if (!ldlm->ldlm_cancel_service) { - CERROR("failed to start service\n"); - GOTO(out_proc, rc = -ENOMEM); - } - - for (i = 0; i < LDLM_NUM_THREADS; i++) { - char name[32]; - sprintf(name, "ldlm_cn_%02d", i); - rc = ptlrpc_start_thread(obddev, ldlm->ldlm_cancel_service, - name); - if (rc) { - CERROR("cannot start LDLM thread #%d: rc %d\n", i, rc); - LBUG(); - GOTO(out_thread, rc); - } - } - - for (i = 0; i < LDLM_NUM_THREADS; i++) { - char name[32]; - sprintf(name, "ldlm_cb_%02d", i); - rc = ptlrpc_start_thread(obddev, ldlm->ldlm_cb_service, name); - if (rc) { - CERROR("cannot start LDLM thread #%d: rc %d\n", i, rc); - LBUG(); - GOTO(out_thread, rc); - } - } - - INIT_LIST_HEAD(&expired_lock_thread.elt_expired_locks); - spin_lock_init(&expired_lock_thread.elt_lock); - expired_lock_thread.elt_state = ELT_STOPPED; - init_waitqueue_head(&expired_lock_thread.elt_waitq); - - rc = kernel_thread(expired_lock_main, NULL, CLONE_VM | CLONE_FS); - if (rc < 0) { - CERROR("Cannot start ldlm expired-lock thread: %d\n", rc); - GOTO(out_thread, rc); - } - - wait_event(expired_lock_thread.elt_waitq, - expired_lock_thread.elt_state == ELT_READY); - - INIT_LIST_HEAD(&waiting_locks_list); - spin_lock_init(&waiting_locks_spinlock); - waiting_locks_timer.function = waiting_locks_callback; - waiting_locks_timer.data = 0; - init_timer(&waiting_locks_timer); -#endif - - ldlm_already_setup = 1; - - RETURN(0); - - out_thread: -#ifdef __KERNEL__ - ptlrpc_stop_all_threads(ldlm->ldlm_cancel_service); - ptlrpc_unregister_service(ldlm->ldlm_cancel_service); - ptlrpc_stop_all_threads(ldlm->ldlm_cb_service); - ptlrpc_unregister_service(ldlm->ldlm_cb_service); -#endif - out_proc: - ldlm_proc_cleanup(obddev); - - return rc; -} - -static int ldlm_cleanup(struct obd_device *obddev, int force, int failover) -{ - struct ldlm_obd *ldlm = &obddev->u.ldlm; - ENTRY; - - if (!list_empty(&ldlm_namespace_list)) { - CERROR("ldlm still has namespaces; clean these up first.\n"); - ldlm_dump_all_namespaces(); - RETURN(-EBUSY); - } - -#ifdef __KERNEL__ - if (force) { - ptlrpc_put_ldlm_hooks(); - } else if (ptlrpc_ldlm_hooks_referenced()) { - CERROR("Some connections weren't cleaned up; run lconf with " - "--force to forcibly unload.\n"); - ptlrpc_dump_connections(); - RETURN(-EBUSY); - } - - ptlrpc_stop_all_threads(ldlm->ldlm_cb_service); - ptlrpc_unregister_service(ldlm->ldlm_cb_service); - ptlrpc_stop_all_threads(ldlm->ldlm_cancel_service); - ptlrpc_unregister_service(ldlm->ldlm_cancel_service); - ldlm_proc_cleanup(obddev); - - expired_lock_thread.elt_state = ELT_TERMINATE; - wake_up(&expired_lock_thread.elt_waitq); - wait_event(expired_lock_thread.elt_waitq, - expired_lock_thread.elt_state == ELT_STOPPED); - - inter_module_unregister("ldlm_namespace_cleanup"); - inter_module_unregister("ldlm_cli_cancel_unused"); - inter_module_unregister("ldlm_replay_locks"); -#endif - - ldlm_already_setup = 0; - RETURN(0); -} - -static int ldlm_connect(struct lustre_handle *conn, struct obd_device *src, - struct obd_uuid *cluuid) -{ - return class_connect(conn, src, cluuid); -} - -struct obd_ops ldlm_obd_ops = { - o_owner: THIS_MODULE, - o_iocontrol: ldlm_iocontrol, - o_setup: ldlm_setup, - o_cleanup: ldlm_cleanup, - o_connect: ldlm_connect, - o_disconnect: class_disconnect -}; - -int __init ldlm_init(void) -{ - int rc = class_register_type(&ldlm_obd_ops, 0, OBD_LDLM_DEVICENAME); - if (rc != 0) - return rc; - - ldlm_resource_slab = kmem_cache_create("ldlm_resources", - sizeof(struct ldlm_resource), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (ldlm_resource_slab == NULL) - return -ENOMEM; - - ldlm_lock_slab = kmem_cache_create("ldlm_locks", - sizeof(struct ldlm_lock), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (ldlm_lock_slab == NULL) { - kmem_cache_destroy(ldlm_resource_slab); - return -ENOMEM; - } - - l_lock_init(&ldlm_handle_lock); - - return 0; -} - -static void __exit ldlm_exit(void) -{ - class_unregister_type(OBD_LDLM_DEVICENAME); - if (kmem_cache_destroy(ldlm_resource_slab) != 0) - CERROR("couldn't free ldlm resource slab\n"); - if (kmem_cache_destroy(ldlm_lock_slab) != 0) - CERROR("couldn't free ldlm lock slab\n"); -} - -/* ldlm_lock.c */ -EXPORT_SYMBOL(ldlm_lock2desc); -EXPORT_SYMBOL(ldlm_register_intent); -EXPORT_SYMBOL(ldlm_unregister_intent); -EXPORT_SYMBOL(ldlm_lockname); -EXPORT_SYMBOL(ldlm_typename); -EXPORT_SYMBOL(ldlm_lock2handle); -EXPORT_SYMBOL(__ldlm_handle2lock); -EXPORT_SYMBOL(ldlm_lock_put); -EXPORT_SYMBOL(ldlm_lock_match); -EXPORT_SYMBOL(ldlm_lock_cancel); -EXPORT_SYMBOL(ldlm_lock_addref); -EXPORT_SYMBOL(ldlm_lock_decref); -EXPORT_SYMBOL(ldlm_lock_decref_and_cancel); -EXPORT_SYMBOL(ldlm_lock_change_resource); -EXPORT_SYMBOL(ldlm_lock_set_data); -EXPORT_SYMBOL(ldlm_it2str); -EXPORT_SYMBOL(ldlm_lock_dump); -EXPORT_SYMBOL(ldlm_lock_dump_handle); -EXPORT_SYMBOL(ldlm_cancel_locks_for_export); -EXPORT_SYMBOL(ldlm_reprocess_all_ns); - -/* ldlm_request.c */ -EXPORT_SYMBOL(ldlm_completion_ast); -EXPORT_SYMBOL(ldlm_expired_completion_wait); -EXPORT_SYMBOL(ldlm_cli_convert); -EXPORT_SYMBOL(ldlm_cli_enqueue); -EXPORT_SYMBOL(ldlm_cli_cancel); -EXPORT_SYMBOL(ldlm_cli_cancel_unused); -EXPORT_SYMBOL(ldlm_replay_locks); -EXPORT_SYMBOL(ldlm_resource_foreach); -EXPORT_SYMBOL(ldlm_namespace_foreach); -EXPORT_SYMBOL(ldlm_namespace_foreach_res); - -/* ldlm_lockd.c */ -EXPORT_SYMBOL(ldlm_server_blocking_ast); -EXPORT_SYMBOL(ldlm_server_completion_ast); -EXPORT_SYMBOL(ldlm_handle_enqueue); -EXPORT_SYMBOL(ldlm_handle_cancel); -EXPORT_SYMBOL(ldlm_handle_convert); -EXPORT_SYMBOL(ldlm_del_waiting_lock); - -#if 0 -/* ldlm_test.c */ -EXPORT_SYMBOL(ldlm_test); -EXPORT_SYMBOL(ldlm_regression_start); -EXPORT_SYMBOL(ldlm_regression_stop); -#endif - -/* ldlm_resource.c */ -EXPORT_SYMBOL(ldlm_namespace_new); -EXPORT_SYMBOL(ldlm_namespace_cleanup); -EXPORT_SYMBOL(ldlm_namespace_free); -EXPORT_SYMBOL(ldlm_namespace_dump); - -/* l_lock.c */ -EXPORT_SYMBOL(l_lock); -EXPORT_SYMBOL(l_unlock); - -/* ldlm_lib.c */ -EXPORT_SYMBOL(client_import_connect); -EXPORT_SYMBOL(client_import_disconnect); -EXPORT_SYMBOL(target_abort_recovery); -EXPORT_SYMBOL(target_handle_connect); -EXPORT_SYMBOL(target_cancel_recovery_timer); -EXPORT_SYMBOL(target_send_reply); -EXPORT_SYMBOL(target_queue_recovery_request); -EXPORT_SYMBOL(target_handle_ping); -EXPORT_SYMBOL(target_handle_disconnect); -EXPORT_SYMBOL(target_queue_final_reply); - -#ifdef __KERNEL__ -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Lustre Lock Management Module v0.1"); -MODULE_LICENSE("GPL"); - -module_init(ldlm_init); -module_exit(ldlm_exit); -#endif diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c deleted file mode 100644 index e6a8229..0000000 --- a/lustre/ldlm/ldlm_request.c +++ /dev/null @@ -1,919 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LDLM -#ifndef __KERNEL__ -#include <signal.h> -#include <liblustre.h> -#endif - -#include <linux/lustre_dlm.h> -#include <linux/obd_class.h> -#include <linux/obd.h> - -static void interrupted_completion_wait(void *data) -{ -} - -struct lock_wait_data { - struct ldlm_lock *lwd_lock; - int lwd_generation; -}; - -int ldlm_expired_completion_wait(void *data) -{ - struct lock_wait_data *lwd = data; - struct ldlm_lock *lock = lwd->lwd_lock; - struct obd_device *obd = class_conn2obd(lock->l_connh); - - if (obd == NULL) { - LDLM_ERROR(lock, "lock timed out; mot entering recovery in " - "server code, just going back to sleep"); - } else { - struct obd_import *imp = obd->u.cli.cl_import; - ptlrpc_fail_import(imp, lwd->lwd_generation); - LDLM_ERROR(lock, "lock timed out, entering recovery for %s@%s", - imp->imp_target_uuid.uuid, - imp->imp_connection->c_remote_uuid.uuid); - } - - RETURN(0); -} - -int ldlm_completion_ast(struct ldlm_lock *lock, int flags, void *data) -{ - /* XXX ALLOCATE - 160 mytes */ - struct lock_wait_data lwd; - unsigned long irqflags; - struct obd_device *obd; - struct obd_import *imp = NULL; - int rc = 0; - struct l_wait_info lwi; - - obd = class_conn2obd(lock->l_connh); - - /* if this is a local lock, then there is no import */ - if (obd != NULL) - imp = obd->u.cli.cl_import; - - lwd.lwd_lock = lock; - - lwi = LWI_TIMEOUT_INTR(obd_timeout * HZ, ldlm_expired_completion_wait, - interrupted_completion_wait, &lwd); - ENTRY; - - if (flags == LDLM_FL_WAIT_NOREPROC) - goto noreproc; - - if (flags == 0) { - wake_up(&lock->l_waitq); - RETURN(0); - } - - if (!(flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED | - LDLM_FL_BLOCK_CONV))) - RETURN(0); - - LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock, " - "sleeping"); - ldlm_lock_dump(D_OTHER, lock); - ldlm_reprocess_all(lock->l_resource); - - noreproc: - if (imp != NULL) { - spin_lock_irqsave(&imp->imp_lock, irqflags); - lwd.lwd_generation = imp->imp_generation; - spin_unlock_irqrestore(&imp->imp_lock, irqflags); - } - - /* Go to sleep until the lock is granted or cancelled. */ - rc = l_wait_event(lock->l_waitq, - ((lock->l_req_mode == lock->l_granted_mode) || - lock->l_destroyed), &lwi); - - if (lock->l_destroyed) { - LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed"); - RETURN(-EIO); - } - - if (rc) { - LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)", - rc); - RETURN(rc); - } - - LDLM_DEBUG(lock, "client-side enqueue waking up: granted"); - RETURN(0); -} - -static int ldlm_cli_enqueue_local(struct ldlm_namespace *ns, - struct lustre_handle *parent_lockh, - struct ldlm_res_id res_id, - __u32 type, - void *cookie, int cookielen, - ldlm_mode_t mode, - int *flags, - ldlm_completion_callback completion, - ldlm_blocking_callback blocking, - void *data, - struct lustre_handle *lockh) -{ - struct ldlm_lock *lock; - int err; - ENTRY; - - if (ns->ns_client) { - CERROR("Trying to enqueue local lock in a shadow namespace\n"); - LBUG(); - } - - lock = ldlm_lock_create(ns, parent_lockh, res_id, type, mode, - blocking, data); - if (!lock) - GOTO(out_nolock, err = -ENOMEM); - LDLM_DEBUG(lock, "client-side local enqueue handler, new lock created"); - - ldlm_lock_addref_internal(lock, mode); - ldlm_lock2handle(lock, lockh); - lock->l_flags |= LDLM_FL_LOCAL; - - err = ldlm_lock_enqueue(ns, &lock, cookie, cookielen, flags, - completion); - if (err != ELDLM_OK) - GOTO(out, err); - - if (type == LDLM_EXTENT) - memcpy(cookie, &lock->l_extent, sizeof(lock->l_extent)); - if ((*flags) & LDLM_FL_LOCK_CHANGED) - memcpy(&res_id, &lock->l_resource->lr_name, sizeof(res_id)); - - LDLM_DEBUG_NOLOCK("client-side local enqueue handler END (lock %p)", - lock); - - if (lock->l_completion_ast) - lock->l_completion_ast(lock, *flags, NULL); - - LDLM_DEBUG(lock, "client-side local enqueue END"); - EXIT; - out: - LDLM_LOCK_PUT(lock); - out_nolock: - return err; -} - -int ldlm_cli_enqueue(struct lustre_handle *connh, - struct ptlrpc_request *req, - struct ldlm_namespace *ns, - struct lustre_handle *parent_lock_handle, - struct ldlm_res_id res_id, - __u32 type, - void *cookie, int cookielen, - ldlm_mode_t mode, - int *flags, - ldlm_completion_callback completion, - ldlm_blocking_callback blocking, - void *data, - struct lustre_handle *lockh) -{ - struct ldlm_lock *lock; - struct ldlm_request *body; - struct ldlm_reply *reply; - int rc, size = sizeof(*body), req_passed_in = 1, is_replay; - ENTRY; - - is_replay = *flags & LDLM_FL_REPLAY; - LASSERT(connh != NULL || !is_replay); - - if (connh == NULL) { - rc = ldlm_cli_enqueue_local(ns, parent_lock_handle, res_id, - type, cookie, cookielen, mode, - flags, completion, blocking, data, - lockh); - RETURN(rc); - } - - /* If we're replaying this lock, just check some invariants. - * If we're creating a new lock, get everything all setup nice. */ - if (is_replay) { - lock = ldlm_handle2lock(lockh); - LDLM_DEBUG(lock, "client-side enqueue START"); - LASSERT(connh == lock->l_connh); - } else { - lock = ldlm_lock_create(ns, parent_lock_handle, res_id, type, - mode, blocking, data); - if (lock == NULL) - GOTO(out_nolock, rc = -ENOMEM); - /* ugh. I set this early (instead of waiting for _enqueue) - * because the completion AST might arrive early, and we need - * (in just this one case) to run the completion_cb even if it - * arrives before the reply. */ - lock->l_completion_ast = completion; - LDLM_DEBUG(lock, "client-side enqueue START"); - /* for the local lock, add the reference */ - ldlm_lock_addref_internal(lock, mode); - ldlm_lock2handle(lock, lockh); - if (type == LDLM_EXTENT) - memcpy(&lock->l_extent, cookie, - sizeof(body->lock_desc.l_extent)); - } - - if (req == NULL) { - req = ptlrpc_prep_req(class_conn2cliimp(connh), LDLM_ENQUEUE, 1, - &size, NULL); - if (!req) - GOTO(out, rc = -ENOMEM); - req_passed_in = 0; - } else if (req->rq_reqmsg->buflens[0] != sizeof(*body)) - LBUG(); - - /* Dump lock data into the request buffer */ - body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); - ldlm_lock2desc(lock, &body->lock_desc); - body->lock_flags = *flags; - - memcpy(&body->lock_handle1, lockh, sizeof(*lockh)); - if (parent_lock_handle) - memcpy(&body->lock_handle2, parent_lock_handle, - sizeof(body->lock_handle2)); - - /* Continue as normal. */ - if (!req_passed_in) { - size = sizeof(*reply); - req->rq_replen = lustre_msg_size(1, &size); - } - lock->l_connh = connh; - lock->l_export = NULL; - lock->l_blocking_ast = blocking; - - LDLM_DEBUG(lock, "sending request"); - rc = ptlrpc_queue_wait(req); - - if (rc != ELDLM_OK) { - LASSERT(!is_replay); - LDLM_DEBUG(lock, "client-side enqueue END (%s)", - rc == ELDLM_LOCK_ABORTED ? "ABORTED" : "FAILED"); - /* Set a flag to prevent us from sending a CANCEL (bug 407) */ - l_lock(&ns->ns_lock); - lock->l_flags |= LDLM_FL_LOCAL_ONLY; - l_unlock(&ns->ns_lock); - - ldlm_lock_decref_and_cancel(lockh, mode); - - if (rc == ELDLM_LOCK_ABORTED) { - /* caller expects reply buffer 0 to have been swabbed */ - reply = lustre_swab_repbuf(req, 0, sizeof (*reply), - lustre_swab_ldlm_reply); - if (reply == NULL) { - CERROR ("Can't unpack ldlm_reply\n"); - GOTO (out_req, rc = -EPROTO); - } - } - GOTO(out_req, rc); - } - - reply = lustre_swab_repbuf(req, 0, sizeof (*reply), - lustre_swab_ldlm_reply); - if (reply == NULL) { - CERROR ("Can't unpack ldlm_reply\n"); - GOTO (out_req, rc = -EPROTO); - } - - memcpy(&lock->l_remote_handle, &reply->lock_handle, - sizeof(lock->l_remote_handle)); - *flags = reply->lock_flags; - - CDEBUG(D_INFO, "local: %p, remote cookie: "LPX64", flags: %d\n", lock, - reply->lock_handle.cookie, *flags); - if (type == LDLM_EXTENT) { - CDEBUG(D_INFO, "requested extent: "LPU64" -> "LPU64", got " - "extent "LPU64" -> "LPU64"\n", - body->lock_desc.l_extent.start, - body->lock_desc.l_extent.end, - reply->lock_extent.start, reply->lock_extent.end); - - if ((reply->lock_extent.end & ~PAGE_MASK) != ~PAGE_MASK) { - /* XXX Old versions of BA OST code have a fencepost bug - * which will cause them to grant a lock that's one - * byte too large. This can be safely removed after BA - * ships their next release -phik (02 Apr 2003) */ - reply->lock_extent.end--; - } else if ((reply->lock_extent.start & ~PAGE_MASK) == - ~PAGE_MASK) { - reply->lock_extent.start++; - } - - cookie = &reply->lock_extent; /* FIXME bug 267 */ - cookielen = sizeof(reply->lock_extent); - } - - /* If enqueue returned a blocked lock but the completion handler has - * already run, then it fixed up the resource and we don't need to do it - * again. */ - if ((*flags) & LDLM_FL_LOCK_CHANGED) { - int newmode = reply->lock_mode; - LASSERT(!is_replay); - if (newmode && newmode != lock->l_req_mode) { - LDLM_DEBUG(lock, "server returned different mode %s", - ldlm_lockname[newmode]); - lock->l_req_mode = newmode; - } - - if (reply->lock_resource_name.name[0] != - lock->l_resource->lr_name.name[0]) { - CDEBUG(D_INFO, "remote intent success, locking %ld " - "instead of %ld\n", - (long)reply->lock_resource_name.name[0], - (long)lock->l_resource->lr_name.name[0]); - - ldlm_lock_change_resource(ns, lock, - reply->lock_resource_name); - if (lock->l_resource == NULL) { - LBUG(); - GOTO(out_req, rc = -ENOMEM); - } - LDLM_DEBUG(lock, "client-side enqueue, new resource"); - } - } - - if (!is_replay) { - l_lock(&ns->ns_lock); - lock->l_completion_ast = NULL; - rc = ldlm_lock_enqueue(ns, &lock, cookie, cookielen, flags, - completion); - l_unlock(&ns->ns_lock); - if (lock->l_completion_ast) - lock->l_completion_ast(lock, *flags, NULL); - } - - LDLM_DEBUG(lock, "client-side enqueue END"); - EXIT; - out_req: - if (!req_passed_in) - ptlrpc_req_finished(req); - out: - LDLM_LOCK_PUT(lock); - out_nolock: - return rc; -} - -int ldlm_cli_replay_enqueue(struct ldlm_lock *lock) -{ - struct lustre_handle lockh; - struct ldlm_res_id junk; - int flags = LDLM_FL_REPLAY; - ldlm_lock2handle(lock, &lockh); - return ldlm_cli_enqueue(lock->l_connh, NULL, NULL, NULL, junk, - lock->l_resource->lr_type, NULL, 0, -1, &flags, - NULL, NULL, NULL, &lockh); -} - -static int ldlm_cli_convert_local(struct ldlm_lock *lock, int new_mode, - int *flags) -{ - ENTRY; - if (lock->l_resource->lr_namespace->ns_client) { - CERROR("Trying to cancel local lock\n"); - LBUG(); - } - LDLM_DEBUG(lock, "client-side local convert"); - - ldlm_lock_convert(lock, new_mode, flags); - ldlm_reprocess_all(lock->l_resource); - - LDLM_DEBUG(lock, "client-side local convert handler END"); - LDLM_LOCK_PUT(lock); - RETURN(0); -} - -/* FIXME: one of ldlm_cli_convert or the server side should reject attempted - * conversion of locks which are on the waiting or converting queue */ -int ldlm_cli_convert(struct lustre_handle *lockh, int new_mode, int *flags) -{ - struct ldlm_request *body; - struct lustre_handle *connh; - struct ldlm_reply *reply; - struct ldlm_lock *lock; - struct ldlm_resource *res; - struct ptlrpc_request *req; - int rc, size = sizeof(*body); - ENTRY; - - lock = ldlm_handle2lock(lockh); - if (!lock) { - LBUG(); - RETURN(-EINVAL); - } - *flags = 0; - connh = lock->l_connh; - - if (!connh) - RETURN(ldlm_cli_convert_local(lock, new_mode, flags)); - - LDLM_DEBUG(lock, "client-side convert"); - - req = ptlrpc_prep_req(class_conn2cliimp(connh), LDLM_CONVERT, 1, &size, - NULL); - if (!req) - GOTO(out, rc = -ENOMEM); - - body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); - memcpy(&body->lock_handle1, &lock->l_remote_handle, - sizeof(body->lock_handle1)); - - body->lock_desc.l_req_mode = new_mode; - body->lock_flags = *flags; - - size = sizeof(*reply); - req->rq_replen = lustre_msg_size(1, &size); - - rc = ptlrpc_queue_wait(req); - if (rc != ELDLM_OK) - GOTO(out, rc); - - reply = lustre_swab_repbuf(req, 0, sizeof (*reply), - lustre_swab_ldlm_reply); - if (reply == NULL) { - CERROR ("Can't unpack ldlm_reply\n"); - GOTO (out, rc = -EPROTO); - } - - res = ldlm_lock_convert(lock, new_mode, &reply->lock_flags); - if (res != NULL) - ldlm_reprocess_all(res); - /* Go to sleep until the lock is granted. */ - /* FIXME: or cancelled. */ - if (lock->l_completion_ast) - lock->l_completion_ast(lock, LDLM_FL_WAIT_NOREPROC, NULL); - EXIT; - out: - LDLM_LOCK_PUT(lock); - ptlrpc_req_finished(req); - return rc; -} - -int ldlm_cli_cancel(struct lustre_handle *lockh) -{ - struct ptlrpc_request *req; - struct ldlm_lock *lock; - struct ldlm_request *body; - int rc = 0, size = sizeof(*body); - ENTRY; - - /* concurrent cancels on the same handle can happen */ - lock = __ldlm_handle2lock(lockh, LDLM_FL_CANCELING); - if (lock == NULL) - RETURN(0); - - if (lock->l_connh) { - int local_only; - struct obd_import *imp; - - LDLM_DEBUG(lock, "client-side cancel"); - /* Set this flag to prevent others from getting new references*/ - l_lock(&lock->l_resource->lr_namespace->ns_lock); - lock->l_flags |= LDLM_FL_CBPENDING; - local_only = (lock->l_flags & LDLM_FL_LOCAL_ONLY); - l_unlock(&lock->l_resource->lr_namespace->ns_lock); - ldlm_cancel_callback(lock); - - if (local_only) { - CDEBUG(D_INFO, "not sending request (at caller's " - "instruction)\n"); - goto local_cancel; - } - - imp = class_conn2cliimp(lock->l_connh); - if (imp == NULL || imp->imp_invalid) { - CDEBUG(D_HA, "skipping cancel on invalid import %p\n", - imp); - goto local_cancel; - } - - req = ptlrpc_prep_req(imp, LDLM_CANCEL, 1, &size, NULL); - if (!req) - GOTO(out, rc = -ENOMEM); - - /* XXX FIXME bug 249 */ - req->rq_request_portal = LDLM_CANCEL_REQUEST_PORTAL; - req->rq_reply_portal = LDLM_CANCEL_REPLY_PORTAL; - - body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); - memcpy(&body->lock_handle1, &lock->l_remote_handle, - sizeof(body->lock_handle1)); - - req->rq_replen = lustre_msg_size(0, NULL); - - rc = ptlrpc_queue_wait(req); - - if (rc == ESTALE) - CERROR("client/server (nid "LPU64") out of sync--not " - "fatal\n", - req->rq_import->imp_connection->c_peer.peer_nid); - else if (rc != ELDLM_OK) - CERROR("Got rc %d from cancel RPC: canceling " - "anyway\n", rc); - - ptlrpc_req_finished(req); - local_cancel: - ldlm_lock_cancel(lock); - } else { - LDLM_DEBUG(lock, "client-side local cancel"); - if (lock->l_resource->lr_namespace->ns_client) { - CERROR("Trying to cancel local lock\n"); - LBUG(); - } - ldlm_lock_cancel(lock); - ldlm_reprocess_all(lock->l_resource); - LDLM_DEBUG(lock, "client-side local cancel handler END"); - } - - EXIT; - out: - LDLM_LOCK_PUT(lock); - return rc; -} - -int ldlm_cancel_lru(struct ldlm_namespace *ns) -{ - struct list_head *tmp, *next, list = LIST_HEAD_INIT(list); - int count, rc = 0; - struct ldlm_ast_work *w; - ENTRY; - - l_lock(&ns->ns_lock); - count = ns->ns_nr_unused - ns->ns_max_unused; - - if (count <= 0) { - l_unlock(&ns->ns_lock); - RETURN(0); - } - - list_for_each_safe(tmp, next, &ns->ns_unused_list) { - struct ldlm_lock *lock; - lock = list_entry(tmp, struct ldlm_lock, l_lru); - - LASSERT(!lock->l_readers && !lock->l_writers); - - /* Setting the CBPENDING flag is a little misleading, but - * prevents an important race; namely, once CBPENDING is set, - * the lock can accumulate no more readers/writers. Since - * readers and writers are already zero here, ldlm_lock_decref - * won't see this flag and call l_blocking_ast */ - lock->l_flags |= LDLM_FL_CBPENDING; - - OBD_ALLOC(w, sizeof(*w)); - LASSERT(w); - - w->w_lock = LDLM_LOCK_GET(lock); - list_add(&w->w_list, &list); - ldlm_lock_remove_from_lru(lock); - - if (--count == 0) - break; - } - l_unlock(&ns->ns_lock); - - list_for_each_safe(tmp, next, &list) { - struct lustre_handle lockh; - int rc; - w = list_entry(tmp, struct ldlm_ast_work, w_list); - - ldlm_lock2handle(w->w_lock, &lockh); - rc = ldlm_cli_cancel(&lockh); - if (rc != ELDLM_OK) - CDEBUG(D_INFO, "ldlm_cli_cancel: %d\n", rc); - - list_del(&w->w_list); - LDLM_LOCK_PUT(w->w_lock); - OBD_FREE(w, sizeof(*w)); - } - - RETURN(rc); -} - -static int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns, - struct ldlm_res_id res_id, int flags, - void *opaque) -{ - struct ldlm_resource *res; - struct list_head *tmp, *next, list = LIST_HEAD_INIT(list); - struct ldlm_ast_work *w; - ENTRY; - - res = ldlm_resource_get(ns, NULL, res_id, 0, 0); - if (res == NULL) { - /* This is not a problem. */ - CDEBUG(D_INFO, "No resource "LPU64"\n", res_id.name[0]); - RETURN(0); - } - - l_lock(&ns->ns_lock); - list_for_each(tmp, &res->lr_granted) { - struct ldlm_lock *lock; - lock = list_entry(tmp, struct ldlm_lock, l_res_link); - - if (opaque != NULL && lock->l_data != opaque) { - LDLM_ERROR(lock, "data %p doesn't match opaque %p res" - LPU64":"LPU64, lock->l_data, opaque, - res_id.name[0], res_id.name[1]); - //LBUG(); - continue; - } - - if (lock->l_readers || lock->l_writers) { - if (flags & LDLM_FL_WARN) { - LDLM_ERROR(lock, "lock in use"); - //LBUG(); - } - continue; - } - - /* See CBPENDING comment in ldlm_cancel_lru */ - lock->l_flags |= LDLM_FL_CBPENDING; - - OBD_ALLOC(w, sizeof(*w)); - LASSERT(w); - - w->w_lock = LDLM_LOCK_GET(lock); - - /* Prevent the cancel callback from being called by setting - * LDLM_FL_CANCEL in the lock. Very sneaky. -p */ - if (flags & LDLM_FL_NO_CALLBACK) - w->w_lock->l_flags |= LDLM_FL_CANCEL; - - list_add(&w->w_list, &list); - } - l_unlock(&ns->ns_lock); - - list_for_each_safe(tmp, next, &list) { - struct lustre_handle lockh; - int rc; - w = list_entry(tmp, struct ldlm_ast_work, w_list); - - if (flags & LDLM_FL_LOCAL_ONLY) { - ldlm_lock_cancel(w->w_lock); - } else { - ldlm_lock2handle(w->w_lock, &lockh); - rc = ldlm_cli_cancel(&lockh); - if (rc != ELDLM_OK) - CERROR("ldlm_cli_cancel: %d\n", rc); - } - list_del(&w->w_list); - LDLM_LOCK_PUT(w->w_lock); - OBD_FREE(w, sizeof(*w)); - } - - ldlm_resource_putref(res); - - RETURN(0); -} - -/* Cancel all locks on a namespace (or a specific resource, if given) - * that have 0 readers/writers. - * - * If flags & LDLM_FL_LOCAL_ONLY, throw the locks away without trying - * to notify the server. - * If flags & LDLM_FL_NO_CALLBACK, don't run the cancel callback. - * If flags & LDLM_FL_WARN, print a warning if some locks are still in use. */ -int ldlm_cli_cancel_unused(struct ldlm_namespace *ns, - struct ldlm_res_id *res_id, int flags, void *opaque) -{ - int i; - ENTRY; - - if (ns == NULL) - RETURN(ELDLM_OK); - - if (res_id) - RETURN(ldlm_cli_cancel_unused_resource(ns, *res_id, flags, - opaque)); - - l_lock(&ns->ns_lock); - for (i = 0; i < RES_HASH_SIZE; i++) { - struct list_head *tmp, *pos; - list_for_each_safe(tmp, pos, &(ns->ns_hash[i])) { - int rc; - struct ldlm_resource *res; - res = list_entry(tmp, struct ldlm_resource, lr_hash); - ldlm_resource_getref(res); - - rc = ldlm_cli_cancel_unused_resource(ns, res->lr_name, - flags, opaque); - - if (rc) - CERROR("cancel_unused_res ("LPU64"): %d\n", - res->lr_name.name[0], rc); - ldlm_resource_putref(res); - } - } - l_unlock(&ns->ns_lock); - - RETURN(ELDLM_OK); -} - -/* Lock iterators. */ - -int ldlm_resource_foreach(struct ldlm_resource *res, ldlm_iterator_t iter, - void *closure) -{ - struct list_head *tmp, *next; - struct ldlm_lock *lock; - int rc = LDLM_ITER_CONTINUE; - struct ldlm_namespace *ns = res->lr_namespace; - - ENTRY; - - if (!res) - RETURN(LDLM_ITER_CONTINUE); - - l_lock(&ns->ns_lock); - list_for_each_safe(tmp, next, &res->lr_granted) { - lock = list_entry(tmp, struct ldlm_lock, l_res_link); - - if (iter(lock, closure) == LDLM_ITER_STOP) - GOTO(out, rc = LDLM_ITER_STOP); - } - - list_for_each_safe(tmp, next, &res->lr_converting) { - lock = list_entry(tmp, struct ldlm_lock, l_res_link); - - if (iter(lock, closure) == LDLM_ITER_STOP) - GOTO(out, rc = LDLM_ITER_STOP); - } - - list_for_each_safe(tmp, next, &res->lr_waiting) { - lock = list_entry(tmp, struct ldlm_lock, l_res_link); - - if (iter(lock, closure) == LDLM_ITER_STOP) - GOTO(out, rc = LDLM_ITER_STOP); - } - out: - l_unlock(&ns->ns_lock); - RETURN(rc); -} - -struct iter_helper_data { - ldlm_iterator_t iter; - void *closure; -}; - -static int ldlm_iter_helper(struct ldlm_lock *lock, void *closure) -{ - struct iter_helper_data *helper = closure; - return helper->iter(lock, helper->closure); -} - -static int ldlm_res_iter_helper(struct ldlm_resource *res, void *closure) -{ - return ldlm_resource_foreach(res, ldlm_iter_helper, closure); -} - -int ldlm_namespace_foreach(struct ldlm_namespace *ns, ldlm_iterator_t iter, - void *closure) -{ - struct iter_helper_data helper = { iter: iter, closure: closure }; - return ldlm_namespace_foreach_res(ns, ldlm_res_iter_helper, &helper); -} - -int ldlm_namespace_foreach_res(struct ldlm_namespace *ns, - ldlm_res_iterator_t iter, void *closure) -{ - int i, rc = LDLM_ITER_CONTINUE; - - l_lock(&ns->ns_lock); - for (i = 0; i < RES_HASH_SIZE; i++) { - struct list_head *tmp, *next; - list_for_each_safe(tmp, next, &(ns->ns_hash[i])) { - struct ldlm_resource *res = - list_entry(tmp, struct ldlm_resource, lr_hash); - - ldlm_resource_getref(res); - rc = iter(res, closure); - ldlm_resource_putref(res); - if (rc == LDLM_ITER_STOP) - GOTO(out, rc); - } - } - out: - l_unlock(&ns->ns_lock); - RETURN(rc); -} - -/* Lock replay */ - -static int ldlm_chain_lock_for_replay(struct ldlm_lock *lock, void *closure) -{ - struct list_head *list = closure; - - /* we use l_pending_chain here, because it's unused on clients. */ - list_add(&lock->l_pending_chain, list); - return LDLM_ITER_CONTINUE; -} - -static int replay_one_lock(struct obd_import *imp, struct ldlm_lock *lock) -{ - struct ptlrpc_request *req; - struct ldlm_request *body; - struct ldlm_reply *reply; - int rc, size; - int flags; - - /* - * If granted mode matches the requested mode, this lock is granted. - * - * If they differ, but we have a granted mode, then we were granted - * one mode and now want another: ergo, converting. - * - * If we haven't been granted anything and are on a resource list, - * then we're blocked/waiting. - * - * If we haven't been granted anything and we're NOT on a resource list, - * then we haven't got a reply yet and don't have a known disposition. - * This happens whenever a lock enqueue is the request that triggers - * recovery. - */ - if (lock->l_granted_mode == lock->l_req_mode) - flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_GRANTED; - else if (lock->l_granted_mode) - flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_CONV; - else if (!list_empty(&lock->l_res_link)) - flags = LDLM_FL_REPLAY | LDLM_FL_BLOCK_WAIT; - else - flags = LDLM_FL_REPLAY; - - size = sizeof(*body); - req = ptlrpc_prep_req(imp, LDLM_ENQUEUE, 1, &size, NULL); - if (!req) - RETURN(-ENOMEM); - - /* We're part of recovery, so don't wait for it. */ - req->rq_level = LUSTRE_CONN_RECOVER; - - body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); - ldlm_lock2desc(lock, &body->lock_desc); - body->lock_flags = flags; - - ldlm_lock2handle(lock, &body->lock_handle1); - size = sizeof(*reply); - req->rq_replen = lustre_msg_size(1, &size); - - LDLM_DEBUG(lock, "replaying lock:"); - rc = ptlrpc_queue_wait(req); - if (rc != ELDLM_OK) - GOTO(out, rc); - - reply = lustre_swab_repbuf(req, 0, sizeof (*reply), - lustre_swab_ldlm_reply); - if (reply == NULL) { - CERROR("Can't unpack ldlm_reply\n"); - GOTO (out, rc = -EPROTO); - } - - memcpy(&lock->l_remote_handle, &reply->lock_handle, - sizeof(lock->l_remote_handle)); - LDLM_DEBUG(lock, "replayed lock:"); - out: - ptlrpc_req_finished(req); - RETURN(rc); -} - -int ldlm_replay_locks(struct obd_import *imp) -{ - struct ldlm_namespace *ns = imp->imp_obd->obd_namespace; - struct list_head list, *pos, *next; - struct ldlm_lock *lock; - int rc = 0; - - ENTRY; - INIT_LIST_HEAD(&list); - - l_lock(&ns->ns_lock); - (void)ldlm_namespace_foreach(ns, ldlm_chain_lock_for_replay, &list); - - list_for_each_safe(pos, next, &list) { - lock = list_entry(pos, struct ldlm_lock, l_pending_chain); - rc = replay_one_lock(imp, lock); - if (rc) - break; /* or try to do the rest? */ - } - l_unlock(&ns->ns_lock); - RETURN(rc); -} diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c deleted file mode 100644 index 84fdecc..0000000 --- a/lustre/ldlm/ldlm_resource.c +++ /dev/null @@ -1,578 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002, 2003 Cluster File Systems, Inc. - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Peter Braam <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LDLM -#ifdef __KERNEL__ -#include <linux/lustre_dlm.h> -#else -#include <liblustre.h> -#endif - -#include <linux/obd_class.h> - -kmem_cache_t *ldlm_resource_slab, *ldlm_lock_slab; - -spinlock_t ldlm_namespace_lock = SPIN_LOCK_UNLOCKED; -struct list_head ldlm_namespace_list = LIST_HEAD_INIT(ldlm_namespace_list); -static struct proc_dir_entry *ldlm_ns_proc_dir = NULL; - -int ldlm_proc_setup(struct obd_device *obd) -{ - int rc; - ENTRY; - LASSERT(ldlm_ns_proc_dir == NULL); - LASSERT(obd != NULL); - rc = lprocfs_obd_attach(obd, 0); - if (rc) { - CERROR("LProcFS failed in ldlm-init\n"); - RETURN(rc); - } - ldlm_ns_proc_dir = obd->obd_proc_entry; - RETURN(0); -} - -void ldlm_proc_cleanup(struct obd_device *obd) -{ - if (ldlm_ns_proc_dir) { - lprocfs_obd_detach(obd); - ldlm_ns_proc_dir = NULL; - } -} - -#ifdef __KERNEL__ -static int lprocfs_uint_rd(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - unsigned int *temp = (unsigned int *)data; - return snprintf(page, count, "%u\n", *temp); -} - - -#define MAX_STRING_SIZE 128 -void ldlm_proc_namespace(struct ldlm_namespace *ns) -{ - struct lprocfs_vars lock_vars[2]; - char lock_name[MAX_STRING_SIZE + 1]; - - LASSERT(ns != NULL); - LASSERT(ns->ns_name != NULL); - - lock_name[MAX_STRING_SIZE] = '\0'; - - memset(lock_vars, 0, sizeof(lock_vars)); - lock_vars[0].read_fptr = lprocfs_rd_u64; - - lock_vars[0].name = lock_name; - - snprintf(lock_name, MAX_STRING_SIZE, "%s/resource_count", ns->ns_name); - - lock_vars[0].data = &ns->ns_resources; - lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0); - - snprintf(lock_name, MAX_STRING_SIZE, "%s/lock_count", ns->ns_name); - - lock_vars[0].data = &ns->ns_locks; - lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0); - - snprintf(lock_name, MAX_STRING_SIZE, "%s/lock_unused_count", - ns->ns_name); - lock_vars[0].data = &ns->ns_nr_unused; - lock_vars[0].read_fptr = lprocfs_uint_rd; - lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0); -} -#endif -#undef MAX_STRING_SIZE - -#define LDLM_MAX_UNUSED 100 -struct ldlm_namespace *ldlm_namespace_new(char *name, __u32 client) -{ - struct ldlm_namespace *ns = NULL; - struct list_head *bucket; - ENTRY; - - OBD_ALLOC(ns, sizeof(*ns)); - if (!ns) - RETURN(NULL); - - ns->ns_hash = vmalloc(sizeof(*ns->ns_hash) * RES_HASH_SIZE); - if (!ns->ns_hash) - GOTO(out_ns, NULL); - - atomic_add(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory); - - OBD_ALLOC(ns->ns_name, strlen(name) + 1); - if (!ns->ns_name) - GOTO(out_hash, NULL); - - strcpy(ns->ns_name, name); - - INIT_LIST_HEAD(&ns->ns_root_list); - l_lock_init(&ns->ns_lock); - ns->ns_refcount = 0; - ns->ns_client = client; - spin_lock_init(&ns->ns_counter_lock); - ns->ns_locks = 0; - ns->ns_resources = 0; - - for (bucket = ns->ns_hash + RES_HASH_SIZE - 1; bucket >= ns->ns_hash; - bucket--) - INIT_LIST_HEAD(bucket); - - INIT_LIST_HEAD(&ns->ns_unused_list); - ns->ns_nr_unused = 0; - ns->ns_max_unused = LDLM_MAX_UNUSED; - - spin_lock(&ldlm_namespace_lock); - list_add(&ns->ns_list_chain, &ldlm_namespace_list); - spin_unlock(&ldlm_namespace_lock); -#ifdef __KERNEL__ - ldlm_proc_namespace(ns); -#endif - RETURN(ns); - -out_hash: - POISON(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE); - vfree(ns->ns_hash); - atomic_sub(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory); -out_ns: - OBD_FREE(ns, sizeof(*ns)); - return NULL; -} - -extern struct ldlm_lock *ldlm_lock_get(struct ldlm_lock *lock); - -/* If 'local_only' is true, don't try to tell the server, just cleanup. - * This is currently only used for recovery, and we make certain assumptions - * as a result--notably, that we shouldn't cancel locks with refs. -phil - * - * Called with the ns_lock held. */ -static void cleanup_resource(struct ldlm_resource *res, struct list_head *q, - int local_only) -{ - struct list_head *tmp, *pos; - int rc = 0, client = res->lr_namespace->ns_client; - ENTRY; - - list_for_each_safe(tmp, pos, q) { - struct ldlm_lock *lock; - lock = list_entry(tmp, struct ldlm_lock, l_res_link); - LDLM_LOCK_GET(lock); - - if (local_only && (lock->l_readers || lock->l_writers)) { - /* This is a little bit gross, but much better than the - * alternative: pretend that we got a blocking AST from - * the server, so that when the lock is decref'd, it - * will go away ... */ - lock->l_flags |= LDLM_FL_CBPENDING; - /* ... without sending a CANCEL message. */ - lock->l_flags |= LDLM_FL_LOCAL_ONLY; - /* ... and without calling the cancellation callback */ - lock->l_flags |= LDLM_FL_CANCEL; - LDLM_LOCK_PUT(lock); - continue; - } - - /* At shutdown time, don't call the cancellation callback */ - lock->l_flags |= LDLM_FL_CANCEL; - - if (client) { - struct lustre_handle lockh; - ldlm_lock2handle(lock, &lockh); - if (!local_only) { - rc = ldlm_cli_cancel(&lockh); - if (rc) - CERROR("ldlm_cli_cancel: %d\n", rc); - } - /* Force local cleanup on errors, too. */ - if (local_only || rc != ELDLM_OK) - ldlm_lock_cancel(lock); - } else { - LDLM_DEBUG(lock, "Freeing a lock still held by a " - "client node"); - - ldlm_resource_unlink_lock(lock); - ldlm_lock_destroy(lock); - } - LDLM_LOCK_PUT(lock); - } - EXIT; -} - -int ldlm_namespace_cleanup(struct ldlm_namespace *ns, int local_only) -{ - int i; - - if (ns == NULL) { - CDEBUG(D_INFO, "NULL ns, skipping cleanup\n"); - return ELDLM_OK; - } - - l_lock(&ns->ns_lock); - for (i = 0; i < RES_HASH_SIZE; i++) { - struct list_head *tmp, *pos; - list_for_each_safe(tmp, pos, &(ns->ns_hash[i])) { - struct ldlm_resource *res; - res = list_entry(tmp, struct ldlm_resource, lr_hash); - ldlm_resource_getref(res); - - cleanup_resource(res, &res->lr_granted, local_only); - cleanup_resource(res, &res->lr_converting, local_only); - cleanup_resource(res, &res->lr_waiting, local_only); - - /* XXX what a mess: don't force cleanup if we're - * local_only (which is only used by recovery). In that - * case, we probably still have outstanding lock refs - * which reference these resources. -phil */ - if (!ldlm_resource_putref(res) && !local_only) { - CERROR("Resource refcount nonzero (%d) after " - "lock cleanup; forcing cleanup.\n", - atomic_read(&res->lr_refcount)); - ldlm_resource_dump(res); - atomic_set(&res->lr_refcount, 1); - ldlm_resource_putref(res); - } - } - } - l_unlock(&ns->ns_lock); - - return ELDLM_OK; -} - -/* Cleanup, but also free, the namespace */ -int ldlm_namespace_free(struct ldlm_namespace *ns) -{ - if (!ns) - RETURN(ELDLM_OK); - - spin_lock(&ldlm_namespace_lock); - list_del(&ns->ns_list_chain); - - spin_unlock(&ldlm_namespace_lock); - - ldlm_namespace_cleanup(ns, 0); - - POISON(ns->ns_hash, 0x5a, sizeof(*ns->ns_hash) * RES_HASH_SIZE); - vfree(ns->ns_hash /* , sizeof(*ns->ns_hash) * RES_HASH_SIZE */); - atomic_sub(sizeof(*ns->ns_hash) * RES_HASH_SIZE, &obd_memory); - OBD_FREE(ns->ns_name, strlen(ns->ns_name) + 1); - OBD_FREE(ns, sizeof(*ns)); - - return ELDLM_OK; -} - -static __u32 ldlm_hash_fn(struct ldlm_resource *parent, struct ldlm_res_id name) -{ - __u32 hash = 0; - int i; - - for (i = 0; i < RES_NAME_SIZE; i++) - hash += name.name[i]; - - hash += (__u32)((unsigned long)parent >> 4); - - return (hash & RES_HASH_MASK); -} - -static struct ldlm_resource *ldlm_resource_new(void) -{ - struct ldlm_resource *res; - - OBD_SLAB_ALLOC(res, ldlm_resource_slab, SLAB_KERNEL, sizeof *res); - if (res == NULL) { - LBUG(); - return NULL; - } - memset(res, 0, sizeof(*res)); - - INIT_LIST_HEAD(&res->lr_children); - INIT_LIST_HEAD(&res->lr_childof); - INIT_LIST_HEAD(&res->lr_granted); - INIT_LIST_HEAD(&res->lr_converting); - INIT_LIST_HEAD(&res->lr_waiting); - - atomic_set(&res->lr_refcount, 1); - - return res; -} - -/* Args: locked namespace - * Returns: newly-allocated, referenced, unlocked resource */ -static struct ldlm_resource * -ldlm_resource_add(struct ldlm_namespace *ns, struct ldlm_resource *parent, - struct ldlm_res_id name, __u32 type) -{ - struct list_head *bucket; - struct ldlm_resource *res; - ENTRY; - - if (type < LDLM_MIN_TYPE || type > LDLM_MAX_TYPE) { - LBUG(); - RETURN(NULL); - } - - res = ldlm_resource_new(); - if (!res) { - LBUG(); - RETURN(NULL); - } - - spin_lock(&ns->ns_counter_lock); - ns->ns_resources++; - spin_unlock(&ns->ns_counter_lock); - - l_lock(&ns->ns_lock); - memcpy(&res->lr_name, &name, sizeof(res->lr_name)); - res->lr_namespace = ns; - ns->ns_refcount++; - - res->lr_type = type; - res->lr_most_restr = LCK_NL; - - bucket = ns->ns_hash + ldlm_hash_fn(parent, name); - list_add(&res->lr_hash, bucket); - - if (parent == NULL) { - list_add(&res->lr_childof, &ns->ns_root_list); - } else { - res->lr_parent = parent; - list_add(&res->lr_childof, &parent->lr_children); - } - l_unlock(&ns->ns_lock); - - RETURN(res); -} - -/* Args: unlocked namespace - * Locks: takes and releases ns->ns_lock and res->lr_lock - * Returns: referenced, unlocked ldlm_resource or NULL */ -struct ldlm_resource * -ldlm_resource_get(struct ldlm_namespace *ns, struct ldlm_resource *parent, - struct ldlm_res_id name, __u32 type, int create) -{ - struct list_head *bucket, *tmp; - struct ldlm_resource *res = NULL; - ENTRY; - - LASSERT(ns != NULL); - LASSERT(ns->ns_hash != NULL); - - l_lock(&ns->ns_lock); - bucket = ns->ns_hash + ldlm_hash_fn(parent, name); - - list_for_each(tmp, bucket) { - res = list_entry(tmp, struct ldlm_resource, lr_hash); - - if (memcmp(&res->lr_name, &name, sizeof(res->lr_name)) == 0) { - ldlm_resource_getref(res); - l_unlock(&ns->ns_lock); - RETURN(res); - } - } - - if (create) - res = ldlm_resource_add(ns, parent, name, type); - else - res = NULL; - - l_unlock(&ns->ns_lock); - - RETURN(res); -} - -struct ldlm_resource *ldlm_resource_getref(struct ldlm_resource *res) -{ - atomic_inc(&res->lr_refcount); - CDEBUG(D_INFO, "getref res: %p count: %d\n", res, - atomic_read(&res->lr_refcount)); - return res; -} - -/* Returns 1 if the resource was freed, 0 if it remains. */ -int ldlm_resource_putref(struct ldlm_resource *res) -{ - int rc = 0; - ENTRY; - - CDEBUG(D_INFO, "putref res: %p count: %d\n", res, - atomic_read(&res->lr_refcount) - 1); - LASSERT(atomic_read(&res->lr_refcount) > 0); - LASSERT(atomic_read(&res->lr_refcount) < 0x5a5a5a5a); - - if (atomic_dec_and_test(&res->lr_refcount)) { - struct ldlm_namespace *ns = res->lr_namespace; - ENTRY; - - l_lock(&ns->ns_lock); - - if (atomic_read(&res->lr_refcount) != 0) { - /* We lost the race. */ - l_unlock(&ns->ns_lock); - RETURN(rc); - } - - if (!list_empty(&res->lr_granted)) { - ldlm_resource_dump(res); - LBUG(); - } - - if (!list_empty(&res->lr_converting)) { - ldlm_resource_dump(res); - LBUG(); - } - - if (!list_empty(&res->lr_waiting)) { - ldlm_resource_dump(res); - LBUG(); - } - - if (!list_empty(&res->lr_children)) { - ldlm_resource_dump(res); - LBUG(); - } - - ns->ns_refcount--; - list_del_init(&res->lr_hash); - list_del_init(&res->lr_childof); - - OBD_SLAB_FREE(res, ldlm_resource_slab, sizeof *res); - l_unlock(&ns->ns_lock); - - spin_lock(&ns->ns_counter_lock); - ns->ns_resources--; - spin_unlock(&ns->ns_counter_lock); - - rc = 1; - EXIT; - } - - RETURN(rc); -} - -void ldlm_resource_add_lock(struct ldlm_resource *res, struct list_head *head, - struct ldlm_lock *lock) -{ - l_lock(&res->lr_namespace->ns_lock); - - ldlm_resource_dump(res); - CDEBUG(D_OTHER, "About to add this lock:\n"); - ldlm_lock_dump(D_OTHER, lock); - - if (lock->l_destroyed) { - CDEBUG(D_OTHER, "Lock destroyed, not adding to resource\n"); - return; - } - - LASSERT(list_empty(&lock->l_res_link)); - - list_add_tail(&lock->l_res_link, head); - l_unlock(&res->lr_namespace->ns_lock); -} - -void ldlm_resource_unlink_lock(struct ldlm_lock *lock) -{ - l_lock(&lock->l_resource->lr_namespace->ns_lock); - list_del_init(&lock->l_res_link); - l_unlock(&lock->l_resource->lr_namespace->ns_lock); -} - -void ldlm_res2desc(struct ldlm_resource *res, struct ldlm_resource_desc *desc) -{ - desc->lr_type = res->lr_type; - memcpy(&desc->lr_name, &res->lr_name, sizeof(desc->lr_name)); - memcpy(desc->lr_version, res->lr_version, sizeof(desc->lr_version)); -} - -void ldlm_dump_all_namespaces(void) -{ - struct list_head *tmp; - - spin_lock(&ldlm_namespace_lock); - - list_for_each(tmp, &ldlm_namespace_list) { - struct ldlm_namespace *ns; - ns = list_entry(tmp, struct ldlm_namespace, ns_list_chain); - ldlm_namespace_dump(ns); - } - - spin_unlock(&ldlm_namespace_lock); -} - -void ldlm_namespace_dump(struct ldlm_namespace *ns) -{ - struct list_head *tmp; - - l_lock(&ns->ns_lock); - CDEBUG(D_OTHER, "--- Namespace: %s (rc: %d, client: %d)\n", ns->ns_name, - ns->ns_refcount, ns->ns_client); - - list_for_each(tmp, &ns->ns_root_list) { - struct ldlm_resource *res; - res = list_entry(tmp, struct ldlm_resource, lr_childof); - - /* Once we have resources with children, this should really dump - * them recursively. */ - ldlm_resource_dump(res); - } - l_unlock(&ns->ns_lock); -} - -void ldlm_resource_dump(struct ldlm_resource *res) -{ - struct list_head *tmp; - char name[256]; - - if (RES_NAME_SIZE != 3) - LBUG(); - - snprintf(name, sizeof(name), "%Lx %Lx %Lx", - (unsigned long long)res->lr_name.name[0], - (unsigned long long)res->lr_name.name[1], - (unsigned long long)res->lr_name.name[2]); - - CDEBUG(D_OTHER, "--- Resource: %p (%s) (rc: %d)\n", res, name, - atomic_read(&res->lr_refcount)); - CDEBUG(D_OTHER, "Namespace: %p (%s)\n", res->lr_namespace, - res->lr_namespace->ns_name); - CDEBUG(D_OTHER, "Parent: %p, root: %p\n", res->lr_parent, res->lr_root); - - CDEBUG(D_OTHER, "Granted locks:\n"); - list_for_each(tmp, &res->lr_granted) { - struct ldlm_lock *lock; - lock = list_entry(tmp, struct ldlm_lock, l_res_link); - ldlm_lock_dump(D_OTHER, lock); - } - - CDEBUG(D_OTHER, "Converting locks:\n"); - list_for_each(tmp, &res->lr_converting) { - struct ldlm_lock *lock; - lock = list_entry(tmp, struct ldlm_lock, l_res_link); - ldlm_lock_dump(D_OTHER, lock); - } - - CDEBUG(D_OTHER, "Waiting locks:\n"); - list_for_each(tmp, &res->lr_waiting) { - struct ldlm_lock *lock; - lock = list_entry(tmp, struct ldlm_lock, l_res_link); - ldlm_lock_dump(D_OTHER, lock); - } -} diff --git a/lustre/ldlm/ldlm_test.c b/lustre/ldlm/ldlm_test.c deleted file mode 100644 index 6cf1056..0000000 --- a/lustre/ldlm/ldlm_test.c +++ /dev/null @@ -1,648 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cluster File Systems, Inc. <info@clusterfs.com> - * Copyright (c) 2002 Lawrence Livermore National Laboratory - * Author: James Newsome <newsome2@llnl.gov> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LDLM - -#include <asm/atomic.h> -#include <linux/types.h> -#include <linux/random.h> - -#include <linux/lustre_dlm.h> -#include <linux/obd.h> - -struct ldlm_test_thread { - struct obd_device *obddev; - struct ldlm_namespace *t_ns; - struct list_head t_link; - __u32 t_flags; - wait_queue_head_t t_ctl_waitq; -}; - -struct ldlm_test_lock { - struct list_head l_link; - struct lustre_handle l_lockh; -}; - -static unsigned int max_locks; -static unsigned int num_resources; -static unsigned int num_extents; - -static spinlock_t ctl_lock = SPIN_LOCK_UNLOCKED; -/* protect these with the ctl_lock */ -static LIST_HEAD(ctl_threads); -static int regression_running = 0; -static LIST_HEAD(lock_list); -static int num_locks = 0; - -/* cumulative stats for regression test */ -static atomic_t locks_requested = ATOMIC_INIT(0); -static atomic_t converts_requested = ATOMIC_INIT(0); -static atomic_t locks_granted = ATOMIC_INIT(0); -static atomic_t locks_matched = ATOMIC_INIT(0); - -/* making this a global avoids the problem of having pointers - * to garbage after the test exits. - */ -static struct lustre_handle regress_connh; - -static int ldlm_do_decrement(void); -static int ldlm_do_enqueue(struct ldlm_test_thread *thread); -static int ldlm_do_convert(void); - -/* - * blocking ast for regression test. - * Just cancels lock - */ -static int ldlm_test_blocking_ast(struct ldlm_lock *lock, - struct ldlm_lock_desc *new, - void *data, int flag) -{ - int rc; - struct lustre_handle lockh; - ENTRY; - - switch (flag) { - case LDLM_CB_BLOCKING: - LDLM_DEBUG(lock, "We're blocking. Cancelling lock"); - ldlm_lock2handle(lock, &lockh); - rc = ldlm_cli_cancel(&lockh); - if (rc < 0) { - CERROR("ldlm_cli_cancel: %d\n", rc); - LBUG(); - } - break; - case LDLM_CB_CANCELING: - LDLM_DEBUG(lock, "this lock is being cancelled"); - break; - default: - LBUG(); - } - - RETURN(0); -} - -/* blocking ast for basic tests. noop */ -static int ldlm_blocking_ast(struct ldlm_lock *lock, - struct ldlm_lock_desc *new, - void *data, int flag) -{ - ENTRY; - CERROR("ldlm_blocking_ast: lock=%p, new=%p, flag=%d\n", lock, new, - flag); - RETURN(0); -} - -/* Completion ast for regression test. - * Does not sleep when blocked. - */ -static int ldlm_test_completion_ast(struct ldlm_lock *lock, int flags, void *data) -{ - struct ldlm_test_lock *lock_info; - ENTRY; - - if (flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED | - LDLM_FL_BLOCK_CONV)) { - LDLM_DEBUG(lock, "client-side enqueue returned a blocked lock"); - RETURN(0); - } - - if (lock->l_granted_mode != lock->l_req_mode) - CERROR("completion ast called with non-granted lock\n"); - - /* add to list of granted locks */ - - if (flags & LDLM_FL_WAIT_NOREPROC) { - atomic_inc(&locks_matched); - LDLM_DEBUG(lock, "lock matched"); - } else { - atomic_inc(&locks_granted); - LDLM_DEBUG(lock, "lock granted"); - } - - OBD_ALLOC(lock_info, sizeof(*lock_info)); - if (lock_info == NULL) { - LBUG(); - RETURN(-ENOMEM); - } - - ldlm_lock2handle(lock, &lock_info->l_lockh); - - spin_lock(&ctl_lock); - list_add_tail(&lock_info->l_link, &lock_list); - num_locks++; - spin_unlock(&ctl_lock); - - RETURN(0); -} - -int ldlm_test_basics(struct obd_device *obddev) -{ - struct ldlm_namespace *ns; - struct ldlm_resource *res; - struct ldlm_res_id res_id = { .name = {1, 2, 3} }; - ldlm_error_t err; - struct ldlm_lock *lock1, *lock; - int flags; - ENTRY; - - ns = ldlm_namespace_new("test_server", LDLM_NAMESPACE_SERVER); - if (ns == NULL) - LBUG(); - - lock1 = ldlm_lock_create(ns, NULL, res_id, LDLM_PLAIN, LCK_CR, NULL, 0); - if (lock1 == NULL) - LBUG(); - err = ldlm_lock_enqueue(ns, lock1, NULL, 0, &flags, - ldlm_completion_ast, ldlm_blocking_ast); - if (err != ELDLM_OK) - LBUG(); - - lock = ldlm_lock_create(ns, NULL, res_id, LDLM_PLAIN, LCK_EX, NULL, 0); - if (lock == NULL) - LBUG(); - err = ldlm_lock_enqueue(ns, lock, NULL, 0, &flags, - ldlm_completion_ast, ldlm_blocking_ast); - if (err != ELDLM_OK) - LBUG(); - if (!(flags & LDLM_FL_BLOCK_GRANTED)) - LBUG(); - - res = ldlm_resource_get(ns, NULL, res_id, LDLM_PLAIN, 1); - if (res == NULL) - LBUG(); - ldlm_resource_dump(res); - - res = ldlm_lock_convert(lock1, LCK_NL, &flags); - if (res != NULL) - ldlm_reprocess_all(res); - - ldlm_resource_dump(res); - ldlm_namespace_free(ns); - - RETURN(0); -} - -int ldlm_test_extents(struct obd_device *obddev) -{ - struct ldlm_namespace *ns; - struct ldlm_resource *res; - struct ldlm_lock *lock, *lock1, *lock2; - struct ldlm_res_id res_id = { .name = {0} }; - struct ldlm_extent ext1 = {4, 6}, ext2 = {6, 9}, ext3 = {10, 11}; - ldlm_error_t err; - int flags; - ENTRY; - - ns = ldlm_namespace_new("test_server", LDLM_NAMESPACE_SERVER); - if (ns == NULL) - LBUG(); - - flags = 0; - lock1 = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR, NULL, - 0); - if (lock1 == NULL) - LBUG(); - err = ldlm_lock_enqueue(ns, lock1, &ext1, sizeof(ext1), &flags, NULL, - NULL); - if (err != ELDLM_OK) - LBUG(); - if (!(flags & LDLM_FL_LOCK_CHANGED)) - LBUG(); - - flags = 0; - lock2 = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_PR, - NULL, 0); - err = ldlm_lock_enqueue(ns, lock2, &ext2, sizeof(ext2), &flags, NULL, - NULL); - if (err != ELDLM_OK) - LBUG(); - if (!(flags & LDLM_FL_LOCK_CHANGED)) - LBUG(); - - flags = 0; - lock = ldlm_lock_create(ns, NULL, res_id, LDLM_EXTENT, LCK_EX, NULL, 0); - if (lock == NULL) - LBUG(); - err = ldlm_lock_enqueue(ns, lock, &ext3, sizeof(ext3), &flags, - NULL, NULL); - if (err != ELDLM_OK) - LBUG(); - if (!(flags & LDLM_FL_BLOCK_GRANTED)) - LBUG(); - if (flags & LDLM_FL_LOCK_CHANGED) - LBUG(); - - /* Convert/cancel blocking locks */ - flags = 0; - res = ldlm_lock_convert(lock1, LCK_NL, &flags); - if (res != NULL) - ldlm_reprocess_all(res); - - ldlm_lock_cancel(lock2); - if (res != NULL) - ldlm_reprocess_all(res); - - /* Dump the results */ - res = ldlm_resource_get(ns, NULL, res_id, LDLM_EXTENT, 0); - if (res == NULL) - LBUG(); - ldlm_resource_dump(res); - ldlm_namespace_free(ns); - - RETURN(0); -} - -static int ldlm_test_network(struct obd_device *obddev, - struct lustre_handle *connh) -{ - struct ldlm_res_id res_id = { .name = {1, 2, 3} }; - struct ldlm_extent ext = {4, 6}; - struct lustre_handle lockh1; - struct ldlm_lock *lock; - int flags = 0; - ldlm_error_t err; - ENTRY; - - err = ldlm_cli_enqueue(connh, NULL, obddev->obd_namespace, NULL, res_id, - LDLM_EXTENT, &ext, sizeof(ext), LCK_PR, &flags, - ldlm_completion_ast, NULL, NULL, 0, &lockh1); - - CERROR("ldlm_cli_enqueue: %d\n", err); - - flags = 0; - err = ldlm_cli_convert(&lockh1, LCK_EX, &flags); - CERROR("ldlm_cli_convert: %d\n", err); - - lock = ldlm_handle2lock(&lockh1); - ldlm_lock_dump(D_OTHER, lock); - ldlm_lock_put(lock); - - /* Need to decrement old mode. Don't bother incrementing new - * mode since the test is done. - */ - if (err == ELDLM_OK) - ldlm_lock_decref(&lockh1, LCK_PR); - - RETURN(err); -} - -static int ldlm_do_decrement(void) -{ - struct ldlm_test_lock *lock_info; - struct ldlm_lock *lock; - int rc = 0; - ENTRY; - - spin_lock(&ctl_lock); - if(list_empty(&lock_list)) { - CERROR("lock_list is empty\n"); - spin_unlock(&ctl_lock); - RETURN(0); - } - - /* delete from list */ - lock_info = list_entry(lock_list.next, - struct ldlm_test_lock, l_link); - list_del(lock_list.next); - num_locks--; - spin_unlock(&ctl_lock); - - /* decrement and free the info */ - lock = ldlm_handle2lock(&lock_info->l_lockh); - ldlm_lock_decref(&lock_info->l_lockh, lock->l_granted_mode); - ldlm_lock_put(lock); - - OBD_FREE(lock_info, sizeof(*lock_info)); - - RETURN(rc); -} - -static int ldlm_do_enqueue(struct ldlm_test_thread *thread) -{ - struct lustre_handle lockh; - struct ldlm_res_id res_id = { .name = {0} }; - __u32 lock_mode; - struct ldlm_extent ext; - unsigned char random; - int flags = 0, rc = 0; - ENTRY; - - /* Pick a random resource from 1 to num_resources */ - get_random_bytes(&random, sizeof(random)); - res_id.name[0] = random % num_resources; - - /* Pick a random lock mode */ - get_random_bytes(&random, sizeof(random)); - lock_mode = random % LCK_NL + 1; - - /* Pick a random extent */ - get_random_bytes(&random, sizeof(random)); - ext.start = random % num_extents; - get_random_bytes(&random, sizeof(random)); - ext.end = random % - (num_extents - (int)ext.start) + ext.start; - - LDLM_DEBUG_NOLOCK("about to enqueue with resource "LPX64", mode %d," - " extent "LPX64" -> "LPX64, res_id.name[0], lock_mode, - ext.start, ext.end); - - rc = ldlm_match_or_enqueue(®ress_connh, NULL, - thread->obddev->obd_namespace, - NULL, res_id, LDLM_EXTENT, &ext, - sizeof(ext), lock_mode, &flags, - ldlm_test_completion_ast, - ldlm_test_blocking_ast, - NULL, 0, &lockh); - - atomic_inc(&locks_requested); - - if (rc < 0) { - CERROR("ldlm_cli_enqueue: %d\n", rc); - LBUG(); - } - - RETURN(rc); -} - -static int ldlm_do_convert(void) -{ - __u32 lock_mode; - unsigned char random; - int flags = 0, rc = 0; - struct ldlm_test_lock *lock_info; - struct ldlm_lock *lock; - ENTRY; - - /* delete from list */ - spin_lock(&ctl_lock); - lock_info = list_entry(lock_list.next, struct ldlm_test_lock, l_link); - list_del(lock_list.next); - num_locks--; - spin_unlock(&ctl_lock); - - /* Pick a random lock mode */ - get_random_bytes(&random, sizeof(random)); - lock_mode = random % LCK_NL + 1; - - /* do the conversion */ - rc = ldlm_cli_convert(&lock_info->l_lockh , lock_mode, &flags); - atomic_inc(&converts_requested); - - if (rc < 0) { - CERROR("ldlm_cli_convert: %d\n", rc); - LBUG(); - } - - /* - * Adjust reference counts. - * FIXME: This is technically a bit... wrong, - * since we don't know when/if the convert succeeded - */ - ldlm_lock_addref(&lock_info->l_lockh, lock_mode); - lock = ldlm_handle2lock(&lock_info->l_lockh); - ldlm_lock_decref(&lock_info->l_lockh, lock->l_granted_mode); - ldlm_lock_put(lock); - - OBD_FREE(lock_info, sizeof(*lock_info)); - - RETURN(rc); -} - - - -static int ldlm_test_main(void *data) -{ - struct ldlm_test_thread *thread = data; - unsigned long flags; - ENTRY; - - lock_kernel(); - daemonize(); -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) - sigfillset(¤t->blocked); - recalc_sigpending(); -#else - spin_lock_irqsave(¤t->sigmask_lock, flags); - sigfillset(¤t->blocked); - recalc_sigpending(current); - spin_unlock_irqrestore(¤t->sigmask_lock, flags); -#endif - - sprintf(current->comm, "ldlm_test"); - unlock_kernel(); - - /* Record that the thread is running */ - thread->t_flags |= SVC_RUNNING; - wake_up(&thread->t_ctl_waitq); - - while (!(thread->t_flags & SVC_STOPPING)) { - unsigned char random; - unsigned char dec_chance, con_chance; - unsigned char chance_left = 100; - - spin_lock(&ctl_lock); - /* probability of decrementing increases linearly - * as more locks are held. - */ - dec_chance = chance_left * num_locks / max_locks; - chance_left -= dec_chance; - - /* FIXME: conversions temporarily disabled - * until they are working correctly. - */ - /* con_chance = chance_left * num_locks / max_locks; */ - con_chance = 0; - chance_left -= con_chance; - spin_unlock(&ctl_lock); - - get_random_bytes(&random, sizeof(random)); - - random = random % 100; - if (random < dec_chance) - ldlm_do_decrement(); - else if (random < (dec_chance + con_chance)) - ldlm_do_convert(); - else - ldlm_do_enqueue(thread); - - LDLM_DEBUG_NOLOCK("locks requested: %d, " - "conversions requested %d", - atomic_read(&locks_requested), - atomic_read(&converts_requested)); - LDLM_DEBUG_NOLOCK("locks granted: %d, " - "locks matched: %d", - atomic_read(&locks_granted), - atomic_read(&locks_matched)); - - spin_lock(&ctl_lock); - LDLM_DEBUG_NOLOCK("lock references currently held: %d, ", - num_locks); - spin_unlock(&ctl_lock); - - /* - * We don't sleep after a lock being blocked, so let's - * make sure other things can run. - */ - schedule(); - } - - thread->t_flags |= SVC_STOPPED; - wake_up(&thread->t_ctl_waitq); - - RETURN(0); -} - -static int ldlm_start_thread(struct obd_device *obddev, - struct lustre_handle *connh) -{ - struct ldlm_test_thread *test; - int rc; - ENTRY; - - OBD_ALLOC(test, sizeof(*test)); - if (test == NULL) { - LBUG(); - RETURN(-ENOMEM); - } - init_waitqueue_head(&test->t_ctl_waitq); - - test->obddev = obddev; - - spin_lock(&ctl_lock); - list_add(&test->t_link, &ctl_threads); - spin_unlock(&ctl_lock); - - rc = kernel_thread(ldlm_test_main, (void *)test, - CLONE_VM | CLONE_FS | CLONE_FILES); - if (rc < 0) { - CERROR("cannot start thread\n"); - RETURN(-EINVAL); - } - wait_event(test->t_ctl_waitq, test->t_flags & SVC_RUNNING); - - RETURN(0); -} - -int ldlm_regression_start(struct obd_device *obddev, - struct lustre_handle *connh, - unsigned int threads, unsigned int max_locks_in, - unsigned int num_resources_in, - unsigned int num_extents_in) -{ - int i, rc = 0; - ENTRY; - - spin_lock(&ctl_lock); - if (regression_running) { - CERROR("You can't start the ldlm regression twice.\n"); - spin_unlock(&ctl_lock); - RETURN(-EINVAL); - } - regression_running = 1; - spin_unlock(&ctl_lock); - - regress_connh = *connh; - max_locks = max_locks_in; - num_resources = num_resources_in; - num_extents = num_extents_in; - - LDLM_DEBUG_NOLOCK("regression test started: threads: %d, max_locks: " - "%d, num_res: %d, num_ext: %d\n", - threads, max_locks_in, num_resources_in, - num_extents_in); - - for (i = 0; i < threads; i++) { - rc = ldlm_start_thread(obddev, connh); - if (rc < 0) - GOTO(cleanup, rc); - } - - cleanup: - if (rc < 0) - ldlm_regression_stop(); - RETURN(rc); -} - -int ldlm_regression_stop(void) -{ - ENTRY; - - spin_lock(&ctl_lock); - if (!regression_running) { - CERROR("The ldlm regression isn't started.\n"); - spin_unlock(&ctl_lock); - RETURN(-EINVAL); - } - - while (!list_empty(&ctl_threads)) { - struct ldlm_test_thread *thread; - thread = list_entry(ctl_threads.next, struct ldlm_test_thread, - t_link); - - thread->t_flags |= SVC_STOPPING; - - spin_unlock(&ctl_lock); - wake_up(&thread->t_ctl_waitq); - wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED); - spin_lock(&ctl_lock); - - list_del(&thread->t_link); - OBD_FREE(thread, sizeof(*thread)); - } - - /* decrement all held locks */ - while (!list_empty(&lock_list)) { - struct ldlm_lock *lock; - struct ldlm_test_lock *lock_info = - list_entry(lock_list.next, struct ldlm_test_lock, - l_link); - list_del(lock_list.next); - num_locks--; - - lock = ldlm_handle2lock(&lock_info->l_lockh); - ldlm_lock_decref(&lock_info->l_lockh, lock->l_granted_mode); - ldlm_lock_put(lock); - - OBD_FREE(lock_info, sizeof(*lock_info)); - } - - regression_running = 0; - spin_unlock(&ctl_lock); - - RETURN(0); -} - -int ldlm_test(struct obd_device *obddev, struct lustre_handle *connh) -{ - int rc; - rc = ldlm_test_basics(obddev); - if (rc) - RETURN(rc); - - rc = ldlm_test_extents(obddev); - if (rc) - RETURN(rc); - - rc = ldlm_test_network(obddev, connh); - RETURN(rc); -} diff --git a/lustre/liblustre/.cvsignore b/lustre/liblustre/.cvsignore deleted file mode 100644 index fb1a186..0000000 --- a/lustre/liblustre/.cvsignore +++ /dev/null @@ -1,9 +0,0 @@ -.Xrefs -config.log -config.status -configure -Makefile -Makefile.in -.deps -TAGS -libtest diff --git a/lustre/liblustre/Makefile.am b/lustre/liblustre/Makefile.am deleted file mode 100644 index 6648aa8..0000000 --- a/lustre/liblustre/Makefile.am +++ /dev/null @@ -1,30 +0,0 @@ -# Administration utilities Makefile -DEFS= - -CFLAGS:=-g -I$(top_srcdir)/utils -I$(top_srcdir)/portals/include -I$(srcdir)/../include -I$(top_srcdir)/../libsysio/include -Wall -L../portals/utils - -KFLAGS:= -CPPFLAGS = $(HAVE_EFENCE) -D_LARGEFILE64_SOURCE=1 -LIBS = $(LIBEFENCE) -LLIBS= ./libllite.a ../lov/liblov.a ../obdecho/libobdecho.a ../osc/libosc.a ../ldlm/libldlm.a ../ptlrpc/libptlrpc.a ../obdclass/liblustreclass.a ../mdc/libmdc.a - -lib_LIBRARIES = libllite.a -libllite_a_SOURCES = llite_lib.c super.c file.c rw.c - -bin_PROGRAMS = libtest lltest - -libtest_LDADD := $(LIBREADLINE) $(LLIBS) \ - ../portals/unals/libtpcnal.a \ - ../portals/portals/libportals.a\ - -lptlctl -lpthread -libtest_SOURCES = libtest.c - -lltest_LDADD := $(LIBREADLINE) $(LLIBS) \ - ../../libsysio/src/libsysio.a ../../libsysio/dev/stdfd/libsysio_stdfd.a \ - -lc \ - ../portals/unals/libtcpnal.a ../portals/portals/libportals.a \ - -lptlctl -lpthread -lltest_SOURCES = lltest.c - -include $(top_srcdir)/Rules - diff --git a/lustre/liblustre/file.c b/lustre/liblustre/file.c deleted file mode 100644 index 8344af5..0000000 --- a/lustre/liblustre/file.c +++ /dev/null @@ -1,553 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Lustre Light Super operations - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LLITE - -#include <stdlib.h> -#include <string.h> -#include <error.h> -#include <assert.h> -#include <time.h> -#include <sys/types.h> -#include <sys/queue.h> - -#include <sysio.h> -#include <fs.h> -#include <mount.h> -#include <inode.h> -#include <file.h> - -#include "llite_lib.h" - -void llu_prepare_mdc_op_data(struct mdc_op_data *data, - struct inode *i1, - struct inode *i2, - const char *name, - int namelen, - int mode) -{ - struct llu_inode_info *lli1, *lli2; - - LASSERT(i1); - - lli1 = llu_i2info(i1); - data->ino1 = lli1->lli_st_ino; - data->gen1 = lli1->lli_st_generation; - data->typ1 = lli1->lli_st_mode & S_IFMT; - data->gid1 = lli1->lli_st_gid; - - if (i2) { - lli2 = llu_i2info(i2); - data->ino2 = lli2->lli_st_ino; - data->gen2 = lli2->lli_st_generation; - data->typ2 = lli2->lli_st_mode & S_IFMT; - data->gid2 = lli2->lli_st_gid; - } else - data->ino2 = 0; - - data->name = name; - data->namelen = namelen; - data->mode = mode; -} - -static struct inode *llu_create_node(struct inode *dir, const char *name, - int namelen, const void *data, int datalen, - int mode, __u64 extra, - struct lookup_intent *it) -{ - struct inode *inode; - struct ptlrpc_request *request = NULL; - struct mds_body *body; - time_t time = 123456;//time(NULL); - struct llu_sb_info *sbi = llu_i2sbi(dir); - - if (it && it->it_disposition) { - LBUG(); -#if 0 - ll_invalidate_inode_pages(dir); -#endif - request = it->it_data; - body = lustre_msg_buf(request->rq_repmsg, 1, sizeof(*body)); - } else { - struct mdc_op_data op_data; - struct llu_inode_info *lli_dir = llu_i2info(dir); - int gid = current->fsgid; - int rc; - - if (lli_dir->lli_st_mode & S_ISGID) { - gid = lli_dir->lli_st_gid; - if (S_ISDIR(mode)) - mode |= S_ISGID; - } - - llu_prepare_mdc_op_data(&op_data, dir, NULL, name, namelen, 0); - rc = mdc_create(&sbi->ll_mdc_conn, &op_data, - data, datalen, mode, current->fsuid, gid, - time, extra, &request); - if (rc) { - inode = (struct inode*)rc; - goto out; - } - body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*body)); - } - - inode = llu_new_inode(dir->i_fs, body->ino, body->mode); - if (!inode) { - /* FIXME more cleanup needed? */ - goto out; - } - - llu_update_inode(inode, body, NULL); - - if (it && it->it_disposition) { - /* We asked for a lock on the directory, but were - * granted a lock on the inode. Since we finally have - * an inode pointer, stuff it in the lock. */ -#if 0 - ll_mdc_lock_set_inode((struct lustre_handle *)it->it_lock_handle, - inode); -#endif - } - - out: - ptlrpc_req_finished(request); - return inode; -} - -int llu_create(struct inode *dir, struct pnode_base *pnode, int mode) -{ - struct inode *inode; -#if 0 - int rc = 0; - - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu,intent=%s\n", - dentry->d_name.name, dir->i_ino, LL_IT2STR(dentry->d_it)); - - it = dentry->d_it; - - rc = ll_it_open_error(IT_OPEN_CREATE, it); - if (rc) { - LL_GET_INTENT(dentry, it); - ptlrpc_req_finished(it->it_data); - RETURN(rc); - } -#endif - inode = llu_create_node(dir, pnode->pb_name.name, pnode->pb_name.len, - NULL, 0, mode, 0, NULL); - - if (IS_ERR(inode)) - RETURN(PTR_ERR(inode)); - - pnode->pb_ino = inode; - - return 0; -} - -static int llu_create_obj(struct lustre_handle *conn, struct inode *inode, - struct lov_stripe_md *lsm) -{ - struct ptlrpc_request *req = NULL; - struct llu_inode_info *lli = llu_i2info(inode); - struct lov_mds_md *lmm = NULL; - struct obdo *oa; - struct iattr iattr; - struct mdc_op_data op_data; - int rc, err, lmm_size = 0;; - ENTRY; - - oa = obdo_alloc(); - if (!oa) - RETURN(-ENOMEM); - - oa->o_mode = S_IFREG | 0600; - oa->o_id = lli->lli_st_ino; - /* Keep these 0 for now, because chown/chgrp does not change the - * ownership on the OST, and we don't want to allow BA OST NFS - * users to access these objects by mistake. - */ - oa->o_uid = 0; - oa->o_gid = 0; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE | - OBD_MD_FLUID | OBD_MD_FLGID; - - rc = obd_create(conn, oa, &lsm, NULL); - if (rc) { - CERROR("error creating objects for inode %lu: rc = %d\n", - lli->lli_st_ino, rc); - if (rc > 0) { - CERROR("obd_create returned invalid rc %d\n", rc); - rc = -EIO; - } - GOTO(out_oa, rc); - } - - LASSERT(lsm && lsm->lsm_object_id); - rc = obd_packmd(conn, &lmm, lsm); - if (rc < 0) - GOTO(out_destroy, rc); - - lmm_size = rc; - - /* Save the stripe MD with this file on the MDS */ - memset(&iattr, 0, sizeof(iattr)); - iattr.ia_valid = ATTR_FROM_OPEN; - - llu_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0); - - rc = mdc_setattr(&llu_i2sbi(inode)->ll_mdc_conn, &op_data, - &iattr, lmm, lmm_size, &req); - ptlrpc_req_finished(req); - - obd_free_diskmd(conn, &lmm); - - /* If we couldn't complete mdc_open() and store the stripe MD on the - * MDS, we need to destroy the objects now or they will be leaked. - */ - if (rc) { - CERROR("error: storing stripe MD for %lu: rc %d\n", - lli->lli_st_ino, rc); - GOTO(out_destroy, rc); - } - lli->lli_smd = lsm; - - EXIT; -out_oa: - obdo_free(oa); - return rc; - -out_destroy: - obdo_from_inode(oa, inode, OBD_MD_FLTYPE); - oa->o_id = lsm->lsm_object_id; - oa->o_valid |= OBD_MD_FLID; - err = obd_destroy(conn, oa, lsm, NULL); - obd_free_memmd(conn, &lsm); - if (err) { - CERROR("error uncreating inode %lu objects: rc %d\n", - lli->lli_st_ino, err); - } - goto out_oa; -} - -/* FIXME currently no "it" passed in */ -static int llu_local_open(struct llu_inode_info *lli, struct lookup_intent *it) -{ - struct ll_file_data *fd; -#if 0 - struct ptlrpc_request *req = it->it_data; - struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1); - ENTRY; -#endif - LASSERT(!lli->lli_file_data); - - fd = malloc(sizeof(struct ll_file_data)); - /* We can't handle this well without reorganizing ll_file_open and - * ll_mdc_close, so don't even try right now. */ - LASSERT(fd != NULL); - - memset(fd, 0, sizeof(*fd)); -#if 0 - memcpy(&fd->fd_mds_och.och_fh, &body->handle, sizeof(body->handle)); - fd->fd_mds_och.och_req = it->it_data; -#endif - lli->lli_file_data = fd; - - RETURN(0); -} - -static int llu_osc_open(struct lustre_handle *conn, struct inode *inode, - struct lov_stripe_md *lsm) -{ - struct ll_file_data *fd = llu_i2info(inode)->lli_file_data; - struct obdo *oa; - int rc; - ENTRY; - - oa = obdo_alloc(); - if (!oa) - RETURN(-ENOMEM); - oa->o_id = lsm->lsm_object_id; - oa->o_mode = S_IFREG; - oa->o_valid = (OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLBLOCKS | - OBD_MD_FLMTIME | OBD_MD_FLCTIME); - rc = obd_open(conn, oa, lsm, NULL, &fd->fd_ost_och); - if (rc) - GOTO(out, rc); - -// file->f_flags &= ~O_LOV_DELAY_CREATE; - obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS | OBD_MD_FLMTIME | - OBD_MD_FLCTIME); - - EXIT; -out: - obdo_free(oa); - return rc; -} - -static int llu_file_open(struct inode *inode) -{ -#if 0 - struct llu_sb_info *sbi = llu_i2sbi(inode); -#endif - struct llu_inode_info *lli = llu_i2info(inode); - struct lustre_handle *conn = llu_i2obdconn(inode); - struct lookup_intent *it; - struct lov_stripe_md *lsm; - int rc = 0; - -#if 0 - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino); - LL_GET_INTENT(file->f_dentry, it); - rc = ll_it_open_error(IT_OPEN_OPEN, it); - if (rc) - RETURN(rc); -#endif - rc = llu_local_open(lli, it); - if (rc) - LBUG(); -#if 0 - mdc_set_open_replay_data(&((struct ll_file_data *) - file->private_data)->fd_mds_och); -#endif - lsm = lli->lli_smd; - if (lsm == NULL) { -#if 0 - if (file->f_flags & O_LOV_DELAY_CREATE) { - CDEBUG(D_INODE, "delaying object creation\n"); - RETURN(0); - } -#endif - if (!lli->lli_smd) { - rc = llu_create_obj(conn, inode, NULL); - if (rc) - GOTO(out_close, rc); - } else { - CERROR("warning: stripe already set on ino %lu\n", - lli->lli_st_ino); - } - lsm = lli->lli_smd; - } - - rc = llu_osc_open(conn, inode, lsm); - if (rc) - GOTO(out_close, rc); - RETURN(0); - - out_close: -// ll_mdc_close(&sbi->ll_mdc_conn, inode, file); - return rc; -} - -int llu_iop_open(struct pnode *pnode, int flags, mode_t mode) -{ - struct inode *dir = pnode->p_parent->p_base->pb_ino; - int rc; - /* FIXME later we must add the ldlm here */ - - LASSERT(dir); - - /* libsysio forgot to guarentee mode is valid XXX */ - mode |= S_IFREG; - - if (!pnode->p_base->pb_ino) { - rc = llu_create(dir, pnode->p_base, mode); - if (rc) - return rc; - } - - LASSERT(pnode->p_base->pb_ino); - return llu_file_open(pnode->p_base->pb_ino); -} - - -static int llu_mdc_close(struct lustre_handle *mdc_conn, struct inode *inode) -{ - struct llu_inode_info *lli = llu_i2info(inode); - struct ll_file_data *fd = lli->lli_file_data; - struct ptlrpc_request *req = NULL; - unsigned long flags; - struct obd_import *imp; - int rc; - - /* FIXME add following code later FIXME */ -#if 0 - /* Complete the open request and remove it from replay list */ - rc = mdc_close(&ll_i2sbi(inode)->ll_mdc_conn, lli->lli_st_ino, - inode->i_mode, &fd->fd_mds_och.och_fh, &req); - if (rc) - CERROR("inode %lu close failed: rc = %d\n", - lli->lli_st_ino, rc); - - imp = fd->fd_mds_och.och_req->rq_import; - LASSERT(imp != NULL); - spin_lock_irqsave(&imp->imp_lock, flags); - - DEBUG_REQ(D_HA, fd->fd_mds_och.och_req, "matched open req %p", - fd->fd_mds_och.och_req); - - /* We held on to the request for replay until we saw a close for that - * file. Now that we've closed it, it gets replayed on the basis of - * its transno only. */ - spin_lock (&fd->fd_mds_och.och_req->rq_lock); - fd->fd_mds_och.och_req->rq_replay = 0; - spin_unlock (&fd->fd_mds_och.och_req->rq_lock); - - if (fd->fd_mds_och.och_req->rq_transno) { - /* This open created a file, so it needs replay as a - * normal transaction now. Our reference to it now - * effectively owned by the imp_replay_list, and it'll - * be committed just like other transno-having - * requests from here on out. */ - - /* We now retain this close request, so that it is - * replayed if the open is replayed. We duplicate the - * transno, so that we get freed at the right time, - * and rely on the difference in xid to keep - * everything ordered correctly. - * - * But! If this close was already given a transno - * (because it caused real unlinking of an - * open-unlinked file, f.e.), then we'll be ordered on - * the basis of that and we don't need to do anything - * magical here. */ - if (!req->rq_transno) { - req->rq_transno = fd->fd_mds_och.och_req->rq_transno; - ptlrpc_retain_replayable_request(req, imp); - } - spin_unlock_irqrestore(&imp->imp_lock, flags); - - /* Should we free_committed now? we always free before - * replay, so it's probably a wash. We could check to - * see if the fd_req should already be committed, in - * which case we can avoid the whole retain_replayable - * dance. */ - } else { - /* No transno means that we can just drop our ref. */ - spin_unlock_irqrestore(&imp->imp_lock, flags); - } - ptlrpc_req_finished(fd->fd_mds_och.och_req); - - /* Do this after the fd_req->rq_transno check, because we don't want - * to bounce off zero references. */ - ptlrpc_req_finished(req); - fd->fd_mds_och.och_fh.cookie = DEAD_HANDLE_MAGIC; -#endif - lli->lli_file_data = NULL; - free(fd); - - RETURN(-abs(rc)); -} - -static int llu_file_release(struct inode *inode) -{ - struct llu_sb_info *sbi = llu_i2sbi(inode); - struct llu_inode_info *lli = llu_i2info(inode); - struct lov_stripe_md *lsm = lli->lli_smd; - struct ll_file_data *fd; - struct obdo oa; - int rc = 0, rc2; - - fd = lli->lli_file_data; - if (!fd) /* no process opened the file after an mcreate */ - RETURN(rc = 0); - - /* we might not be able to get a valid handle on this file - * again so we really want to flush our write cache.. */ - if (S_ISREG(inode->i_mode) && lsm) { - memset(&oa, 0, sizeof(oa)); - oa.o_id = lsm->lsm_object_id; - oa.o_mode = S_IFREG; - oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID; - - memcpy(&oa.o_inline, &fd->fd_ost_och, FD_OSTDATA_SIZE); - oa.o_valid |= OBD_MD_FLHANDLE; - - rc = obd_close(&sbi->ll_osc_conn, &oa, lsm, NULL); - if (rc) - CERROR("inode %lu object close failed: rc = " - "%d\n", lli->lli_st_ino, rc); - } - - rc2 = llu_mdc_close(&sbi->ll_mdc_conn, inode); - if (rc2 && !rc) - rc = rc2; - - RETURN(rc); -} - -int llu_iop_close(struct inode *inode) -{ - return llu_file_release(inode); -} - -int llu_iop_ipreadv(struct inode *ino, - struct io_arguments *ioargs, - struct ioctx **ioctxp) -{ - struct ioctx *ioctx; - - if (!ioargs->ioarg_iovlen) - return 0; - if (ioargs->ioarg_iovlen < 0) - return -EINVAL; - - ioctx = _sysio_ioctx_new(ino, ioargs); - if (!ioctx) - return -ENOMEM; - - ioctx->ioctx_cc = llu_file_read(ino, - ioctx->ioctx_iovec, - ioctx->ioctx_iovlen, - ioctx->ioctx_offset); - if (ioctx->ioctx_cc < 0) - ioctx->ioctx_errno = ioctx->ioctx_cc; - - *ioctxp = ioctx; - return 0; -} - -int llu_iop_ipwritev(struct inode *ino, - struct io_arguments *ioargs, - struct ioctx **ioctxp) -{ - struct ioctx *ioctx; - - if (!ioargs->ioarg_iovlen) - return 0; - if (ioargs->ioarg_iovlen < 0) - return -EINVAL; - - ioctx = _sysio_ioctx_new(ino, ioargs); - if (!ioctx) - return -ENOMEM; - - ioctx->ioctx_cc = llu_file_write(ino, - ioctx->ioctx_iovec, - ioctx->ioctx_iovlen, - ioctx->ioctx_offset); - if (ioctx->ioctx_cc < 0) - ioctx->ioctx_errno = ioctx->ioctx_cc; - - *ioctxp = ioctx; - return 0; -} - diff --git a/lustre/liblustre/libtest.c b/lustre/liblustre/libtest.c deleted file mode 100644 index 1d523a6..0000000 --- a/lustre/liblustre/libtest.c +++ /dev/null @@ -1,136 +0,0 @@ -#include <stdio.h> -#include <netinet/in.h> -#include <sys/socket.h> -#include <arpa/inet.h> - -#include <portals/api-support.h> /* needed for ptpctl.h */ -#include <portals/ptlctl.h> /* needed for parse_dump */ - - -#include <liblustre.h> -#include <linux/obd.h> -#include <linux/obd_class.h> -#include <portals/procbridge.h> - -struct ldlm_namespace; -struct ldlm_res_id; -struct obd_import; - -extern int ldlm_cli_cancel_unused(struct ldlm_namespace *ns, struct ldlm_res_id *res_id, int flags); -extern int ldlm_namespace_cleanup(struct ldlm_namespace *ns, int local_only); -extern int ldlm_replay_locks(struct obd_import *imp); - -void *inter_module_get(char *arg) -{ - if (!strcmp(arg, "tcpnal_ni")) - return &tcpnal_ni; - else if (!strcmp(arg, "ldlm_cli_cancel_unused")) - return ldlm_cli_cancel_unused; - else if (!strcmp(arg, "ldlm_namespace_cleanup")) - return ldlm_namespace_cleanup; - else if (!strcmp(arg, "ldlm_replay_locks")) - return ldlm_replay_locks; - else - return NULL; -} - -ptl_handle_ni_t tcpnal_ni; - -struct pingcli_args { - ptl_nid_t mynid; - ptl_nid_t nid; - ptl_pid_t port; - int count; - int size; -}; - -struct task_struct *current; - -struct obd_class_user_state ocus; - -/* portals interfaces */ -ptl_handle_ni_t * -kportal_get_ni (int nal) -{ - return &tcpnal_ni; -} - -inline void -kportal_put_ni (int nal) -{ - return; -} - -void init_current(int argc, char **argv) -{ - current = malloc(sizeof(*current)); - strncpy(current->comm, argv[0], sizeof(current->comm)); - current->pid = getpid(); - -} - -ptl_nid_t tcpnal_mynid; - -int init_lib_portals(struct pingcli_args *args) -{ - int rc; - - PtlInit(); - tcpnal_mynid = args->mynid; - rc = PtlNIInit(procbridge_interface, 0, 0, 0, &tcpnal_ni); - if (rc != 0) { - CERROR("ksocknal: PtlNIInit failed: error %d\n", rc); - PtlFini(); - RETURN (rc); - } - PtlNIDebug(tcpnal_ni, ~0); - return rc; -} - -extern int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd, unsigned long arg); - - -int lib_ioctl(int dev_id, int opc, void * ptr) -{ - - if (dev_id == OBD_DEV_ID) { - struct obd_ioctl_data *ioc = ptr; - class_handle_ioctl(&ocus, opc, (unsigned long)ptr); - - /* you _may_ need to call obd_ioctl_unpack or some - other verification function if you want to use ioc - directly here */ - printf ("processing ioctl cmd: %x buf len: %d\n", - opc, ioc->ioc_len); - } - return (0); -} - -int main(int argc, char **argv) -{ - struct pingcli_args *args; - args= malloc(sizeof(*args)); - if (!args) { - printf("Malloc error\n"); - exit(1); - } - - args->mynid = ntohl (inet_addr (argv[1])); - INIT_LIST_HEAD(&ocus.ocus_conns); - - init_current(argc, argv); - init_obdclass(); - init_lib_portals(args); - ptlrpc_init(); - ldlm_init(); - mdc_init(); - lov_init(); - osc_init(); - echo_client_init(); - - parse_dump("/tmp/DUMP_FILE", lib_ioctl); - - printf("Hello\n"); - return 0; -} - diff --git a/lustre/liblustre/llite_lib.c b/lustre/liblustre/llite_lib.c deleted file mode 100644 index b11de88..0000000 --- a/lustre/liblustre/llite_lib.c +++ /dev/null @@ -1,226 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Lustre Light Super operations - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LLITE - -#include <stdlib.h> -#include <string.h> -#include <error.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/queue.h> - -#include <sysio.h> -#include <fs.h> -#include <mount.h> -#include <inode.h> -#include <file.h> - -#include <netinet/in.h> -#include <sys/socket.h> -#include <arpa/inet.h> - -#include <portals/api-support.h> /* needed for ptpctl.h */ -#include <portals/ptlctl.h> /* needed for parse_dump */ - -#include "llite_lib.h" - - -ptl_handle_ni_t tcpnal_ni; -struct task_struct *current; -struct obd_class_user_state ocus; - -/* portals interfaces */ -ptl_handle_ni_t * -kportal_get_ni (int nal) -{ - return &tcpnal_ni; -} - -inline void -kportal_put_ni (int nal) -{ - return; -} - -struct ldlm_namespace; -struct ldlm_res_id; -struct obd_import; - -extern int ldlm_cli_cancel_unused(struct ldlm_namespace *ns, struct ldlm_res_id *res_id, int flags); -extern int ldlm_namespace_cleanup(struct ldlm_namespace *ns, int local_only); -extern int ldlm_replay_locks(struct obd_import *imp); - -void *inter_module_get(char *arg) -{ - if (!strcmp(arg, "tcpnal_ni")) - return &tcpnal_ni; - else if (!strcmp(arg, "ldlm_cli_cancel_unused")) - return ldlm_cli_cancel_unused; - else if (!strcmp(arg, "ldlm_namespace_cleanup")) - return ldlm_namespace_cleanup; - else if (!strcmp(arg, "ldlm_replay_locks")) - return ldlm_replay_locks; - else - return NULL; -} - -void init_current(char *comm) -{ - current = malloc(sizeof(*current)); - current->fs = malloc(sizeof(*current->fs)); - current->fs->umask = umask(0777); - umask(current->fs->umask); - strncpy(current->comm, comm, sizeof(current->comm)); - current->pid = getpid(); - current->fsuid = 0; - current->fsgid = 0; - current->cap_effective = 0; - memset(¤t->pending, 0, sizeof(current->pending)); -} - -ptl_nid_t tcpnal_mynid; - -int init_lib_portals() -{ - int rc; - - PtlInit(); - rc = PtlNIInit(procbridge_interface, 0, 0, 0, &tcpnal_ni); - if (rc != 0) { - CERROR("ksocknal: PtlNIInit failed: error %d\n", rc); - PtlFini(); - RETURN (rc); - } - PtlNIDebug(tcpnal_ni, ~0); - return rc; -} - -extern int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd, unsigned long arg); - -struct mount_option_s mount_option = {NULL, NULL}; - -/* FIXME simple arg parser FIXME */ -void parse_mount_options(void *arg) -{ - char *buf = NULL; - struct obd_ioctl_data *data; - char *ptr, *comma, *eq, **tgt, *v; - int len; - - if (obd_ioctl_getdata(&buf, &len, arg)) { - CERROR("OBD ioctl: data error\n"); - return; - } - data = (struct obd_ioctl_data *)buf; - ptr = data->ioc_inlbuf1; - printf("mount option: %s\n", ptr); - - while (ptr) { - eq = strchr(ptr, '='); - if (!eq) - return; - - *eq = 0; - if (!strcmp("osc", ptr)) - tgt = &mount_option.osc_uuid; - else if (!strcmp("mdc", ptr)) - tgt = &mount_option.mdc_uuid; - else { - printf("Unknown mount option %s\n", ptr); - return; - } - - v = eq + 1; - comma = strchr(v, ','); - if (comma) { - *comma = 0; - ptr = comma + 1; - } else - ptr = NULL; - - *tgt = malloc(strlen(v)+1); - strcpy(*tgt, v); - } - - if (buf) - obd_ioctl_freedata(buf, len); -} - -int lib_ioctl(int dev_id, int opc, void * ptr) -{ - int rc; - - if (dev_id == OBD_DEV_ID) { - struct obd_ioctl_data *ioc = ptr; - - if (opc == OBD_IOC_MOUNTOPT) { - parse_mount_options(ptr); - return 0; - } - - rc = class_handle_ioctl(&ocus, opc, (unsigned long)ptr); - - /* you _may_ need to call obd_ioctl_unpack or some - other verification function if you want to use ioc - directly here */ - printf ("processing ioctl cmd: %x buf len: %d, rc %d\n", - opc, ioc->ioc_len, rc); - - if (rc) - return rc; - } - return (0); -} - -int lllib_init(char *arg) -{ - tcpnal_mynid = ntohl(inet_addr(arg)); - INIT_LIST_HEAD(&ocus.ocus_conns); - - init_current("dummy"); - if (init_obdclass() || - init_lib_portals() || - ptlrpc_init() || - ldlm_init() || - mdc_init() || - lov_init() || - osc_init()) - return -1; - - if (parse_dump("/tmp/DUMP_FILE", lib_ioctl)) - return -1; - - return _sysio_fssw_register("llite", &llu_fssw_ops); -} - -/* FIXME */ -void generate_random_uuid(unsigned char uuid_out[16]) -{ - int *arr = (int*)uuid_out; - int i; - - for (i = 0; i < sizeof(uuid_out)/sizeof(int); i++) - arr[i] = rand(); -} - diff --git a/lustre/liblustre/llite_lib.h b/lustre/liblustre/llite_lib.h deleted file mode 100644 index ce2e23b..0000000 --- a/lustre/liblustre/llite_lib.h +++ /dev/null @@ -1,135 +0,0 @@ -#ifndef __LLU_H_ -#define __LLU_H_ - -#include <liblustre.h> -#include <linux/obd.h> -#include <linux/obd_class.h> -#include <portals/procbridge.h> -#include <linux/lustre_lite.h> - -#include <sys/types.h> -#include <sys/stat.h> - -struct ll_file_data { - struct obd_client_handle fd_mds_och; - struct obd_client_handle fd_ost_och; - __u32 fd_flags; -}; - -struct llu_sb_info -{ - struct obd_uuid ll_sb_uuid; - struct lustre_handle ll_mdc_conn; - struct lustre_handle ll_osc_conn; - obd_id ll_rootino; - int ll_flags; - struct list_head ll_conn_chain; -}; - -struct llu_inode_info { - struct llu_sb_info *lli_sbi; - struct ll_fid lli_fid; - struct lov_stripe_md *lli_smd; - char *lli_symlink_name; - /*struct semaphore lli_open_sem;*/ - unsigned long lli_flags; - struct list_head lli_read_extents; - - /* in libsysio we have no chance to store data in file, - * so place it here */ - struct ll_file_data *lli_file_data; - - /* stat FIXME not 64 bit clean */ - dev_t lli_st_dev; - ino_t lli_st_ino; - mode_t lli_st_mode; - nlink_t lli_st_nlink; - uid_t lli_st_uid; - gid_t lli_st_gid; - dev_t lli_st_rdev; - loff_t lli_st_size; - unsigned int lli_st_blksize; - unsigned int lli_st_blocks; - time_t lli_st_atime; - time_t lli_st_mtime; - time_t lli_st_ctime; - - /* not for stat, change it later */ - int lli_st_flags; - unsigned long lli_st_generation; -}; - -static inline struct llu_sb_info *llu_fs2sbi(struct filesys *fs) -{ - return (struct llu_sb_info*)(fs->fs_private); -} - -static inline struct llu_inode_info *llu_i2info(struct inode *inode) -{ - return (struct llu_inode_info*)(inode->i_private); -} - -static inline struct llu_sb_info *llu_i2sbi(struct inode *inode) -{ - return llu_i2info(inode)->lli_sbi; -} - -static inline struct client_obd *sbi2mdc(struct llu_sb_info *sbi) -{ - struct obd_device *obd = class_conn2obd(&sbi->ll_mdc_conn); - if (obd == NULL) - LBUG(); - return &obd->u.cli; -} - -static inline struct lustre_handle *llu_i2obdconn(struct inode *inode) -{ - return &(llu_i2info(inode)->lli_sbi->ll_osc_conn); -} - - -struct mount_option_s -{ - char *mdc_uuid; - char *osc_uuid; -}; - -/* llite_lib.c */ -void generate_random_uuid(unsigned char uuid_out[16]); - -extern struct mount_option_s mount_option; - -/* super.c */ -void llu_update_inode(struct inode *inode, struct mds_body *body, - struct lov_stripe_md *lmm); -void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid); -void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid); -struct inode* llu_new_inode(struct filesys *fs, ino_t ino, mode_t mode); - -extern struct fssw_ops llu_fssw_ops; - -/* file.c */ -void llu_prepare_mdc_op_data(struct mdc_op_data *data, - struct inode *i1, - struct inode *i2, - const char *name, - int namelen, - int mode); -int llu_create(struct inode *dir, struct pnode_base *pnode, int mode); -int llu_iop_open(struct pnode *pnode, int flags, mode_t mode); -int llu_iop_close(struct inode *inode); -int llu_iop_ipreadv(struct inode *ino, - struct io_arguments *ioargs, - struct ioctx **ioctxp); -int llu_iop_ipwritev(struct inode *ino, - struct io_arguments *ioargs, - struct ioctx **ioctxp); - -/* rw.c */ -int llu_iop_iodone(struct ioctx *ioctxp __IS_UNUSED); -ssize_t llu_file_write(struct inode *inode, const struct iovec *iovec, - size_t iovlen, loff_t pos); -ssize_t llu_file_read(struct inode *inode, const struct iovec *iovec, - size_t iovlen, loff_t pos); - -#endif diff --git a/lustre/liblustre/lltest.c b/lustre/liblustre/lltest.c deleted file mode 100644 index acdc47e..0000000 --- a/lustre/liblustre/lltest.c +++ /dev/null @@ -1,159 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Lustre Light user test program - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define _BSD_SOURCE - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <getopt.h> -#include <errno.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <sys/queue.h> -#include <sys/statvfs.h> - -#include <sysio.h> -#include <mount.h> - - -int do_stat(const char *name) -{ - struct stat stat; - - if (lstat(name, &stat)) { - perror("failed to stat: "); - return -1; - } - printf("******* stat '%s' ********\n", name); - printf("ino:\t\t%lu\n",stat.st_ino); - printf("mode:\t\t%o\n",stat.st_mode); - printf("nlink:\t\t%d\n",stat.st_nlink); - printf("uid/gid:\t%d/%d\n", stat.st_uid, stat.st_gid); - printf("size:\t\t%ld\n", stat.st_size); - printf("blksize:\t%ld\n", stat.st_blksize); - printf("block count:\t%ld\n", stat.st_blocks); - printf("atime:\t\t%lu\n",stat.st_atime); - printf("mtime:\t\t%lu\n",stat.st_mtime); - printf("ctime:\t\t%lu\n",stat.st_ctime); - printf("******* end stat ********\n"); - - return 0; -} -/* - * Get stats of file and file system. - * - * Usage: test_stats [-a] [-r <root-path>] [-m <root-driver>] [<path> ...] - */ - -extern int lllib_init(char *arg); - -char *root_driver = "llite"; -char *root_path = "/"; -unsigned mntflgs = 0; -struct mount root_mount; - -extern int portal_debug; -extern int portal_subsystem_debug; - -char* files[] = {"/dir1", "/dir1/file1", "/dir1/file2", "/dir1/dir2", "/dir1/dir2/file3"}; - -int -main(int argc, char * const argv[]) -{ - struct stat statbuf; - int rc, err, i, fd, written, readed; - char pgbuf[4096], readbuf[4096]; - int npages; - - if (_sysio_init() != 0) { - perror("init sysio"); - exit(1); - } - err = lllib_init(argv[1]); - if (err) { - perror("init llite driver"); - exit(1); - } - - err = _sysio_mount_root(root_path, root_driver, mntflgs, NULL); - if (err) { - errno = -err; - perror(root_driver); - exit(1); - } -#if 0 - for (i=0; i< sizeof(files)/sizeof(char*); i++) { - printf("******** stat %s *********\n", files[i]); - /* XXX ugly, only for testing */ - err = fixme_lstat(files[i], &statbuf); - if (err) - perror(root_driver); - printf("******** end stat %s: %d*********\n", files[i], err); - } -#endif -#if 0 - portal_debug = 0; - portal_subsystem_debug = 0; - npages = 10; - - fd = open("/newfile01", O_RDWR|O_CREAT|O_TRUNC, 00664); - printf("***************** open return %d ****************\n", fd); - - printf("***************** begin write pages ****************\n"); - for (i = 0; i < npages; i++ ) { - memset(pgbuf, ('A'+ i%10), 4096); - written = write(fd, pgbuf, 4096); - printf(">>> page %d: %d bytes written\n", i, written); - } - - printf("***************** begin read pages ****************\n"); - lseek(fd, 0, SEEK_SET); - - for (i = 0; i < npages; i++ ) { - memset(readbuf, '8', 4096); - readed = read(fd, readbuf, 4096); - readbuf[10] = 0; - printf("<<< page %d: %d bytes (%s)\n", i, readed, readbuf); - } - close(fd); -#endif - -#if 1 - //rc = chown("/newfile01", 10, 20); - rc = chmod("/newfile01", 0777); - printf("-------------- chmod return %d -----------\n", rc); - do_stat("/newfile01"); -#endif - - printf("sysio is about shutdown\n"); - /* - * Clean up. - */ - _sysio_shutdown(); - - printf("complete successfully\n"); - return 0; -} diff --git a/lustre/liblustre/rw.c b/lustre/liblustre/rw.c deleted file mode 100644 index 847b1d0..0000000 --- a/lustre/liblustre/rw.c +++ /dev/null @@ -1,519 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Lustre Light Super operations - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LLITE - -#include <stdlib.h> -#include <string.h> -#include <error.h> -#include <assert.h> -#include <time.h> -#include <sys/types.h> -#include <sys/queue.h> - -#include <sysio.h> -#include <fs.h> -#include <mount.h> -#include <inode.h> -#include <file.h> - -#include "llite_lib.h" - -int llu_iop_iodone(struct ioctx *ioctxp __IS_UNUSED) -{ - return 1; -} - -/* - * this grabs a lock and manually implements behaviour that makes it look - * like the OST is returning the file size with each lock acquisition - */ -int llu_extent_lock(struct ll_file_data *fd, struct inode *inode, - struct lov_stripe_md *lsm, - int mode, struct ldlm_extent *extent, - struct lustre_handle *lockh) -{ -#if 0 - struct ll_inode_info *lli = ll_i2info(inode); - int rc; - ENTRY; - - rc = ll_extent_lock_no_validate(fd, inode, lsm, mode, extent, lockh); - if (rc != ELDLM_OK) - RETURN(rc); - - /* always do a getattr for the first person to pop out of lock - * acquisition.. the DID_GETATTR flag and semaphore serialize - * this initial race. we used to make a decision based on whether - * the lock was matched or acquired, but the matcher could win the - * waking race with the first issuer so that was no good.. - */ - if (test_bit(LLI_F_DID_GETATTR, &lli->lli_flags)) - RETURN(ELDLM_OK); - - down(&lli->lli_getattr_sem); - - if (!test_bit(LLI_F_DID_GETATTR, &lli->lli_flags)) { - rc = ll_inode_getattr(inode, lsm, fd ? &fd->fd_ost_och : NULL); - if (rc == 0) { - set_bit(LLI_F_DID_GETATTR, &lli->lli_flags); - } else { - /* XXX can this fail? */ - ll_extent_unlock(fd, inode, lsm, mode, lockh); - } - } - - up(&lli->lli_getattr_sem); - RETURN(rc); -#else - return ELDLM_OK; -#endif -} - -int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode, - struct lov_stripe_md *lsm, int mode, - struct lustre_handle *lockh) -{ -#if 0 - struct ll_sb_info *sbi = ll_i2sbi(inode); - int rc; - ENTRY; - - /* XXX phil: can we do this? won't it screw the file size up? */ - if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) || - (sbi->ll_flags & LL_SBI_NOLCK)) - RETURN(0); - - rc = obd_cancel(&sbi->ll_osc_conn, lsm, mode, lockh); - - RETURN(rc); -#else - return 0; -#endif -} - -static int llu_brw(int cmd, struct inode *inode, struct page *page, int flags) -{ - struct llu_inode_info *lli = llu_i2info(inode); - struct lov_stripe_md *lsm = lli->lli_smd; - struct brw_page pg; - int rc; - ENTRY; - - pg.pg = page; - pg.off = ((obd_off)page->index) << PAGE_SHIFT; - - /* FIXME FIXME FIXME FIXME FIXME FIXME FIXME FIXME FIXME */ -#if 0 - if (cmd == OBD_BRW_WRITE && (pg.off + PAGE_SIZE > lli->lli_st_size)) - pg.count = lli->lli_st_size % PAGE_SIZE; - else -#endif - pg.count = PAGE_SIZE; - - CDEBUG(D_PAGE, "%s %d bytes ino %lu at "LPU64"/"LPX64"\n", - cmd & OBD_BRW_WRITE ? "write" : "read", pg.count, lli->lli_st_ino, - pg.off, pg.off); - if (pg.count == 0) { - LBUG(); - } - - pg.flag = flags; - - rc = obd_brw(cmd, llu_i2obdconn(inode), lsm, 1, &pg, set, NULL); - if (rc) { - CERROR("error from obd_brw: rc = %d\n", rc); - } - - RETURN(rc); -} - -static int llu_prepare_write(struct inode *inode, struct page *page, - unsigned from, unsigned to) -{ - struct llu_inode_info *lli = llu_i2info(inode); - obd_off offset = ((obd_off)page->index) << PAGE_SHIFT; - int rc = 0; - ENTRY; - -#if 0 - if (!PageLocked(page)) - LBUG(); - - if (PageUptodate(page)) - RETURN(0); - - //POISON(addr + from, 0xca, to - from); -#endif - /* We're completely overwriting an existing page, so _don't_ set it up - * to date until commit_write */ - if (from == 0 && to == PAGE_SIZE) - RETURN(0); - - /* If are writing to a new page, no need to read old data. - * the extent locking and getattr procedures in ll_file_write have - * guaranteed that i_size is stable enough for our zeroing needs */ - if (lli->lli_st_size <= offset) { - memset(kmap(page), 0, PAGE_SIZE); - kunmap(page); - GOTO(prepare_done, rc = 0); - } - - rc = llu_brw(OBD_BRW_READ, inode, page, 0); - - EXIT; - - prepare_done: - return rc; -} - -static int llu_commit_write(struct inode *inode, struct page *page, - unsigned from, unsigned to) -{ - struct llu_inode_info *lli = llu_i2info(inode); - loff_t size; - int rc; - ENTRY; -#if 0 - LASSERT(inode == file->f_dentry->d_inode); - LASSERT(PageLocked(page)); - - CDEBUG(D_INODE, "inode %p is writing page %p from %d to %d at %lu\n", - inode, page, from, to, page->index); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu,from=%d,to=%d\n", - inode->i_ino, from, to); - /* to match full page case in prepare_write */ - SetPageUptodate(page); - /* mark the page dirty, put it on mapping->dirty, - * mark the inode PAGES_DIRTY, put it on sb->dirty */ - set_page_dirty(page); -#endif - rc = llu_brw(OBD_BRW_WRITE, inode, page, 0); - if (rc) - return rc; - - /* this is matched by a hack in obdo_to_inode at the moment */ - size = (((obd_off)page->index) << PAGE_SHIFT) + to; - if (size > lli->lli_st_size) - lli->lli_st_size = size; - - RETURN(0); -} /* ll_commit_write */ - -ssize_t -llu_generic_file_write(struct inode *inode, const char *buf, - size_t count, loff_t pos) -{ - struct page *page; - ssize_t written; - long status = 0; - int err; - unsigned bytes; - - if ((ssize_t) count < 0) - return -EINVAL; -#if 0 - down(&inode->i_sem); -#endif - if (pos < 0) - return -EINVAL; - - written = 0; - -#if 0 - remove_suid(inode); - update_inode_times(inode); -#endif - do { - unsigned long index, offset; - char *kaddr; - - /* - * Try to find the page in the cache. If it isn't there, - * allocate a free page. - */ - offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ - index = pos >> PAGE_CACHE_SHIFT; - bytes = PAGE_CACHE_SIZE - offset; - if (bytes > count) { - bytes = count; - } - - status = -ENOMEM; /* we'll assign it later anyway */ - page = __grab_cache_page(index); - if (!page) - break; - - kaddr = kmap(page); - status = llu_prepare_write(inode, page, offset, offset+bytes); - if (status) - goto sync_failure; - - memcpy(kaddr+offset, buf, bytes); - - status = llu_commit_write(inode, page, offset, offset+bytes); - if (!status) - status = bytes; - - if (status >= 0) { - written += status; - count -= status; - pos += status; - buf += status; - } -unlock: - kunmap(page); - page_cache_release(page); - - if (status < 0) - break; - } while (count); -done: - err = written ? written : status; - -#if 0 - up(&inode->i_sem); -#endif - return err; - - status = -EFAULT; - goto unlock; - -sync_failure: - /* - * If blocksize < pagesize, prepare_write() may have instantiated a - * few blocks outside i_size. Trim these off again. - */ - kunmap(page); - page_cache_release(page); - goto done; -} - -ssize_t llu_file_write(struct inode *inode, const struct iovec *iovec, - size_t iovlen, loff_t pos) -{ - struct llu_inode_info *lli = llu_i2info(inode); - struct ll_file_data *fd = lli->lli_file_data; /* XXX not ready don't use it now */ - struct lustre_handle lockh = { 0 }; - struct lov_stripe_md *lsm = lli->lli_smd; - struct ldlm_extent extent; - ldlm_error_t err; - ssize_t retval = 0; - ENTRY; - - /* XXX consider other types later */ - if (!S_ISREG(lli->lli_st_mode)) - LBUG(); -#if 0 - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu,size="LPSZ",offset=%Ld\n", - inode->i_ino, count, *ppos); - - /* - * sleep doing some writeback work of this mount's dirty data - * if the VM thinks we're low on memory.. other dirtying code - * paths should think about doing this, too, but they should be - * careful not to hold locked pages while they do so. like - * ll_prepare_write. *cough* - */ - ll_check_dirty(inode->i_sb); -#endif - while (iovlen--) { - const char *buf = iovec[iovlen].iov_base; - size_t count = iovec[iovlen].iov_len; - - /* POSIX, but surprised the VFS doesn't check this already */ - if (count == 0) - continue; - -#if 0 - if (!S_ISBLK(lli->lli_st_mode) && file->f_flags & O_APPEND) { - extent.start = 0; - extent.end = OBD_OBJECT_EOF; - } else { - extent.start = *ppos; - extent.end = *ppos + count - 1; - } -#else - extent.start = pos; - extent.end = pos + count - 1; -#endif - - err = llu_extent_lock(fd, inode, lsm, LCK_PW, &extent, &lockh); - if (err != ELDLM_OK) - RETURN(-ENOLCK); - -#if 0 - if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND) - *ppos = inode->i_size; - - CDEBUG(D_INFO, "Writing inode %lu, "LPSZ" bytes, offset %Lu\n", - inode->i_ino, count, *ppos); -#endif - retval += llu_generic_file_write(inode, buf, count, pos); - } - - /* XXX errors? */ - ll_extent_unlock(fd, inode, lsm, LCK_PW, &lockh); - return(retval); -} - -static void llu_update_atime(struct inode *inode) -{ -#if 0 - struct llu_inode_info *lli = llu_i2info(inode); - -#ifdef USE_ATIME - struct iattr attr; - - attr.ia_atime = LTIME_S(CURRENT_TIME); - attr.ia_valid = ATTR_ATIME; - - if (lli->lli_st_atime == attr.ia_atime) return; - if (IS_RDONLY(inode)) return; - if (IS_NOATIME(inode)) return; - - /* ll_inode_setattr() sets inode->i_atime from attr.ia_atime */ - llu_inode_setattr(inode, &attr, 0); -#else - /* update atime, but don't explicitly write it out just this change */ - inode->i_atime = CURRENT_TIME; -#endif -#endif -} - -static size_t llu_generic_file_read(struct inode *inode, char *buf, - size_t count, loff_t pos) -{ - struct llu_inode_info *lli = llu_i2info(inode); - unsigned long index, offset; - int error = 0; - size_t readed = 0; - - index = pos >> PAGE_CACHE_SHIFT; - offset = pos & ~PAGE_CACHE_MASK; - - do { - struct page *page; - unsigned long end_index, nr; - - end_index = lli->lli_st_size >> PAGE_CACHE_SHIFT; - - if (index > end_index) - break; - nr = PAGE_CACHE_SIZE; - if (index == end_index) { - nr = lli->lli_st_size & ~PAGE_CACHE_MASK; - if (nr <= offset) - break; - } - - nr = nr - offset; - if (nr > count) - nr = count; - - page = grab_cache_page(index); - if (!page) { - error = -ENOMEM; - break; - } - - error = llu_brw(OBD_BRW_READ, inode, page, 0); - if (error) { - page_cache_release(page); - break; - } - - memcpy(buf, kmap(page)+offset, nr); - offset += nr; - index += offset >> PAGE_CACHE_SHIFT; - offset &= ~PAGE_CACHE_MASK; - readed += nr; - count -= nr; - - page_cache_release(page); - } while (count); - - if (error) - return error; - return readed; -} - -ssize_t llu_file_read(struct inode *inode, const struct iovec *iovec, - size_t iovlen, loff_t pos) -{ - struct llu_inode_info *lli = llu_i2info(inode); - struct ll_file_data *fd = lli->lli_file_data; - struct lov_stripe_md *lsm = lli->lli_smd; - struct lustre_handle lockh = { 0 }; -#if 0 - struct ll_read_extent rextent; -#else - struct ldlm_extent extent; -#endif - ldlm_error_t err; - ssize_t retval = 0; - ENTRY; - - while (iovlen--) { - char *buf = iovec[iovlen].iov_base; - size_t count = iovec[iovlen].iov_len; - - /* "If nbyte is 0, read() will return 0 and have no other results." - * -- Single Unix Spec */ - if (count == 0) - RETURN(0); - -#if 0 - rextent.re_extent.start = pos; - rextent.re_extent.end = pos + count - 1; -#else - extent.start = pos; - extent.end = pos + count - 1; -#endif - err = llu_extent_lock(fd, inode, lsm, LCK_PR, &extent, &lockh); - if (err != ELDLM_OK) - RETURN(-ENOLCK); -#if 0 - rextent.re_task = current; - spin_lock(&lli->lli_read_extent_lock); - list_add(&rextent.re_lli_item, &lli->lli_read_extents); - spin_unlock(&lli->lli_read_extent_lock); -#endif - CDEBUG(D_INFO, "Reading inode %lu, "LPSZ" bytes, offset %Ld\n", - lli->lli_st_ino, count, pos); - retval = llu_generic_file_read(inode, buf, count, pos); -#if 0 - spin_lock(&lli->lli_read_extent_lock); - list_del(&rextent.re_lli_item); - spin_unlock(&lli->lli_read_extent_lock); -#endif - } - - if (retval > 0) - llu_update_atime(inode); - - /* XXX errors? */ - ll_extent_unlock(fd, inode, lsm, LCK_PR, &lockh); - RETURN(retval); -} - diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c deleted file mode 100644 index 0e88933..0000000 --- a/lustre/liblustre/super.c +++ /dev/null @@ -1,779 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Lustre Light Super operations - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LLITE - -#include <stdlib.h> -#include <string.h> -#include <error.h> -#include <assert.h> -#include <time.h> -#include <sys/types.h> -#include <sys/queue.h> - -#include <sysio.h> -#include <fs.h> -#include <mount.h> -#include <inode.h> -#include <file.h> - -#include "llite_lib.h" - -static void llu_fsop_gone(struct filesys *fs) -{ - /* FIXME */ -} - -static struct inode_ops llu_inode_ops; - -void llu_update_inode(struct inode *inode, struct mds_body *body, - struct lov_stripe_md *lsm) -{ - struct llu_inode_info *lli = llu_i2info(inode); - - LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0)); - if (lsm != NULL) { - if (lli->lli_smd == NULL) - lli->lli_smd = lsm; - else - LASSERT (!memcmp (lli->lli_smd, lsm, - sizeof (*lsm))); - } - - if (body->valid & OBD_MD_FLID) - lli->lli_st_ino = body->ino; - if (body->valid & OBD_MD_FLATIME) - LTIME_S(lli->lli_st_atime) = body->atime; - if (body->valid & OBD_MD_FLMTIME) - LTIME_S(lli->lli_st_mtime) = body->mtime; - if (body->valid & OBD_MD_FLCTIME) - LTIME_S(lli->lli_st_ctime) = body->ctime; - if (body->valid & OBD_MD_FLMODE) - lli->lli_st_mode = (lli->lli_st_mode & S_IFMT)|(body->mode & ~S_IFMT); - if (body->valid & OBD_MD_FLTYPE) - lli->lli_st_mode = (lli->lli_st_mode & ~S_IFMT)|(body->mode & S_IFMT); - if (body->valid & OBD_MD_FLUID) - lli->lli_st_uid = body->uid; - if (body->valid & OBD_MD_FLGID) - lli->lli_st_gid = body->gid; - if (body->valid & OBD_MD_FLFLAGS) - lli->lli_st_flags = body->flags; - if (body->valid & OBD_MD_FLNLINK) - lli->lli_st_nlink = body->nlink; - if (body->valid & OBD_MD_FLGENER) - lli->lli_st_generation = body->generation; - if (body->valid & OBD_MD_FLRDEV) - lli->lli_st_rdev = body->rdev; - if (body->valid & OBD_MD_FLSIZE) - lli->lli_st_size = body->size; - if (body->valid & OBD_MD_FLBLOCKS) - lli->lli_st_blocks = body->blocks; - - /* fillin fid */ - if (body->valid & OBD_MD_FLID) - lli->lli_fid.id = body->ino; - if (body->valid & OBD_MD_FLGENER) - lli->lli_fid.generation = body->generation; - if (body->valid & OBD_MD_FLTYPE) - lli->lli_fid.f_type = body->mode & S_IFMT; -} - -void obdo_to_inode(struct inode *dst, struct obdo *src, obd_flag valid) -{ - struct llu_inode_info *lli = llu_i2info(dst); - - valid &= src->o_valid; - - if (valid & OBD_MD_FLATIME) - LTIME_S(lli->lli_st_atime) = src->o_atime; - if (valid & OBD_MD_FLMTIME) - LTIME_S(lli->lli_st_mtime) = src->o_mtime; - if (valid & OBD_MD_FLCTIME && src->o_ctime > LTIME_S(lli->lli_st_ctime)) - LTIME_S(lli->lli_st_ctime) = src->o_ctime; - if (valid & OBD_MD_FLSIZE) - lli->lli_st_size = src->o_size; - if (valid & OBD_MD_FLBLOCKS) /* allocation of space */ - lli->lli_st_blocks = src->o_blocks; - if (valid & OBD_MD_FLBLKSZ) - lli->lli_st_blksize = src->o_blksize; - if (valid & OBD_MD_FLTYPE) - lli->lli_st_mode = (lli->lli_st_mode & ~S_IFMT) | (src->o_mode & S_IFMT); - if (valid & OBD_MD_FLMODE) - lli->lli_st_mode = (lli->lli_st_mode & S_IFMT) | (src->o_mode & ~S_IFMT); - if (valid & OBD_MD_FLUID) - lli->lli_st_uid = src->o_uid; - if (valid & OBD_MD_FLGID) - lli->lli_st_gid = src->o_gid; - if (valid & OBD_MD_FLFLAGS) - lli->lli_st_flags = src->o_flags; - if (valid & OBD_MD_FLNLINK) - lli->lli_st_nlink = src->o_nlink; - if (valid & OBD_MD_FLGENER) - lli->lli_st_generation = src->o_generation; - if (valid & OBD_MD_FLRDEV) - lli->lli_st_rdev = src->o_rdev; -} - -void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid) -{ - struct llu_inode_info *lli = llu_i2info(src); - - if (valid & OBD_MD_FLATIME) - dst->o_atime = LTIME_S(lli->lli_st_atime); - if (valid & OBD_MD_FLMTIME) - dst->o_mtime = LTIME_S(lli->lli_st_mtime); - if (valid & OBD_MD_FLCTIME) - dst->o_ctime = LTIME_S(lli->lli_st_ctime); - if (valid & OBD_MD_FLSIZE) - dst->o_size = lli->lli_st_size; - if (valid & OBD_MD_FLBLOCKS) /* allocation of space */ - dst->o_blocks = lli->lli_st_blocks; - if (valid & OBD_MD_FLBLKSZ) - dst->o_blksize = lli->lli_st_blksize; - if (valid & OBD_MD_FLTYPE) - dst->o_mode = (dst->o_mode & ~S_IFMT) | (lli->lli_st_mode & S_IFMT); - if (valid & OBD_MD_FLMODE) - dst->o_mode = (dst->o_mode & S_IFMT) | (lli->lli_st_mode & ~S_IFMT); - if (valid & OBD_MD_FLUID) - dst->o_uid = lli->lli_st_uid; - if (valid & OBD_MD_FLGID) - dst->o_gid = lli->lli_st_gid; - if (valid & OBD_MD_FLFLAGS) - dst->o_flags = lli->lli_st_flags; - if (valid & OBD_MD_FLNLINK) - dst->o_nlink = lli->lli_st_nlink; - if (valid & OBD_MD_FLGENER) - dst->o_generation = lli->lli_st_generation; - if (valid & OBD_MD_FLRDEV) - dst->o_rdev = (__u32)(lli->lli_st_rdev); - - dst->o_valid |= (valid & ~OBD_MD_FLID); -} - -int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm, - char *ostdata) -{ - struct llu_sb_info *sbi = llu_i2sbi(inode); - struct obdo oa; - int rc; - ENTRY; - - LASSERT(lsm); - LASSERT(sbi); - - memset(&oa, 0, sizeof oa); - oa.o_id = lsm->lsm_object_id; - oa.o_mode = S_IFREG; - oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE | - OBD_MD_FLBLOCKS | OBD_MD_FLMTIME | OBD_MD_FLCTIME; - - if (ostdata != NULL) { - memcpy(&oa.o_inline, ostdata, FD_OSTDATA_SIZE); - oa.o_valid |= OBD_MD_FLHANDLE; - } - - rc = obd_getattr(&sbi->ll_osc_conn, &oa, lsm); - if (rc) - RETURN(rc); - - obdo_to_inode(inode, &oa, OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | - OBD_MD_FLMTIME | OBD_MD_FLCTIME); - - RETURN(0); -} - -struct inode* llu_new_inode(struct filesys *fs, ino_t ino, mode_t mode) -{ - struct inode *inode; - struct llu_inode_info *lli; - - OBD_ALLOC(lli, sizeof(*lli)); - if (!lli) - return NULL; - - /* initialize lli here */ - lli->lli_sbi = llu_fs2sbi(fs); - lli->lli_smd = NULL; - lli->lli_symlink_name = NULL; - lli->lli_flags = 0; - INIT_LIST_HEAD(&lli->lli_read_extents); - lli->lli_file_data = NULL; - - /* could file_identifier be 0 ? FIXME */ - inode = _sysio_i_new(fs, ino, NULL, -#ifndef AUTOMOUNT_FILE_NAME - mode & S_IFMT, -#else - mode, /* all of the bits! */ -#endif - 0, - &llu_inode_ops, lli); - - if (!inode) - OBD_FREE(lli, sizeof(*lli)); - - return inode; -} - -static int llu_iop_lookup(struct pnode *pnode, - struct inode **inop, - struct intent *intnt __IS_UNUSED, - const char *path __IS_UNUSED) -{ - struct pnode_base *pb_dir = pnode->p_parent->p_base; - struct ptlrpc_request *request = NULL; - struct llu_sb_info *sbi = llu_i2sbi(pb_dir->pb_ino); - struct ll_fid *fid = &llu_i2info(pb_dir->pb_ino)->lli_fid; - struct qstr *name = &pnode->p_base->pb_name; - struct mds_body *body; - unsigned long valid; - char *pname; - int rc, easize; - struct ll_read_inode2_cookie lic = {.lic_body = NULL, .lic_lsm = NULL}; - - /* the mount root inode have no name, so don't call - * remote in this case. but probably we need revalidate - * it here? FIXME */ - if (pnode->p_mount->mnt_root == pnode) { - struct inode *i = pnode->p_base->pb_ino; - I_REF(i); - *inop = i; - return 0; - } - - if (!name->len) - return -EINVAL; - - /* mdc_getattr_name require NULL-terminated name */ - OBD_ALLOC(pname, name->len + 1); - if (!pname) - return -ENOMEM; - memcpy(pname, name->name, name->len); - pname[name->len] = 0; - - valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE; - - /* FIXME before getattr_name, we don't know whether - * the inode we are finding is regular or not, so here - * we blindly require server feed in EA data */ - easize = obd_size_diskmd(&sbi->ll_osc_conn, NULL); - valid |= OBD_MD_FLEASIZE; - - rc = mdc_getattr_name(&sbi->ll_mdc_conn, fid, - pname, name->len + 1, - valid, easize, &request); - if (rc < 0) { - CERROR("mdc_getattr_name: %d\n", rc); - rc = -ENOENT; - goto out; - } - body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*body)); - - *inop = llu_new_inode(pnode->p_mount->mnt_fs, body->ino, body->mode); - if (!inop) - goto out; - - lic.lic_body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*lic.lic_body)); - LASSERT (lic.lic_body != NULL); - LASSERT_REPSWABBED (request, 0); - - if (S_ISREG(lic.lic_body->mode) && - lic.lic_body->valid & OBD_MD_FLEASIZE) { - struct lov_mds_md *lmm; - int lmm_size; - int rc; - - lmm_size = lic.lic_body->eadatasize; - if (lmm_size == 0) { - CERROR ("OBD_MD_FLEASIZE set but eadatasize 0\n"); - RETURN (-EPROTO); - } - lmm = lustre_msg_buf(request->rq_repmsg, 0 + 1, lmm_size); - LASSERT(lmm != NULL); - LASSERT_REPSWABBED (request, 0 + 1); - - rc = obd_unpackmd (&sbi->ll_osc_conn, - &lic.lic_lsm, lmm, lmm_size); - if (rc < 0) { - CERROR ("Error %d unpacking eadata\n", rc); - RETURN (rc); - } - LASSERT (rc >= sizeof (*lic.lic_lsm)); - - } else { - lic.lic_lsm = NULL; - } - - llu_update_inode(*inop, body, lic.lic_lsm); - - if (llu_i2info(*inop)->lli_smd) { - rc = llu_inode_getattr(*inop, llu_i2info(*inop)->lli_smd, NULL); - if (rc) - _sysio_i_gone(*inop); - } - -out: - ptlrpc_req_finished(request); - OBD_FREE(pname, name->len + 1); - - return rc; -} - -static int llu_iop_getattr(struct pnode *pno, - struct inode *ino, - struct intnl_stat *b) -{ - struct llu_inode_info *lli = llu_i2info(ino); - - b->st_dev = lli->lli_st_dev; - b->st_ino = lli->lli_st_ino; - b->st_mode = lli->lli_st_mode; - b->st_nlink = lli->lli_st_nlink; - b->st_uid = lli->lli_st_uid; - b->st_gid = lli->lli_st_gid; - b->st_rdev = lli->lli_st_rdev; - b->st_size = lli->lli_st_size; - b->st_blksize = lli->lli_st_blksize; - b->st_blocks = lli->lli_st_blocks; - b->st_atime = lli->lli_st_atime; - b->st_mtime = lli->lli_st_mtime; - b->st_ctime = lli->lli_st_ctime; - - return 0; -} - -int llu_mdc_cancel_unused(struct lustre_handle *conn, - struct llu_inode_info *lli, - int flags) -{ - struct ldlm_res_id res_id = - { .name = {lli->lli_st_ino, lli->lli_st_generation} }; - struct obd_device *obddev = class_conn2obd(conn); - ENTRY; - RETURN(ldlm_cli_cancel_unused(obddev->obd_namespace, &res_id, flags)); -} - -static void llu_clear_inode(struct inode *inode) -{ - struct llu_sb_info *sbi = llu_i2sbi(inode); - struct llu_inode_info *lli = llu_i2info(inode); - int rc; - ENTRY; - - CDEBUG(D_INODE, "clear inode: %lu\n", lli->lli_st_ino); - rc = llu_mdc_cancel_unused(&sbi->ll_mdc_conn, lli, - LDLM_FL_NO_CALLBACK); - if (rc < 0) { - CERROR("ll_mdc_cancel_unused: %d\n", rc); - /* XXX FIXME do something dramatic */ - } - - if (lli->lli_smd) { - rc = obd_cancel_unused(&sbi->ll_osc_conn, lli->lli_smd, 0); - if (rc < 0) { - CERROR("obd_cancel_unused: %d\n", rc); - /* XXX FIXME do something dramatic */ - } - } - - if (lli->lli_smd) - obd_free_memmd(&sbi->ll_osc_conn, &lli->lli_smd); - - if (lli->lli_symlink_name) { - OBD_FREE(lli->lli_symlink_name, - strlen(lli->lli_symlink_name) + 1); - lli->lli_symlink_name = NULL; - } - - EXIT; -} - -void llu_iop_gone(struct inode *inode) -{ - struct llu_inode_info *lli = llu_i2info(inode); - - llu_clear_inode(inode); - - OBD_FREE(lli, sizeof(*lli)); -} - -static int llu_setattr_raw(struct inode *inode, struct iattr *attr) -{ - struct ptlrpc_request *request = NULL; - struct llu_sb_info *sbi = llu_i2sbi(inode); - struct llu_inode_info *lli = llu_i2info(inode); - struct mdc_op_data op_data; - int err = 0; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", lli->lli_st_ino); - - /* if need truncate, do it at first */ - if (attr->ia_valid & ATTR_SIZE) { - printf("************* don't support truncate now !!!!!!!!\n"); - LBUG(); - } - - /* Don't send size changes to MDS to avoid "fast EA" problems, and - * also avoid a pointless RPC (we get file size from OST anyways). - */ - attr->ia_valid &= ~ATTR_SIZE; - if (!attr->ia_valid) - RETURN(0); - - llu_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0); - - err = mdc_setattr(&sbi->ll_mdc_conn, &op_data, - attr, NULL, 0, &request); - if (err) - CERROR("mdc_setattr fails: err = %d\n", err); - - ptlrpc_req_finished(request); - - if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) { - struct lov_stripe_md *lsm = lli->lli_smd; - struct obdo oa; - int err2; - - CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n", - lli->lli_st_ino, attr->ia_mtime); - oa.o_id = lsm->lsm_object_id; - oa.o_mode = S_IFREG; - oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMTIME; - oa.o_mtime = attr->ia_mtime; - err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL); - if (err2) { - CERROR("obd_setattr fails: rc=%d\n", err); - if (!err) - err = err2; - } - } - RETURN(err); -} - -/* FIXME here we simply act as a thin layer to glue it with - * llu_setattr_raw(), which is copy from kernel - */ -static int llu_iop_setattr(struct pnode *pno, - struct inode *ino, - unsigned mask, - struct intnl_stat *stbuf) -{ - struct iattr iattr; - - memset(&iattr, 0, sizeof(iattr)); - - if (mask & SETATTR_MODE) { - iattr.ia_mode = stbuf->st_mode; - iattr.ia_valid |= ATTR_MODE; - } - if (mask & SETATTR_MTIME) { - iattr.ia_mtime = stbuf->st_mtime; - iattr.ia_valid |= ATTR_MTIME; - } - if (mask & SETATTR_ATIME) { - iattr.ia_atime = stbuf->st_atime; - iattr.ia_valid |= ATTR_ATIME; - } - if (mask & SETATTR_UID) { - iattr.ia_uid = stbuf->st_uid; - iattr.ia_valid |= ATTR_UID; - } - if (mask & SETATTR_GID) { - iattr.ia_gid = stbuf->st_gid; - iattr.ia_valid |= ATTR_GID; - } - if (mask & SETATTR_LEN) { - iattr.ia_size = stbuf->st_size; /* FIXME signed expansion problem */ - iattr.ia_valid |= ATTR_SIZE; - } - - iattr.ia_valid |= ATTR_RAW; - /* FIXME FIXME FIXME FIXME FIXME FIXME FIXME - * without ATTR_FROM_OPEN, mds_reint_setattr will call - * mds_fid2locked_dentry() and deadlocked at completion_ast call. - * Here we workaround it and avoid any locking. - * FIXME FIXME FIXME FIXME FIXME FIXME FIXME - */ - iattr.ia_valid |= ATTR_FROM_OPEN; - - return llu_setattr_raw(ino, &iattr); -} - - -static int llu_mkdir2(struct inode *dir, const char *name, int len, int mode) -{ - struct ptlrpc_request *request = NULL; - time_t curtime = CURRENT_TIME; - struct llu_sb_info *sbi = llu_i2sbi(dir); - struct llu_inode_info *lli = llu_i2info(dir); - struct mdc_op_data op_data; - int err = -EMLINK; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu\n", - name, lli->lli_st_ino); - - /* FIXME check this later */ -#if 0 - if (dir->i_nlink >= EXT2_LINK_MAX) - RETURN(err); - mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR; -#endif - mode |= S_IFDIR; - llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); - err = mdc_create(&sbi->ll_mdc_conn, &op_data, NULL, 0, mode, - current->fsuid, current->fsgid, - curtime, 0, &request); - ptlrpc_req_finished(request); - RETURN(err); -} - -static int llu_iop_mkdir(struct pnode *pno, mode_t mode) -{ - struct inode *dir = pno->p_base->pb_parent->pb_ino; - struct qstr *qstr = &pno->p_base->pb_name; - int rc; - - LASSERT(dir); - - rc = llu_mkdir2(dir, qstr->name, qstr->len, mode); - - return rc; -} - -#ifndef S_IRWXUGO -#define S_IRWXUGO (S_IRWXU|S_IRWXG|S_IRWXO) -#endif - -static int llu_symlink2(struct inode *dir, const char *name, int len, - const char *tgt) -{ - struct ptlrpc_request *request = NULL; - time_t curtime = CURRENT_TIME; - struct llu_sb_info *sbi = llu_i2sbi(dir); - struct llu_inode_info *lli = llu_i2info(dir); - struct mdc_op_data op_data; - int err = -EMLINK; - ENTRY; - - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu,target=%s\n", - name, lli->lli_st_ino, tgt); - -#if 0 - if (dir->i_nlink >= EXT2_LINK_MAX) - RETURN(err); -#endif - llu_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); - err = mdc_create(&sbi->ll_mdc_conn, &op_data, - tgt, strlen(tgt) + 1, S_IFLNK | S_IRWXUGO, - current->fsuid, current->fsgid, curtime, 0, &request); - ptlrpc_req_finished(request); - RETURN(err); -} - -static int llu_iop_symlink(struct pnode *pno, const char *data) -{ - struct inode *dir = pno->p_base->pb_parent->pb_ino; - struct qstr *qstr = &pno->p_base->pb_name; - int rc; - - LASSERT(dir); - - rc = llu_symlink2(dir, qstr->name, qstr->len, data); - - return rc; -} - -struct filesys_ops llu_filesys_ops = -{ - fsop_gone: llu_fsop_gone, -}; - - -static struct inode_ops llu_inode_ops = { - inop_lookup: llu_iop_lookup, - inop_getattr: llu_iop_getattr, - inop_setattr: llu_iop_setattr, - inop_getdirentries: NULL, - inop_mkdir: llu_iop_mkdir, - inop_rmdir: NULL, - inop_symlink: llu_iop_symlink, - inop_readlink: NULL, - inop_open: llu_iop_open, - inop_close: llu_iop_close, - inop_unlink: NULL, - inop_ipreadv: llu_iop_ipreadv, - inop_ipwritev: llu_iop_ipwritev, - inop_iodone: llu_iop_iodone, - inop_fcntl: NULL, - inop_sync: NULL, - inop_datasync: NULL, - inop_ioctl: NULL, - inop_mknod: NULL, - inop_statvfs: NULL, - inop_gone: llu_iop_gone, -}; - - -static int -llu_fsswop_mount(const char *source, - unsigned flags, - const void *data __IS_UNUSED, - struct pnode *tocover, - struct mount **mntp) -{ - struct filesys *fs; - struct inode *root; - struct pnode_base *rootpb; - static struct qstr noname = { NULL, 0, 0 }; - struct ll_fid rootfid; - - struct llu_sb_info *sbi; - struct ptlrpc_connection *mdc_conn; - struct ptlrpc_request *request = NULL; - struct mds_body *root_body; - struct obd_uuid param_uuid; - class_uuid_t uuid; - struct obd_device *obd; - char *osc=mount_option.osc_uuid; - char *mdc=mount_option.mdc_uuid; - int err = -EINVAL; - - ENTRY; - - OBD_ALLOC(sbi, sizeof(*sbi)); - if (!sbi) - RETURN(-ENOMEM); - - INIT_LIST_HEAD(&sbi->ll_conn_chain); - generate_random_uuid(uuid); - class_uuid_unparse(uuid, &sbi->ll_sb_uuid); - - fs = _sysio_fs_new(&llu_filesys_ops, flags, sbi); - if (!fs) { - err = -ENOMEM; - goto out_free; - } - - obd = class_name2obd(mdc); - if (!obd) { - CERROR("MDC %s: not setup or attached\n", mdc); - err = -EINVAL; - goto out_free; - } - - /* setup mdc */ - /* FIXME need recover stuff */ - err = obd_connect(&sbi->ll_mdc_conn, obd, &sbi->ll_sb_uuid); - if (err) { - CERROR("cannot connect to %s: rc = %d\n", mdc, err); - goto out_free; - } - - mdc_conn = sbi2mdc(sbi)->cl_import->imp_connection; - - /* setup osc */ - obd = class_name2obd(osc); - if (!obd) { - CERROR("OSC %s: not setup or attached\n", osc); - err = -EINVAL; - goto out_mdc; - } - - err = obd_connect(&sbi->ll_osc_conn, obd, &sbi->ll_sb_uuid); - if (err) { - CERROR("cannot connect to %s: rc = %d\n", osc, err); - goto out_mdc; - } - - err = mdc_getstatus(&sbi->ll_mdc_conn, &rootfid); - if (err) { - CERROR("cannot mds_connect: rc = %d\n", err); - goto out_osc; - } - CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id); - sbi->ll_rootino = rootfid.id; - -/* XXX do we need this?? - memset(&osfs, 0, sizeof(osfs)); - rc = obd_statfs(&sbi->ll_mdc_conn, &osfs); -*/ - /* fetch attr of root inode */ - err = mdc_getattr(&sbi->ll_mdc_conn, &rootfid, - OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request); - if (err) { - CERROR("mdc_getattr failed for root: rc = %d\n", err); - goto out_request; - } - - root_body = lustre_msg_buf(request->rq_repmsg, 0, sizeof(*root_body)); - LASSERT(sbi->ll_rootino != 0); - - root = llu_new_inode(fs, root_body->ino, root_body->mode); - if (!root) { - err = -ENOMEM; - goto out_request; - } - - llu_update_inode(root, root_body, NULL); - - /* - * Generate base path-node for root. - */ - rootpb = _sysio_pb_new(&noname, NULL, root); - if (!rootpb) { - err = -ENOMEM; - goto out_inode; - } - - err = _sysio_do_mount(fs, rootpb, flags, NULL, mntp); - if (err) { - _sysio_pb_gone(rootpb); - goto out_inode; - } - - ptlrpc_req_finished(request); - request = NULL; - - printf("************************************************\n"); - printf("* Mount successfully!!!!!!! *\n"); - printf("************************************************\n"); - - return 0; - -out_inode: - _sysio_i_gone(root); -out_request: - ptlrpc_req_finished(request); -out_osc: - obd_disconnect(&sbi->ll_osc_conn); -out_mdc: - obd_disconnect(&sbi->ll_mdc_conn); -out_free: - OBD_FREE(sbi, sizeof(*sbi)); - return err; -} - -struct fssw_ops llu_fssw_ops = { - llu_fsswop_mount -}; - diff --git a/lustre/llite/.cvsignore b/lustre/llite/.cvsignore deleted file mode 100644 index e530020..0000000 --- a/lustre/llite/.cvsignore +++ /dev/null @@ -1,8 +0,0 @@ -.Xrefs -config.log -config.status -configure -Makefile -Makefile.in -.deps -TAGS diff --git a/lustre/llite/Makefile.am b/lustre/llite/Makefile.am deleted file mode 100644 index b6fc501..0000000 --- a/lustre/llite/Makefile.am +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -DEFS= - -MODULE = llite -modulefs_DATA = llite.o -EXTRA_PROGRAMS = llite - -llite_SOURCES = dcache.c commit_callback.c super.c rw.c iod.c super25.c -llite_SOURCES += file.c dir.c sysctl.c symlink.c -llite_SOURCES += namei.c lproc_llite.c llite_internal.h - -include $(top_srcdir)/Rules diff --git a/lustre/llite/commit_callback.c b/lustre/llite/commit_callback.c deleted file mode 100644 index ee49bb8..0000000 --- a/lustre/llite/commit_callback.c +++ /dev/null @@ -1,132 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * The daemon that causes completed but not committed transactions - * on the MDS to be flushed periodically when they are committed. - * A gratuitous getattr RPC is made to the MDS to discover the - * last committed record. - * - * Lustre High Availability Daemon - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This code is issued under the GNU General Public License. - * See the file COPYING in this distribution - * - * by Peter Braam <braam@clusterfs.com> - * - */ - -#define EXPORT_SYMTAB - -#include <linux/version.h> -#include <linux/module.h> -#include <linux/fs.h> -#include <linux/stat.h> -#include <linux/sched.h> -#include <linux/smp_lock.h> -#include <linux/kmod.h> -#include <linux/quotaops.h> -#include <asm/unistd.h> -#include <asm/uaccess.h> - -#define DEBUG_SUBSYSTEM S_LLITE - -#include <linux/lustre_lite.h> -#include <linux/lustre_lib.h> -#include <linux/lustre_compat25.h> - -static int ll_commitcbd_check_event(struct ll_sb_info *sbi) -{ - int rc = 0; - ENTRY; - - spin_lock(&sbi->ll_commitcbd_lock); - if (sbi->ll_commitcbd_flags & LL_COMMITCBD_STOPPING) - GOTO(out, rc = 1); - - EXIT; - out: - spin_unlock(&sbi->ll_commitcbd_lock); - return rc; -} - -static int ll_commitcbd_main(void *arg) -{ - struct ll_sb_info *sbi = (struct ll_sb_info *)arg; - unsigned long flags; - ENTRY; - - lock_kernel(); - kportal_daemonize("lustre_commitcbd"); - - SIGNAL_MASK_LOCK(current, flags); - sigfillset(¤t->blocked); - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); - - unlock_kernel(); - - /* Record that the thread is running */ - sbi->ll_commitcbd_waketime = LTIME_S(CURRENT_TIME); - sbi->ll_commitcbd_timeout = 10 * HZ; - sbi->ll_commitcbd_thread = current; - sbi->ll_commitcbd_flags = LL_COMMITCBD_RUNNING; - wake_up(&sbi->ll_commitcbd_ctl_waitq); - - /* And now, loop forever on requests */ - while (1) { - struct l_wait_info lwi = { 0 }; - l_wait_event(sbi->ll_commitcbd_waitq, - ll_commitcbd_check_event(sbi), &lwi); - - spin_lock(&sbi->ll_commitcbd_lock); - if (sbi->ll_commitcbd_flags & LL_COMMITCBD_STOPPING) { - spin_unlock(&sbi->ll_commitcbd_lock); - CERROR("lustre_commitd quitting\n"); - EXIT; - break; - } - - schedule_timeout(sbi->ll_commitcbd_timeout); - CERROR("commit callback daemon woken up - FIXME\n"); - spin_unlock(&sbi->ll_commitcbd_lock); - } - - sbi->ll_commitcbd_thread = NULL; - sbi->ll_commitcbd_flags = LL_COMMITCBD_STOPPED; - wake_up(&sbi->ll_commitcbd_ctl_waitq); - CDEBUG(D_NET, "commit callback daemon exiting %d\n", current->pid); - RETURN(0); -} - - - -int ll_commitcbd_setup(struct ll_sb_info *sbi) -{ - int rc; - struct l_wait_info lwi = { 0 }; - ENTRY; - - rc = kernel_thread(ll_commitcbd_main, (void *) sbi, - CLONE_VM | CLONE_FS | CLONE_FILES); - if (rc < 0) { - CERROR("cannot start thread\n"); - RETURN(rc); - } - l_wait_event(sbi->ll_commitcbd_ctl_waitq, - sbi->ll_commitcbd_flags & LL_COMMITCBD_RUNNING, &lwi); - RETURN(0); -} - - -int ll_commitcbd_cleanup(struct ll_sb_info *sbi) -{ - struct l_wait_info lwi = { 0 }; - sbi->ll_commitcbd_flags = LL_COMMITCBD_STOPPING; - - wake_up(&sbi->ll_commitcbd_waitq); - l_wait_event(sbi->ll_commitcbd_ctl_waitq, - sbi->ll_commitcbd_flags & LL_COMMITCBD_STOPPED, &lwi); - RETURN(0); -} diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c deleted file mode 100644 index 0684968..0000000 --- a/lustre/llite/dcache.c +++ /dev/null @@ -1,286 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/fs.h> -#include <linux/sched.h> -#include <linux/smp_lock.h> -#include <linux/quotaops.h> - -#define DEBUG_SUBSYSTEM S_LLITE - -#include <linux/obd_support.h> -#include <linux/lustre_lite.h> -#include <linux/lustre_idl.h> -#include <linux/lustre_dlm.h> - -/* should NOT be called with the dcache lock, see fs/dcache.c */ -void ll_release(struct dentry *de) -{ - ENTRY; - OBD_FREE(de->d_fsdata, sizeof(struct ll_dentry_data)); - EXIT; -} - -int ll_delete(struct dentry *de) -{ - if (de->d_it != 0) { - CERROR("%s put dentry %p+%p with d_it %p\n", current->comm, - de, de->d_fsdata, de->d_it); - LBUG(); - } - return 0; -} - -void ll_set_dd(struct dentry *de) -{ - ENTRY; - LASSERT(de != NULL); - - lock_kernel(); - - if (de->d_fsdata == NULL) { - OBD_ALLOC(de->d_fsdata, sizeof(struct ll_dentry_data)); - sema_init(&ll_d2d(de)->lld_it_sem, 1); - } - - unlock_kernel(); - - EXIT; -} - -void ll_intent_release(struct dentry *de, struct lookup_intent *it) -{ - struct lustre_handle *handle; - ENTRY; - - if (it->it_lock_mode) { - handle = (struct lustre_handle *)it->it_lock_handle; - CDEBUG(D_DLMTRACE, "releasing lock with cookie "LPX64 - " from it %p\n", - handle->cookie, it); - ldlm_lock_decref(handle, it->it_lock_mode); - - /* intent_release may be called multiple times, from - this thread and we don't want to double-decref this - lock (see bug 494) */ - it->it_lock_mode = 0; - } - - if (!de->d_it || it->it_op == IT_RELEASED_MAGIC) { - EXIT; - return; - } - - if (de->d_it == it) - LL_GET_INTENT(de, it); - else - CDEBUG(D_INODE, "STRANGE intent release: %p %p\n", - de->d_it, it); - - EXIT; -} - -extern struct dentry *ll_find_alias(struct inode *, struct dentry *); - -static int revalidate2_finish(int flag, struct ptlrpc_request *request, - struct inode *parent, struct dentry **de, - struct lookup_intent *it, int offset, obd_id ino) -{ - struct ll_sb_info *sbi = ll_i2sbi(parent); - struct mds_body *body; - struct lov_stripe_md *lsm = NULL; - struct lov_mds_md *lmm; - int lmmsize; - int rc = 0; - ENTRY; - - /* NB 1 request reference will be taken away by ll_intent_lock() - * when I return */ - - if ((flag & LL_LOOKUP_NEGATIVE) != 0) - GOTO (out, rc = -ENOENT); - - /* We only get called if the mdc_enqueue() called from - * ll_intent_lock() was successful. Therefore the mds_body is - * present and correct, and the eadata is present (but still - * opaque, so only obd_unpackmd() can check the size) */ - body = lustre_msg_buf(request->rq_repmsg, offset, sizeof (*body)); - LASSERT (body != NULL); - LASSERT_REPSWABBED (request, offset); - - if (body->valid & OBD_MD_FLEASIZE) { - /* Only bother with this if inodes's LSM not set? */ - - if (body->eadatasize == 0) { - CERROR ("OBD_MD_FLEASIZE set, but eadatasize 0\n"); - GOTO (out, rc = -EPROTO); - } - lmmsize = body->eadatasize; - lmm = lustre_msg_buf (request->rq_repmsg, offset + 1, lmmsize); - LASSERT (lmm != NULL); - LASSERT_REPSWABBED (request, offset + 1); - - rc = obd_unpackmd (&sbi->ll_osc_conn, - &lsm, lmm, lmmsize); - if (rc < 0) { - CERROR ("Error %d unpacking eadata\n", rc); - LBUG(); - /* XXX don't know if I should do this... */ - GOTO (out, rc); - /* or skip the ll_update_inode but still do - * mdc_lock_set_inode() */ - } - LASSERT (rc >= sizeof (*lsm)); - rc = 0; - } - - ll_update_inode((*de)->d_inode, body, lsm); - - if (lsm != NULL && - ll_i2info((*de)->d_inode)->lli_smd != lsm) - obd_free_memmd (&sbi->ll_osc_conn, &lsm); - - ll_mdc_lock_set_inode((struct lustre_handle *)it->it_lock_handle, - (*de)->d_inode); - out: - RETURN(rc); -} - -int ll_have_md_lock(struct dentry *de) -{ - struct ll_sb_info *sbi = ll_s2sbi(de->d_sb); - struct lustre_handle lockh; - struct ldlm_res_id res_id = { .name = {0} }; - struct obd_device *obddev; - int flags; - ENTRY; - - if (!de->d_inode) - RETURN(0); - - obddev = class_conn2obd(&sbi->ll_mdc_conn); - res_id.name[0] = de->d_inode->i_ino; - res_id.name[1] = de->d_inode->i_generation; - - CDEBUG(D_INFO, "trying to match res "LPU64"\n", res_id.name[0]); - - flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA; - if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_PLAIN, - NULL, 0, LCK_PR, de->d_inode, &lockh)) { - ldlm_lock_decref(&lockh, LCK_PR); - RETURN(1); - } - - if (ldlm_lock_match(obddev->obd_namespace, flags, &res_id, LDLM_PLAIN, - NULL, 0, LCK_PW, de->d_inode, &lockh)) { - ldlm_lock_decref(&lockh, LCK_PW); - RETURN(1); - } - RETURN(0); -} - -int ll_revalidate2(struct dentry *de, int flags, struct lookup_intent *it) -{ - int rc; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,intent=%s\n", de->d_name.name, - LL_IT2STR(it)); - - /* We don't want to cache negative dentries, so return 0 immediately. - * We believe that this is safe, that negative dentries cannot be - * pinned by someone else */ - if (de->d_inode == NULL) { - CDEBUG(D_INODE, "negative dentry: ret 0 to force lookup2\n"); - RETURN(0); - } - - if (it == NULL || it->it_op == IT_GETATTR) { - /* We could just return 1 immediately, but since we should only - * be called in revalidate2 if we already have a lock, let's - * verify that. */ - struct inode *inode = de->d_inode; - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct obd_device *obddev = class_conn2obd(&sbi->ll_mdc_conn); - struct ldlm_res_id res_id = - { .name = {inode->i_ino, (__u64)inode->i_generation} }; - struct lustre_handle lockh; - int flags; - flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA; - rc = ldlm_lock_match(obddev->obd_namespace, flags, &res_id, - LDLM_PLAIN, NULL, 0, LCK_PR, inode, - &lockh); - if (rc) { - de->d_flags &= ~DCACHE_LUSTRE_INVALID; - if (it && it->it_op == IT_GETATTR) { - memcpy(it->it_lock_handle, &lockh, - sizeof(lockh)); - it->it_lock_mode = LCK_PR; - LL_SAVE_INTENT(de, it); - } else { - ldlm_lock_decref(&lockh, LCK_PR); - } - RETURN(1); - } - rc = ldlm_lock_match(obddev->obd_namespace, flags, &res_id, - LDLM_PLAIN, NULL, 0, LCK_PW, inode, - &lockh); - if (rc) { - de->d_flags &= ~DCACHE_LUSTRE_INVALID; - if (it && it->it_op == IT_GETATTR) { - memcpy(it->it_lock_handle, &lockh, - sizeof(lockh)); - it->it_lock_mode = LCK_PW; - LL_SAVE_INTENT(de, it); - } else { - ldlm_lock_decref(&lockh, LCK_PW); - } - RETURN(1); - } - if (S_ISDIR(de->d_inode->i_mode)) - ll_invalidate_inode_pages(de->d_inode); - d_unhash_aliases(de->d_inode); - RETURN(0); - } - - rc = ll_intent_lock(de->d_parent->d_inode, &de, it, revalidate2_finish); - if (rc < 0) { - if (rc != -ESTALE) { - CERROR("ll_intent_lock: rc %d : it->it_status %d\n", rc, - it->it_status); - } - RETURN(0); - } - /* unfortunately ll_intent_lock may cause a callback and revoke our - dentry */ - spin_lock(&dcache_lock); - list_del_init(&de->d_hash); - __d_rehash(de, 0); - spin_unlock(&dcache_lock); - - RETURN(1); -} - -struct dentry_operations ll_d_ops = { - .d_revalidate2 = ll_revalidate2, - .d_intent_release = ll_intent_release, - .d_release = ll_release, - .d_delete = ll_delete, -}; diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c deleted file mode 100644 index 115ed4e..0000000 --- a/lustre/llite/dir.c +++ /dev/null @@ -1,860 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/fs/minix/dir.c - * linux/fs/ext2/dir.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * ext2 directory handling functions - * - * Big-endian to little-endian byte-swapping/bitmaps by - * David S. Miller (davem@caip.rutgers.edu), 1995 - * - * All code that works with directory layout had been switched to pagecache - * and moved here. AV - * - * Adapted for Lustre Light - * Copyright (C) 2002-2003, Cluster File Systems, Inc. - * - */ - -#include <linux/fs.h> -#include <linux/ext2_fs.h> -#include <linux/pagemap.h> -#include <linux/mm.h> -#include <linux/version.h> -#include <linux/smp_lock.h> -#include <asm/uaccess.h> -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#include <linux/locks.h> // for wait_on_buffer -#else -#include <linux/buffer_head.h> // for wait_on_buffer -#endif - -#define DEBUG_SUBSYSTEM S_LLITE - -#include <linux/obd_support.h> -#include <linux/obd_class.h> -#include <linux/lustre_lib.h> -#include <linux/lustre_idl.h> -#include <linux/lustre_mds.h> -#include <linux/lustre_lite.h> -#include <linux/lustre_dlm.h> - -typedef struct ext2_dir_entry_2 ext2_dirent; - -#define PageChecked(page) test_bit(PG_checked, &(page)->flags) -#define SetPageChecked(page) set_bit(PG_checked, &(page)->flags) - - -static int ll_dir_prepare_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - CDEBUG(D_VFSTRACE, "VFS Op:\n"); - return 0; -} - -/* returns the page unlocked, but with a reference */ -static int ll_dir_readpage(struct file *file, struct page *page) -{ - struct inode *inode = page->mapping->host; - struct ll_sb_info *sbi = ll_i2sbi(inode); - __u64 offset; - int rc = 0; - struct ptlrpc_request *request; - struct lustre_handle lockh; - struct mds_body *body; - struct lookup_intent it = { .it_op = IT_READDIR }; - struct mdc_op_data data; - struct obd_device *obddev = class_conn2obd(&sbi->ll_mdc_conn); - struct ldlm_res_id res_id = - { .name = {inode->i_ino, (__u64)inode->i_generation} }; - int flags = LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA; - ENTRY; - - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); - if ((inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_SHIFT <= page->index){ - /* XXX why do we need this exactly, and why do we think that - * an all-zero directory page is useful? - */ - CERROR("memsetting dir page %lu to zero (size %lld)\n", - page->index, inode->i_size); - memset(kmap(page), 0, PAGE_CACHE_SIZE); - kunmap(page); - GOTO(readpage_out, rc); - } - - rc = ldlm_lock_match(obddev->obd_namespace, flags, &res_id, - LDLM_PLAIN, NULL, 0, LCK_PR, inode, - &lockh); - if (!rc) { - ll_prepare_mdc_op_data(&data, inode, NULL, NULL, 0, 0); - - rc = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, &it, LCK_PR, - &data, &lockh, NULL, 0, - ldlm_completion_ast, ll_mdc_blocking_ast, - inode); - request = (struct ptlrpc_request *)it.it_data; - if (request) - ptlrpc_req_finished(request); - if (rc < 0) { - CERROR("lock enqueue: err: %d\n", rc); - unlock_page(page); - RETURN(rc); - } - } - ldlm_lock_dump_handle(D_OTHER, &lockh); - - if (PageUptodate(page)) { - CERROR("Explain this please?\n"); - GOTO(readpage_out, rc); - } - - offset = page->index << PAGE_SHIFT; - rc = mdc_readpage(&sbi->ll_mdc_conn, inode->i_ino, - S_IFDIR, offset, page, &request); - if (!rc) { - body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*body)); - LASSERT (body != NULL); /* checked by mdc_readpage() */ - LASSERT_REPSWABBED (request, 0); /* swabbed by mdc_readpage() */ - - inode->i_size = body->size; - } - ptlrpc_req_finished(request); - EXIT; - - readpage_out: - if (!rc) - SetPageUptodate(page); - - unlock_page(page); - ll_unlock(LCK_PR, &lockh); - if (rc != ELDLM_OK) - CERROR("ll_unlock: err: %d\n", rc); - return rc; -} - -struct address_space_operations ll_dir_aops = { - readpage: ll_dir_readpage, - prepare_write: ll_dir_prepare_write -}; - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3)) -int waitfor_one_page(struct page *page) -{ - int error = 0; - struct buffer_head *bh, *head = page->buffers; - - bh = head; - do { - wait_on_buffer(bh); - if (buffer_req(bh) && !buffer_uptodate(bh)) - error = -EIO; - } while ((bh = bh->b_this_page) != head); - return error; -} -#elif (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -int waitfor_one_page(struct page *page) -{ - wait_on_page_locked(page); - return 0; -} -#endif - -/* - * ext2 uses block-sized chunks. Arguably, sector-sized ones would be - * more robust, but we have what we have - */ -static inline unsigned ext2_chunk_size(struct inode *inode) -{ - return inode->i_sb->s_blocksize; -} - -static inline void ext2_put_page(struct page *page) -{ - kunmap(page); - page_cache_release(page); -} - -static inline unsigned long dir_pages(struct inode *inode) -{ - return (inode->i_size+PAGE_CACHE_SIZE-1)>>PAGE_CACHE_SHIFT; -} - -extern void set_page_clean(struct page *page); - -static int ext2_commit_chunk(struct page *page, unsigned from, unsigned to) -{ - struct inode *dir = page->mapping->host; - loff_t new_size = (page->index << PAGE_CACHE_SHIFT) + to; - int err = 0; - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - dir->i_version = ++event; -#endif - if (new_size > dir->i_size) - dir->i_size = new_size; - SetPageUptodate(page); - set_page_clean(page); - - //page->mapping->a_ops->commit_write(NULL, page, from, to); - //if (IS_SYNC(dir)) - // err = waitfor_one_page(page); - return err; -} - -static void ext2_check_page(struct page *page) -{ - struct inode *dir = page->mapping->host; - unsigned chunk_size = ext2_chunk_size(dir); - char *kaddr = page_address(page); - // u32 max_inumber = le32_to_cpu(sb->u.ext2_sb.s_es->s_inodes_count); - unsigned offs, rec_len; - unsigned limit = PAGE_CACHE_SIZE; - ext2_dirent *p; - char *error; - - if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) { - limit = dir->i_size & ~PAGE_CACHE_MASK; - if (limit & (chunk_size - 1)) { - CERROR("limit %d dir size %lld index %ld\n", - limit, dir->i_size, page->index); - goto Ebadsize; - } - for (offs = limit; offs<PAGE_CACHE_SIZE; offs += chunk_size) { - ext2_dirent *p = (ext2_dirent*)(kaddr + offs); - p->rec_len = cpu_to_le16(chunk_size); - p->name_len = 0; - p->inode = 0; - } - if (!limit) - goto out; - } - for (offs = 0; offs <= limit - EXT2_DIR_REC_LEN(1); offs += rec_len) { - p = (ext2_dirent *)(kaddr + offs); - rec_len = le16_to_cpu(p->rec_len); - - if (rec_len < EXT2_DIR_REC_LEN(1)) - goto Eshort; - if (rec_len & 3) - goto Ealign; - if (rec_len < EXT2_DIR_REC_LEN(p->name_len)) - goto Enamelen; - if (((offs + rec_len - 1) ^ offs) & ~(chunk_size-1)) - goto Espan; - // if (le32_to_cpu(p->inode) > max_inumber) - //goto Einumber; - } - if (offs != limit) - goto Eend; -out: - SetPageChecked(page); - return; - - /* Too bad, we had an error */ - -Ebadsize: - CERROR("ext2_check_page" - "size of directory #%lu is not a multiple of chunk size\n", - dir->i_ino - ); - goto fail; -Eshort: - error = "rec_len is smaller than minimal"; - goto bad_entry; -Ealign: - error = "unaligned directory entry"; - goto bad_entry; -Enamelen: - error = "rec_len is too small for name_len"; - goto bad_entry; -Espan: - error = "directory entry across blocks"; - goto bad_entry; - //Einumber: - // error = "inode out of bounds"; -bad_entry: - CERROR("ext2_check_page: bad entry in directory #%lu: %s - " - "offset=%lu+%u, inode=%lu, rec_len=%d, name_len=%d", - dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT), offs, - (unsigned long) le32_to_cpu(p->inode), - rec_len, p->name_len); - goto fail; -Eend: - p = (ext2_dirent *)(kaddr + offs); - CERROR("ext2_check_page" - "entry in directory #%lu spans the page boundary" - "offset=%lu, inode=%lu", - dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, - (unsigned long) le32_to_cpu(p->inode)); -fail: - SetPageChecked(page); - SetPageError(page); - LBUG(); -} - -static struct page *ll_get_dir_page(struct inode *dir, unsigned long n) -{ - struct address_space *mapping = dir->i_mapping; - struct page *page = read_cache_page(mapping, n, - (filler_t*)mapping->a_ops->readpage, NULL); - if (!IS_ERR(page)) { - wait_on_page(page); - kmap(page); - if (!PageUptodate(page)) - goto fail; - if (!PageChecked(page)) - ext2_check_page(page); - if (PageError(page)) - goto fail; - } - return page; - -fail: - ext2_put_page(page); - return ERR_PTR(-EIO); -} - -/* - * NOTE! unlike strncmp, ext2_match returns 1 for success, 0 for failure. - * - * len <= EXT2_NAME_LEN and de != NULL are guaranteed by caller. - */ -static inline int ext2_match (int len, const char * const name, - struct ext2_dir_entry_2 * de) -{ - if (len != de->name_len) - return 0; - if (!de->inode) - return 0; - return !memcmp(name, de->name, len); -} - -/* - * p is at least 6 bytes before the end of page - */ -static inline ext2_dirent *ext2_next_entry(ext2_dirent *p) -{ - return (ext2_dirent *)((char*)p + le16_to_cpu(p->rec_len)); -} - -static inline unsigned -ext2_validate_entry(char *base, unsigned offset, unsigned mask) -{ - ext2_dirent *de = (ext2_dirent*)(base + offset); - ext2_dirent *p = (ext2_dirent*)(base + (offset&mask)); - while ((char*)p < (char*)de) - p = ext2_next_entry(p); - return (char *)p - base; -} - -static unsigned char ext2_filetype_table[EXT2_FT_MAX] = { - [EXT2_FT_UNKNOWN] DT_UNKNOWN, - [EXT2_FT_REG_FILE] DT_REG, - [EXT2_FT_DIR] DT_DIR, - [EXT2_FT_CHRDEV] DT_CHR, - [EXT2_FT_BLKDEV] DT_BLK, - [EXT2_FT_FIFO] DT_FIFO, - [EXT2_FT_SOCK] DT_SOCK, - [EXT2_FT_SYMLINK] DT_LNK, -}; - -static unsigned int ll_dt2fmt[DT_WHT + 1] = { - [EXT2_FT_UNKNOWN] 0, - [EXT2_FT_REG_FILE] S_IFREG, - [EXT2_FT_DIR] S_IFDIR, - [EXT2_FT_CHRDEV] S_IFCHR, - [EXT2_FT_BLKDEV] S_IFBLK, - [EXT2_FT_FIFO] S_IFIFO, - [EXT2_FT_SOCK] S_IFSOCK, - [EXT2_FT_SYMLINK] S_IFLNK -}; - -#define S_SHIFT 12 -static unsigned char ext2_type_by_mode[S_IFMT >> S_SHIFT] = { - [S_IFREG >> S_SHIFT] EXT2_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] EXT2_FT_DIR, - [S_IFCHR >> S_SHIFT] EXT2_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] EXT2_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] EXT2_FT_FIFO, - [S_IFSOCK >> S_SHIFT] EXT2_FT_SOCK, - [S_IFLNK >> S_SHIFT] EXT2_FT_SYMLINK, -}; - -static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode) -{ - mode_t mode = inode->i_mode; - de->file_type = ext2_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; -} - -int ll_readdir(struct file * filp, void * dirent, filldir_t filldir) -{ - loff_t pos = filp->f_pos; - struct inode *inode = filp->f_dentry->d_inode; - // XXX struct super_block *sb = inode->i_sb; - unsigned offset = pos & ~PAGE_CACHE_MASK; - unsigned long n = pos >> PAGE_CACHE_SHIFT; - unsigned long npages = dir_pages(inode); - unsigned chunk_mask = ~(ext2_chunk_size(inode)-1); - unsigned char *types = NULL; - int need_revalidate = (filp->f_version != inode->i_version); - ENTRY; - - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); - if (pos > inode->i_size - EXT2_DIR_REC_LEN(1)) - GOTO(done, 0); - - types = ext2_filetype_table; - - for ( ; n < npages; n++, offset = 0) { - char *kaddr, *limit; - ext2_dirent *de; - struct page *page; - - CDEBUG(D_EXT2, "reading %lu of dir %lu page %lu, size %llu\n", - PAGE_CACHE_SIZE, inode->i_ino, n, inode->i_size); - page = ll_get_dir_page(inode, n); - - /* size might have been updated by mdc_readpage */ - npages = dir_pages(inode); - - if (IS_ERR(page)) - continue; - kaddr = page_address(page); - if (need_revalidate) { - offset = ext2_validate_entry(kaddr, offset, chunk_mask); - need_revalidate = 0; - } - de = (ext2_dirent *)(kaddr+offset); - limit = kaddr + PAGE_CACHE_SIZE - EXT2_DIR_REC_LEN(1); - for ( ;(char*)de <= limit; de = ext2_next_entry(de)) - if (de->inode) { - int over; - unsigned char d_type = DT_UNKNOWN; - - if (types && de->file_type < EXT2_FT_MAX) - d_type = types[de->file_type]; - - offset = (char *)de - kaddr; - over = filldir(dirent, de->name, de->name_len, - (n<<PAGE_CACHE_SHIFT) | offset, - le32_to_cpu(de->inode), d_type); - if (over) { - ext2_put_page(page); - GOTO(done,0); - } - } - ext2_put_page(page); - } - -done: - filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset; - filp->f_version = inode->i_version; - UPDATE_ATIME(inode); - RETURN(0); -} - -/* - * ext2_find_entry() - * - * finds an entry in the specified directory with the wanted name. It - * returns the page in which the entry was found, and the entry itself - * (as a parameter - res_dir). Page is returned mapped and unlocked. - * Entry is guaranteed to be valid. - */ -struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, - struct dentry *dentry, struct page ** res_page) -{ - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; - unsigned reclen = EXT2_DIR_REC_LEN(namelen); - unsigned long start, n; - unsigned long npages = dir_pages(dir); - struct page *page = NULL; - ext2_dirent * de; - - /* OFFSET_CACHE */ - *res_page = NULL; - - // start = dir->u.ext2_i.i_dir_start_lookup; - start = 0; - if (start >= npages) - start = 0; - n = start; - do { - char *kaddr; - page = ll_get_dir_page(dir, n); - if (!IS_ERR(page)) { - kaddr = page_address(page); - de = (ext2_dirent *) kaddr; - kaddr += PAGE_CACHE_SIZE - reclen; - while ((char *) de <= kaddr) { - if (ext2_match (namelen, name, de)) - goto found; - de = ext2_next_entry(de); - } - ext2_put_page(page); - } - if (++n >= npages) - n = 0; - } while (n != start); - return NULL; - -found: - *res_page = page; - // dir->u.ext2_i.i_dir_start_lookup = n; - return de; -} - -struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p) -{ - struct page *page = ll_get_dir_page(dir, 0); - ext2_dirent *de = NULL; - - if (!IS_ERR(page)) { - de = ext2_next_entry((ext2_dirent *) page_address(page)); - *p = page; - } - return de; -} - -obd_id ll_inode_by_name(struct inode * dir, struct dentry *dentry, int *type) -{ - obd_id res = 0; - struct ext2_dir_entry_2 * de; - struct page *page; - - de = ext2_find_entry (dir, dentry, &page); - if (de) { - res = le32_to_cpu(de->inode); - *type = ll_dt2fmt[de->file_type]; - kunmap(page); - page_cache_release(page); - } - return res; -} - -/* Releases the page */ -void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, - struct page *page, struct inode *inode) -{ - unsigned from = (char *) de - (char *) page_address(page); - unsigned to = from + le16_to_cpu(de->rec_len); - int err; - - lock_page(page); - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); - if (err) - LBUG(); - de->inode = cpu_to_le32(inode->i_ino); - ext2_set_de_type (de, inode); - dir->i_mtime = dir->i_ctime = CURRENT_TIME; - err = ext2_commit_chunk(page, from, to); - unlock_page(page); - ext2_put_page(page); -} - -/* - * Parent is locked. - */ -int ll_add_link (struct dentry *dentry, struct inode *inode) -{ - struct inode *dir = dentry->d_parent->d_inode; - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; - unsigned reclen = EXT2_DIR_REC_LEN(namelen); - unsigned short rec_len, name_len; - struct page *page = NULL; - ext2_dirent * de; - unsigned long npages = dir_pages(dir); - unsigned long n; - char *kaddr; - unsigned from, to; - int err; - - /* We take care of directory expansion in the same loop */ - for (n = 0; n <= npages; n++) { - page = ll_get_dir_page(dir, n); - err = PTR_ERR(page); - if (IS_ERR(page)) - goto out; - kaddr = page_address(page); - de = (ext2_dirent *)kaddr; - kaddr += PAGE_CACHE_SIZE - reclen; - while ((char *)de <= kaddr) { - err = -EEXIST; - if (ext2_match (namelen, name, de)) - goto out_page; - name_len = EXT2_DIR_REC_LEN(de->name_len); - rec_len = le16_to_cpu(de->rec_len); - if ( n==npages && rec_len == 0) { - CERROR("Fatal dir behaviour\n"); - goto out_page; - } - if (!de->inode && rec_len >= reclen) - goto got_it; - if (rec_len >= name_len + reclen) - goto got_it; - de = (ext2_dirent *) ((char *) de + rec_len); - } - ext2_put_page(page); - } - LBUG(); - return -EINVAL; - -got_it: - from = (char*)de - (char*)page_address(page); - to = from + rec_len; - lock_page(page); - err = page->mapping->a_ops->prepare_write(NULL, page, from, to); - if (err) - goto out_unlock; - if (de->inode) { - ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len); - de1->rec_len = cpu_to_le16(rec_len - name_len); - de->rec_len = cpu_to_le16(name_len); - de = de1; - } - de->name_len = namelen; - memcpy (de->name, name, namelen); - de->inode = cpu_to_le32(inode->i_ino); - ext2_set_de_type (de, inode); - CDEBUG(D_INODE, "type set to %o\n", de->file_type); - dir->i_mtime = dir->i_ctime = CURRENT_TIME; - err = ext2_commit_chunk(page, from, to); - - // change_inode happens with the commit_chunk - /* XXX OFFSET_CACHE */ - -out_unlock: - unlock_page(page); -out_page: - ext2_put_page(page); -out: - return err; -} - -/* - * ext2_delete_entry deletes a directory entry by merging it with the - * previous entry. Page is up-to-date. Releases the page. - */ -int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) -{ - struct address_space *mapping = page->mapping; - struct inode *inode = mapping->host; - char *kaddr = page_address(page); - unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); - unsigned to = ((char*)dir - kaddr) + le16_to_cpu(dir->rec_len); - ext2_dirent * pde = NULL; - ext2_dirent * de = (ext2_dirent *) (kaddr + from); - int err; - - while ((char*)de < (char*)dir) { - pde = de; - de = ext2_next_entry(de); - } - if (pde) - from = (char*)pde - (char*)page_address(page); - lock_page(page); - err = mapping->a_ops->prepare_write(NULL, page, from, to); - if (err) - LBUG(); - if (pde) - pde->rec_len = cpu_to_le16(to-from); - dir->inode = 0; - inode->i_ctime = inode->i_mtime = CURRENT_TIME; - err = ext2_commit_chunk(page, from, to); - unlock_page(page); - ext2_put_page(page); - return err; -} - -/* - * Set the first fragment of directory. - */ -int ext2_make_empty(struct inode *inode, struct inode *parent) -{ - struct address_space *mapping = inode->i_mapping; - struct page *page = grab_cache_page(mapping, 0); - unsigned chunk_size = ext2_chunk_size(inode); - struct ext2_dir_entry_2 * de; - char *base; - int err; - ENTRY; - - if (!page) - return -ENOMEM; - base = kmap(page); - if (!base) - return -ENOMEM; - - err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size); - if (err) - goto fail; - - de = (struct ext2_dir_entry_2 *) base; - de->name_len = 1; - de->rec_len = cpu_to_le16(EXT2_DIR_REC_LEN(1)); - memcpy (de->name, ".\0\0", 4); - de->inode = cpu_to_le32(inode->i_ino); - ext2_set_de_type (de, inode); - - de = (struct ext2_dir_entry_2 *) (base + EXT2_DIR_REC_LEN(1)); - de->name_len = 2; - de->rec_len = cpu_to_le16(chunk_size - EXT2_DIR_REC_LEN(1)); - de->inode = cpu_to_le32(parent->i_ino); - memcpy (de->name, "..\0", 4); - ext2_set_de_type (de, inode); - - err = ext2_commit_chunk(page, 0, chunk_size); -fail: - kunmap(page); - unlock_page(page); - page_cache_release(page); - ENTRY; - return err; -} - -/* - * routine to check that the specified directory is empty (for rmdir) - */ -int ext2_empty_dir (struct inode * inode) -{ - struct page *page = NULL; - unsigned long i, npages = dir_pages(inode); - - for (i = 0; i < npages; i++) { - char *kaddr; - ext2_dirent * de; - page = ll_get_dir_page(inode, i); - - if (IS_ERR(page)) - continue; - - kaddr = page_address(page); - de = (ext2_dirent *)kaddr; - kaddr += PAGE_CACHE_SIZE-EXT2_DIR_REC_LEN(1); - - while ((char *)de <= kaddr) { - if (de->inode != 0) { - /* check for . and .. */ - if (de->name[0] != '.') - goto not_empty; - if (de->name_len > 2) - goto not_empty; - if (de->name_len < 2) { - if (de->inode != - cpu_to_le32(inode->i_ino)) - goto not_empty; - } else if (de->name[1] != '.') - goto not_empty; - } - de = ext2_next_entry(de); - } - ext2_put_page(page); - } - return 1; - -not_empty: - ext2_put_page(page); - return 0; -} - -static int ll_dir_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct obd_ioctl_data *data; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%u\n", inode->i_ino, - inode->i_generation, inode, cmd); - - if (_IOC_TYPE(cmd) == 'T') /* tty ioctls */ - return -ENOTTY; - - switch(cmd) { - case IOC_MDC_LOOKUP: { - struct ptlrpc_request *request = NULL; - struct ll_fid fid; - char *buf = NULL; - struct mds_body *body; - char *filename; - int namelen, rc, err, len = 0; - unsigned long valid; - - rc = obd_ioctl_getdata(&buf, &len, (void *)arg); - if (rc) - RETURN(rc); - data = (void *)buf; - - filename = data->ioc_inlbuf1; - namelen = data->ioc_inllen1; - - if (namelen < 1) { - CERROR("IOC_MDC_LOOKUP missing filename\n"); - GOTO(out, rc = -EINVAL); - } - - valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE; - ll_inode2fid(&fid, inode); - rc = mdc_getattr_name(&sbi->ll_mdc_conn, &fid, - filename, namelen, valid, 0, &request); - if (rc < 0) { - CERROR("mdc_getattr_name: %d\n", rc); - GOTO(out, rc); - } - - body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*body)); - LASSERT(body != NULL); /* checked by mdc_getattr_name */ - LASSERT_REPSWABBED(request, 0);/* swabbed by mdc_getattr_name */ - - /* surely there's a better way -phik */ - data->ioc_obdo1.o_mode = body->mode; - data->ioc_obdo1.o_uid = body->uid; - data->ioc_obdo1.o_gid = body->gid; - - ptlrpc_req_finished(request); - - err = copy_to_user((void *)arg, buf, len); - if (err) - GOTO(out, rc = -EFAULT); - - EXIT; - out: - obd_ioctl_freedata(buf, len); - return rc; - } - default: - CERROR("unrecognized ioctl %#x\n", cmd); - RETURN(-ENOTTY); - } -} - -int ll_dir_open(struct inode *inode, struct file *file) -{ - return ll_file_open(inode, file); -} - -int ll_dir_release(struct inode *inode, struct file *file) -{ - return ll_file_release(inode, file); -} - -struct file_operations ll_dir_operations = { - open: ll_dir_open, - release: ll_dir_release, - read: generic_read_dir, - readdir: ll_readdir, - ioctl: ll_dir_ioctl -}; - diff --git a/lustre/llite/file.c b/lustre/llite/file.c deleted file mode 100644 index 943ba1b..0000000 --- a/lustre/llite/file.c +++ /dev/null @@ -1,1271 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * Author: Peter Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Andreas Dilger <adilger@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LLITE -#include <linux/lustre_dlm.h> -#include <linux/lustre_lite.h> -#include <linux/obd_lov.h> /* for lov_mds_md_size() in lov_setstripe() */ -#include <linux/random.h> -#include <linux/pagemap.h> -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#include <linux/lustre_compat25.h> -#endif - -int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc); -extern int ll_setattr(struct dentry *de, struct iattr *attr); - -static int ll_mdc_close(struct lustre_handle *mdc_conn, struct inode *inode, - struct file *file) -{ - struct ll_file_data *fd = file->private_data; - struct ptlrpc_request *req = NULL; - unsigned long flags; - struct obd_import *imp; - int rc; - ENTRY; - - /* Complete the open request and remove it from replay list */ - rc = mdc_close(&ll_i2sbi(inode)->ll_mdc_conn, inode->i_ino, - inode->i_mode, &fd->fd_mds_och.och_fh, &req); - if (rc) - CERROR("inode %lu close failed: rc = %d\n", inode->i_ino, rc); - - imp = fd->fd_mds_och.och_req->rq_import; - LASSERT(imp != NULL); - spin_lock_irqsave(&imp->imp_lock, flags); - - DEBUG_REQ(D_HA, fd->fd_mds_och.och_req, "matched open req %p", - fd->fd_mds_och.och_req); - - /* We held on to the request for replay until we saw a close for that - * file. Now that we've closed it, it gets replayed on the basis of - * its transno only. */ - spin_lock (&fd->fd_mds_och.och_req->rq_lock); - fd->fd_mds_och.och_req->rq_replay = 0; - spin_unlock (&fd->fd_mds_och.och_req->rq_lock); - - if (fd->fd_mds_och.och_req->rq_transno) { - /* This open created a file, so it needs replay as a - * normal transaction now. Our reference to it now - * effectively owned by the imp_replay_list, and it'll - * be committed just like other transno-having - * requests from here on out. */ - - /* We now retain this close request, so that it is - * replayed if the open is replayed. We duplicate the - * transno, so that we get freed at the right time, - * and rely on the difference in xid to keep - * everything ordered correctly. - * - * But! If this close was already given a transno - * (because it caused real unlinking of an - * open-unlinked file, f.e.), then we'll be ordered on - * the basis of that and we don't need to do anything - * magical here. */ - if (!req->rq_transno) { - req->rq_transno = fd->fd_mds_och.och_req->rq_transno; - ptlrpc_retain_replayable_request(req, imp); - } - spin_unlock_irqrestore(&imp->imp_lock, flags); - - /* Should we free_committed now? we always free before - * replay, so it's probably a wash. We could check to - * see if the fd_req should already be committed, in - * which case we can avoid the whole retain_replayable - * dance. */ - } else { - /* No transno means that we can just drop our ref. */ - spin_unlock_irqrestore(&imp->imp_lock, flags); - } - ptlrpc_req_finished(fd->fd_mds_och.och_req); - - /* Do this after the fd_req->rq_transno check, because we don't want - * to bounce off zero references. */ - ptlrpc_req_finished(req); - fd->fd_mds_och.och_fh.cookie = DEAD_HANDLE_MAGIC; - file->private_data = NULL; - OBD_SLAB_FREE(fd, ll_file_data_slab, sizeof *fd); - - RETURN(-abs(rc)); -} - -/* While this returns an error code, fput() the caller does not, so we need - * to make every effort to clean up all of our state here. Also, applications - * rarely check close errors and even if an error is returned they will not - * re-try the close call. - */ -int ll_file_release(struct inode *inode, struct file *file) -{ - struct ll_file_data *fd; - struct obdo oa; - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_inode_info *lli = ll_i2info(inode); - struct lov_stripe_md *lsm = lli->lli_smd; - int rc = 0, rc2; - - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); - - /* don't do anything for / */ - if (inode->i_sb->s_root == file->f_dentry) - RETURN(0); - - lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_RELEASE); - fd = (struct ll_file_data *)file->private_data; - if (!fd) /* no process opened the file after an mcreate */ - RETURN(rc = 0); - - /* we might not be able to get a valid handle on this file - * again so we really want to flush our write cache.. */ - if (S_ISREG(inode->i_mode)) { - filemap_fdatasync(inode->i_mapping); - filemap_fdatawait(inode->i_mapping); - - if (lsm != NULL) { - memset(&oa, 0, sizeof(oa)); - oa.o_id = lsm->lsm_object_id; - oa.o_mode = S_IFREG; - oa.o_valid = OBD_MD_FLTYPE | OBD_MD_FLID; - - memcpy(&oa.o_inline, &fd->fd_ost_och, FD_OSTDATA_SIZE); - oa.o_valid |= OBD_MD_FLHANDLE; - - rc = obd_close(&sbi->ll_osc_conn, &oa, lsm, NULL); - if (rc) - CERROR("inode %lu object close failed: rc = " - "%d\n", inode->i_ino, rc); - } - } - - rc2 = ll_mdc_close(&sbi->ll_mdc_conn, inode, file); - if (rc2 && !rc) - rc = rc2; - - RETURN(rc); -} - -static int ll_local_open(struct file *file, struct lookup_intent *it) -{ - struct ptlrpc_request *req = it->it_data; - struct ll_file_data *fd; - struct mds_body *body; - ENTRY; - - body = lustre_msg_buf (req->rq_repmsg, 1, sizeof (*body)); - LASSERT (body != NULL); /* reply already checked out */ - LASSERT_REPSWABBED (req, 1); /* and swabbed down */ - - LASSERT(!file->private_data); - - OBD_SLAB_ALLOC(fd, ll_file_data_slab, SLAB_KERNEL, sizeof *fd); - /* We can't handle this well without reorganizing ll_file_open and - * ll_mdc_close, so don't even try right now. */ - LASSERT(fd != NULL); - - memset(fd, 0, sizeof(*fd)); - - memcpy(&fd->fd_mds_och.och_fh, &body->handle, sizeof(body->handle)); - fd->fd_mds_och.och_req = it->it_data; - file->private_data = fd; - - RETURN(0); -} - -static int ll_osc_open(struct lustre_handle *conn, struct inode *inode, - struct file *file, struct lov_stripe_md *lsm) -{ - struct ll_file_data *fd = file->private_data; - struct obdo *oa; - int rc; - ENTRY; - - oa = obdo_alloc(); - if (!oa) - RETURN(-ENOMEM); - oa->o_id = lsm->lsm_object_id; - oa->o_mode = S_IFREG; - oa->o_valid = (OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLBLOCKS | - OBD_MD_FLMTIME | OBD_MD_FLCTIME); - rc = obd_open(conn, oa, lsm, NULL, &fd->fd_ost_och); - if (rc) - GOTO(out, rc); - - file->f_flags &= ~O_LOV_DELAY_CREATE; - obdo_to_inode(inode, oa, OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | - OBD_MD_FLMTIME | OBD_MD_FLCTIME); - - EXIT; -out: - obdo_free(oa); - return rc; -} - -/* Caller must hold lli_open_sem to protect lli->lli_smd from changing and - * duplicate objects from being created. We only install lsm to lli_smd if - * the mdc open was successful (hence stored stripe MD on MDS), otherwise - * other nodes could try to create different objects for the same file. - */ -static int ll_create_obj(struct lustre_handle *conn, struct inode *inode, - struct file *file, struct lov_stripe_md *lsm) -{ - struct ptlrpc_request *req = NULL; - struct ll_inode_info *lli = ll_i2info(inode); - struct lov_mds_md *lmm = NULL; - struct obdo *oa; - struct iattr iattr; - struct mdc_op_data op_data; - int rc, err, lmm_size = 0;; - ENTRY; - - oa = obdo_alloc(); - if (!oa) - RETURN(-ENOMEM); - - oa->o_mode = S_IFREG | 0600; - oa->o_id = inode->i_ino; - /* Keep these 0 for now, because chown/chgrp does not change the - * ownership on the OST, and we don't want to allow BA OST NFS - * users to access these objects by mistake. */ - oa->o_uid = 0; - oa->o_gid = 0; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE | - OBD_MD_FLUID | OBD_MD_FLGID; - - rc = obd_create(conn, oa, &lsm, NULL); - if (rc) { - CERROR("error creating objects for inode %lu: rc = %d\n", - inode->i_ino, rc); - if (rc > 0) { - CERROR("obd_create returned invalid rc %d\n", rc); - rc = -EIO; - } - GOTO(out_oa, rc); - } - obdo_to_inode(inode, oa, OBD_MD_FLBLKSZ); - - LASSERT(lsm && lsm->lsm_object_id); - rc = obd_packmd(conn, &lmm, lsm); - if (rc < 0) - GOTO(out_destroy, rc); - - lmm_size = rc; - - /* Save the stripe MD with this file on the MDS */ - memset(&iattr, 0, sizeof(iattr)); - iattr.ia_valid = ATTR_FROM_OPEN; - - ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0); - - rc = mdc_setattr(&ll_i2sbi(inode)->ll_mdc_conn, &op_data, - &iattr, lmm, lmm_size, &req); - ptlrpc_req_finished(req); - - obd_free_diskmd (conn, &lmm); - - /* If we couldn't complete mdc_open() and store the stripe MD on the - * MDS, we need to destroy the objects now or they will be leaked. - */ - if (rc) { - CERROR("error: storing stripe MD for %lu: rc %d\n", - inode->i_ino, rc); - GOTO(out_destroy, rc); - } - lli->lli_smd = lsm; - lli->lli_maxbytes = lsm->lsm_maxbytes; - - EXIT; -out_oa: - obdo_free(oa); - return rc; - -out_destroy: - obdo_from_inode(oa, inode, OBD_MD_FLTYPE); - oa->o_id = lsm->lsm_object_id; - oa->o_valid |= OBD_MD_FLID; - err = obd_destroy(conn, oa, lsm, NULL); - obd_free_memmd(conn, &lsm); - if (err) - CERROR("error uncreating inode %lu objects: rc %d\n", - inode->i_ino, err); - goto out_oa; -} - -/* Open a file, and (for the very first open) create objects on the OSTs at - * this time. If opened with O_LOV_DELAY_CREATE, then we don't do the object - * creation or open until ll_lov_setstripe() ioctl is called. We grab - * lli_open_sem to ensure no other process will create objects, send the - * stripe MD to the MDS, or try to destroy the objects if that fails. - * - * If we already have the stripe MD locally then we don't request it in - * mdc_open(), by passing a lmm_size = 0. - * - * It is up to the application to ensure no other processes open this file - * in the O_LOV_DELAY_CREATE case, or the default striping pattern will be - * used. We might be able to avoid races of that sort by getting lli_open_sem - * before returning in the O_LOV_DELAY_CREATE case and dropping it here - * or in ll_file_release(), but I'm not sure that is desirable/necessary. - */ -extern int ll_it_open_error(int phase, struct lookup_intent *it); - -int ll_file_open(struct inode *inode, struct file *file) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_inode_info *lli = ll_i2info(inode); - struct lustre_handle *conn = ll_i2obdconn(inode); - struct lookup_intent *it; - struct lov_stripe_md *lsm; - int rc = 0; - ENTRY; - - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); - - /* don't do anything for / */ - if (inode->i_sb->s_root == file->f_dentry) - RETURN(0); - - lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_OPEN); - LL_GET_INTENT(file->f_dentry, it); - rc = ll_it_open_error(IT_OPEN_OPEN, it); - if (rc) - RETURN(rc); - - rc = ll_local_open(file, it); - if (rc) - LBUG(); - - mdc_set_open_replay_data(&((struct ll_file_data *) - file->private_data)->fd_mds_och); - if (!S_ISREG(inode->i_mode)) - RETURN(0); - - lsm = lli->lli_smd; - if (lsm == NULL) { - if (file->f_flags & O_LOV_DELAY_CREATE) { - CDEBUG(D_INODE, "delaying object creation\n"); - RETURN(0); - } - down(&lli->lli_open_sem); - if (!lli->lli_smd) { - rc = ll_create_obj(conn, inode, file, NULL); - up(&lli->lli_open_sem); - if (rc) - GOTO(out_close, rc); - } else { - CERROR("warning: stripe already set on ino %lu\n", - inode->i_ino); - up(&lli->lli_open_sem); - } - lsm = lli->lli_smd; - } - - rc = ll_osc_open(conn, inode, file, lsm); - if (rc) - GOTO(out_close, rc); - RETURN(0); - - out_close: - ll_mdc_close(&sbi->ll_mdc_conn, inode, file); - return rc; -} - -/* - * really does the getattr on the inode and updates its fields - */ -int ll_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm, - void *ostdata) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_inode_info *lli = ll_i2info(inode); - struct ptlrpc_request_set *set; - struct obdo oa; - int bef, aft; - unsigned long before, after; - int rc; - ENTRY; - - LASSERT(lsm); - LASSERT(sbi); - LASSERT(lli); - - memset(&oa, 0, sizeof oa); - oa.o_id = lsm->lsm_object_id; - oa.o_mode = S_IFREG; - oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE | - OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | OBD_MD_FLMTIME | - OBD_MD_FLCTIME; - - if (ostdata != NULL) { - memcpy(&oa.o_inline, ostdata, FD_OSTDATA_SIZE); - oa.o_valid |= OBD_MD_FLHANDLE; - } - - /* getattr can race with writeback. we don't want to trust a getattr - * that doesn't include the writeback of our farthest cached pages - * that it raced with. */ - /* Now that the OSC knows the cached-page status, it can and should be - * adjusting its getattr results to include the maximum cached offset - * for its stripe(s). */ - do { - bef = obd_last_dirty_offset(ll_i2obdconn(inode), lli->lli_smd, - &before); -#if 0 - rc = obd_getattr(&sbi->ll_osc_conn, &oa, lsm); -#else - set = ptlrpc_prep_set (); - if (set == NULL) { - CERROR ("ENOMEM allocing request set\n"); - rc = -ENOMEM; - } else { - rc = obd_getattr_async(&sbi->ll_osc_conn, &oa, lsm, set); - if (rc == 0) - rc = ptlrpc_set_wait (set); - ptlrpc_set_destroy (set); - } -#endif - if (rc) - RETURN(rc); - - aft = obd_last_dirty_offset(ll_i2obdconn(inode), lli->lli_smd, - &after); - CDEBUG(D_INODE, " %d,%lu -> %d,%lu\n", bef, before, aft, after); - } while (bef == 0 && - (aft != 0 || after < before) && - oa.o_size < ((u64)before + 1) << PAGE_CACHE_SHIFT); - - obdo_to_inode(inode, &oa, (OBD_MD_FLBLOCKS | OBD_MD_FLBLKSZ | - OBD_MD_FLMTIME | OBD_MD_FLCTIME)); - if (inode->i_blksize < PAGE_CACHE_SIZE) - inode->i_blksize = PAGE_CACHE_SIZE; - - /* make sure getattr doesn't return a size that causes writeback - * to forget about cached writes */ - if ((aft == 0) && oa.o_size < ((u64)after + 1) << PAGE_CACHE_SHIFT) { - CDEBUG(D_INODE, "cached at %lu, keeping %llu i_size instead " - "of oa "LPU64"\n", after, inode->i_size, - oa.o_size); - RETURN(0); - } - - obdo_to_inode(inode, &oa, OBD_MD_FLSIZE); - - CDEBUG(D_INODE, "objid "LPX64" size %Lu/%Lu blksize %lu\n", - lsm->lsm_object_id, inode->i_size, inode->i_size, - inode->i_blksize); - RETURN(0); -} - -/* - * some callers, notably truncate, really don't want i_size set based - * on the the size returned by the getattr, or lock acquisition in - * the future. - */ -int ll_extent_lock_no_validate(struct ll_file_data *fd, struct inode *inode, - struct lov_stripe_md *lsm, - int mode, struct ldlm_extent *extent, - struct lustre_handle *lockh) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - int rc, flags = 0; - ENTRY; - - LASSERT(lockh->cookie == 0); - - /* XXX phil: can we do this? won't it screw the file size up? */ - if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) || - (sbi->ll_flags & LL_SBI_NOLCK)) - RETURN(0); - - CDEBUG(D_DLMTRACE, "Locking inode %lu, start "LPU64" end "LPU64"\n", - inode->i_ino, extent->start, extent->end); - - rc = obd_enqueue(&sbi->ll_osc_conn, lsm, NULL, LDLM_EXTENT, extent, - sizeof(extent), mode, &flags, ll_extent_lock_callback, - inode, lockh); - - RETURN(rc); -} - -/* - * this grabs a lock and manually implements behaviour that makes it look like - * the OST is returning the file size with each lock acquisition. - */ -int ll_extent_lock(struct ll_file_data *fd, struct inode *inode, - struct lov_stripe_md *lsm, int mode, - struct ldlm_extent *extent, struct lustre_handle *lockh) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct ldlm_extent size_lock; - struct lustre_handle match_lockh = {0}; - int flags, rc, matched; - ENTRY; - - rc = ll_extent_lock_no_validate(fd, inode, lsm, mode, extent, lockh); - if (rc != ELDLM_OK) - RETURN(rc); - - if (test_bit(LLI_F_HAVE_SIZE_LOCK, &lli->lli_flags)) - RETURN(0); - - rc = ll_inode_getattr(inode, lsm, fd ? &fd->fd_ost_och : NULL); - if (rc) { - ll_extent_unlock(fd, inode, lsm, mode, lockh); - RETURN(rc); - } - - size_lock.start = inode->i_size; - size_lock.end = OBD_OBJECT_EOF; - - /* XXX I bet we should be checking the lock ignore flags.. */ - flags = LDLM_FL_CBPENDING | LDLM_FL_BLOCK_GRANTED | LDLM_FL_MATCH_DATA; - matched = obd_match(&ll_i2sbi(inode)->ll_osc_conn, lsm, LDLM_EXTENT, - &size_lock, sizeof(size_lock), LCK_PR, &flags, - inode, &match_lockh); - - /* hey, alright, we hold a size lock that covers the size we - * just found, its not going to change for a while.. */ - if (matched == 1) { - set_bit(LLI_F_HAVE_SIZE_LOCK, &lli->lli_flags); - obd_cancel(&ll_i2sbi(inode)->ll_osc_conn, lsm, LCK_PR, - &match_lockh); - } - - RETURN(0); -} - -int ll_extent_unlock(struct ll_file_data *fd, struct inode *inode, - struct lov_stripe_md *lsm, int mode, - struct lustre_handle *lockh) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - int rc; - ENTRY; - - /* XXX phil: can we do this? won't it screw the file size up? */ - if ((fd && (fd->fd_flags & LL_FILE_IGNORE_LOCK)) || - (sbi->ll_flags & LL_SBI_NOLCK)) - RETURN(0); - - rc = obd_cancel(&sbi->ll_osc_conn, lsm, mode, lockh); - - RETURN(rc); -} - -static inline void ll_remove_suid(struct inode *inode) -{ - unsigned int mode; - - /* set S_IGID if S_IXGRP is set, and always set S_ISUID */ - mode = (inode->i_mode & S_IXGRP)*(S_ISGID/S_IXGRP) | S_ISUID; - - /* was any of the uid bits set? */ - mode &= inode->i_mode; - if (mode && !capable(CAP_FSETID)) { - inode->i_mode &= ~mode; - // XXX careful here - we cannot change the size - } -} - -#if 0 -static void ll_update_atime(struct inode *inode) -{ -#ifdef USE_ATIME - struct iattr attr; - - attr.ia_atime = LTIME_S(CURRENT_TIME); - attr.ia_valid = ATTR_ATIME; - - if (inode->i_atime == attr.ia_atime) return; - if (IS_RDONLY(inode)) return; - if (IS_NOATIME(inode)) return; - - /* ll_inode_setattr() sets inode->i_atime from attr.ia_atime */ - ll_inode_setattr(inode, &attr, 0); -#else - /* update atime, but don't explicitly write it out just this change */ - inode->i_atime = CURRENT_TIME; -#endif -} -#endif - -/* - * flush the page cache for an extent as its canceled. when we're on an - * lov we get a lock cancelation for each of the obd locks under the lov - * so we have to map the obd's region back onto the stripes in the file - * that it held. - * - * no one can dirty the extent until we've finished our work and they - * can enqueue another lock. - * - * XXX this could be asking the inode's dirty tree for info - */ -void ll_pgcache_remove_extent(struct inode *inode, struct lov_stripe_md *lsm, - struct ldlm_lock *lock) -{ - struct ldlm_extent *extent = &lock->l_extent; - unsigned long start, end, count, skip, i, j; - struct page *page; - int ret; - ENTRY; - - CDEBUG(D_INODE, "obdo %lu inode %p ["LPU64"->"LPU64"] size: %llu\n", - inode->i_ino, inode, extent->start, extent->end, inode->i_size); - - start = extent->start >> PAGE_CACHE_SHIFT; - count = ~0; - skip = 0; - end = (extent->end >> PAGE_CACHE_SHIFT) + 1; - if ((end << PAGE_CACHE_SHIFT) < extent->end) - end = ~0; - if (lsm->lsm_stripe_count > 1) { - struct { - char name[16]; - struct ldlm_lock *lock; - struct lov_stripe_md *lsm; - } key = { .name = "lock_to_stripe", .lock = lock, .lsm = lsm }; - __u32 stripe; - __u32 vallen = sizeof(stripe); - int rc; - - /* get our offset in the lov */ - rc = obd_get_info(ll_i2obdconn(inode), sizeof(key), - &key, &vallen, &stripe); - if (rc != 0) { - CERROR("obd_get_info: rc = %d\n", rc); - LBUG(); - } - LASSERT(stripe < lsm->lsm_stripe_count); - - count = lsm->lsm_stripe_size >> PAGE_CACHE_SHIFT; - skip = (lsm->lsm_stripe_count - 1) * count; - start += (start/count * skip) + (stripe * count); - if (end != ~0) - end += (end/count * skip) + (stripe * count); - } - - i = (inode->i_size + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; - if (end >= i) - clear_bit(LLI_F_HAVE_SIZE_LOCK, &(ll_i2info(inode)->lli_flags)); - if (i < end) - end = i; - - CDEBUG(D_INODE, "start: %lu j: %lu count: %lu skip: %lu end: %lu\n", - start, start % count, count, skip, end); - - /* start writeback on dirty pages in the extent when its PW */ - for (i = start, j = start % count; - lock->l_granted_mode == LCK_PW && i < end; j++, i++) { - if (j == count) { - i += skip; - j = 0; - } - /* its unlikely, but give us a chance to bail when we're out */ - PGCACHE_WRLOCK(inode->i_mapping); - if (list_empty(&inode->i_mapping->dirty_pages)) { - CDEBUG(D_INODE, "dirty list empty\n"); - PGCACHE_WRUNLOCK(inode->i_mapping); - break; - } - PGCACHE_WRUNLOCK(inode->i_mapping); - - if (need_resched()) - schedule(); - - page = find_get_page(inode->i_mapping, i); - if (page == NULL) - continue; - if (!PageDirty(page) || TryLockPage(page)) { - page_cache_release(page); - continue; - } - if (PageDirty(page)) { - CDEBUG(D_INODE, "writing page %p\n", page); - PGCACHE_WRLOCK(inode->i_mapping); - list_del(&page->list); - list_add(&page->list, &inode->i_mapping->locked_pages); - PGCACHE_WRUNLOCK(inode->i_mapping); - - /* this writepage might write out pages outside - * this extent, but that's ok, the pages are only - * still dirty because a lock still covers them */ - ClearPageDirty(page); -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - ret = inode->i_mapping->a_ops->writepage(page); -#else - ret = inode->i_mapping->a_ops->writepage(page, NULL); -#endif - if (ret != 0) - unlock_page(page); - } else { - unlock_page(page); - } - page_cache_release(page); - - } - - /* our locks are page granular thanks to osc_enqueue, we invalidate the - * whole page. */ - LASSERT((extent->start & ~PAGE_CACHE_MASK) == 0); - LASSERT(((extent->end+1) & ~PAGE_CACHE_MASK) == 0); - for (i = start, j = start % count ; i < end ; j++, i++) { - if ( j == count ) { - i += skip; - j = 0; - } - PGCACHE_WRLOCK(inode->i_mapping); - if (list_empty(&inode->i_mapping->dirty_pages) && - list_empty(&inode->i_mapping->clean_pages) && - list_empty(&inode->i_mapping->locked_pages)) { - CDEBUG(D_INODE, "nothing left\n"); - PGCACHE_WRUNLOCK(inode->i_mapping); - break; - } - PGCACHE_WRUNLOCK(inode->i_mapping); - if (need_resched()) - schedule(); - page = find_get_page(inode->i_mapping, i); - if (page == NULL) - continue; - CDEBUG(D_INODE, "dropping page %p at %lu\n", page, page->index); - lock_page(page); - if (page->mapping) /* might have raced */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - truncate_complete_page(page); -#else - truncate_complete_page(page->mapping, page); -#endif - unlock_page(page); - page_cache_release(page); - } - EXIT; -} - -int ll_extent_lock_callback(struct ldlm_lock *lock, struct ldlm_lock_desc *new, - void *data, int flag) -{ - struct inode *inode = data; - struct ll_inode_info *lli = ll_i2info(inode); - struct lustre_handle lockh = { 0 }; - int rc; - ENTRY; - - LASSERT(inode != NULL); - - switch (flag) { - case LDLM_CB_BLOCKING: - ldlm_lock2handle(lock, &lockh); - rc = ldlm_cli_cancel(&lockh); - if (rc != ELDLM_OK) - CERROR("ldlm_cli_cancel failed: %d\n", rc); - break; - case LDLM_CB_CANCELING: - /* FIXME: we could be given 'canceling intents' so that we - * could know to write-back or simply throw away the pages - * based on if the cancel comes from a desire to, say, - * read or truncate.. */ - LASSERT((unsigned long)inode > 0x1000); - LASSERT((unsigned long)lli > 0x1000); - LASSERT((unsigned long)lli->lli_smd > 0x1000); - ll_pgcache_remove_extent(inode, lli->lli_smd, lock); - break; - default: - LBUG(); - } - - RETURN(0); -} - -static ssize_t ll_file_read(struct file *filp, char *buf, size_t count, - loff_t *ppos) -{ - struct ll_file_data *fd = filp->private_data; - struct inode *inode = filp->f_dentry->d_inode; - struct ll_inode_info *lli = ll_i2info(inode); - struct lov_stripe_md *lsm = lli->lli_smd; - struct lustre_handle lockh = { 0 }; - struct ll_read_extent rextent; - ldlm_error_t err; - ssize_t retval; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n", - inode->i_ino, inode->i_generation, inode, count, *ppos); - - /* "If nbyte is 0, read() will return 0 and have no other results." - * -- Single Unix Spec */ - if (count == 0) - RETURN(0); - - lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_READ_BYTES, - count); - /* grab a -> eof extent to push extending writes out of node's caches - * so we can see them at the getattr after lock acquisition. this will - * turn into a seperate [*ppos + count, EOF] 'size intent' lock attempt - * in the future. */ - rextent.re_extent.start = *ppos; - rextent.re_extent.end = OBD_OBJECT_EOF; - - err = ll_extent_lock(fd, inode, lsm, LCK_PR, &rextent.re_extent,&lockh); - if (err != ELDLM_OK) - RETURN(-ENOLCK); - - /* XXX tell ll_readpage what pages have a PR lock.. */ - rextent.re_task = current; - spin_lock(&lli->lli_read_extent_lock); - list_add(&rextent.re_lli_item, &lli->lli_read_extents); - spin_unlock(&lli->lli_read_extent_lock); - - CDEBUG(D_INFO, "Reading inode %lu, "LPSZ" bytes, offset %Ld\n", - inode->i_ino, count, *ppos); - retval = generic_file_read(filp, buf, count, ppos); - - spin_lock(&lli->lli_read_extent_lock); - list_del(&rextent.re_lli_item); - spin_unlock(&lli->lli_read_extent_lock); - - /* XXX errors? */ - ll_extent_unlock(fd, inode, lsm, LCK_PR, &lockh); - RETURN(retval); -} - -/* - * Write to a file (through the page cache). - */ -static ssize_t -ll_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) -{ - struct ll_file_data *fd = file->private_data; - struct inode *inode = file->f_dentry->d_inode; - struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - struct lustre_handle lockh = { 0 }; - struct ldlm_extent extent; - loff_t maxbytes = ll_file_maxbytes(inode); - ldlm_error_t err; - ssize_t retval; - char should_validate = 1; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),size="LPSZ",offset=%Ld\n", - inode->i_ino, inode->i_generation, inode, count, *ppos); - - /* - * sleep doing some writeback work of this mount's dirty data - * if the VM thinks we're low on memory.. other dirtying code - * paths should think about doing this, too, but they should be - * careful not to hold locked pages while they do so. like - * ll_prepare_write. *cough* - */ - LL_CHECK_DIRTY(inode->i_sb); - - /* POSIX, but surprised the VFS doesn't check this already */ - if (count == 0) - RETURN(0); - - if (file->f_flags & O_APPEND) { - extent.start = 0; - extent.end = OBD_OBJECT_EOF; - } else { - extent.start = *ppos; - extent.end = *ppos + count - 1; - /* we really don't care what i_size is if we're doing - * fully page aligned writes */ - if ((*ppos & ~PAGE_CACHE_MASK) == 0 && - (count & ~PAGE_CACHE_MASK) == 0) - should_validate = 0; - } - - if (should_validate) - err = ll_extent_lock(fd, inode, lsm, LCK_PW, &extent, &lockh); - else - err = ll_extent_lock_no_validate(fd, inode, lsm, LCK_PW, - &extent, &lockh); - if (err != ELDLM_OK) - RETURN(-ENOLCK); - - /* this is ok, g_f_w will overwrite this under i_sem if it races - * with a local truncate, it just makes our maxbyte checking easier */ - if (file->f_flags & O_APPEND) - *ppos = inode->i_size; - - if (*ppos >= maxbytes) { - if (count || *ppos > maxbytes) { - send_sig(SIGXFSZ, current, 0); - GOTO(out, retval = -EFBIG); - } - } - if (*ppos + count > maxbytes) - count = maxbytes - *ppos; - - CDEBUG(D_INFO, "Writing inode %lu, "LPSZ" bytes, offset %Lu\n", - inode->i_ino, count, *ppos); - - /* generic_file_write handles O_APPEND after getting i_sem */ - retval = generic_file_write(file, buf, count, ppos); - -out: - /* XXX errors? */ - lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, LPROC_LL_WRITE_BYTES, - retval); - ll_extent_unlock(fd, inode, lsm, LCK_PW, &lockh); - RETURN(retval); -} - -static int ll_lov_setstripe(struct inode *inode, struct file *file, - unsigned long arg) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct lustre_handle *conn = ll_i2obdconn(inode); - struct lov_stripe_md *lsm; - int rc; - ENTRY; - - down(&lli->lli_open_sem); - lsm = lli->lli_smd; - if (lsm) { - up(&lli->lli_open_sem); - CERROR("stripe already exists for ino %lu\n", inode->i_ino); - /* If we haven't already done the open, do so now */ - if (file->f_flags & O_LOV_DELAY_CREATE) { - int rc2 = ll_osc_open(conn, inode, file, lsm); - if (rc2) - RETURN(rc2); - } - - RETURN(-EEXIST); - } - - rc = obd_iocontrol(LL_IOC_LOV_SETSTRIPE, conn, 0, &lsm, (void *)arg); - if (rc) { - up(&lli->lli_open_sem); - RETURN(rc); - } - rc = ll_create_obj(conn, inode, file, lsm); - up(&lli->lli_open_sem); - - if (rc) { - obd_free_memmd(conn, &lsm); - RETURN(rc); - } - rc = ll_osc_open(conn, inode, file, lli->lli_smd); - RETURN(rc); -} - -static int ll_lov_getstripe(struct inode *inode, unsigned long arg) -{ - struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - struct lustre_handle *conn = ll_i2obdconn(inode); - - if (!lsm) - RETURN(-ENODATA); - - return obd_iocontrol(LL_IOC_LOV_GETSTRIPE, conn, 0, lsm, (void *)arg); -} - -int ll_file_ioctl(struct inode *inode, struct file *file, unsigned int cmd, - unsigned long arg) -{ - struct ll_file_data *fd = file->private_data; - struct lustre_handle *conn; - int flags; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),cmd=%u\n", inode->i_ino, - inode->i_generation, inode, cmd); - - if (_IOC_TYPE(cmd) == 'T') /* tty ioctls */ - return -ENOTTY; - - lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_IOCTL); - switch(cmd) { - case LL_IOC_GETFLAGS: - /* Get the current value of the file flags */ - return put_user(fd->fd_flags, (int *)arg); - case LL_IOC_SETFLAGS: - case LL_IOC_CLRFLAGS: - /* Set or clear specific file flags */ - /* XXX This probably needs checks to ensure the flags are - * not abused, and to handle any flag side effects. - */ - if (get_user(flags, (int *) arg)) - return -EFAULT; - - if (cmd == LL_IOC_SETFLAGS) - fd->fd_flags |= flags; - else - fd->fd_flags &= ~flags; - return 0; - case LL_IOC_LOV_SETSTRIPE: - return ll_lov_setstripe(inode, file, arg); - case LL_IOC_LOV_GETSTRIPE: - return ll_lov_getstripe(inode, arg); - - /* We need to special case any other ioctls we want to handle, - * to send them to the MDS/OST as appropriate and to properly - * network encode the arg field. - case EXT2_IOC_GETFLAGS: - case EXT2_IOC_SETFLAGS: - case EXT2_IOC_GETVERSION_OLD: - case EXT2_IOC_GETVERSION_NEW: - case EXT2_IOC_SETVERSION_OLD: - case EXT2_IOC_SETVERSION_NEW: - */ - default: - conn = ll_i2obdconn(inode); - return obd_iocontrol(cmd, conn, 0, NULL, (void *)arg); - } -} - -loff_t ll_file_seek(struct file *file, loff_t offset, int origin) -{ - struct inode *inode = file->f_dentry->d_inode; - struct ll_file_data *fd = file->private_data; - struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - struct lustre_handle lockh = {0}; - loff_t retval; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),to=%llu\n", inode->i_ino, - inode->i_generation, inode, - offset + ((origin==2) ? inode->i_size : file->f_pos)); - - lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_LLSEEK); - if (origin == 2) { /* SEEK_END */ - ldlm_error_t err; - struct ldlm_extent extent = {0, OBD_OBJECT_EOF}; - err = ll_extent_lock(fd, inode, lsm, LCK_PR, &extent, &lockh); - if (err != ELDLM_OK) - RETURN(-ENOLCK); - - offset += inode->i_size; - } else if (origin == 1) { /* SEEK_CUR */ - offset += file->f_pos; - } - - retval = -EINVAL; - if (offset >= 0 && offset <= ll_file_maxbytes(inode)) { - if (offset != file->f_pos) { - file->f_pos = offset; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - file->f_reada = 0; - file->f_version = ++event; -#endif - } - retval = offset; - } - - if (origin == 2) - ll_extent_unlock(fd, inode, lsm, LCK_PR, &lockh); - RETURN(retval); -} - -int ll_fsync(struct file *file, struct dentry *dentry, int data) -{ - int ret; - struct inode *inode = dentry->d_inode; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); - - lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_FSYNC); - /* - * filemap_fdata{sync,wait} are also called at PW lock cancelation so - * we know that they can only find data to writeback here if we are - * still holding the PW lock that covered the dirty pages. XXX we - * should probably get a reference on it, though, just to be clear. - */ - ret = filemap_fdatasync(dentry->d_inode->i_mapping); - if ( ret == 0 ) - ret = filemap_fdatawait(dentry->d_inode->i_mapping); - - RETURN(ret); -} - -int ll_inode_revalidate(struct dentry *dentry) -{ - struct inode *inode = dentry->d_inode; - struct lov_stripe_md *lsm = NULL; - ENTRY; - - if (!inode) { - CERROR("REPORT THIS LINE TO PETER\n"); - RETURN(0); - } - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),name=%s\n", - inode->i_ino, inode->i_generation, inode, dentry->d_name.name); -#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0)) - lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_REVALIDATE); -#endif - - /* this is very tricky. it is unsafe to call ll_have_md_lock - when we have a referenced lock: because it may cause an RPC - below when the lock is marked CB_PENDING. That RPC may not - go out because someone else may be in another RPC waiting for - that lock*/ - if (!(dentry->d_it && dentry->d_it->it_lock_mode) && - !ll_have_md_lock(dentry)) { - struct ptlrpc_request *req = NULL; - struct ll_sb_info *sbi = ll_i2sbi(dentry->d_inode); - struct ll_fid fid; - struct mds_body *body; - struct lov_mds_md *lmm; - unsigned long valid = 0; - int eadatalen = 0, rc; - - /* Why don't we update all valid MDS fields here, if we're - * doing an RPC anyways? -phil */ - if (S_ISREG(inode->i_mode)) { - eadatalen = obd_size_diskmd(&sbi->ll_osc_conn, NULL); - valid |= OBD_MD_FLEASIZE; - } - ll_inode2fid(&fid, inode); - rc = mdc_getattr(&sbi->ll_mdc_conn, &fid, - valid, eadatalen, &req); - if (rc) { - CERROR("failure %d inode %lu\n", rc, inode->i_ino); - RETURN(-abs(rc)); - } - - body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body)); - LASSERT (body != NULL); /* checked by mdc_getattr() */ - LASSERT_REPSWABBED (req, 0); /* swabbed by mdc_getattr() */ - - if (S_ISREG(inode->i_mode) && - (body->valid & (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS))) { - CERROR("MDS sent back size for regular file\n"); - body->valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLOCKS); - } - - /* XXX Too paranoid? */ - if ((body->valid ^ valid) & OBD_MD_FLEASIZE) - CERROR("Asked for %s eadata but got %s\n", - (valid & OBD_MD_FLEASIZE) ? "some" : "no", - (body->valid & OBD_MD_FLEASIZE) ? "some":"none"); - - if (S_ISREG(inode->i_mode) && - (body->valid & OBD_MD_FLEASIZE)) { - if (body->eadatasize == 0) { /* no EA data */ - CERROR("OBD_MD_FLEASIZE set but no data\n"); - RETURN(-EPROTO); - } - /* Only bother with this if inode's lsm not set? */ - lmm = lustre_msg_buf(req->rq_repmsg,1,body->eadatasize); - LASSERT(lmm != NULL); /* mdc_getattr() checked */ - LASSERT_REPSWABBED(req, 1); /* mdc_getattr() swabbed */ - - rc = obd_unpackmd (&sbi->ll_osc_conn, - &lsm, lmm, body->eadatasize); - if (rc < 0) { - CERROR("Error %d unpacking eadata\n", rc); - ptlrpc_req_finished(req); - RETURN(rc); - } - LASSERT(rc >= sizeof(*lsm)); - } - - ll_update_inode(inode, body, lsm); - if (lsm != NULL && ll_i2info(inode)->lli_smd != lsm) - obd_free_memmd(&sbi->ll_osc_conn, &lsm); - - ptlrpc_req_finished(req); - } - - lsm = ll_i2info(inode)->lli_smd; - if (!lsm) /* object not yet allocated, don't validate size */ - RETURN(0); - - /* - * unfortunately stat comes in through revalidate and we don't - * differentiate this use from initial instantiation. we're - * also being wildly conservative and flushing write caches - * so that stat really returns the proper size. - */ - { - struct ldlm_extent extent = {0, OBD_OBJECT_EOF}; - struct lustre_handle lockh = {0}; - ldlm_error_t err; - - err = ll_extent_lock(NULL, inode, lsm, LCK_PR, &extent, &lockh); - if (err != ELDLM_OK) - RETURN(err); - - ll_extent_unlock(NULL, inode, lsm, LCK_PR, &lockh); - } - RETURN(0); -} - -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -static int ll_getattr(struct vfsmount *mnt, struct dentry *de, - struct kstat *stat) -{ - int res = 0; - struct inode *inode = de->d_inode; - - lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, LPROC_LL_GETATTR); - res = ll_inode_revalidate(de); - if (res) - return res; -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - stat->dev = inode->i_dev; -#endif - stat->ino = inode->i_ino; - stat->mode = inode->i_mode; - stat->nlink = inode->i_nlink; - stat->uid = inode->i_uid; - stat->gid = inode->i_gid; - stat->rdev = kdev_t_to_nr(inode->i_rdev); - stat->atime = inode->i_atime; - stat->mtime = inode->i_mtime; - stat->ctime = inode->i_ctime; - stat->size = inode->i_size; - return 0; -} -#endif - -struct file_operations ll_file_operations = { - read: ll_file_read, - write: ll_file_write, - ioctl: ll_file_ioctl, - open: ll_file_open, - release: ll_file_release, - mmap: generic_file_mmap, - llseek: ll_file_seek, - fsync: ll_fsync, -}; - -struct inode_operations ll_file_inode_operations = { - setattr_raw: ll_setattr_raw, - setattr: ll_setattr, - truncate: ll_truncate, -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) - getattr: ll_getattr, -#else - revalidate: ll_inode_revalidate, -#endif -}; - -struct inode_operations ll_special_inode_operations = { - setattr_raw: ll_setattr_raw, - setattr: ll_setattr, -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) - getattr: ll_getattr, -#else - revalidate: ll_inode_revalidate, -#endif -}; diff --git a/lustre/llite/iod.c b/lustre/llite/iod.c deleted file mode 100644 index e3fabe6..0000000 --- a/lustre/llite/iod.c +++ /dev/null @@ -1,411 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Copyright (C) 2002, 2003 Cluster File Systems, Inc - * - * this started as an implementation of an io daemon that woke regularly - * to force writeback.. the throttling in prepare_write and kupdate's usual - * writeback pressure got rid of our thread, but the file name remains. - */ - -#include <linux/version.h> -#include <linux/config.h> -#include <linux/module.h> -#include <linux/fs.h> -#include <linux/stat.h> -#include <linux/sched.h> -#include <linux/smp_lock.h> -#include <linux/kmod.h> -#include <linux/pagemap.h> -#include <linux/mm.h> -#include <linux/rbtree.h> -#include <linux/seq_file.h> -#include <linux/time.h> -#include "llite_internal.h" - -/* PG_inactive_clean is shorthand for rmap, we want free_high/low here.. */ -#ifdef PG_inactive_clean -#include <linux/mm_inline.h> -#endif - -#define DEBUG_SUBSYSTEM S_LLITE -#include <linux/lustre_lite.h> - -#ifndef list_for_each_prev_safe -#define list_for_each_prev_safe(pos, n, head) \ - for (pos = (head)->prev, n = pos->prev; pos != (head); \ - pos = n, n = pos->prev ) -#endif - -extern spinlock_t inode_lock; - -struct ll_writeback_pages { - obd_count npgs, max; - struct brw_page *pga; -}; - -/* - * check to see if we're racing with truncate and put the page in - * the brw_page array. returns 0 if there is more room and 1 - * if the array is full. - */ -static int llwp_consume_page(struct ll_writeback_pages *llwp, - struct inode *inode, struct page *page) -{ - obd_off off = ((obd_off)page->index) << PAGE_SHIFT; - struct brw_page *pg; - - /* we raced with truncate? */ - if ( off >= inode->i_size ) { - int rc; - rc = ll_clear_dirty_pages(ll_i2obdconn(inode), - ll_i2info(inode)->lli_smd, - page->index, page->index); - - LASSERT(rc == 0); - CDEBUG(D_CACHE, "offset "LPU64" (index %lu) > i_size %llu\n", - off, page->index, inode->i_size); - unlock_page(page); - return 0; - } - - page_cache_get(page); - pg = &llwp->pga[llwp->npgs]; - llwp->npgs++; - LASSERT(llwp->npgs <= llwp->max); - - pg->pg = page; - pg->off = off; - pg->flag = OBD_BRW_CREATE|OBD_BRW_FROM_GRANT; - pg->count = PAGE_CACHE_SIZE; - - /* catch partial writes for files that end mid-page */ - if (pg->off + pg->count > inode->i_size) - pg->count = inode->i_size & ~PAGE_CACHE_MASK; - - /* - * matches ptlrpc_bulk_get assert that trickles down - * from a 0 page length going through niobuf and into - * the buffer regions being posted - */ - LASSERT(pg->count >= 0); - - CDEBUG(D_CACHE, "brw_page %p: off "LPU64" cnt %d, page %p: ind %ld" - " i_size: %llu\n", pg, pg->off, pg->count, page, - page->index, inode->i_size); - - return llwp->npgs == llwp->max; -} - -/* - * returns the number of pages that it added to the pgs array - * - * this duplicates filemap_fdatasync and gives us an opportunity to grab lots - * of dirty pages.. - */ -static void ll_get_dirty_pages(struct inode *inode, - struct ll_writeback_pages *llwp) -{ - struct address_space *mapping = inode->i_mapping; - struct page *page; - struct list_head *pos, *n; - ENTRY; - - PGCACHE_WRLOCK(mapping); - - list_for_each_prev_safe(pos, n, &mapping->dirty_pages) { - page = list_entry(pos, struct page, list); - - if (TryLockPage(page)) - continue; - - list_del(&page->list); - list_add(&page->list, &mapping->locked_pages); - - if ( ! PageDirty(page) ) { - unlock_page(page); - continue; - } - ClearPageDirty(page); - - if ( llwp_consume_page(llwp, inode, page) != 0) - break; - } - - PGCACHE_WRUNLOCK(mapping); - EXIT; -} - -static void ll_writeback(struct inode *inode, struct ll_writeback_pages *llwp) -{ - int rc, i; - struct ptlrpc_request_set *set; - ENTRY; - - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),bytes=%u\n", - inode->i_ino, inode->i_generation, inode, - ((llwp->npgs-1) << PAGE_SHIFT) + llwp->pga[llwp->npgs-1].count); - - set = ptlrpc_prep_set(); - if (set == NULL) { - CERROR ("Can't create request set\n"); - rc = -ENOMEM; - } else { - rc = obd_brw_async(OBD_BRW_WRITE, ll_i2obdconn(inode), - ll_i2info(inode)->lli_smd, llwp->npgs, - llwp->pga, set, NULL); - if (rc == 0) - rc = ptlrpc_set_wait (set); - ptlrpc_set_destroy (set); - } - /* - * b=1038, we need to pass _brw errors up so that writeback - * doesn't get stuck in recovery leaving processes stuck in - * D waiting for pages - */ - if (rc) { - CERROR("error from obd_brw_async: rc = %d\n", rc); - lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, - LPROC_LL_WB_FAIL, llwp->npgs); - } else { - lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, - LPROC_LL_WB_OK, (llwp->npgs)); - } - - for (i = 0 ; i < llwp->npgs ; i++) { - struct page *page = llwp->pga[i].pg; - - CDEBUG(D_CACHE, "finished page %p at index %lu\n", page, - page->index); - LASSERT(PageLocked(page)); - - rc = ll_clear_dirty_pages(ll_i2obdconn(inode), - ll_i2info(inode)->lli_smd, - page->index, page->index); - LASSERT(rc == 0); - unlock_page(page); - page_cache_release(page); - } - - EXIT; -} - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - -#ifndef PG_inactive_clean -#ifdef CONFIG_DISCONTIGMEM -#error "sorry, we don't support DISCONTIGMEM yet" -#endif - -/* - * __alloc_pages marks a zone as needing balancing if an allocation is - * performed when the zone has fewer free pages than its 'low' water - * mark. its cleared when try_to_free_pages makes progress. - */ -static int zones_need_balancing(void) -{ - pg_data_t * pgdat; - zone_t *zone; - int i; - - for ( pgdat = pgdat_list ; pgdat != NULL ; pgdat = pgdat->node_next ) { - for ( i = pgdat->nr_zones-1 ; i >= 0 ; i-- ) { - zone = &pgdat->node_zones[i]; - - if ( zone->need_balance ) - return 1; - } - } - return 0; -} -#endif -/* 2.4 doesn't give us a way to find out how many pages we have - * cached 'cause we're not using buffer_heads. we are very - * conservative here and flush the superblock of all dirty data - * when the vm (rmap or stock) thinks that it is running low - * and kswapd would have done work. kupdated isn't good enough - * because writers (dbench) can dirty _very quickly_, and we - * allocate under writepage.. - * - * 2.5 gets this right, see the {inc,dec}_page_state(nr_dirty, ) - */ -static int should_writeback(void) -{ -#ifdef PG_inactive_clean - if (free_high(ALL_ZONES) > 0 || free_low(ANY_ZONE) > 0) -#else - if (zones_need_balancing()) -#endif - return 1; - return 0; -} - -static int ll_alloc_brw(struct inode *inode, struct ll_writeback_pages *llwp) -{ - memset(llwp, 0, sizeof(struct ll_writeback_pages)); - - llwp->max = inode->i_blksize >> PAGE_CACHE_SHIFT; - if (llwp->max == 0) { - CERROR("forcing llwp->max to 1. blksize: %lu\n", - inode->i_blksize); - llwp->max = 1; - } - llwp->pga = kmalloc(llwp->max * sizeof(*llwp->pga), GFP_ATOMIC); - if (llwp->pga == NULL) - RETURN(-ENOMEM); - RETURN(0); -} - -int ll_check_dirty(struct super_block *sb) -{ - unsigned long old_flags; /* hack? */ - int making_progress; - struct inode *inode; - int rc = 0; - ENTRY; - - if (!should_writeback()) - return 0; - - old_flags = current->flags; - current->flags |= PF_MEMALLOC; - - spin_lock(&inode_lock); - - /* - * first we try and write back dirty pages from dirty inodes - * until the VM thinkgs we're ok again.. - */ - do { - struct ll_writeback_pages llwp; - struct list_head *pos; - inode = NULL; - making_progress = 0; - - list_for_each_prev(pos, &sb->s_dirty) { - inode = list_entry(pos, struct inode, i_list); - - if (!(inode->i_state & I_DIRTY_PAGES)) { - inode = NULL; - continue; - } - break; - } - - if (inode == NULL) - break; - - /* duplicate __sync_one, *sigh* */ - list_del(&inode->i_list); - list_add(&inode->i_list, &inode->i_sb->s_locked_inodes); - inode->i_state |= I_LOCK; - inode->i_state &= ~I_DIRTY_PAGES; - - spin_unlock(&inode_lock); - - rc = ll_alloc_brw(inode, &llwp); - if (rc != 0) - GOTO(cleanup, rc); - - do { - llwp.npgs = 0; - ll_get_dirty_pages(inode, &llwp); - if (llwp.npgs) { - lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, - LPROC_LL_WB_PRESSURE, - llwp.npgs); - ll_writeback(inode, &llwp); - rc += llwp.npgs; - making_progress = 1; - } - } while (llwp.npgs && should_writeback()); - - spin_lock(&inode_lock); - - if (!list_empty(&inode->i_mapping->dirty_pages)) - inode->i_state |= I_DIRTY_PAGES; - - inode->i_state &= ~I_LOCK; - /* - * we are sneaky and leave the inode on the dirty list, - * even though it might not still be.. - */ - if (!(inode->i_state & I_FREEING)) { - list_del(&inode->i_list); - list_add(&inode->i_list, &inode->i_sb->s_dirty); - } - wake_up(&inode->i_wait); - kfree(llwp.pga); - } while (making_progress && should_writeback()); - - /* - * and if that didn't work, we sleep on any data that might - * be under writeback.. - */ - while (should_writeback()) { - if (list_empty(&sb->s_locked_inodes)) - break; - - inode = list_entry(sb->s_locked_inodes.next, struct inode, - i_list); - - atomic_inc(&inode->i_count); /* XXX hack? */ - spin_unlock(&inode_lock); - wait_event(inode->i_wait, !(inode->i_state & I_LOCK)); - iput(inode); - spin_lock(&inode_lock); - } - - spin_unlock(&inode_lock); - -cleanup: - current->flags = old_flags; - - RETURN(rc); -} -#endif /* linux 2.5 */ - -int ll_batch_writepage(struct inode *inode, struct page *page) -{ - unsigned long old_flags; /* hack? */ - struct ll_writeback_pages llwp; - int rc = 0; - ENTRY; - - old_flags = current->flags; - current->flags |= PF_MEMALLOC; - rc = ll_alloc_brw(inode, &llwp); - if (rc != 0) - GOTO(restore_flags, rc); - - if (llwp_consume_page(&llwp, inode, page) == 0) - ll_get_dirty_pages(inode, &llwp); - - if (llwp.npgs) { - lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, - LPROC_LL_WB_WRITEPAGE, llwp.npgs); - ll_writeback(inode, &llwp); - } - kfree(llwp.pga); - -restore_flags: - current->flags = old_flags; - RETURN(rc); -} diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h deleted file mode 100644 index 4684383..0000000 --- a/lustre/llite/llite_internal.h +++ /dev/null @@ -1,29 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2003 Cluster File Systems, Inc. - * - * This code is issued under the GNU General Public License. - * See the file COPYING in this distribution - */ - -#ifndef LLITE_INTERNAL_H -#define LLITE_INTERNAL_H - -struct lustre_handle; -struct lov_stripe_md; - -int ll_mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode, - int flags, void *opaque); -int ll_rd_dirty_pages(char *page, char **start, off_t off, int count, - int *eof, void *data); -int ll_rd_max_dirty_pages(char *page, char **start, off_t off, int count, - int *eof, void *data); -int ll_wr_max_dirty_pages(struct file *file, const char *buffer, - unsigned long count, void *data); -int ll_clear_dirty_pages(struct lustre_handle *conn, struct lov_stripe_md *lsm, - unsigned long start, unsigned long end); -int ll_mark_dirty_page(struct lustre_handle *conn, struct lov_stripe_md *lsm, - unsigned long index); - -#endif /* LLITE_INTERNAL_H */ diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c deleted file mode 100644 index 42fea4b..0000000 --- a/lustre/llite/lproc_llite.c +++ /dev/null @@ -1,285 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ -#define DEBUG_SUBSYSTEM S_LLITE - -#include <linux/version.h> -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include <asm/statfs.h> -#endif -#include <linux/lustre_lite.h> -#include <linux/lprocfs_status.h> - -#include "llite_internal.h" - -/* /proc/lustre/llite mount point registration */ - -#ifndef LPROCFS -int lprocfs_register_mountpoint(struct proc_dir_entry *parent, - struct super_block *sb, char *osc, char *mdc) -{ - return 0; -} -void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi){} -#else - -#define LPROC_LLITE_STAT_FCT(fct_name, get_statfs_fct) \ -int fct_name(char *page, char **start, off_t off, \ - int count, int *eof, void *data) \ -{ \ - struct statfs sfs; \ - int rc; \ - LASSERT(data != NULL); \ - rc = get_statfs_fct((struct super_block*)data, &sfs); \ - return (rc==0 \ - ? lprocfs_##fct_name (page, start, off, count, eof, &sfs) \ - : rc); \ -} - -long long mnt_instance; - -LPROC_LLITE_STAT_FCT(rd_blksize, vfs_statfs); -LPROC_LLITE_STAT_FCT(rd_kbytestotal, vfs_statfs); -LPROC_LLITE_STAT_FCT(rd_kbytesfree, vfs_statfs); -LPROC_LLITE_STAT_FCT(rd_filestotal, vfs_statfs); -LPROC_LLITE_STAT_FCT(rd_filesfree, vfs_statfs); -LPROC_LLITE_STAT_FCT(rd_filegroups, vfs_statfs); - -int rd_path(char *page, char **start, off_t off, int count, int *eof, - void *data) -{ - return 0; -} - -int rd_fstype(char *page, char **start, off_t off, int count, int *eof, - void *data) -{ - struct super_block *sb = (struct super_block*)data; - - LASSERT(sb != NULL); - *eof = 1; - return snprintf(page, count, "%s\n", sb->s_type->name); -} - -int rd_sb_uuid(char *page, char **start, off_t off, int count, int *eof, - void *data) -{ - struct super_block *sb = (struct super_block *)data; - - LASSERT(sb != NULL); - *eof = 1; - return snprintf(page, count, "%s\n", ll_s2sbi(sb)->ll_sb_uuid.uuid); -} - -struct lprocfs_vars lprocfs_obd_vars[] = { - { "uuid", rd_sb_uuid, 0, 0 }, - { "mntpt_path", rd_path, 0, 0 }, - { "fstype", rd_fstype, 0, 0 }, - { "blocksize", rd_blksize, 0, 0 }, - { "kbytestotal", rd_kbytestotal, 0, 0 }, - { "kbytesfree", rd_kbytesfree, 0, 0 }, - { "filestotal", rd_filestotal, 0, 0 }, - { "filesfree", rd_filesfree, 0, 0 }, - { "filegroups", rd_filegroups, 0, 0 }, - { "dirty_pages", ll_rd_dirty_pages, 0, 0}, - { "max_dirty_pages", ll_rd_max_dirty_pages, ll_wr_max_dirty_pages, 0}, - { 0 } -}; - -#define MAX_STRING_SIZE 128 - -struct llite_file_opcode { - __u32 opcode; - __u32 type; - const char *opname; -} llite_opcode_table[LPROC_LL_FILE_OPCODES] = { - /* file operation */ - { LPROC_LL_DIRTY_HITS, LPROCFS_TYPE_REGS, "dirty_pages_hits" }, - { LPROC_LL_DIRTY_MISSES, LPROCFS_TYPE_REGS, "dirty_pages_misses" }, - { LPROC_LL_WB_WRITEPAGE, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES, - "writeback_from_writepage" }, - { LPROC_LL_WB_PRESSURE, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES, - "writeback_from_pressure" }, - { LPROC_LL_WB_OK, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES, - "writeback_ok_pages" }, - { LPROC_LL_WB_FAIL, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES, - "writeback_failed_pages" }, - { LPROC_LL_READ_BYTES, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES, - "read_bytes" }, - { LPROC_LL_WRITE_BYTES, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_BYTES, - "write_bytes" }, - { LPROC_LL_BRW_READ, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES, - "brw_read" }, - { LPROC_LL_BRW_WRITE, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES, - "brw_write" }, - - { LPROC_LL_IOCTL, LPROCFS_TYPE_REGS, "ioctl" }, - { LPROC_LL_OPEN, LPROCFS_TYPE_REGS, "open" }, - { LPROC_LL_RELEASE, LPROCFS_TYPE_REGS, "close" }, - { LPROC_LL_MAP, LPROCFS_TYPE_REGS, "mmap" }, - { LPROC_LL_LLSEEK, LPROCFS_TYPE_REGS, "seek" }, - { LPROC_LL_FSYNC, LPROCFS_TYPE_REGS, "fsync" }, - /* inode operation */ - { LPROC_LL_SETATTR_RAW, LPROCFS_TYPE_REGS, "setattr_raw" }, - { LPROC_LL_SETATTR, LPROCFS_TYPE_REGS, "setattr" }, - { LPROC_LL_TRUNC, LPROCFS_TYPE_REGS, "punch" }, -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) - { LPROC_LL_GETATTR, LPROCFS_TYPE_REGS, "getattr" }, -#else - { LPROC_LL_REVALIDATE, LPROCFS_TYPE_REGS, "getattr" }, -#endif - /* special inode operation */ - { LPROC_LL_STAFS, LPROCFS_TYPE_REGS, "statfs" }, - { LPROC_LL_ALLOC_INODE, LPROCFS_TYPE_REGS, "alloc_inode" }, - { LPROC_LL_DIRECT_READ, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES, - "direct_read" }, - { LPROC_LL_DIRECT_WRITE, LPROCFS_CNTR_AVGMINMAX|LPROCFS_TYPE_PAGES, - "direct_write" }, - -}; - -int lprocfs_register_mountpoint(struct proc_dir_entry *parent, - struct super_block *sb, char *osc, char *mdc) -{ - struct lprocfs_vars lvars[2]; - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct obd_device *obd; - char name[MAX_STRING_SIZE + 1]; - int err, id; - struct lprocfs_stats *svc_stats = NULL; - ENTRY; - - memset(lvars, 0, sizeof(lvars)); - - name[MAX_STRING_SIZE] = '\0'; - lvars[0].name = name; - - LASSERT(sbi != NULL); - LASSERT(mdc != NULL); - LASSERT(osc != NULL); - - /* Mount info */ - snprintf(name, MAX_STRING_SIZE, "fs%llu", mnt_instance); - - mnt_instance++; - sbi->ll_proc_root = lprocfs_register(name, parent, NULL, NULL); - if (IS_ERR(sbi->ll_proc_root)) { - err = PTR_ERR(sbi->ll_proc_root); - sbi->ll_proc_root = NULL; - RETURN(err); - } - - svc_stats = lprocfs_alloc_stats(LPROC_LL_FILE_OPCODES); - if (svc_stats == NULL) { - err = -ENOMEM; - goto out; - } - /* do counter init */ - for (id = 0; id < LPROC_LL_FILE_OPCODES; id++) { - __u32 type = llite_opcode_table[id].type; - void *ptr = NULL; - if (type & LPROCFS_TYPE_REGS) - ptr = "regs"; - else { - if (type & LPROCFS_TYPE_BYTES) - ptr = "bytes"; - else { - if (type & LPROCFS_TYPE_PAGES) - ptr = "pages"; - } - } - lprocfs_counter_init(svc_stats, llite_opcode_table[id].opcode, - (type & LPROCFS_CNTR_AVGMINMAX), - llite_opcode_table[id].opname, ptr); - } - err = lprocfs_register_stats(sbi->ll_proc_root, "stats", svc_stats); - if (err) - goto out; - else - sbi->ll_stats = svc_stats; - /* need place to keep svc_stats */ - - /* Static configuration info */ - err = lprocfs_add_vars(sbi->ll_proc_root, lprocfs_obd_vars, sb); - if (err) - goto out; - - /* MDC info */ - obd = class_name2obd(mdc); - - LASSERT(obd != NULL); - LASSERT(obd->obd_type != NULL); - LASSERT(obd->obd_type->typ_name != NULL); - - snprintf(name, MAX_STRING_SIZE, "%s/common_name", - obd->obd_type->typ_name); - lvars[0].read_fptr = lprocfs_rd_name; - err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd); - if (err) - goto out; - - snprintf(name, MAX_STRING_SIZE, "%s/uuid", obd->obd_type->typ_name); - lvars[0].read_fptr = lprocfs_rd_uuid; - err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd); - if (err) - goto out; - - /* OSC */ - obd = class_name2obd(osc); - - LASSERT(obd != NULL); - LASSERT(obd->obd_type != NULL); - LASSERT(obd->obd_type->typ_name != NULL); - - snprintf(name, MAX_STRING_SIZE, "%s/common_name", - obd->obd_type->typ_name); - lvars[0].read_fptr = lprocfs_rd_name; - err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd); - if (err) - goto out; - - snprintf(name, MAX_STRING_SIZE, "%s/uuid", obd->obd_type->typ_name); - lvars[0].read_fptr = lprocfs_rd_uuid; - err = lprocfs_add_vars(sbi->ll_proc_root, lvars, obd); -out: - if (err) { - if (svc_stats) - lprocfs_free_stats(svc_stats); - if (sbi->ll_proc_root) - lprocfs_remove(sbi->ll_proc_root); - } - RETURN(err); -} - -void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi) -{ - if (sbi->ll_proc_root) { - struct proc_dir_entry *file_stats = - lprocfs_srch(sbi->ll_proc_root, "stats"); - - if (file_stats) { - lprocfs_free_stats(sbi->ll_stats); - lprocfs_remove(file_stats); - } - } -} -#undef MAX_STRING_SIZE -#endif /* LPROCFS */ diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c deleted file mode 100644 index da6e670..0000000 --- a/lustre/llite/namei.c +++ /dev/null @@ -1,1160 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * derived in small part from linux/fs/ext2/namei.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - * - * Big-endian to little-endian byte-swapping/bitmaps by - * David S. Miller (davem@caip.rutgers.edu), 1995 - * Directory entry file type support and forward compatibility hooks - * for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998 - */ - -#include <linux/fs.h> -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/smp_lock.h> -#include <linux/quotaops.h> -#include <linux/highmem.h> -#include <linux/pagemap.h> - -#define DEBUG_SUBSYSTEM S_LLITE - -#include <linux/obd_support.h> -#include <linux/lustre_lite.h> -#include <linux/lustre_dlm.h> - -/* from dcache.c */ -extern void ll_set_dd(struct dentry *de); - -/* from super.c */ -extern void ll_change_inode(struct inode *inode); -extern int ll_setattr(struct dentry *de, struct iattr *attr); - -/* from dir.c */ -extern int ll_add_link (struct dentry *dentry, struct inode *inode); -obd_id ll_inode_by_name(struct inode * dir, struct dentry *dentry, int *typ); -int ext2_make_empty(struct inode *inode, struct inode *parent); -struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, - struct dentry *dentry, struct page ** res_page); -int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ); -int ext2_empty_dir (struct inode * inode); -struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p); -void ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de, - struct page *page, struct inode *inode); - -/* - * Couple of helper functions - make the code slightly cleaner. - */ -static inline void ext2_inc_count(struct inode *inode) -{ - inode->i_nlink++; -} - -/* postpone the disk update until the inode really goes away */ -static inline void ext2_dec_count(struct inode *inode) -{ - inode->i_nlink--; -} -static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) -{ - int err; - err = ll_add_link(dentry, inode); - if (!err) { - d_instantiate(dentry, inode); - return 0; - } - ext2_dec_count(inode); - iput(inode); - return err; -} - -/* methods */ - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -static int ll_find_inode(struct inode *inode, unsigned long ino, void *opaque) -#else -static int ll_test_inode(struct inode *inode, void *opaque) -#endif -{ - struct ll_read_inode2_cookie *lic = opaque; - struct mds_body *body = lic->lic_body; - - if (!(lic->lic_body->valid & (OBD_MD_FLGENER | OBD_MD_FLID))) - CERROR("invalid generation\n"); - CDEBUG(D_VFSTRACE, "comparing inode %p ino %lu/%u to body %lu/%u\n", - inode, inode->i_ino, inode->i_generation, ino, - lic->lic_body->generation); - - if (inode->i_generation != lic->lic_body->generation) - return 0; - - /* Apply the attributes in 'opaque' to this inode */ - ll_update_inode(inode, body, lic->lic_lsm); - return 1; -} - -extern struct dentry_operations ll_d_ops; - -int ll_unlock(__u32 mode, struct lustre_handle *lockh) -{ - ENTRY; - - ldlm_lock_decref(lockh, mode); - - RETURN(0); -} - -/* Get an inode by inode number (already instantiated by the intent lookup). - * Returns inode or NULL - */ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -extern int ll_read_inode2(struct inode *inode, void *opaque); -struct inode *ll_iget(struct super_block *sb, ino_t hash, - struct ll_read_inode2_cookie *lic) -{ - struct inode *inode; - - LASSERT(hash != 0); - inode = iget5_locked(sb, hash, ll_test_inode, ll_read_inode2, lic); - if (inode == NULL) - return NULL; /* removed ERR_PTR(-ENOMEM) -eeb */ - - if (inode->i_state & I_NEW) - unlock_new_inode(inode); - - // XXX Coda always fills inodes, should Lustre? - return inode; -} -#else -struct inode *ll_iget(struct super_block *sb, ino_t hash, - struct ll_read_inode2_cookie *lic) -{ - struct inode *inode; - LASSERT(hash != 0); - inode = iget4(sb, hash, ll_find_inode, lic); - CDEBUG(D_VFSTRACE, "inode: %lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); - return inode; -} -#endif - -static int ll_intent_to_lock_mode(struct lookup_intent *it) -{ - /* CREAT needs to be tested before open (both could be set) */ - if (it->it_op & IT_CREAT) - return LCK_PW; - else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP)) - return LCK_PR; - - LBUG(); - RETURN(-EINVAL); -} - -int ll_it_open_error(int phase, struct lookup_intent *it) -{ - if (it->it_disposition & IT_OPEN_OPEN) { - if (phase == IT_OPEN_OPEN) - return it->it_status; - else - return 0; - } - - if (it->it_disposition & IT_OPEN_CREATE) { - if (phase == IT_OPEN_CREATE) - return it->it_status; - else - return 0; - } - - if (it->it_disposition & IT_OPEN_LOOKUP) { - if (phase == IT_OPEN_LOOKUP) - return it->it_status; - else - return 0; - } - LBUG(); - return 0; -} - -int ll_mdc_blocking_ast(struct ldlm_lock *lock, - struct ldlm_lock_desc *desc, - void *data, int flag) -{ - int rc; - struct lustre_handle lockh; - ENTRY; - - switch (flag) { - case LDLM_CB_BLOCKING: - ldlm_lock2handle(lock, &lockh); - rc = ldlm_cli_cancel(&lockh); - if (rc < 0) { - CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc); - RETURN(rc); - } - break; - case LDLM_CB_CANCELING: { - /* Invalidate all dentries associated with this inode */ - struct inode *inode = lock->l_data; - LASSERT(inode != NULL); - - if (S_ISDIR(inode->i_mode)) { - CDEBUG(D_INODE, "invalidating inode %lu\n", - inode->i_ino); - - ll_invalidate_inode_pages(inode); - } - -#warning FIXME: we should probably free this inode if there are no aliases - if (inode->i_sb->s_root && - inode != inode->i_sb->s_root->d_inode) - d_unhash_aliases(inode); - break; - } - default: - LBUG(); - } - - RETURN(0); -} - -void ll_mdc_lock_set_inode(struct lustre_handle *lockh, struct inode *inode) -{ - struct ldlm_lock *lock = ldlm_handle2lock(lockh); - ENTRY; - - LASSERT(lock != NULL); - lock->l_data = inode; - LDLM_LOCK_PUT(lock); - EXIT; -} - -int ll_mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode, - int flags, void *opaque) -{ - struct ldlm_res_id res_id = - { .name = {inode->i_ino, inode->i_generation} }; - struct obd_device *obddev = class_conn2obd(conn); - ENTRY; - RETURN(ldlm_cli_cancel_unused(obddev->obd_namespace, &res_id, flags, - opaque)); -} - -void ll_prepare_mdc_op_data(struct mdc_op_data *data, - struct inode *i1, - struct inode *i2, - const char *name, - int namelen, - int mode) -{ - LASSERT(i1); - - data->ino1 = i1->i_ino; - data->gen1 = i1->i_generation; - data->typ1 = i1->i_mode & S_IFMT; - data->gid1 = i1->i_gid; - - if (i2) { - data->ino2 = i2->i_ino; - data->gen2 = i2->i_generation; - data->typ2 = i2->i_mode & S_IFMT; - data->gid2 = i2->i_gid; - } else { - data->ino2 = 0; - } - - data->name = name; - data->namelen = namelen; - data->mode = mode; -} - -#define IT_ENQ_COMPLETE (1<<16) - -int ll_intent_lock(struct inode *parent, struct dentry **de, - struct lookup_intent *it, intent_finish_cb intent_finish) -{ - struct dentry *dentry = *de; - struct inode *inode = dentry->d_inode; - struct ll_sb_info *sbi = ll_i2sbi(parent); - struct lustre_handle lockh; - struct lookup_intent lookup_it = { .it_op = IT_LOOKUP }; - struct ptlrpc_request *request = NULL; - int rc = 0, offset, flag = 0; - obd_id ino = 0; - ENTRY; - -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) - if (it && it->it_op == 0) - *it = lookup_it; -#endif - if (it == NULL) - it = &lookup_it; - - CDEBUG(D_DLMTRACE, "name: %*s, intent: %s\n", dentry->d_name.len, - dentry->d_name.name, ldlm_it2str(it->it_op)); - - if (dentry->d_name.len > EXT2_NAME_LEN) - RETURN(-ENAMETOOLONG); - - if (!(it->it_disposition & IT_ENQ_COMPLETE)) { - struct mdc_op_data op_data; - - ll_prepare_mdc_op_data(&op_data, parent, dentry->d_inode, - dentry->d_name.name, dentry->d_name.len, - 0); - - rc = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, it, - ll_intent_to_lock_mode(it), &op_data, - &lockh, NULL, 0, ldlm_completion_ast, - ll_mdc_blocking_ast, parent); - if (rc < 0) - RETURN(rc); - memcpy(it->it_lock_handle, &lockh, sizeof(lockh)); - } - - request = (struct ptlrpc_request *)it->it_data; - - /* non-zero it_disposition indicates that the server performed the - * intent on our behalf. */ - if (it->it_disposition) { - struct mds_body *mds_body; - int mode; - - /* This long block is all about fixing up the local - * state so that it is correct as of the moment - * _before_ the operation was applied; that way, the - * VFS will think that everything is normal and call - * Lustre's regular FS function. - * - * If we're performing a creation, that means that unless the - * creation failed with EEXIST, we should fake up a negative - * dentry. Likewise for the target of a hard link. - * - * For everything else, we want to lookup to succeed. */ - - /* One additional note: if CREATE/MKDIR/etc succeeded, - * we add an extra reference to the request because we - * need to keep it around until ll_create gets called. - * For anything else which results in - * LL_LOOKUP_POSITIVE, we can do the iget() - * immediately with the contents of the reply (in the - * intent_finish callback). In the create case, - * however, we need to wait until ll_create_node to do - * the iget() or the VFS will abort with -EEXISTS. - */ - - offset = 1; - mds_body = lustre_msg_buf(request->rq_repmsg, offset, - sizeof(*mds_body)); - LASSERT (mds_body != NULL); /* mdc_enqueue checked */ - LASSERT_REPSWABBED (request, offset); /* mdc_enqueue swabbed */ - - ino = mds_body->fid1.id; - mode = mds_body->mode; - - /*We were called from revalidate2: did we find the same inode?*/ - if (inode && (ino != inode->i_ino || - mds_body->fid1.generation != inode->i_generation)) { - it->it_disposition |= IT_ENQ_COMPLETE; - RETURN(-ESTALE); - } - - /* If we're doing an IT_OPEN which did not result in an actual - * successful open, then we need to remove the bit which saves - * this request for unconditional replay. */ - if (it->it_op & IT_OPEN && - (!(it->it_disposition & IT_OPEN_OPEN) || - it->it_status != 0)) { - unsigned long flags; - - spin_lock_irqsave (&request->rq_lock, flags); - request->rq_replay = 0; - spin_unlock_irqrestore (&request->rq_lock, flags); - } - - if (it->it_op & IT_CREAT) { - mdc_store_inode_generation(request, 2, 1); - /* The server will return to us, in it_disposition, an - * indication of exactly what it_status refers to. - * - * If IT_OPEN_OPEN is set, then it_status refers to the - * open() call, otherwise if IT_OPEN_CREATE is set, then - * it status is the creation failure mode. In either - * case, one of IT_OPEN_NEG or IT_OPEN_POS will be set, - * indicating whether the child lookup was successful. - * - * Else, if IT_OPEN_LOOKUP then it_status is the rc - * of the child lookup. - * - * Finally, if none of the bits are set, then the - * failure occurred while looking up the parent. */ - rc = ll_it_open_error(IT_OPEN_LOOKUP, it); - if (rc) - GOTO(drop_req, rc); - - if (it->it_disposition & IT_OPEN_CREATE) - ptlrpc_request_addref(request); - if (it->it_disposition & IT_OPEN_OPEN) - ptlrpc_request_addref(request); - - if (it->it_disposition & IT_OPEN_NEG) - flag = LL_LOOKUP_NEGATIVE; - else - flag = LL_LOOKUP_POSITIVE; - } else if (it->it_op == IT_OPEN) { - LASSERT(!(it->it_disposition & IT_OPEN_CREATE)); - - rc = ll_it_open_error(IT_OPEN_LOOKUP, it); - if (rc) - GOTO(drop_req, rc); - - if (it->it_disposition & IT_OPEN_OPEN) - ptlrpc_request_addref(request); - - if (it->it_disposition & IT_OPEN_NEG) - flag = LL_LOOKUP_NEGATIVE; - else - flag = LL_LOOKUP_POSITIVE; - } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) { - /* For check ops, we want the lookup to succeed */ - it->it_data = NULL; - if (it->it_status) - flag = LL_LOOKUP_NEGATIVE; - else - flag = LL_LOOKUP_POSITIVE; - } else - LBUG(); - } else { - struct ll_fid fid; - obd_flag valid; - int eadatalen; - int mode; - - LBUG(); /* For the moment, no non-intent locks */ - - /* it_disposition == 0 indicates that it just did a simple lock - * request, for which we are very thankful. move along with - * the local lookup then. */ - - //memcpy(&lli->lli_intent_lock_handle, &lockh, sizeof(lockh)); - offset = 0; - - ino = ll_inode_by_name(parent, dentry, &mode); - if (!ino) { - CERROR("inode %*s not found by name\n", - dentry->d_name.len, dentry->d_name.name); - GOTO(drop_lock, rc = -ENOENT); - } - - valid = OBD_MD_FLNOTOBD; - - if (S_ISREG(mode)) { - eadatalen = obd_size_diskmd(&sbi->ll_osc_conn, NULL), - valid |= OBD_MD_FLEASIZE; - } else { - eadatalen = 0; - valid |= OBD_MD_FLBLOCKS; - } - - fid.id = ino; - fid.generation = 0; - fid.f_type = mode; - rc = mdc_getattr(&sbi->ll_mdc_conn, &fid, valid, - eadatalen, &request); - if (rc) { - CERROR("failure %d inode "LPX64"\n", rc, ino); - GOTO(drop_lock, rc = -abs(rc)); - } - } - - LASSERT (request != NULL); - - if (intent_finish != NULL) { - struct lustre_handle old_lock; - struct ldlm_lock *lock; - - rc = intent_finish(flag, request, parent, de, it, offset, ino); - dentry = *de; /* intent_finish may change *de */ - inode = dentry->d_inode; - if (rc != 0) - GOTO(drop_lock, rc); - - /* The intent processing may well have given us a lock different - * from the one we requested. If we already have a matching - * lock, then cancel the new one. (We have to do this here, - * instead of in mdc_enqueue, because we need to use the child's - * inode as the l_data to match, and that's not available until - * intent_finish has performed the iget().) */ - lock = ldlm_handle2lock(&lockh); - if (lock) { - LDLM_DEBUG(lock, "matching against this"); - LDLM_LOCK_PUT(lock); - memcpy(&old_lock, &lockh, sizeof(lockh)); - if (ldlm_lock_match(NULL, - LDLM_FL_BLOCK_GRANTED | - LDLM_FL_MATCH_DATA, - NULL, LDLM_PLAIN, NULL, 0, LCK_NL, - inode, &old_lock)) { - ldlm_lock_decref_and_cancel(&lockh, - it->it_lock_mode); - memcpy(&lockh, &old_lock, sizeof(old_lock)); - memcpy(it->it_lock_handle, &lockh, - sizeof(lockh)); - } - } - - } - ptlrpc_req_finished(request); - - /* This places the intent in the dentry so that the vfs_xxx - * operation can lay its hands on it; but that is not always - * needed... (we need to save it in the GETATTR case for the - * benefit of ll_inode_revalidate -phil) */ - /* Ignore trying to save the intent for "special" inodes as - * they have special semantics that can cause deadlocks on - * the intent semaphore. -mmex */ - if ((!inode || S_ISDIR(inode->i_mode) || S_ISREG(inode->i_mode) || - S_ISLNK(inode->i_mode)) && (it->it_op & (IT_OPEN | IT_GETATTR))) - LL_SAVE_INTENT(dentry, it); - else - CDEBUG(D_DENTRY, - "D_IT dentry %p fsdata %p intent: %s status %d\n", - dentry, ll_d2d(dentry), ldlm_it2str(it->it_op), - it->it_status); - - if (it->it_op == IT_LOOKUP) - ll_intent_release(dentry, it); - - RETURN(rc); - - drop_lock: - ll_intent_release(dentry, it); - drop_req: - ptlrpc_req_finished(request); - RETURN(rc); -} - -/* Search "inode"'s alias list for a dentry that has the same name and parent as - * de. If found, return it. If not found, return de. */ -struct dentry *ll_find_alias(struct inode *inode, struct dentry *de) -{ - struct list_head *tmp; - - spin_lock(&dcache_lock); - list_for_each(tmp, &inode->i_dentry) { - struct dentry *dentry = list_entry(tmp, struct dentry, d_alias); - - /* We are called here with 'de' already on the aliases list. */ - if (dentry == de) { - CERROR("whoops\n"); - continue; - } - - if (dentry->d_parent != de->d_parent) - continue; - - if (dentry->d_name.len != de->d_name.len) - continue; - - if (memcmp(dentry->d_name.name, de->d_name.name, - de->d_name.len) != 0) - continue; - - if (!list_empty(&dentry->d_lru)) - list_del_init(&dentry->d_lru); - - list_del_init(&dentry->d_hash); - __d_rehash(dentry, 0); /* avoid taking dcache_lock inside */ - spin_unlock(&dcache_lock); - atomic_inc(&dentry->d_count); - iput(inode); - dentry->d_flags &= ~DCACHE_LUSTRE_INVALID; - return dentry; - } - - spin_unlock(&dcache_lock); - - return de; -} - -static int -lookup2_finish(int flag, struct ptlrpc_request *request, - struct inode *parent, struct dentry **de, - struct lookup_intent *it, int offset, obd_id ino) -{ - struct ll_sb_info *sbi = ll_i2sbi(parent); - struct dentry *dentry = *de, *saved = *de; - struct inode *inode = NULL; - struct ll_read_inode2_cookie lic = {.lic_body = NULL, .lic_lsm = NULL}; - - /* NB 1 request reference will be taken away by ll_intent_lock() - * when I return */ - - if (!(flag & LL_LOOKUP_NEGATIVE)) { - ENTRY; - - /* We only get called if the mdc_enqueue() called from - * ll_intent_lock() was successful. Therefore the mds_body - * is present and correct, and the eadata is present if - * body->eadatasize != 0 (but still opaque, so only - * obd_unpackmd() can check the size) */ - lic.lic_body = lustre_msg_buf(request->rq_repmsg, offset, - sizeof (*lic.lic_body)); - LASSERT(lic.lic_body != NULL); - LASSERT_REPSWABBED(request, offset); - - if (S_ISREG(lic.lic_body->mode) && - (lic.lic_body->valid & OBD_MD_FLEASIZE)) { - struct lov_mds_md *lmm; - int lmm_size; - int rc; - - lmm_size = lic.lic_body->eadatasize; - if (lmm_size == 0) { - CERROR("OBD_MD_FLEASIZE set but " - "eadatasize 0\n"); - RETURN(-EPROTO); - } - lmm = lustre_msg_buf(request->rq_repmsg, offset + 1, - lmm_size); - LASSERT(lmm != NULL); - LASSERT_REPSWABBED(request, offset + 1); - - rc = obd_unpackmd(&sbi->ll_osc_conn, - &lic.lic_lsm, lmm, lmm_size); - if (rc < 0) { - CERROR("Error %d unpacking eadata\n", rc); - RETURN(rc); - } - LASSERT(rc >= sizeof(*lic.lic_lsm)); - } - - /* Both ENOMEM and an RPC timeout are possible in ll_iget; which - * to pick? A more generic EIO? -phik */ - inode = ll_iget(dentry->d_sb, ino, &lic); - if (!inode) { - /* free the lsm if we allocated one above */ - if (lic.lic_lsm != NULL) - obd_free_memmd(&sbi->ll_osc_conn, &lic.lic_lsm); - RETURN(-ENOMEM); - } else if (lic.lic_lsm != NULL && - ll_i2info(inode)->lli_smd != lic.lic_lsm) { - obd_free_memmd(&sbi->ll_osc_conn, &lic.lic_lsm); - } - - /* If this is a stat, get the authoritative file size */ - if (it->it_op == IT_GETATTR && S_ISREG(inode->i_mode) && - ll_i2info(inode)->lli_smd != NULL) { - struct ldlm_extent extent = {0, OBD_OBJECT_EOF}; - struct lustre_handle lockh = {0}; - struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - ldlm_error_t rc; - - LASSERT(lsm->lsm_object_id != 0); - - rc = ll_extent_lock(NULL, inode, lsm, LCK_PR, &extent, - &lockh); - if (rc != ELDLM_OK) { - iput(inode); - RETURN(-EIO); - } - ll_extent_unlock(NULL, inode, lsm, LCK_PR, &lockh); - } - - dentry = *de = ll_find_alias(inode, dentry); - - /* We asked for a lock on the directory, and may have been - * granted a lock on the inode. Just in case, fixup the data - * pointer. */ - ll_mdc_lock_set_inode((struct lustre_handle*)it->it_lock_handle, - inode); - } else { - ENTRY; - } - - dentry->d_op = &ll_d_ops; - ll_set_dd(dentry); - - if (dentry == saved) - d_add(dentry, inode); - - RETURN(0); -} - -static struct dentry *ll_lookup2(struct inode *parent, struct dentry *dentry, - struct lookup_intent *it) -{ - struct dentry *save = dentry, *retval; - int rc; - ENTRY; - - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n", - dentry->d_name.name, parent->i_ino, parent->i_generation, - parent, LL_IT2STR(it)); - - rc = ll_intent_lock(parent, &dentry, it, lookup2_finish); - if (rc < 0) { - CDEBUG(D_INFO, "ll_intent_lock: %d\n", rc); - GOTO(out, retval = ERR_PTR(rc)); - } - - if (dentry == save) - GOTO(out, retval = NULL); - else - GOTO(out, retval = dentry); - out: - return retval; -} - -/* We depend on "mode" being set with the proper file type/umask by now */ -static struct inode *ll_create_node(struct inode *dir, const char *name, - int namelen, const void *data, int datalen, - int mode, __u64 extra, - struct lookup_intent *it) -{ - struct inode *inode; - struct ptlrpc_request *request = NULL; - struct mds_body *body; - time_t time = LTIME_S(CURRENT_TIME); - struct ll_sb_info *sbi = ll_i2sbi(dir); - struct ll_read_inode2_cookie lic; - ENTRY; - - if (it && it->it_disposition) { - ll_invalidate_inode_pages(dir); - request = it->it_data; - body = lustre_msg_buf(request->rq_repmsg, 1, sizeof (*body)); - LASSERT (body != NULL); /* checked already */ - LASSERT_REPSWABBED (request, 1); /* swabbed already */ - } else { - struct mdc_op_data op_data; - int gid = current->fsgid; - int rc; - - if (dir->i_mode & S_ISGID) { - gid = dir->i_gid; - if (S_ISDIR(mode)) - mode |= S_ISGID; - } - - ll_prepare_mdc_op_data(&op_data, dir, NULL, name, namelen, 0); - rc = mdc_create(&sbi->ll_mdc_conn, &op_data, - data, datalen, mode, current->fsuid, gid, - time, extra, &request); - if (rc) { - inode = ERR_PTR(rc); - GOTO(out, rc); - } - body = lustre_swab_repbuf(request, 0, sizeof (*body), - lustre_swab_mds_body); - if (body == NULL) { - CERROR ("Can't unpack mds_body\n"); - GOTO (out, inode = ERR_PTR(-EPROTO)); - } - } - - lic.lic_body = body; - lic.lic_lsm = NULL; - - inode = ll_iget(dir->i_sb, body->ino, &lic); - if (!inode || is_bad_inode(inode)) { - /* XXX might need iput() for bad inode */ - int rc = -EIO; - CERROR("new_inode -fatal: rc %d\n", rc); - LBUG(); - GOTO(out, rc); - } - - if (!list_empty(&inode->i_dentry)) { - CERROR("new_inode -fatal: inode %d, ct %d lnk %d\n", - body->ino, atomic_read(&inode->i_count), - inode->i_nlink); - iput(inode); - LBUG(); - inode = ERR_PTR(-EIO); - GOTO(out, -EIO); - } - - if (it && it->it_disposition) { - /* We asked for a lock on the directory, but were - * granted a lock on the inode. Since we finally have - * an inode pointer, stuff it in the lock. */ - ll_mdc_lock_set_inode((struct lustre_handle*)it->it_lock_handle, - inode); - } - - EXIT; - out: - ptlrpc_req_finished(request); - return inode; -} - -static int ll_mdc_unlink(struct inode *dir, struct inode *child, __u32 mode, - const char *name, int len) -{ - struct ptlrpc_request *request = NULL; - struct ll_sb_info *sbi = ll_i2sbi(dir); - struct mds_body *body; - struct lov_mds_md *eadata; - struct lov_stripe_md *lsm = NULL; - struct lustre_handle lockh; - struct lookup_intent it = { .it_op = IT_UNLINK }; - struct obdo *oa; - int err; - struct mdc_op_data op_data; - ENTRY; - - ll_prepare_mdc_op_data(&op_data, dir, child, name, len, mode); - - err = mdc_enqueue(&sbi->ll_mdc_conn, LDLM_PLAIN, &it, LCK_EX, - &op_data, &lockh, NULL, 0, - ldlm_completion_ast, ll_mdc_blocking_ast, - dir); - request = (struct ptlrpc_request *)it.it_data; - if (err < 0) - GOTO(out, err); - if (it.it_status) - GOTO(out, err = it.it_status); - err = 0; - - body = lustre_msg_buf (request->rq_repmsg, 1, sizeof (*body)); - LASSERT (body != NULL); /* checked by mdc_enqueue() */ - LASSERT_REPSWABBED (request, 1); /* swabbed by mdc_enqueue() */ - - if (!(body->valid & OBD_MD_FLEASIZE)) - GOTO(out, 0); - - if (body->eadatasize == 0) { - CERROR ("OBD_MD_FLEASIZE set but eadatasize zero\n"); - GOTO (out, err = -EPROTO); - } - - /* The MDS sent back the EA because we unlinked the last reference - * to this file. Use this EA to unlink the objects on the OST. - * Note that mdc_enqueue() has already checked there _is_ some EA - * data, but this data is opaque to both mdc_enqueue() and the MDS. - * We have to leave it to obd_unpackmd() to check it is complete - * and sensible. */ - eadata = lustre_msg_buf (request->rq_repmsg, 2, body->eadatasize); - LASSERT (eadata != NULL); - LASSERT_REPSWABBED (request, 2); - - err = obd_unpackmd(ll_i2obdconn(dir), &lsm, eadata, - body->eadatasize); - if (err < 0) { - CERROR("obd_unpackmd: %d\n", err); - GOTO (out_unlock, err); - } - LASSERT (err >= sizeof (*lsm)); - - oa = obdo_alloc(); - if (oa == NULL) - GOTO(out_free_memmd, err = -ENOMEM); - - oa->o_id = lsm->lsm_object_id; - oa->o_mode = body->mode & S_IFMT; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE; - - err = obd_destroy(ll_i2obdconn(dir), oa, lsm, NULL); - obdo_free(oa); - if (err) - CERROR("obd destroy objid 0x"LPX64" error %d\n", - lsm->lsm_object_id, err); - out_free_memmd: - obd_free_memmd(ll_i2obdconn(dir), &lsm); - out_unlock: - ldlm_lock_decref_and_cancel(&lockh, LCK_EX); - out: - ptlrpc_req_finished(request); - return err; -} - -/* - * By the time this is called, we already have created the directory cache - * entry for the new file, but it is so far negative - it has no inode. - * - * We defer creating the OBD object(s) until open, to keep the intent and - * non-intent code paths similar, and also because we do not have the MDS - * inode number before calling ll_create_node() (which is needed for LOV), - * so we would need to do yet another RPC to the MDS to store the LOV EA - * data on the MDS. If needed, we would pass the PACKED lmm as data and - * lmm_size in datalen (the MDS still has code which will handle that). - * - * If the create succeeds, we fill in the inode information - * with d_instantiate(). - */ -static int ll_create(struct inode *dir, struct dentry *dentry, int mode) -{ - struct lookup_intent *it; - struct inode *inode; - int rc = 0; - ENTRY; - - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),intent=%s\n", - dentry->d_name.name, dir->i_ino, dir->i_generation, dir, - LL_IT2STR(dentry->d_it)); - - it = dentry->d_it; - - rc = ll_it_open_error(IT_OPEN_CREATE, it); - if (rc) { - LL_GET_INTENT(dentry, it); - ptlrpc_req_finished(it->it_data); - RETURN(rc); - } - - inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len, - NULL, 0, mode, 0, it); - - if (IS_ERR(inode)) { - LL_GET_INTENT(dentry, it); - RETURN(PTR_ERR(inode)); - } - - /* no directory data updates when intents rule */ - if (it && it->it_disposition) { - d_instantiate(dentry, inode); - RETURN(0); - } - - rc = ext2_add_nondir(dentry, inode); - RETURN(rc); -} - -static int ll_mknod(struct inode *dir, struct dentry *dentry, int mode, - int rdev) -{ - LBUG(); - return -ENOSYS; -} - -static int ll_mknod2(struct inode *dir, const char *name, int len, int mode, - int rdev) -{ - struct ptlrpc_request *request = NULL; - time_t time = LTIME_S(CURRENT_TIME); - struct ll_sb_info *sbi = ll_i2sbi(dir); - struct mdc_op_data op_data; - int err = -EMLINK; - ENTRY; - - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n", - name, dir->i_ino, dir->i_generation, dir); - - if (dir->i_nlink >= EXT2_LINK_MAX) - RETURN(err); - - mode &= ~current->fs->umask; - - switch (mode & S_IFMT) { - case 0: case S_IFREG: - mode |= S_IFREG; /* for mode = 0 case, fallthrough */ - case S_IFCHR: case S_IFBLK: - case S_IFIFO: case S_IFSOCK: - ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); - err = mdc_create(&sbi->ll_mdc_conn, &op_data, NULL, 0, mode, - current->fsuid, current->fsgid, time, - rdev, &request); - ptlrpc_req_finished(request); - break; - case S_IFDIR: - err = -EPERM; - break; - default: - err = -EINVAL; - } - RETURN(err); -} - -static int ll_symlink(struct inode *dir, struct dentry *dentry, - const char *symname) -{ - LBUG(); - return -ENOSYS; -} - -static int ll_symlink2(struct inode *dir, const char *name, int len, - const char *tgt) -{ - struct ptlrpc_request *request = NULL; - time_t time = LTIME_S(CURRENT_TIME); - struct ll_sb_info *sbi = ll_i2sbi(dir); - struct mdc_op_data op_data; - int err = -EMLINK; - ENTRY; - - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p),target=%s\n", - name, dir->i_ino, dir->i_generation, dir, tgt); - - if (dir->i_nlink >= EXT2_LINK_MAX) - RETURN(err); - - ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); - err = mdc_create(&sbi->ll_mdc_conn, &op_data, - tgt, strlen(tgt) + 1, S_IFLNK | S_IRWXUGO, - current->fsuid, current->fsgid, time, 0, &request); - ptlrpc_req_finished(request); - RETURN(err); -} - -static int ll_link(struct dentry *old_dentry, struct inode * dir, - struct dentry *dentry) -{ - LBUG(); - return -ENOSYS; -} - -static int ll_link2(struct inode *src, struct inode *dir, - const char *name, int len) -{ - struct ptlrpc_request *request = NULL; - struct mdc_op_data op_data; - int err; - struct ll_sb_info *sbi = ll_i2sbi(dir); - - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),dir=%lu/%u(%p),target=%s\n", - src->i_ino, src->i_generation, src, - dir->i_ino, dir->i_generation, dir, name); - - ll_prepare_mdc_op_data(&op_data, src, dir, name, len, 0); - err = mdc_link(&sbi->ll_mdc_conn, &op_data, &request); - ptlrpc_req_finished(request); - - RETURN(err); -} - -static int ll_mkdir(struct inode *dir, struct dentry *dentry, int mode) -{ - LBUG(); - return -ENOSYS; -} - -static int ll_mkdir2(struct inode *dir, const char *name, int len, int mode) -{ - struct ptlrpc_request *request = NULL; - time_t time = LTIME_S(CURRENT_TIME); - struct ll_sb_info *sbi = ll_i2sbi(dir); - struct mdc_op_data op_data; - int err = -EMLINK; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n", - name, dir->i_ino, dir->i_generation, dir); - - if (dir->i_nlink >= EXT2_LINK_MAX) - RETURN(err); - - mode = (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR; - ll_prepare_mdc_op_data(&op_data, dir, NULL, name, len, 0); - err = mdc_create(&sbi->ll_mdc_conn, &op_data, NULL, 0, mode, - current->fsuid, current->fsgid, - time, 0, &request); - ptlrpc_req_finished(request); - RETURN(err); -} - -static int ll_rmdir2(struct inode *dir, const char *name, int len) -{ - int rc; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n", - name, dir->i_ino, dir->i_generation, dir); - - rc = ll_mdc_unlink(dir, NULL, S_IFDIR, name, len); - RETURN(rc); -} - -static int ll_unlink2(struct inode *dir, const char *name, int len) -{ - int rc; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:name=%s,dir=%lu/%u(%p)\n", - name, dir->i_ino, dir->i_generation, dir); - - rc = ll_mdc_unlink(dir, NULL, S_IFREG, name, len); - RETURN(rc); -} - -static int ll_unlink(struct inode *dir, struct dentry *dentry) -{ - LBUG(); - return -ENOSYS; -} - -static int ll_rmdir(struct inode *dir, struct dentry *dentry) -{ - LBUG(); - return -ENOSYS; -} - -static int ll_rename(struct inode * old_dir, struct dentry * old_dentry, - struct inode * new_dir, struct dentry * new_dentry) -{ - LBUG(); - return -ENOSYS; -} - -static int ll_rename2(struct inode *src, struct inode *tgt, - const char *oldname, int oldlen, - const char *newname, int newlen) -{ - struct ptlrpc_request *request = NULL; - struct ll_sb_info *sbi = ll_i2sbi(src); - struct mdc_op_data op_data; - int err; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:oldname=%s,src_dir=%lu/%u(%p),newname=%s," - "tgt_dir=%lu/%u(%p)\n", oldname, src->i_ino, src->i_generation, - src, newname, tgt->i_ino, tgt->i_generation, tgt); - - ll_prepare_mdc_op_data(&op_data, src, tgt, NULL, 0, 0); - err = mdc_rename(&sbi->ll_mdc_conn, &op_data, - oldname, oldlen, newname, newlen, &request); - ptlrpc_req_finished(request); - - RETURN(err); -} - -extern int ll_inode_revalidate(struct dentry *dentry); -struct inode_operations ll_dir_inode_operations = { - create: ll_create, - lookup2: ll_lookup2, - link: ll_link, /* LBUG() */ - link2: ll_link2, - unlink: ll_unlink, /* LBUG() */ - unlink2: ll_unlink2, - symlink: ll_symlink, /* LBUG() */ - symlink2: ll_symlink2, - mkdir: ll_mkdir, /* LBUG() */ - mkdir2: ll_mkdir2, - rmdir: ll_rmdir, /* LBUG() */ - rmdir2: ll_rmdir2, - mknod: ll_mknod, /* LBUG() */ - mknod2: ll_mknod2, - rename: ll_rename, /* LBUG() */ - rename2: ll_rename2, - setattr: ll_setattr, - setattr_raw: ll_setattr_raw, -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - revalidate: ll_inode_revalidate, -#endif -}; diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c deleted file mode 100644 index 98f6086..0000000 --- a/lustre/llite/rw.c +++ /dev/null @@ -1,705 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Lustre Lite I/O Page Cache - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/config.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <linux/version.h> -#include <asm/system.h> -#include <asm/uaccess.h> -#include "llite_internal.h" - -#include <linux/fs.h> -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -#include <linux/buffer_head.h> -#else -#include <linux/iobuf.h> -#endif -#include <linux/stat.h> -#include <asm/uaccess.h> -#include <asm/segment.h> -#include <linux/mm.h> -#include <linux/pagemap.h> -#include <linux/smp_lock.h> - -#define DEBUG_SUBSYSTEM S_LLITE - -#include <linux/lustre_mds.h> -#include <linux/lustre_lite.h> -#include <linux/lustre_lib.h> -#include <linux/lustre_compat25.h> - -/* - * Remove page from dirty list - */ -static void __set_page_clean(struct page *page) -{ - struct address_space *mapping = page->mapping; - struct inode *inode; - - if (!mapping) - return; - - PGCACHE_WRLOCK(mapping); - - list_del(&page->list); - list_add(&page->list, &mapping->clean_pages); - - /* XXX doesn't inode_lock protect i_state ? */ - inode = mapping->host; - if (list_empty(&mapping->dirty_pages)) { - CDEBUG(D_INODE, "inode clean\n"); - inode->i_state &= ~I_DIRTY_PAGES; - } - - PGCACHE_WRUNLOCK(mapping); - EXIT; -} - -void set_page_clean(struct page *page) -{ - if (PageDirty(page)) { - ClearPageDirty(page); - __set_page_clean(page); - } -} - -/* SYNCHRONOUS I/O to object storage for an inode */ -static int ll_brw(int cmd, struct inode *inode, struct page *page, int flags) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct lov_stripe_md *lsm = lli->lli_smd; - struct brw_page pg; - int rc; - ENTRY; - - pg.pg = page; - pg.off = ((obd_off)page->index) << PAGE_SHIFT; - - if (cmd == OBD_BRW_WRITE && (pg.off + PAGE_SIZE > inode->i_size)) - pg.count = inode->i_size % PAGE_SIZE; - else - pg.count = PAGE_SIZE; - - CDEBUG(D_PAGE, "%s %d bytes ino %lu at "LPU64"/"LPX64"\n", - cmd & OBD_BRW_WRITE ? "write" : "read", pg.count, inode->i_ino, - pg.off, pg.off); - if (pg.count == 0) { - CERROR("ZERO COUNT: ino %lu: size %p:%Lu(%p:%Lu) idx %lu off " - LPU64"\n", - inode->i_ino, inode, inode->i_size, page->mapping->host, - page->mapping->host->i_size, page->index, pg.off); - } - - pg.flag = flags; - - if (cmd == OBD_BRW_WRITE) - lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, - LPROC_LL_BRW_WRITE, pg.count); - else - lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, - LPROC_LL_BRW_READ, pg.count); - rc = obd_brw(cmd, ll_i2obdconn(inode), lsm, 1, &pg, NULL); - if (rc) - CERROR("error from obd_brw: rc = %d\n", rc); - - RETURN(rc); -} - -/* - * we were asked to read a single page but we're going to try and read a batch - * of pages all at once. this vaguely simulates 2.5's readpages. - */ -static int ll_readpage(struct file *file, struct page *first_page) -{ - struct inode *inode = first_page->mapping->host; - struct ll_inode_info *lli = ll_i2info(inode); - struct page *page = first_page; - struct list_head *pos; - struct brw_page *pgs; - unsigned long end_index, extent_end = 0; - struct ptlrpc_request_set *set; - int npgs = 0, rc = 0, max_pages; - ENTRY; - - LASSERT(PageLocked(page)); - LASSERT(!PageUptodate(page)); - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p),offset="LPX64"\n", - inode->i_ino, inode->i_generation, inode, - (((obd_off)page->index) << PAGE_SHIFT)); - LASSERT(atomic_read(&file->f_dentry->d_inode->i_count) > 0); - - if (inode->i_size <= ((obd_off)page->index) << PAGE_SHIFT) { - CERROR("reading beyond EOF\n"); - memset(kmap(page), 0, PAGE_SIZE); - kunmap(page); - SetPageUptodate(page); - unlock_page(page); - RETURN(rc); - } - - /* try to read the file's preferred block size in a one-er */ - end_index = first_page->index + - (inode->i_blksize >> PAGE_CACHE_SHIFT); - if (end_index > (inode->i_size >> PAGE_CACHE_SHIFT)) - end_index = inode->i_size >> PAGE_CACHE_SHIFT; - - max_pages = ((end_index - first_page->index) << PAGE_CACHE_SHIFT) >> - PAGE_SHIFT; - pgs = kmalloc(max_pages * sizeof(*pgs), GFP_USER); - if (pgs == NULL) - RETURN(-ENOMEM); - - /* - * find how far we're allowed to read under the extent ll_file_read - * is passing us.. - */ - spin_lock(&lli->lli_read_extent_lock); - list_for_each(pos, &lli->lli_read_extents) { - struct ll_read_extent *rextent; - rextent = list_entry(pos, struct ll_read_extent, re_lli_item); - if (rextent->re_task != current) - continue; - - if (rextent->re_extent.end + PAGE_SIZE < rextent->re_extent.end) - /* extent wrapping */ - extent_end = ~0; - else { - extent_end = (rextent->re_extent.end + PAGE_SIZE) - << PAGE_CACHE_SHIFT; - /* 32bit indexes, 64bit extents.. */ - if (((u64)extent_end >> PAGE_CACHE_SHIFT) < - rextent->re_extent.end) - extent_end = ~0; - } - break; - } - spin_unlock(&lli->lli_read_extent_lock); - - if (extent_end == 0) { - static long next_print; - if (time_after(jiffies, next_print)) { - next_print = jiffies + 30 * HZ; - CDEBUG(D_INODE, "mmap readpage - check locks\n"); - } - end_index = page->index + 1; - } else if (extent_end < end_index) - end_index = extent_end; - - /* to balance the find_get_page ref the other pages get that is - * decrefed on teardown.. */ - page_cache_get(page); - do { - unsigned long index ; - - pgs[npgs].pg = page; - pgs[npgs].off = ((obd_off)page->index) << PAGE_CACHE_SHIFT; - pgs[npgs].flag = 0; - pgs[npgs].count = PAGE_SIZE; - /* XXX Workaround for BA OSTs returning short reads at EOF. - * The linux OST will return the full page, zero-filled at the - * end, which will just overwrite the data we set here. Bug - * 593 relates to fixing this properly. - */ - if (inode->i_size < pgs[npgs].off + PAGE_SIZE) { - int count = inode->i_size - pgs[npgs].off; - void *addr = kmap(page); - pgs[npgs].count = count; - //POISON(addr, 0x7c, count); - memset(addr + count, 0, PAGE_SIZE - count); - kunmap(page); - } - - npgs++; - if (npgs == max_pages) - break; - - /* - * find pages ahead of us that we can read in. - * grab_cache_page waits on pages that are locked so - * we first try find_get_page, which doesn't. this stops - * the worst case behaviour of racing threads waiting on - * each other, but doesn't remove it entirely. - */ - for (index = page->index + 1, page = NULL; - page == NULL && index < end_index; index++) { - - /* see if the page already exists and needs updating */ - page = find_get_page(inode->i_mapping, index); - if (page) { - if (Page_Uptodate(page) || TryLockPage(page)) - goto out_release; - if (!page->mapping || Page_Uptodate(page)) - goto out_unlock; - } else { - /* ok, we have to create it.. */ - page = grab_cache_page(inode->i_mapping, index); - if (page == NULL) - continue; - if (Page_Uptodate(page)) - goto out_unlock; - } - - break; - - out_unlock: - unlock_page(page); - out_release: - page_cache_release(page); - page = NULL; - } - - } while (page); - - set = ptlrpc_prep_set(); - if (set == NULL) { - CERROR("ENOMEM allocing request set\n"); - rc = -ENOMEM; - } else { - rc = obd_brw_async(OBD_BRW_READ, ll_i2obdconn(inode), - ll_i2info(inode)->lli_smd, npgs, pgs, - set, NULL); - if (rc == 0) - rc = ptlrpc_set_wait(set); - ptlrpc_set_destroy(set); - if (rc && rc != -EIO) - CERROR("error from obd_brw_async: rc = %d\n", rc); - } - - while (npgs-- > 0) { - page = pgs[npgs].pg; - - if (rc == 0) - SetPageUptodate(page); - unlock_page(page); - page_cache_release(page); - } - - kfree(pgs); - RETURN(rc); -} /* ll_readpage */ - -/* this isn't where truncate starts. roughly: - * sys_truncate->ll_setattr_raw->vmtruncate->ll_truncate - * we grab the lock back in setattr_raw to avoid races. */ -void ll_truncate(struct inode *inode) -{ - struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - struct obdo oa = {0}; - int err; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); - - if (!lsm) { - /* object not yet allocated */ - inode->i_mtime = inode->i_ctime = CURRENT_TIME; - EXIT; - return; - } - - /* vmtruncate will just throw away our dirty pages, make sure - * we don't think they're still dirty, being careful to round - * i_size to the first whole page that was tossed */ - err = ll_clear_dirty_pages(ll_i2obdconn(inode), lsm, - (inode->i_size + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT, - ~0); - - oa.o_id = lsm->lsm_object_id; - oa.o_mode = inode->i_mode; - oa.o_valid = OBD_MD_FLID | OBD_MD_FLMODE | OBD_MD_FLTYPE; - - CDEBUG(D_INFO, "calling punch for "LPX64" (all bytes after %Lu)\n", - oa.o_id, inode->i_size); - - /* truncate == punch from new size to absolute end of file */ - err = obd_punch(ll_i2obdconn(inode), &oa, lsm, inode->i_size, - OBD_OBJECT_EOF, NULL); - if (err) - CERROR("obd_truncate fails (%d) ino %lu\n", err, inode->i_ino); - else - obdo_to_inode(inode, &oa, oa.o_valid); - - EXIT; - return; -} /* ll_truncate */ - -//#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - -static int ll_prepare_write(struct file *file, struct page *page, unsigned from, - unsigned to) -{ - struct inode *inode = page->mapping->host; - struct ll_inode_info *lli = ll_i2info(inode); - struct lov_stripe_md *lsm = lli->lli_smd; - obd_off offset = ((obd_off)page->index) << PAGE_SHIFT; - struct brw_page pg; - int rc = 0; - ENTRY; - - if (!PageLocked(page)) - LBUG(); - - if (PageUptodate(page)) - RETURN(0); - - //POISON(addr + from, 0xca, to - from); - - /* Check to see if we should return -EIO right away */ - pg.pg = page; - pg.off = offset; - pg.count = PAGE_SIZE; - pg.flag = 0; - rc = obd_brw(OBD_BRW_CHECK, ll_i2obdconn(inode), lsm, 1, &pg, NULL); - if (rc) - RETURN(rc); - - /* We're completely overwriting an existing page, so _don't_ set it up - * to date until commit_write */ - if (from == 0 && to == PAGE_SIZE) - RETURN(0); - - /* If are writing to a new page, no need to read old data. - * the extent locking and getattr procedures in ll_file_write have - * guaranteed that i_size is stable enough for our zeroing needs */ - if (inode->i_size <= offset) { - memset(kmap(page), 0, PAGE_SIZE); - kunmap(page); - GOTO(prepare_done, rc = 0); - } - - rc = ll_brw(OBD_BRW_READ, inode, page, 0); - - EXIT; - prepare_done: - if (rc == 0) - SetPageUptodate(page); - - return rc; -} - -/* - * background file writeback. This is called regularly from kupdated to write - * dirty data, from kswapd when memory is low, and from filemap_fdatasync when - * super blocks or inodes are synced.. - * - * obd_brw errors down in _batch_writepage are ignored, so pages are always - * unlocked. Also, there is nobody to return an error code to from here - the - * application may not even be running anymore. - * - * this should be async so that things like kswapd can have a chance to - * free some more pages that our allocating writeback may need, but it isn't - * yet. - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -static unsigned long ll_local_cache_dirty_pages; -static unsigned long ll_max_dirty_pages = 20 * 1024 * 1024 / PAGE_SIZE; - -static spinlock_t ll_local_cache_page_count_lock = SPIN_LOCK_UNLOCKED; - -int ll_rd_dirty_pages(char *page, char **start, off_t off, int count, int *eof, - void *data) -{ - unsigned long dirty_count; - spin_lock(&ll_local_cache_page_count_lock); - dirty_count = ll_local_cache_dirty_pages; - spin_unlock(&ll_local_cache_page_count_lock); - return snprintf(page, count, "%lu\n", dirty_count); -} - -int ll_rd_max_dirty_pages(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - unsigned long max_dirty; - spin_lock(&ll_local_cache_page_count_lock); - max_dirty = ll_max_dirty_pages; - spin_unlock(&ll_local_cache_page_count_lock); - return snprintf(page, count, "%lu\n", max_dirty); -} - -int ll_wr_max_dirty_pages(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - unsigned long max_dirty; - signed long max_dirty_signed; - char kernbuf[20], *end; - - if (count > (sizeof(kernbuf) - 1)) - return -EINVAL; - - if (copy_from_user(kernbuf, buffer, count)) - return -EFAULT; - - kernbuf[count] = '\0'; - - max_dirty_signed = simple_strtol(kernbuf, &end, 0); - if (kernbuf == end) - return -EINVAL; - max_dirty = (unsigned long)max_dirty_signed; - -#if 0 - if (max_dirty < ll_local_cache_dirty_pages) - flush_to_new_max_dirty(); -#endif - - spin_lock(&ll_local_cache_page_count_lock); - CDEBUG(D_CACHE, "changing max_dirty from %lu to %lu\n", - ll_max_dirty_pages, max_dirty); - ll_max_dirty_pages = max_dirty; - spin_unlock(&ll_local_cache_page_count_lock); - return count; -} - -static int ll_local_cache_full(void) -{ - int full = 0; - spin_lock(&ll_local_cache_page_count_lock); - if (ll_max_dirty_pages && - ll_local_cache_dirty_pages >= ll_max_dirty_pages) { - full = 1; - } - spin_unlock(&ll_local_cache_page_count_lock); - /* XXX instrument? */ - /* XXX trigger async writeback when full, or 75% of full? */ - return full; -} - -static void ll_local_cache_flushed_pages(unsigned long pgcount) -{ - unsigned long dirty_count; - spin_lock(&ll_local_cache_page_count_lock); - dirty_count = ll_local_cache_dirty_pages; - ll_local_cache_dirty_pages -= pgcount; - CDEBUG(D_CACHE, "dirty pages: %lu->%lu)\n", - dirty_count, ll_local_cache_dirty_pages); - spin_unlock(&ll_local_cache_page_count_lock); - LASSERT(dirty_count >= pgcount); -} - -static void ll_local_cache_dirtied_pages(unsigned long pgcount) -{ - unsigned long dirty_count; - spin_lock(&ll_local_cache_page_count_lock); - dirty_count = ll_local_cache_dirty_pages; - ll_local_cache_dirty_pages += pgcount; - CDEBUG(D_CACHE, "dirty pages: %lu->%lu\n", - dirty_count, ll_local_cache_dirty_pages); - spin_unlock(&ll_local_cache_page_count_lock); - /* XXX track maximum cached, report to lprocfs */ -} - -int ll_clear_dirty_pages(struct lustre_handle *conn, struct lov_stripe_md *lsm, - unsigned long start, unsigned long end) -{ - unsigned long cleared; - int rc; - - ENTRY; - rc = obd_clear_dirty_pages(conn, lsm, start, end, &cleared); - if (!rc) - ll_local_cache_flushed_pages(cleared); - RETURN(rc); -} - -int ll_mark_dirty_page(struct lustre_handle *conn, struct lov_stripe_md *lsm, - unsigned long index) -{ - int rc; - - ENTRY; - if (ll_local_cache_full()) - RETURN(-EDQUOT); - - rc = obd_mark_page_dirty(conn, lsm, index); - if (!rc) - ll_local_cache_dirtied_pages(1); - RETURN(rc); -} - -static int ll_writepage(struct page *page) -{ - struct inode *inode = page->mapping->host; - ENTRY; - - CDEBUG(D_CACHE, "page %p [lau %d] inode %p\n", page, - PageLaunder(page), inode); - LASSERT(PageLocked(page)); - - /* XXX should obd_brw errors trickle up? */ - ll_batch_writepage(inode, page); - RETURN(0); -} - -/* - * we really don't want to start writeback here, we want to give callers some - * time to further dirty the pages before we write them out. - */ -static int ll_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - struct inode *inode = page->mapping->host; - loff_t size; - int rc = 0; - ENTRY; - - LASSERT(inode == file->f_dentry->d_inode); - LASSERT(PageLocked(page)); - - CDEBUG(D_INODE, "inode %p is writing page %p from %d to %d at %lu\n", - inode, page, from, to, page->index); - if (!PageDirty(page)) { - lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, - LPROC_LL_DIRTY_MISSES); - rc = ll_mark_dirty_page(ll_i2obdconn(inode), - ll_i2info(inode)->lli_smd, - page->index); - if (rc < 0 && rc != -EDQUOT) - RETURN(rc); /* XXX lproc counter here? */ - } else { - lprocfs_counter_incr(ll_i2sbi(inode)->ll_stats, - LPROC_LL_DIRTY_HITS); - } - - size = (((obd_off)page->index) << PAGE_SHIFT) + to; - if (size > inode->i_size) - inode->i_size = size; - - SetPageUptodate(page); - set_page_dirty(page); - - /* This means that we've hit either the local cache limit or the limit - * of the OST's grant. */ - if (rc == -EDQUOT) { - int rc = ll_batch_writepage(inode, page); - lock_page(page); /* caller expects to unlock */ - RETURN(rc); - } - - RETURN(0); -} /* ll_commit_write */ -#else -static int ll_writepage(struct page *page, - struct writeback_control *wbc) -{ - - return 0; -} -static int ll_commit_write(struct file *file, struct page *page, - unsigned from, unsigned to) -{ - return 0; -} -#endif - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -static int ll_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf, - unsigned long blocknr, int blocksize) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct lov_stripe_md *lsm = lli->lli_smd; - struct brw_page *pga; - struct ptlrpc_request_set *set; - int length, i, flags, rc = 0; - loff_t offset; - ENTRY; - - if (!lsm || !lsm->lsm_object_id) - RETURN(-ENOMEM); - - if ((iobuf->offset & (blocksize - 1)) || - (iobuf->length & (blocksize - 1))) - RETURN(-EINVAL); - - set = ptlrpc_prep_set(); - if (set == NULL) - RETURN(-ENOMEM); - - OBD_ALLOC(pga, sizeof(*pga) * iobuf->nr_pages); - if (!pga) { - ptlrpc_set_destroy(set); - RETURN(-ENOMEM); - } - - flags = (rw == WRITE ? OBD_BRW_CREATE : 0) /* | OBD_BRW_DIRECTIO */; - offset = ((obd_off)blocknr << inode->i_blkbits); - length = iobuf->length; - - for (i = 0, length = iobuf->length; length > 0; - length -= pga[i].count, offset += pga[i].count, i++) { /*i last!*/ - pga[i].pg = iobuf->maplist[i]; - pga[i].off = offset; - /* To the end of the page, or the length, whatever is less */ - pga[i].count = min_t(int, PAGE_SIZE - (offset & ~PAGE_MASK), - length); - pga[i].flag = flags; - if (rw == READ) { - //POISON(kmap(iobuf->maplist[i]), 0xc5, PAGE_SIZE); - //kunmap(iobuf->maplist[i]); - } - } - - if (rw == WRITE) - lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, - LPROC_LL_DIRECT_WRITE, iobuf->length); - else - lprocfs_counter_add(ll_i2sbi(inode)->ll_stats, - LPROC_LL_DIRECT_READ, iobuf->length); - rc = obd_brw_async(rw == WRITE ? OBD_BRW_WRITE : OBD_BRW_READ, - ll_i2obdconn(inode), lsm, iobuf->nr_pages, pga, set, - NULL); - if (rc) { - CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR, - "error from obd_brw_async: rc = %d\n", rc); - } else { - rc = ptlrpc_set_wait(set); - if (rc) - CERROR("error from callback: rc = %d\n", rc); - } - ptlrpc_set_destroy(set); - if (rc == 0) - rc = iobuf->length; - - OBD_FREE(pga, sizeof(*pga) * iobuf->nr_pages); - RETURN(rc); -} -#endif - -//#endif - -struct address_space_operations ll_aops = { - readpage: ll_readpage, -#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0)) - direct_IO: ll_direct_IO, -#endif - writepage: ll_writepage, - sync_page: block_sync_page, - prepare_write: ll_prepare_write, - commit_write: ll_commit_write, - bmap: NULL -//#endif -}; diff --git a/lustre/llite/super.c b/lustre/llite/super.c deleted file mode 100644 index 85532f0..0000000 --- a/lustre/llite/super.c +++ /dev/null @@ -1,854 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Lustre Light Super operations - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LLITE - -#include <linux/module.h> -#include <linux/random.h> -#include <linux/version.h> -#include <linux/lustre_lite.h> -#include <linux/lustre_ha.h> -#include <linux/lustre_dlm.h> -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/lprocfs_status.h> -#include "llite_internal.h" - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -kmem_cache_t *ll_file_data_slab; -extern struct address_space_operations ll_aops; -extern struct address_space_operations ll_dir_aops; -struct super_operations ll_super_operations; - -/* /proc/lustre/llite root that tracks llite mount points */ -struct proc_dir_entry *proc_lustre_fs_root = NULL; -/* lproc_llite.c */ -extern void lprocfs_unregister_mountpoint(struct ll_sb_info *sbi); -extern int lprocfs_register_mountpoint(struct proc_dir_entry *parent, - struct super_block *sb, - char *osc, char *mdc); - -extern int ll_recover(struct recovd_data *, int); -extern int ll_commitcbd_setup(struct ll_sb_info *); -extern int ll_commitcbd_cleanup(struct ll_sb_info *); - -static char *ll_read_opt(const char *opt, char *data) -{ - char *value; - char *retval; - ENTRY; - - CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data); - if (strncmp(opt, data, strlen(opt))) - RETURN(NULL); - if ((value = strchr(data, '=')) == NULL) - RETURN(NULL); - - value++; - OBD_ALLOC(retval, strlen(value) + 1); - if (!retval) { - CERROR("out of memory!\n"); - RETURN(NULL); - } - - memcpy(retval, value, strlen(value)+1); - CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval); - RETURN(retval); -} - -static int ll_set_opt(const char *opt, char *data, int fl) -{ - ENTRY; - - CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data); - if (strncmp(opt, data, strlen(opt))) - RETURN(0); - else - RETURN(fl); -} - -static void ll_options(char *options, char **ost, char **mds, int *flags) -{ - char *this_char; - ENTRY; - - if (!options) { - EXIT; - return; - } - - for (this_char = strtok (options, ","); - this_char != NULL; - this_char = strtok (NULL, ",")) { - CDEBUG(D_SUPER, "this_char %s\n", this_char); - if ((!*ost && (*ost = ll_read_opt("osc", this_char)))|| - (!*mds && (*mds = ll_read_opt("mdc", this_char)))|| - (!(*flags & LL_SBI_NOLCK) && - ((*flags) = (*flags) | - ll_set_opt("nolock", this_char, LL_SBI_NOLCK)))) - continue; - } - EXIT; -} - -#ifndef log2 -#define log2(n) ffz(~(n)) -#endif - -static struct super_block *ll_read_super(struct super_block *sb, - void *data, int silent) -{ - struct inode *root = 0; - struct obd_device *obd; - struct ll_sb_info *sbi; - struct obd_export *mdc_export; - char *osc = NULL; - char *mdc = NULL; - int err; - struct ll_fid rootfid; - struct obd_statfs osfs; - struct ptlrpc_request *request = NULL; - struct ptlrpc_connection *mdc_conn; - struct ll_read_inode2_cookie lic; - class_uuid_t uuid; - - ENTRY; - - CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb); - OBD_ALLOC(sbi, sizeof(*sbi)); - if (!sbi) - RETURN(NULL); - - INIT_LIST_HEAD(&sbi->ll_conn_chain); - INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list); - generate_random_uuid(uuid); - class_uuid_unparse(uuid, &sbi->ll_sb_uuid); - - sb->u.generic_sbp = sbi; - - ll_options(data, &osc, &mdc, &sbi->ll_flags); - - if (!osc) { - CERROR("no osc\n"); - GOTO(out_free, sb = NULL); - } - - if (!mdc) { - CERROR("no mdc\n"); - GOTO(out_free, sb = NULL); - } - - obd = class_name2obd(mdc); - if (!obd) { - CERROR("MDC %s: not setup or attached\n", mdc); - GOTO(out_free, sb = NULL); - } - - err = obd_connect(&sbi->ll_mdc_conn, obd, &sbi->ll_sb_uuid); - if (err) { - CERROR("cannot connect to %s: rc = %d\n", mdc, err); - GOTO(out_free, sb = NULL); - } - - mdc_conn = sbi2mdc(sbi)->cl_import->imp_connection; - - obd = class_name2obd(osc); - if (!obd) { - CERROR("OSC %s: not setup or attached\n", osc); - GOTO(out_mdc, sb = NULL); - } - - err = obd_connect(&sbi->ll_osc_conn, obd, &sbi->ll_sb_uuid); - if (err) { - CERROR("cannot connect to %s: rc = %d\n", osc, err); - GOTO(out_mdc, sb = NULL); - } - - err = mdc_getstatus(&sbi->ll_mdc_conn, &rootfid); - if (err) { - CERROR("cannot mds_connect: rc = %d\n", err); - GOTO(out_osc, sb = NULL); - } - CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id); - sbi->ll_rootino = rootfid.id; - - memset(&osfs, 0, sizeof(osfs)); - mdc_export = class_conn2export(&sbi->ll_mdc_conn); - if (mdc_export == NULL) { - CERROR("null mdc_export\n"); - GOTO(out_osc, sb = NULL); - } - err = obd_statfs(mdc_export, &osfs); - class_export_put(mdc_export); - sb->s_blocksize = osfs.os_bsize; - sb->s_blocksize_bits = log2(osfs.os_bsize); - sb->s_magic = LL_SUPER_MAGIC; - sb->s_maxbytes = PAGE_CACHE_MAXBYTES; - - sb->s_op = &ll_super_operations; - - /* make root inode - * XXX: move this to after cbd setup? */ - err = mdc_getattr(&sbi->ll_mdc_conn, &rootfid, - OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request); - if (err) { - CERROR("mdc_getattr failed for root: rc = %d\n", err); - GOTO(out_osc, sb = NULL); - } - - /* initialize committed transaction callback daemon */ - spin_lock_init(&sbi->ll_commitcbd_lock); - init_waitqueue_head(&sbi->ll_commitcbd_waitq); - init_waitqueue_head(&sbi->ll_commitcbd_ctl_waitq); - sbi->ll_commitcbd_flags = 0; - err = ll_commitcbd_setup(sbi); - if (err) { - CERROR("failed to start commit callback daemon: rc = %d\n",err); - ptlrpc_req_finished (request); - GOTO(out_osc, sb = NULL); - } - - lic.lic_body = lustre_msg_buf(request->rq_repmsg, 0, - sizeof(*lic.lic_body)); - LASSERT (lic.lic_body != NULL); /* checked by mdc_getattr() */ - LASSERT_REPSWABBED (request, 0); /* swabbed by mdc_getattr() */ - - lic.lic_lsm = NULL; - - LASSERT(sbi->ll_rootino != 0); - root = iget4(sb, sbi->ll_rootino, NULL, &lic); - - ptlrpc_req_finished(request); - - if (root == NULL || is_bad_inode(root)) { - /* XXX might need iput() for bad inode */ - CERROR("lustre_lite: bad iget4 for root\n"); - GOTO(out_cbd, sb = NULL); - } - - sb->s_root = d_alloc_root(root); - - if (proc_lustre_fs_root) { - err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb, - osc, mdc); - if (err < 0) - CERROR("could not register mount in /proc/lustre"); - } - -out_dev: - if (mdc) - OBD_FREE(mdc, strlen(mdc) + 1); - if (osc) - OBD_FREE(osc, strlen(osc) + 1); - - RETURN(sb); - -out_cbd: - ll_commitcbd_cleanup(sbi); -out_osc: - obd_disconnect(&sbi->ll_osc_conn, 0); -out_mdc: - obd_disconnect(&sbi->ll_mdc_conn, 0); -out_free: - lprocfs_unregister_mountpoint(sbi); - OBD_FREE(sbi, sizeof(*sbi)); - - goto out_dev; -} /* ll_read_super */ - -static void ll_put_super(struct super_block *sb) -{ - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct list_head *tmp, *next; - struct ll_fid rootfid; - struct obd_device *obd = class_conn2obd(&sbi->ll_mdc_conn); - ENTRY; - - CDEBUG(D_VFSTRACE, "VFS Op: sb %p\n", sb); - list_del(&sbi->ll_conn_chain); - ll_commitcbd_cleanup(sbi); - obd_disconnect(&sbi->ll_osc_conn, 0); - - /* NULL request to force sync on the MDS, and get the last_committed - * value to flush remaining RPCs from the sending queue on client. - * - * XXX This should be an mdc_sync() call to sync the whole MDS fs, - * which we can call for other reasons as well. - */ - if (!obd->obd_no_recov) - mdc_getstatus(&sbi->ll_mdc_conn, &rootfid); - - lprocfs_unregister_mountpoint(sbi); - if (sbi->ll_proc_root) { - lprocfs_remove(sbi->ll_proc_root); - sbi->ll_proc_root = NULL; - } - - obd_disconnect(&sbi->ll_mdc_conn, 0); - - spin_lock(&dcache_lock); - list_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list) { - struct dentry *dentry = list_entry(tmp, struct dentry, d_hash); - shrink_dcache_parent(dentry); - } - spin_unlock(&dcache_lock); - - OBD_FREE(sbi, sizeof(*sbi)); - - EXIT; -} /* ll_put_super */ - -static void ll_clear_inode(struct inode *inode) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_inode_info *lli = ll_i2info(inode); - int rc; - ENTRY; - - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); - rc = ll_mdc_cancel_unused(&sbi->ll_mdc_conn, inode, - LDLM_FL_NO_CALLBACK, inode); - if (rc < 0) { - CERROR("ll_mdc_cancel_unused: %d\n", rc); - /* XXX FIXME do something dramatic */ - } - - if (atomic_read(&inode->i_count) != 0) - CERROR("clearing in-use inode %lu: count = %d\n", - inode->i_ino, atomic_read(&inode->i_count)); - - if (lli->lli_smd) { - rc = obd_cancel_unused(&sbi->ll_osc_conn, lli->lli_smd, - LDLM_FL_WARN, inode); - if (rc < 0) { - CERROR("obd_cancel_unused: %d\n", rc); - /* XXX FIXME do something dramatic */ - } - obd_free_memmd(&sbi->ll_osc_conn, &lli->lli_smd); - lli->lli_smd = NULL; - } - - if (lli->lli_symlink_name) { - OBD_FREE(lli->lli_symlink_name, - strlen(lli->lli_symlink_name) + 1); - lli->lli_symlink_name = NULL; - } - - EXIT; -} - -#if 0 -static void ll_delete_inode(struct inode *inode) -{ - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); - if (S_ISREG(inode->i_mode)) { - int err; - struct obdo *oa; - struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - - /* mcreate with no open */ - if (!lsm) - GOTO(out, 0); - - if (lsm->lsm_object_id == 0) { - CERROR("This really happens\n"); - /* No obdo was ever created */ - GOTO(out, 0); - } - - oa = obdo_alloc(); - if (oa == NULL) - GOTO(out, -ENOMEM); - - oa->o_id = lsm->lsm_object_id; - obdo_from_inode(oa, inode, OBD_MD_FLID | OBD_MD_FLTYPE); - - err = obd_destroy(ll_i2obdconn(inode), oa, lsm, NULL); - obdo_free(oa); - if (err) - CDEBUG(D_INODE, - "inode %lu obd_destroy objid "LPX64" error %d\n", - inode->i_ino, lsm->lsm_object_id, err); - } -out: - clear_inode(inode); - EXIT; -} -#endif - -/* like inode_setattr, but doesn't mark the inode dirty */ -static int ll_attr2inode(struct inode *inode, struct iattr *attr, int trunc) -{ - unsigned int ia_valid = attr->ia_valid; - int error = 0; - - if ((ia_valid & ATTR_SIZE) && trunc) { - if (attr->ia_size > ll_file_maxbytes(inode)) { - error = -EFBIG; - goto out; - } - error = vmtruncate(inode, attr->ia_size); - if (error) - goto out; - } else if (ia_valid & ATTR_SIZE) - inode->i_size = attr->ia_size; - - if (ia_valid & ATTR_UID) - inode->i_uid = attr->ia_uid; - if (ia_valid & ATTR_GID) - inode->i_gid = attr->ia_gid; - if (ia_valid & ATTR_ATIME) - inode->i_atime = attr->ia_atime; - if (ia_valid & ATTR_MTIME) - inode->i_mtime = attr->ia_mtime; - if (ia_valid & ATTR_CTIME) - inode->i_ctime = attr->ia_ctime; - if (ia_valid & ATTR_MODE) { - inode->i_mode = attr->ia_mode; - if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) - inode->i_mode &= ~S_ISGID; - } -out: - return error; -} - -int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc) -{ - struct ptlrpc_request *request = NULL; - struct ll_sb_info *sbi = ll_i2sbi(inode); - int err = 0; - ENTRY; - - /* change incore inode */ - err = ll_attr2inode(inode, attr, do_trunc); - if (err) - RETURN(err); - - /* Don't send size changes to MDS to avoid "fast EA" problems, and - * also avoid a pointless RPC (we get file size from OST anyways). - */ - attr->ia_valid &= ~ATTR_SIZE; - if (attr->ia_valid) { - struct mdc_op_data op_data; - - ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0); - err = mdc_setattr(&sbi->ll_mdc_conn, &op_data, - attr, NULL, 0, &request); - if (err) - CERROR("mdc_setattr fails: err = %d\n", err); - - ptlrpc_req_finished(request); - if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) { - struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - struct obdo oa; - int err2; - - CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n", - inode->i_ino, attr->ia_mtime); - oa.o_id = lsm->lsm_object_id; - oa.o_mode = S_IFREG; - oa.o_valid = OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME; - oa.o_mtime = attr->ia_mtime; - err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL); - if (err2) { - CERROR("obd_setattr fails: rc=%d\n", err); - if (!err) - err = err2; - } - } - } - - RETURN(err); -} - -int ll_setattr_raw(struct inode *inode, struct iattr *attr) -{ - struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ptlrpc_request *request = NULL; - struct mdc_op_data op_data; - int rc = 0, err; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); - - if ((attr->ia_valid & ATTR_SIZE)) { - struct ldlm_extent extent = {attr->ia_size, OBD_OBJECT_EOF}; - struct lustre_handle lockh = { 0 }; - - if (attr->ia_size > ll_file_maxbytes(inode)) - RETURN(-EFBIG); - - /* writeback uses inode->i_size to determine how far out - * its cached pages go. ll_truncate gets a PW lock, canceling - * our lock, _after_ it has updated i_size. this can confuse - * - * If this file doesn't have stripes yet, it is already, - * by definition, truncated. */ - if ((attr->ia_valid & ATTR_FROM_OPEN) && lsm == NULL) { - LASSERT(attr->ia_size == 0); - GOTO(skip_extent_lock, rc = 0); - } - - /* we really need to get our PW lock before we change - * inode->i_size. if we don't we can race with other - * i_size updaters on our node, like ll_file_read. we - * can also race with i_size propogation to other - * nodes through dirtying and writeback of final cached - * pages. this last one is especially bad for racing - * o_append users on other nodes. */ - rc = ll_extent_lock_no_validate(NULL, inode, lsm, LCK_PW, - &extent, &lockh); - if (rc != ELDLM_OK) { - if (rc > 0) - RETURN(-ENOLCK); - RETURN(rc); - } - - rc = vmtruncate(inode, attr->ia_size); - if (rc == 0) - set_bit(LLI_F_HAVE_SIZE_LOCK, - &ll_i2info(inode)->lli_flags); - - /* unlock now as we don't mind others file lockers racing with - * the mds updates below? */ - err = ll_extent_unlock(NULL, inode, lsm, LCK_PW, &lockh); - if (err) - CERROR("ll_extent_unlock failed: %d\n", err); - if (rc) - RETURN(rc); - } - -skip_extent_lock: - /* Don't send size changes to MDS to avoid "fast EA" problems, and - * also avoid a pointless RPC (we get file size from OST anyways). - */ - attr->ia_valid &= ~ATTR_SIZE; - if (!attr->ia_valid) - RETURN(0); - - ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0); - - err = mdc_setattr(&sbi->ll_mdc_conn, &op_data, - attr, NULL, 0, &request); - if (err) - CERROR("mdc_setattr fails: err = %d\n", err); - - ptlrpc_req_finished(request); - - if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_MTIME_SET)) { - struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - struct obdo oa; - int err2; - - if (lsm == NULL) { - CDEBUG(D_INODE, "no lsm: not setting mtime on OSTs\n"); - RETURN(err); - } - - CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n", - inode->i_ino, attr->ia_mtime); - oa.o_id = lsm->lsm_object_id; - oa.o_mode = S_IFREG; - oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMTIME; - oa.o_mtime = attr->ia_mtime; - err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL); - if (err2) { - CERROR("obd_setattr fails: rc=%d\n", err); - if (!err) - err = err2; - } - } - RETURN(err); -} - -int ll_setattr(struct dentry *de, struct iattr *attr) -{ - int rc = inode_change_ok(de->d_inode, attr); - CDEBUG(D_VFSTRACE, "VFS Op:name=%s\n", de->d_name.name); - if (rc) - return rc; - lprocfs_counter_incr(ll_i2sbi(de->d_inode)->ll_stats, LPROC_LL_SETATTR); - - return ll_inode_setattr(de->d_inode, attr, 1); -} - -static int ll_statfs(struct super_block *sb, struct statfs *sfs) -{ - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct obd_export *mdc_exp = class_conn2export(&sbi->ll_mdc_conn); - struct obd_export *osc_exp; - struct obd_statfs osfs; - int rc; - ENTRY; - - if (mdc_exp == NULL) - RETURN(-EINVAL); - - CDEBUG(D_VFSTRACE, "VFS Op:\n"); - lprocfs_counter_incr(sbi->ll_stats, LPROC_LL_STAFS); - memset(sfs, 0, sizeof(*sfs)); - rc = obd_statfs(mdc_exp, &osfs); - statfs_unpack(sfs, &osfs); - if (rc) - CERROR("mdc_statfs fails: rc = %d\n", rc); - else - CDEBUG(D_SUPER, "mdc_statfs shows blocks "LPU64"/"LPU64 - " objects "LPU64"/"LPU64"\n", - osfs.os_bavail, osfs.os_blocks, - osfs.os_ffree, osfs.os_files); - - /* temporary until mds_statfs returns statfs info for all OSTs */ - if (!rc) { - osc_exp = class_conn2export(&sbi->ll_osc_conn); - if (osc_exp == NULL) - GOTO(out, rc = -EINVAL); - rc = obd_statfs(osc_exp, &osfs); - class_export_put(osc_exp); - if (rc) { - CERROR("obd_statfs fails: rc = %d\n", rc); - GOTO(out, rc); - } - CDEBUG(D_SUPER, "obd_statfs shows blocks "LPU64"/"LPU64 - " objects "LPU64"/"LPU64"\n", - osfs.os_bavail, osfs.os_blocks, - osfs.os_ffree, osfs.os_files); - - while (osfs.os_blocks > ~0UL) { - sfs->f_bsize <<= 1; - - osfs.os_blocks >>= 1; - osfs.os_bfree >>= 1; - osfs.os_bavail >>= 1; - } - - sfs->f_blocks = osfs.os_blocks; - sfs->f_bfree = osfs.os_bfree; - sfs->f_bavail = osfs.os_bavail; - - /* If we don't have as many objects free on the OST as inodes - * on the MDS, we reduce the total number of inodes to - * compensate, so that the "inodes in use" number is correct. - */ - if (osfs.os_ffree < (__u64)sfs->f_ffree) { - sfs->f_files = (sfs->f_files - sfs->f_ffree) + - osfs.os_ffree; - sfs->f_ffree = osfs.os_ffree; - } - } - -out: - class_export_put(mdc_exp); - RETURN(rc); -} - -void dump_lsm(int level, struct lov_stripe_md *lsm) -{ - CDEBUG(level, "objid "LPX64", maxbytes "LPX64", magic %#08x, " - "stripe_size %#08x, offset %u, stripe_count %u\n", - lsm->lsm_object_id, lsm->lsm_maxbytes, lsm->lsm_magic, - lsm->lsm_stripe_size, lsm->lsm_stripe_offset, - lsm->lsm_stripe_count); -} - -void ll_update_inode(struct inode *inode, struct mds_body *body, - struct lov_stripe_md *lsm) -{ - struct ll_inode_info *lli = ll_i2info(inode); - - LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0)); - if (lsm != NULL) { - if (lli->lli_smd == NULL) { - lli->lli_maxbytes = lsm->lsm_maxbytes; - if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES) - lli->lli_maxbytes = PAGE_CACHE_MAXBYTES; - lli->lli_smd = lsm; - } else { - if (memcmp(lli->lli_smd, lsm, sizeof(*lsm))) { - CERROR("lsm mismatch for inode %ld\n", - inode->i_ino); - CERROR("lli_smd:\n"); - dump_lsm(D_ERROR, lli->lli_smd); - CERROR("lsm:\n"); - dump_lsm(D_ERROR, lsm); - LBUG(); - } - } - } - - if (body->valid & OBD_MD_FLID) - inode->i_ino = body->ino; - if (body->valid & OBD_MD_FLATIME) - LTIME_S(inode->i_atime) = body->atime; - if (body->valid & OBD_MD_FLMTIME) - LTIME_S(inode->i_mtime) = body->mtime; - if (body->valid & OBD_MD_FLCTIME) - LTIME_S(inode->i_ctime) = body->ctime; - if (body->valid & OBD_MD_FLMODE) - inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT); - if (body->valid & OBD_MD_FLTYPE) - inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT); - if (body->valid & OBD_MD_FLUID) - inode->i_uid = body->uid; - if (body->valid & OBD_MD_FLGID) - inode->i_gid = body->gid; - if (body->valid & OBD_MD_FLFLAGS) - inode->i_flags = body->flags; - if (body->valid & OBD_MD_FLNLINK) - inode->i_nlink = body->nlink; - if (body->valid & OBD_MD_FLGENER) - inode->i_generation = body->generation; - if (body->valid & OBD_MD_FLRDEV) - inode->i_rdev = body->rdev; - if (body->valid & OBD_MD_FLSIZE) - inode->i_size = body->size; - if (body->valid & OBD_MD_FLBLOCKS) - inode->i_blocks = body->blocks; -} - -static void ll_read_inode2(struct inode *inode, void *opaque) -{ - struct ll_read_inode2_cookie *lic = opaque; - struct mds_body *body = lic->lic_body; - struct ll_inode_info *lli = ll_i2info(inode); - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p)\n", inode->i_ino, - inode->i_generation, inode); - - sema_init(&lli->lli_open_sem, 1); - spin_lock_init(&lli->lli_read_extent_lock); - INIT_LIST_HEAD(&lli->lli_read_extents); - lli->lli_flags = 0; - /* We default to 2T-4k until the LSM is created/read, at which point - * it'll be updated. */ - lli->lli_maxbytes = LUSTRE_STRIPE_MAXBYTES; - - LASSERT(!lli->lli_smd); - - /* core attributes from the MDS first */ - ll_update_inode(inode, body, lic->lic_lsm); - - /* OIDEBUG(inode); */ - - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ll_file_inode_operations; - inode->i_fop = &ll_file_operations; - inode->i_mapping->a_ops = &ll_aops; - EXIT; - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &ll_dir_inode_operations; - inode->i_fop = &ll_dir_operations; - inode->i_mapping->a_ops = &ll_dir_aops; - EXIT; - } else if (S_ISLNK(inode->i_mode)) { - inode->i_op = &ll_fast_symlink_inode_operations; - EXIT; - } else { - inode->i_op = &ll_special_inode_operations; - init_special_inode(inode, inode->i_mode, inode->i_rdev); - EXIT; - } -} - -void ll_umount_begin(struct super_block *sb) -{ - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct obd_device *obd; - struct obd_ioctl_data ioc_data = { 0 }; - - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:\n"); - - obd = class_conn2obd(&sbi->ll_mdc_conn); - obd->obd_no_recov = 1; - obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_mdc_conn, sizeof ioc_data, - &ioc_data, NULL); - - obd = class_conn2obd(&sbi->ll_osc_conn); - obd->obd_no_recov = 1; - obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_osc_conn, sizeof ioc_data, - &ioc_data, NULL); - - /* Really, we'd like to wait until there are no requests outstanding, - * and then continue. For now, we just invalidate the requests, - * schedule, and hope. - */ - schedule(); - - EXIT; -} - -/* exported operations */ -struct super_operations ll_super_operations = -{ - read_inode2: ll_read_inode2, - clear_inode: ll_clear_inode, - // delete_inode: ll_delete_inode, - put_super: ll_put_super, - statfs: ll_statfs, - umount_begin: ll_umount_begin -}; - -static struct file_system_type lustre_lite_fs_type = { - name: "lustre_lite", - fs_flags: 0, - read_super: ll_read_super, - owner: THIS_MODULE, -}; - -static int __init init_lustre_lite(void) -{ - printk(KERN_INFO "Lustre Lite Client File System; " - "info@clusterfs.com\n"); - ll_file_data_slab = kmem_cache_create("ll_file_data", - sizeof(struct ll_file_data), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (ll_file_data_slab == NULL) - return -ENOMEM; - - proc_lustre_fs_root = proc_lustre_root ? proc_mkdir("llite", proc_lustre_root) : NULL; - - return register_filesystem(&lustre_lite_fs_type); -} - -static void __exit exit_lustre_lite(void) -{ - unregister_filesystem(&lustre_lite_fs_type); - kmem_cache_destroy(ll_file_data_slab); - - if (proc_lustre_fs_root) { - lprocfs_remove(proc_lustre_fs_root); - proc_lustre_fs_root = NULL; - } -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Lustre Lite Client File System"); -MODULE_LICENSE("GPL"); - -module_init(init_lustre_lite); -module_exit(exit_lustre_lite); -#endif diff --git a/lustre/llite/super25.c b/lustre/llite/super25.c deleted file mode 100644 index 980bfcd..0000000 --- a/lustre/llite/super25.c +++ /dev/null @@ -1,856 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Lustre Light Super operations - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_LLITE - -#include <linux/module.h> -#include <linux/random.h> -#include <linux/version.h> -#include <linux/lustre_lite.h> -#include <linux/lustre_ha.h> -#include <linux/lustre_dlm.h> -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/lprocfs_status.h> -#include "llite_internal.h" - -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -#include <asm/statfs.h> -kmem_cache_t *ll_file_data_slab; -extern struct address_space_operations ll_aops; -extern struct address_space_operations ll_dir_aops; -struct super_operations ll_super_operations; - -/* /proc/lustre/llite root that tracks llite mount points */ -struct proc_dir_entry *proc_lustre_fs_root = NULL; -/* lproc_llite.c */ -extern int lprocfs_register_mountpoint(struct proc_dir_entry *parent, - struct super_block *sb, - char *osc, char *mdc); - -extern int ll_init_inodecache(void); -extern void ll_destroy_inodecache(void); -extern int ll_recover(struct recovd_data *, int); -extern int ll_commitcbd_setup(struct ll_sb_info *); -extern int ll_commitcbd_cleanup(struct ll_sb_info *); -int ll_read_inode2(struct inode *inode, void *opaque); - -extern int ll_proc_namespace(struct super_block* sb, char* osc, char* mdc); - -static char *ll_read_opt(const char *opt, char *data) -{ - char *value; - char *retval; - ENTRY; - - CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data); - if (strncmp(opt, data, strlen(opt))) - RETURN(NULL); - if ((value = strchr(data, '=')) == NULL) - RETURN(NULL); - - value++; - OBD_ALLOC(retval, strlen(value) + 1); - if (!retval) { - CERROR("out of memory!\n"); - RETURN(NULL); - } - - memcpy(retval, value, strlen(value)+1); - CDEBUG(D_SUPER, "Assigned option: %s, value %s\n", opt, retval); - RETURN(retval); -} - -static int ll_set_opt(const char *opt, char *data, int fl) -{ - ENTRY; - - CDEBUG(D_SUPER, "option: %s, data %s\n", opt, data); - if (strncmp(opt, data, strlen(opt))) - RETURN(0); - else - RETURN(fl); -} - -static void ll_options(char *options, char **ost, char **mds, int *flags) -{ - char *opt_ptr = options; - char *this_char; - ENTRY; - - if (!options) { - EXIT; - return; - } - - while ((this_char = strsep (&opt_ptr, ",")) != NULL) { - CDEBUG(D_SUPER, "this_char %s\n", this_char); - if ((!*ost && (*ost = ll_read_opt("osc", this_char)))|| - (!*mds && (*mds = ll_read_opt("mdc", this_char)))|| - (!(*flags & LL_SBI_NOLCK) && - ((*flags) = (*flags) | - ll_set_opt("nolock", this_char, LL_SBI_NOLCK)))) - continue; - } - EXIT; -} - -#ifndef log2 -#define log2(n) ffz(~(n)) -#endif - - -static int ll_fill_super(struct super_block *sb, void *data, int silent) -{ - struct inode *root = 0; - struct obd_device *obd; - struct ll_sb_info *sbi; - char *osc = NULL; - char *mdc = NULL; - int err; - struct ll_fid rootfid; - struct obd_statfs osfs; - struct ptlrpc_request *request = NULL; - struct ptlrpc_connection *mdc_conn; - struct ll_read_inode2_cookie lic; - class_uuid_t uuid; - - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:\n"); - - OBD_ALLOC(sbi, sizeof(*sbi)); - if (!sbi) - RETURN(-ENOMEM); - - INIT_LIST_HEAD(&sbi->ll_conn_chain); - INIT_LIST_HEAD(&sbi->ll_orphan_dentry_list); - generate_random_uuid(uuid); - class_uuid_unparse(uuid, &sbi->ll_sb_uuid); - - sb->s_fs_info = sbi; - - ll_options(data, &osc, &mdc, &sbi->ll_flags); - - if (!osc) { - CERROR("no osc\n"); - GOTO(out_free, sb = NULL); - } - - if (!mdc) { - CERROR("no mdc\n"); - GOTO(out_free, sb = NULL); - } - - obd = class_name2obd(mdc); - if (!obd) { - CERROR("MDC %s: not setup or attached\n", mdc); - GOTO(out_free, sb = NULL); - } - - err = obd_connect(&sbi->ll_mdc_conn, obd, &sbi->ll_sb_uuid); - if (err) { - CERROR("cannot connect to %s: rc = %d\n", mdc, err); - GOTO(out_free, sb = NULL); - } - - mdc_conn = sbi2mdc(sbi)->cl_import->imp_connection; - - obd = class_name2obd(osc); - if (!obd) { - CERROR("OSC %s: not setup or attached\n", osc); - GOTO(out_mdc, sb = NULL); - } - - err = obd_connect(&sbi->ll_osc_conn, obd, &sbi->ll_sb_uuid); - if (err) { - CERROR("cannot connect to %s: rc = %d\n", osc, err); - GOTO(out_mdc, sb = NULL); - } - - err = mdc_getstatus(&sbi->ll_mdc_conn, &rootfid); - if (err) { - CERROR("cannot mds_connect: rc = %d\n", err); - GOTO(out_osc, sb = NULL); - } - CDEBUG(D_SUPER, "rootfid "LPU64"\n", rootfid.id); - sbi->ll_rootino = rootfid.id; - - memset(&osfs, 0, sizeof(osfs)); - err = obd_statfs(&sbi->ll_mdc_conn, &osfs); - sb->s_blocksize = osfs.os_bsize; - sb->s_blocksize_bits = log2(osfs.os_bsize); - sb->s_magic = LL_SUPER_MAGIC; - sb->s_maxbytes = PAGE_CACHE_MAXBYTES; - - sb->s_op = &ll_super_operations; - - /* make root inode - * XXX: move this to after cbd setup? */ - err = mdc_getattr(&sbi->ll_mdc_conn, &rootfid, - OBD_MD_FLNOTOBD|OBD_MD_FLBLOCKS, 0, &request); - if (err) { - CERROR("mdc_getattr failed for root: rc = %d\n", err); - GOTO(out_osc, sb = NULL); - } - - /* initialize committed transaction callback daemon */ - spin_lock_init(&sbi->ll_commitcbd_lock); - init_waitqueue_head(&sbi->ll_commitcbd_waitq); - init_waitqueue_head(&sbi->ll_commitcbd_ctl_waitq); - sbi->ll_commitcbd_flags = 0; - err = ll_commitcbd_setup(sbi); - if (err) { - CERROR("failed to start commit callback daemon: rc = %d\n",err); - ptlrpc_req_finished (request); - GOTO(out_osc, sb = NULL); - } - - lic.lic_body = lustre_msg_buf(request->rq_repmsg, 0, sizeof (*lic.lic_body)); - LASSERT (lic.lic_body != NULL); /* checked by mdc_getattr() */ - LASSERT_REPSWABBED (request, 0); /* swabbed by mdc_getattr() */ - - lic.lic_lsm = NULL; - - root = iget5_locked(sb, sbi->ll_rootino, NULL, - ll_read_inode2, &lic); - - ptlrpc_req_finished(request); - - if (root == NULL || is_bad_inode(root)) { - /* XXX might need iput() for bad inode */ - CERROR("lustre_lite: bad iget5 for root\n"); - GOTO(out_cbd, sb = NULL); - } - - sb->s_root = d_alloc_root(root); - root->i_state &= ~(I_LOCK | I_NEW); - printk("AMRUT 1\n"); - if (proc_lustre_fs_root) { - err = lprocfs_register_mountpoint(proc_lustre_fs_root, sb, - osc, mdc); - if (err < 0) - CERROR("could not register mount in /proc/lustre"); - } - -out_dev: - if (mdc) - OBD_FREE(mdc, strlen(mdc) + 1); - if (osc) - OBD_FREE(osc, strlen(osc) + 1); - printk("AMRUT 2\n"); - - RETURN(0); - -out_cbd: - ll_commitcbd_cleanup(sbi); -out_osc: - obd_disconnect(&sbi->ll_osc_conn, 0); -out_mdc: - obd_disconnect(&sbi->ll_mdc_conn, 0); -out_free: - lprocfs_unregister_mountpoint(sbi); - OBD_FREE(sbi, sizeof(*sbi)); - - goto out_dev; -} /* ll_fill_super */ - - -int ll_setattr_raw(struct inode *inode, struct iattr *attr) -{ - struct ptlrpc_request *request = NULL; - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct mdc_op_data op_data; - int err = 0; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino); - - LPROC_COUNTER_INODE_INCBY1(inode, LPROC_LL_SETATTR); - if ((attr->ia_valid & ATTR_SIZE)) { - /* writeback uses inode->i_size to determine how far out - * its cached pages go. ll_truncate gets a PW lock, canceling - * our lock, _after_ it has updated i_size. this can confuse - * us into zero extending the file to the newly truncated - * size, and this has bad implications for a racing o_append. - * if we're extending our size we need to flush the pages - * with the correct i_size before vmtruncate stomps on - * the new i_size. again, this can only find pages to - * purge if the PW lock that generated them is still held. - */ - if ( attr->ia_size > inode->i_size ) { - filemap_fdatasync(inode->i_mapping); - filemap_fdatawait(inode->i_mapping); - } - err = vmtruncate(inode, attr->ia_size); - if (err) - RETURN(err); - } - - /* Don't send size changes to MDS to avoid "fast EA" problems, and - * also avoid a pointless RPC (we get file size from OST anyways). - */ - attr->ia_valid &= ~ATTR_SIZE; - if (!attr->ia_valid) - RETURN(0); - - ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0); - - err = mdc_setattr(&sbi->ll_mdc_conn, &op_data, - attr, NULL, 0, &request); - if (err) - CERROR("mdc_setattr fails: err = %d\n", err); - - ptlrpc_req_finished(request); - - if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) { - struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - struct obdo oa; - int err2; - - CDEBUG(D_INODE, "set mtime on OST inode %lu to %lu\n", - inode->i_ino, attr->ia_mtime); - oa.o_id = lsm->lsm_object_id; - oa.o_mode = S_IFREG; - oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMTIME; - oa.o_mtime = LTIME_S(attr->ia_mtime); - err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL); - if (err2) { - CERROR("obd_setattr fails: rc=%d\n", err); - if (!err) - err = err2; - } - } - RETURN(err); -} -struct super_block * ll_get_sb(struct file_system_type *fs_type, - int flags, char *devname, void * data) -{ - return get_sb_nodev(fs_type, flags, data, ll_fill_super); -} - -static void ll_put_super(struct super_block *sb) -{ - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct list_head *tmp, *next; - struct ll_fid rootfid; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:\n"); - - list_del(&sbi->ll_conn_chain); - ll_commitcbd_cleanup(sbi); - obd_disconnect(&sbi->ll_osc_conn, 0); - - /* NULL request to force sync on the MDS, and get the last_committed - * value to flush remaining RPCs from the pending queue on client. - * - * XXX This should be an mdc_sync() call to sync the whole MDS fs, - * which we can call for other reasons as well. - */ - mdc_getstatus(&sbi->ll_mdc_conn, &rootfid); - - lprocfs_unregister_mountpoint(sbi); - if (sbi->ll_proc_root) { - lprocfs_remove(sbi->ll_proc_root); - sbi->ll_proc_root = NULL; - } - - obd_disconnect(&sbi->ll_mdc_conn, 0); - - spin_lock(&dcache_lock); - list_for_each_safe(tmp, next, &sbi->ll_orphan_dentry_list){ - struct dentry *dentry = list_entry(tmp, struct dentry, d_hash); - shrink_dcache_parent(dentry); - } - spin_unlock(&dcache_lock); - - OBD_FREE(sbi, sizeof(*sbi)); - - EXIT; -} /* ll_put_super */ - -static void ll_clear_inode(struct inode *inode) -{ - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_inode_info *lli = ll_i2info(inode); - int rc; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino); - -#warning "Is there a reason we don't do this in 2.5, but we do in 2.4?" -#if 0 - rc = ll_mdc_cancel_unused(&sbi->ll_mdc_conn, inode, LDLM_FL_NO_CALLBACK); - if (rc < 0) { - CERROR("ll_mdc_cancel_unused: %d\n", rc); - /* XXX FIXME do something dramatic */ - } - - if (lli->lli_smd) { - rc = obd_cancel_unused(&sbi->ll_osc_conn, lli->lli_smd, 0); - if (rc < 0) { - CERROR("obd_cancel_unused: %d\n", rc); - /* XXX FIXME do something dramatic */ - } - } -#endif - - if (atomic_read(&inode->i_count) != 0) - CERROR("clearing in-use inode %lu: count = %d\n", - inode->i_ino, atomic_read(&inode->i_count)); - - if (lli->lli_smd) { - obd_free_memmd(&sbi->ll_osc_conn, &lli->lli_smd); - lli->lli_smd = NULL; - } - - if (lli->lli_symlink_name) { - OBD_FREE(lli->lli_symlink_name,strlen(lli->lli_symlink_name)+1); - lli->lli_symlink_name = NULL; - } - - EXIT; -} - -#if 0 -static void ll_delete_inode(struct inode *inode) -{ - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino); - if (S_ISREG(inode->i_mode)) { - int err; - struct obdo *oa; - struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - - /* mcreate with no open */ - if (!lsm) - GOTO(out, 0); - - if (lsm->lsm_object_id == 0) { - CERROR("This really happens\n"); - /* No obdo was ever created */ - GOTO(out, 0); - } - - oa = obdo_alloc(); - if (oa == NULL) - GOTO(out, -ENOMEM); - - oa->o_id = lsm->lsm_object_id; - oa->o_mode = inode->i_mode; - oa->o_valid = OBD_MD_FLID | OBD_MD_FLTYPE; - - err = obd_destroy(ll_i2obdconn(inode), oa, lsm); - obdo_free(oa); - if (err) - CDEBUG(D_SUPER, "obd destroy objid "LPX64" error %d\n", - lsm->lsm_object_id, err); - } -out: - clear_inode(inode); - EXIT; -} -#endif - -/* like inode_setattr, but doesn't mark the inode dirty */ -static int ll_attr2inode(struct inode * inode, struct iattr * attr, int trunc) -{ - unsigned int ia_valid = attr->ia_valid; - int error = 0; - - if ((ia_valid & ATTR_SIZE) && trunc) { - if (attr->ia_size > ll_file_maxbytes(inode)) { - error = -EFBIG; - goto out; - } - error = vmtruncate(inode, attr->ia_size); - if (error) - goto out; - } else if (ia_valid & ATTR_SIZE) - inode->i_size = attr->ia_size; - - if (ia_valid & ATTR_UID) - inode->i_uid = attr->ia_uid; - if (ia_valid & ATTR_GID) - inode->i_gid = attr->ia_gid; - if (ia_valid & ATTR_ATIME) - inode->i_atime = attr->ia_atime; - if (ia_valid & ATTR_MTIME) - inode->i_mtime = attr->ia_mtime; - if (ia_valid & ATTR_CTIME) - inode->i_ctime = attr->ia_ctime; - if (ia_valid & ATTR_MODE) { - inode->i_mode = attr->ia_mode; - if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) - inode->i_mode &= ~S_ISGID; - } -out: - return error; -} - -int ll_inode_setattr(struct inode *inode, struct iattr *attr, int do_trunc) -{ - struct ptlrpc_request *request = NULL; - struct ll_sb_info *sbi = ll_i2sbi(inode); - int err = 0; - - ENTRY; - - /* change incore inode */ - err = ll_attr2inode(inode, attr, do_trunc); - if (err) - RETURN(err); - - /* Don't send size changes to MDS to avoid "fast EA" problems, and - * also avoid a pointless RPC (we get file size from OST anyways). - */ - attr->ia_valid &= ~ATTR_SIZE; - if (attr->ia_valid) { - struct mdc_op_data op_data; - - ll_prepare_mdc_op_data(&op_data, inode, NULL, NULL, 0, 0); - - err = mdc_setattr(&sbi->ll_mdc_conn, &op_data, - attr, NULL, 0, &request); - if (err) - CERROR("mdc_setattr fails: err = %d\n", err); - - ptlrpc_req_finished(request); - if (S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_MTIME_SET) { - struct lov_stripe_md *lsm = ll_i2info(inode)->lli_smd; - struct obdo oa; - int err2; - - CDEBUG(D_ERROR, "setting mtime on OST\n"); - oa.o_id = lsm->lsm_object_id; - oa.o_mode = S_IFREG; - oa.o_valid = OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME; - oa.o_mtime = LTIME_S(attr->ia_mtime); - err2 = obd_setattr(&sbi->ll_osc_conn, &oa, lsm, NULL); - if (err2) { - CERROR("obd_setattr fails: rc=%d\n", err); - if (!err) - err = err2; - } - } - } - - RETURN(err); -} - -int ll_setattr(struct dentry *de, struct iattr *attr) -{ - int rc = inode_change_ok(de->d_inode, attr); - CDEBUG(D_VFSTRACE, "VFS Op:name=%s\n", de->d_name.name); - if (rc) - return rc; - - LPROC_COUNTER_INODE_INCBY1((de->d_inode), LPROC_LL_SETATTR); - return ll_inode_setattr(de->d_inode, attr, 1); -} - -static int ll_statfs(struct super_block *sb, struct statfs *sfs) -{ - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct obd_statfs osfs; - int rc; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:\n"); - - LPROC_COUNTER_SBI_INCBY1(sbi, LPROC_LL_STAFS); - memset(sfs, 0, sizeof(*sfs)); - rc = obd_statfs(&sbi->ll_mdc_conn, &osfs); - statfs_unpack(sfs, &osfs); - if (rc) - CERROR("mdc_statfs fails: rc = %d\n", rc); - else - CDEBUG(D_SUPER, "mdc_statfs shows blocks "LPU64"/"LPU64 - " objects "LPU64"/"LPU64"\n", - osfs.os_bavail, osfs.os_blocks, - osfs.os_ffree, osfs.os_files); - - /* temporary until mds_statfs returns statfs info for all OSTs */ - if (!rc) { - rc = obd_statfs(&sbi->ll_osc_conn, &osfs); - if (rc) { - CERROR("obd_statfs fails: rc = %d\n", rc); - GOTO(out, rc); - } - CDEBUG(D_SUPER, "obd_statfs shows blocks "LPU64"/"LPU64 - " objects "LPU64"/"LPU64"\n", - osfs.os_bavail, osfs.os_blocks, - osfs.os_ffree, osfs.os_files); - - while (osfs.os_blocks > ~0UL) { - sfs->f_bsize <<= 1; - - osfs.os_blocks >>= 1; - osfs.os_bfree >>= 1; - osfs.os_bavail >>= 1; - } - sfs->f_blocks = osfs.os_blocks; - sfs->f_bfree = osfs.os_bfree; - sfs->f_bavail = osfs.os_bavail; - if (osfs.os_ffree < (__u64)sfs->f_ffree) { - sfs->f_files = (sfs->f_files - sfs->f_ffree) + - osfs.os_ffree; - sfs->f_ffree = osfs.os_ffree; - } - } - -out: - RETURN(rc); -} - -void ll_update_inode(struct inode *inode, struct mds_body *body, - struct lov_stripe_md *lsm) -{ - struct ll_inode_info *lli = ll_i2info(inode); - - LASSERT ((lsm != NULL) == ((body->valid & OBD_MD_FLEASIZE) != 0)); - if (lsm != NULL) { - if (lli->lli_smd == NULL) { - lli->lli_smd = lsm; - lli->lli_maxbytes = lsm->lsm_maxbytes; - if (lli->lli_maxbytes > PAGE_CACHE_MAXBYTES) - lli->lli_maxbytes = PAGE_CACHE_MAXBYTES; - } else { - LASSERT (!memcmp (lli->lli_smd, lsm, sizeof (*lsm))); - } - } - - if (body->valid & OBD_MD_FLID) - inode->i_ino = body->ino; - if (body->valid & OBD_MD_FLATIME) - LTIME_S(inode->i_atime) = body->atime; - if (body->valid & OBD_MD_FLMTIME) - LTIME_S(inode->i_mtime) = body->mtime; - if (body->valid & OBD_MD_FLCTIME) - LTIME_S(inode->i_ctime) = body->ctime; - if (body->valid & OBD_MD_FLMODE) - inode->i_mode = (inode->i_mode & S_IFMT)|(body->mode & ~S_IFMT); - if (body->valid & OBD_MD_FLTYPE) - inode->i_mode = (inode->i_mode & ~S_IFMT)|(body->mode & S_IFMT); - if (body->valid & OBD_MD_FLUID) - inode->i_uid = body->uid; - if (body->valid & OBD_MD_FLGID) - inode->i_gid = body->gid; - if (body->valid & OBD_MD_FLFLAGS) - inode->i_flags = body->flags; - if (body->valid & OBD_MD_FLNLINK) - inode->i_nlink = body->nlink; - if (body->valid & OBD_MD_FLGENER) - inode->i_generation = body->generation; - if (body->valid & OBD_MD_FLRDEV) - inode->i_rdev = to_kdev_t(body->rdev); - if (body->valid & OBD_MD_FLSIZE) - inode->i_size = body->size; - if (body->valid & OBD_MD_FLBLOCKS) - inode->i_blocks = body->blocks; -} - -int ll_read_inode2(struct inode *inode, void *opaque) -{ - struct ll_read_inode2_cookie *lic = opaque; - struct mds_body *body = lic->lic_body; - struct ll_inode_info *lli = ll_i2info(inode); - int rc = 0; - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu\n", inode->i_ino); - - sema_init(&lli->lli_open_sem, 1); - /* these are 2.4 only, but putting them here for consistency.. */ - spin_lock_init(&lli->lli_read_extent_lock); - INIT_LIST_HEAD(&lli->lli_read_extents); - ll_lldo_init(&lli->lli_dirty); - lli->lli_flags = 0; - lli->lli_maxbytes = LUSTRE_STRIPE_MAXBYTES; - - LASSERT(!lli->lli_smd); - - /* core attributes first */ - ll_update_inode(inode, body, lic ? lic->lic_lsm : NULL); - - /* OIDEBUG(inode); */ - - if (S_ISREG(inode->i_mode)) { - inode->i_op = &ll_file_inode_operations; - inode->i_fop = &ll_file_operations; - inode->i_mapping->a_ops = &ll_aops; - EXIT; - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &ll_dir_inode_operations; - inode->i_fop = &ll_dir_operations; - inode->i_mapping->a_ops = &ll_dir_aops; - EXIT; - } else if (S_ISLNK(inode->i_mode)) { - inode->i_op = &ll_fast_symlink_inode_operations; - EXIT; - } else { - inode->i_op = &ll_special_inode_operations; - init_special_inode(inode, inode->i_mode, - kdev_t_to_nr(inode->i_rdev)); - EXIT; - } - - return rc; -} - - -void ll_umount_begin(struct super_block *sb) -{ - struct ll_sb_info *sbi = ll_s2sbi(sb); - struct obd_device *obd; - struct obd_ioctl_data ioc_data = { 0 }; - - ENTRY; - CDEBUG(D_VFSTRACE, "VFS Op:\n"); - - obd = class_conn2obd(&sbi->ll_mdc_conn); - obd->obd_no_recov = 1; - obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_mdc_conn, sizeof ioc_data, - &ioc_data, NULL); - - obd = class_conn2obd(&sbi->ll_osc_conn); - obd->obd_no_recov = 1; - obd_iocontrol(IOC_OSC_SET_ACTIVE, &sbi->ll_osc_conn, sizeof ioc_data, - &ioc_data, NULL); - - /* Really, we'd like to wait until there are no requests outstanding, - * and then continue. For now, we just invalidate the requests, - * schedule, and hope. - */ - schedule(); - - EXIT; -} - -static kmem_cache_t *ll_inode_cachep; - -static struct inode *ll_alloc_inode(struct super_block *sb) -{ - struct ll_inode_info *lli; - LPROC_COUNTER_SBI_INCBY1((ll_s2sbi(sb)), LL_ALLOC_INODE); - OBD_SLAB_ALLOC(lli, ll_inode_cachep, SLAB_KERNEL, sizeof *lli); - if (lli == NULL) - return NULL; - - memset(lli, 0, (char *)&lli->lli_vfs_inode - (char *)lli); - sema_init(&lli->lli_open_sem, 1); - init_MUTEX(&lli->lli_size_valid_sem); - lli->lli_maxbytes = LUSTRE_STRIPE_MAXBYTES; - - return &lli->lli_vfs_inode; -} - -static void ll_destroy_inode(struct inode *inode) -{ - OBD_SLAB_FREE(ll_inode_cachep, ll_i2info(inode), - sizeof(struct ll_inode_info)); -} - -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) -{ - struct ll_inode_info *lli = foo; - - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == - SLAB_CTOR_CONSTRUCTOR) - inode_init_once(&lli->lli_vfs_inode); -} - -int ll_init_inodecache(void) -{ - ll_inode_cachep = kmem_cache_create("lustre_inode_cache", - sizeof(struct ll_inode_info), - 0, SLAB_HWCACHE_ALIGN, - init_once, NULL); - if (ll_inode_cachep == NULL) - return -ENOMEM; - return 0; -} - -void ll_destroy_inodecache(void) -{ - if (kmem_cache_destroy(ll_inode_cachep)) - CERROR("ll_inode_cache: not all structures were freed\n"); -} - - - -/* exported operations */ -struct super_operations ll_super_operations = -{ - alloc_inode: ll_alloc_inode, - destroy_inode: ll_destroy_inode, - clear_inode: ll_clear_inode, -// delete_inode: ll_delete_inode, - put_super: ll_put_super, - statfs: ll_statfs, - umount_begin: ll_umount_begin -}; - - -struct file_system_type lustre_lite_fs_type = { - .owner = THIS_MODULE, - .name = "lustre_lite", - .get_sb = ll_get_sb, - .kill_sb = kill_anon_super, -}; - -static int __init init_lustre_lite(void) -{ - int rc; - printk(KERN_INFO "Lustre Lite Client File System; " - "info@clusterfs.com\n"); - rc = ll_init_inodecache(); - if (rc) - return -ENOMEM; - ll_file_data_slab = kmem_cache_create("ll_file_data", - sizeof(struct ll_file_data), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (ll_file_data_slab == NULL) { - ll_destroy_inodecache(); - return -ENOMEM; - } - - proc_lustre_fs_root = proc_lustre_root ? - proc_mkdir("llite", proc_lustre_root) : NULL; - - return register_filesystem(&lustre_lite_fs_type); -} - -static void __exit exit_lustre_lite(void) -{ - unregister_filesystem(&lustre_lite_fs_type); - ll_destroy_inodecache(); - kmem_cache_destroy(ll_file_data_slab); - if (proc_lustre_fs_root) { - lprocfs_remove(proc_lustre_fs_root); - proc_lustre_fs_root = NULL; - } -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Lustre Lite Client File System"); -MODULE_LICENSE("GPL"); - -module_init(init_lustre_lite); -module_exit(exit_lustre_lite); -#endif diff --git a/lustre/llite/symlink.c b/lustre/llite/symlink.c deleted file mode 100644 index 19d234e..0000000 --- a/lustre/llite/symlink.c +++ /dev/null @@ -1,198 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/fs.h> -#include <linux/mm.h> -#include <linux/stat.h> -#include <linux/smp_lock.h> -#include <linux/version.h> -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include <asm/statfs.h> -#endif -#define DEBUG_SUBSYSTEM S_LLITE - -#include <linux/lustre_lite.h> - -static int ll_readlink_internal(struct inode *inode, - struct ptlrpc_request **request, char **symname) -{ - struct ll_inode_info *lli = ll_i2info(inode); - struct ll_sb_info *sbi = ll_i2sbi(inode); - struct ll_fid fid; - struct mds_body *body; - int rc, symlen = inode->i_size + 1; - ENTRY; - - *request = NULL; - - if (lli->lli_symlink_name) { - *symname = lli->lli_symlink_name; - CDEBUG(D_INODE, "using cached symlink %s\n", *symname); - RETURN(0); - } - - ll_inode2fid(&fid, inode); - rc = mdc_getattr(&sbi->ll_mdc_conn, &fid, - OBD_MD_LINKNAME, symlen, request); - if (rc) { - CERROR("inode %lu: rc = %d\n", inode->i_ino, rc); - RETURN(rc); - } - - body = lustre_msg_buf ((*request)->rq_repmsg, 0, sizeof (*body)); - LASSERT (body != NULL); - LASSERT_REPSWABBED (*request, 0); - - if ((body->valid & OBD_MD_LINKNAME) == 0) { - CERROR ("OBD_MD_LINKNAME not set on reply\n"); - GOTO (failed, rc = -EPROTO); - } - - LASSERT (symlen != 0); - if (body->eadatasize != symlen) { - CERROR ("inode %lu: symlink length %d not expected %d\n", - inode->i_ino, body->eadatasize - 1, symlen - 1); - GOTO (failed, rc = -EPROTO); - } - - *symname = lustre_msg_buf ((*request)->rq_repmsg, 1, symlen); - if (*symname == NULL || - strnlen (*symname, symlen) != symlen - 1) { - /* not full/NULL terminated */ - CERROR ("inode %lu: symlink not NULL terminated string" - "of length %d\n", inode->i_ino, symlen - 1); - GOTO (failed, rc = -EPROTO); - } - - OBD_ALLOC(lli->lli_symlink_name, symlen); - /* do not return an error if we cannot cache the symlink locally */ - if (lli->lli_symlink_name) - memcpy(lli->lli_symlink_name, *symname, symlen); - - RETURN(0); - - failed: - ptlrpc_req_finished (*request); - RETURN (-EPROTO); -} - -static int ll_readlink(struct dentry *dentry, char *buffer, int buflen) -{ - struct inode *inode = dentry->d_inode; - struct ll_inode_info *lli = ll_i2info(inode); - struct ptlrpc_request *request; - char *symname; - int rc; - ENTRY; - - CDEBUG(D_VFSTRACE, "VFS Op\n"); - /* on symlinks lli_open_sem protects lli_symlink_name allocation/data */ - down(&lli->lli_open_sem); - rc = ll_readlink_internal(inode, &request, &symname); - if (rc) - GOTO(out, rc); - - rc = vfs_readlink(dentry, buffer, buflen, symname); - ptlrpc_req_finished(request); - out: - up(&lli->lli_open_sem); - RETURN(rc); -} - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -static int ll_follow_link(struct dentry *dentry, struct nameidata *nd, - struct lookup_intent *it) -{ - struct inode *inode = dentry->d_inode; - struct ll_inode_info *lli = ll_i2info(inode); - struct ptlrpc_request *request; - int op = 0, mode = 0, rc; - char *symname; - ENTRY; - - CDEBUG(D_VFSTRACE, "VFS Op\n"); - if (it != NULL) { - op = it->it_op; - mode = it->it_mode; - - ll_intent_release(dentry, it); - } - - down(&lli->lli_open_sem); - rc = ll_readlink_internal(inode, &request, &symname); - up(&lli->lli_open_sem); - if (rc) - GOTO(out, rc); - - if (it != NULL) { - it->it_op = op; - it->it_mode = mode; - } - - rc = vfs_follow_link_it(nd, symname, it); - ptlrpc_req_finished(request); - out: - RETURN(rc); -} -#else -static int ll_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - struct inode *inode = dentry->d_inode; - struct ll_inode_info *lli = ll_i2info(inode); - struct ptlrpc_request *request; - int op = 0, mode = 0, rc; - char *symname; - ENTRY; - - op = nd->it.it_op; - mode = nd->it.it_mode; - - ll_intent_release(dentry, &nd->it); - - down(&lli->lli_open_sem); - - rc = ll_readlink_internal(inode, &request, &symname); - if (rc) - GOTO(out, rc); - - nd->it.it_op = op; - nd->it.it_mode = mode; - - rc = vfs_follow_link(nd, symname); - ptlrpc_req_finished(request); - out: - up(&lli->lli_open_sem); - - RETURN(rc); -} -#endif - -extern int ll_inode_revalidate(struct dentry *dentry); -extern int ll_setattr(struct dentry *de, struct iattr *attr); -struct inode_operations ll_fast_symlink_inode_operations = { - readlink: ll_readlink, - setattr: ll_setattr, - setattr_raw: ll_setattr_raw, - follow_link2: ll_follow_link, -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - revalidate: ll_inode_revalidate -#endif -}; diff --git a/lustre/llite/sysctl.c b/lustre/llite/sysctl.c deleted file mode 100644 index b626046..0000000 --- a/lustre/llite/sysctl.c +++ /dev/null @@ -1,70 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. - * - * This code is issued under the GNU General Public License. - * See the file COPYING in this distribution - */ -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/sysctl.h> -#include <linux/version.h> -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#include <linux/swapctl.h> -#endif -#include <linux/proc_fs.h> -#include <linux/slab.h> -#include <linux/stat.h> -#include <linux/ctype.h> -#include <asm/bitops.h> -#include <asm/segment.h> -#include <asm/uaccess.h> -#include <linux/utsname.h> - -struct ctl_table_header *ll_table_header = NULL; - -int ll_debug_level = 0; -int ll_print_entry = 1; - - -#define LL_SYSCTL 1 - -#define LL_DEBUG 1 /* control debugging */ -#define LL_ENTRY 2 /* control enter/leave pattern */ -#define LL_TIMEOUT 3 /* timeout on upcalls to become intrble */ -#define LL_HARD 4 /* mount type "hard" or "soft" */ -#define LL_VARS 5 -#define LL_INDEX 6 -#define LL_RESET 7 - -#define LL_VARS_SLOT 2 - -static ctl_table ll_table[] = { - {LL_DEBUG, "debug", &ll_debug_level, sizeof(int), 0644, NULL, &proc_dointvec}, - {LL_ENTRY, "trace", &ll_print_entry, sizeof(int), 0644, NULL, &proc_dointvec}, - { 0 } -}; - -static ctl_table top_table[] = { - {LL_SYSCTL, "lustre_light", NULL, 0, 0555, ll_table}, - {0} -}; - -void ll_sysctl_init (void) -{ - -#ifdef CONFIG_SYSCTL - if ( !ll_table_header ) - ll_table_header = register_sysctl_table(top_table, 0); -#endif -} - -void ll_sysctl_clean (void) -{ -#ifdef CONFIG_SYSCTL - if ( ll_table_header ) - unregister_sysctl_table(ll_table_header); - ll_table_header = NULL; -#endif -} diff --git a/lustre/lov/.cvsignore b/lustre/lov/.cvsignore deleted file mode 100644 index e995588..0000000 --- a/lustre/lov/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lustre/lov/Makefile.am b/lustre/lov/Makefile.am deleted file mode 100644 index 879e44d..0000000 --- a/lustre/lov/Makefile.am +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (C) 2002 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -DEFS= - -if LIBLUSTRE -lib_LIBRARIES = liblov.a -liblov_a_SOURCES = lov_obd.c lov_pack.c -else -MODULE = lov -modulefs_DATA = lov.o -EXTRA_PROGRAMS = lov -lov_SOURCES = lov_obd.c lov_pack.c lproc_lov.c -endif - -include $(top_srcdir)/Rules diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c deleted file mode 100644 index 2974b2a..0000000 --- a/lustre/lov/lov_obd.c +++ /dev/null @@ -1,2397 +0,0 @@ - /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002, 2003 Cluster File Systems, Inc. - * Author: Phil Schwan <phil@clusterfs.com> - * Peter Braam <braam@clusterfs.com> - * Mike Shaver <shaver@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define EXPORT_SYMTAB -#define DEBUG_SUBSYSTEM S_LOV -#ifdef __KERNEL__ -#include <linux/slab.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/random.h> -#include <linux/slab.h> -#include <linux/pagemap.h> -#include <asm/div64.h> -#else -#include <liblustre.h> -#endif - -#include <linux/obd_support.h> -#include <linux/lustre_lib.h> -#include <linux/lustre_net.h> -#include <linux/lustre_idl.h> -#include <linux/lustre_lite.h> /* for LL_IOC_LOV_[GS]ETSTRIPE */ -#include <linux/lustre_mds.h> -#include <linux/obd_class.h> -#include <linux/obd_lov.h> -#include <linux/seq_file.h> -#include <linux/lprocfs_status.h> - -struct lov_file_handles { - struct portals_handle lfh_handle; - atomic_t lfh_refcount; - struct list_head lfh_list; - int lfh_count; - struct obd_client_handle *lfh_och; -}; - -struct lov_lock_handles { - struct portals_handle llh_handle; - atomic_t llh_refcount; - int llh_stripe_count; - struct lustre_handle llh_handles[0]; -}; - -/* lov_file_handles helpers */ -static void lov_lfh_addref(void *lfhp) -{ - struct lov_file_handles *lfh = lfhp; - - atomic_inc(&lfh->lfh_refcount); - CDEBUG(D_INFO, "GETting lfh %p : new refcount %d\n", lfh, - atomic_read(&lfh->lfh_refcount)); -} - -static struct lov_file_handles *lov_lfh_new(void) -{ - struct lov_file_handles *lfh; - - OBD_ALLOC(lfh, sizeof *lfh); - if (lfh == NULL) { - CERROR("out of memory\n"); - return NULL; - } - - atomic_set(&lfh->lfh_refcount, 2); - - INIT_LIST_HEAD(&lfh->lfh_handle.h_link); - class_handle_hash(&lfh->lfh_handle, lov_lfh_addref); - - return lfh; -} - -static struct lov_file_handles *lov_handle2lfh(struct lustre_handle *handle) -{ - ENTRY; - LASSERT(handle != NULL); - RETURN(class_handle2object(handle->cookie)); -} - -static void lov_lfh_put(struct lov_file_handles *lfh) -{ - CDEBUG(D_INFO, "PUTting lfh %p : new refcount %d\n", lfh, - atomic_read(&lfh->lfh_refcount) - 1); - LASSERT(atomic_read(&lfh->lfh_refcount) > 0 && - atomic_read(&lfh->lfh_refcount) < 0x5a5a); - if (atomic_dec_and_test(&lfh->lfh_refcount)) { - LASSERT(list_empty(&lfh->lfh_handle.h_link)); - OBD_FREE(lfh, sizeof *lfh); - } -} - -static void lov_lfh_destroy(struct lov_file_handles *lfh) -{ - class_handle_unhash(&lfh->lfh_handle); - lov_lfh_put(lfh); -} - -static void lov_llh_addref(void *llhp) -{ - struct lov_lock_handles *llh = llhp; - - atomic_inc(&llh->llh_refcount); - CDEBUG(D_INFO, "GETting llh %p : new refcount %d\n", llh, - atomic_read(&llh->llh_refcount)); -} - -static struct lov_lock_handles *lov_llh_new(struct lov_stripe_md *lsm) -{ - struct lov_lock_handles *llh; - - OBD_ALLOC(llh, sizeof *llh + - sizeof(*llh->llh_handles) * lsm->lsm_stripe_count); - if (llh == NULL) { - CERROR("out of memory\n"); - return NULL; - } - atomic_set(&llh->llh_refcount, 2); - llh->llh_stripe_count = lsm->lsm_stripe_count; - INIT_LIST_HEAD(&llh->llh_handle.h_link); - class_handle_hash(&llh->llh_handle, lov_llh_addref); - return llh; -} - -static struct lov_lock_handles *lov_handle2llh(struct lustre_handle *handle) -{ - ENTRY; - LASSERT(handle != NULL); - RETURN(class_handle2object(handle->cookie)); -} - -static void lov_llh_put(struct lov_lock_handles *llh) -{ - CDEBUG(D_INFO, "PUTting llh %p : new refcount %d\n", llh, - atomic_read(&llh->llh_refcount) - 1); - LASSERT(atomic_read(&llh->llh_refcount) > 0 && - atomic_read(&llh->llh_refcount) < 0x5a5a); - if (atomic_dec_and_test(&llh->llh_refcount)) { - LASSERT(list_empty(&llh->llh_handle.h_link)); - OBD_FREE(llh, sizeof *llh + - sizeof(*llh->llh_handles) * llh->llh_stripe_count); - } -} - -static void lov_llh_destroy(struct lov_lock_handles *llh) -{ - class_handle_unhash(&llh->llh_handle); - lov_llh_put(llh); -} - -/* obd methods */ -int lov_attach(struct obd_device *dev, obd_count len, void *data) -{ - struct lprocfs_static_vars lvars; - struct proc_dir_entry *entry; - int rc; - - lprocfs_init_vars(&lvars); - rc = lprocfs_obd_attach(dev, lvars.obd_vars); - if (rc) - return rc; - - entry = create_proc_entry("target_obd", 0444, dev->obd_proc_entry); - if (entry == NULL) - RETURN(-ENOMEM); - entry->proc_fops = &ll_proc_target_fops; - entry->data = dev; - - return rc; - -} - -int lov_detach(struct obd_device *dev) -{ - return lprocfs_obd_detach(dev); -} - -static int lov_connect(struct lustre_handle *conn, struct obd_device *obd, - struct obd_uuid *cluuid) -{ - struct ptlrpc_request *req = NULL; - struct lov_obd *lov = &obd->u.lov; - struct client_obd *mdc = &lov->mdcobd->u.cli; - struct lov_desc *desc = &lov->desc; - struct lov_desc *mdesc; - struct lov_tgt_desc *tgts; - struct obd_export *exp; - struct lustre_handle mdc_conn; - struct obd_uuid lov_mds_uuid = {"LOV_MDS_UUID"}; - struct obd_uuid *uuids; - int rc, rc2, i; - ENTRY; - - rc = class_connect(conn, obd, cluuid); - if (rc) - RETURN(rc); - - /* We don't want to actually do the underlying connections more than - * once, so keep track. */ - lov->refcount++; - if (lov->refcount > 1) - RETURN(0); - - exp = class_conn2export(conn); - spin_lock_init(&exp->exp_lov_data.led_lock); - INIT_LIST_HEAD(&exp->exp_lov_data.led_open_head); - - /* retrieve LOV metadata from MDS */ - rc = obd_connect(&mdc_conn, lov->mdcobd, &lov_mds_uuid); - if (rc) { - CERROR("cannot connect to mdc: rc = %d\n", rc); - GOTO(out_conn, rc); - } - - rc = mdc_getlovinfo(obd, &mdc_conn, &req); - rc2 = obd_disconnect(&mdc_conn, 0); - if (rc) { - CERROR("cannot get lov info %d\n", rc); - GOTO(out_conn, rc); - } - - if (rc2) { - CERROR("error disconnecting from MDS %d\n", rc2); - GOTO(out_req, rc = rc2); - } - - /* mdc_getlovinfo() has checked and swabbed the reply. It has also - * done some simple checks (e.g. #uuids consistent with desc, uuid - * array fits in LOV_MAX_UUID_BUFFER_SIZE and all uuids are - * terminated), but I still need to verify it makes overall - * sense */ - mdesc = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*mdesc)); - LASSERT (mdesc != NULL); - LASSERT_REPSWABBED (req, 0); - - *desc = *mdesc; - - if (!obd_uuid_equals(&obd->obd_uuid, &desc->ld_uuid)) { - CERROR("LOV desc: uuid %s not on mds device (%s)\n", - obd->obd_uuid.uuid, desc->ld_uuid.uuid); - GOTO(out_req, rc = -EINVAL); - } - - /* Because of 64-bit divide/mod operations only work with a 32-bit - * divisor in a 32-bit kernel, we cannot support a stripe width - * of 4GB or larger on 32-bit CPUs. - */ - if ((desc->ld_default_stripe_count ? - desc->ld_default_stripe_count : desc->ld_tgt_count) * - desc->ld_default_stripe_size > ~0UL) { - CERROR("LOV: stripe width "LPU64"x%u > %lu on 32-bit system\n", - desc->ld_default_stripe_size, - desc->ld_default_stripe_count ? - desc->ld_default_stripe_count : desc->ld_tgt_count,~0UL); - GOTO(out_req, rc = -EINVAL); - } - - /* We know ld_tgt_count is reasonable (the array of UUIDS fits in - * the maximum buffer size, so we won't be making outrageous - * demands on memory here. */ - lov->bufsize = sizeof(struct lov_tgt_desc) * desc->ld_tgt_count; - OBD_ALLOC(lov->tgts, lov->bufsize); - if (!lov->tgts) { - CERROR("Out of memory\n"); - GOTO(out_req, rc = -ENOMEM); - } - - uuids = lustre_msg_buf(req->rq_repmsg, 1, - sizeof(*uuids) * desc->ld_tgt_count); - LASSERT (uuids != NULL); - LASSERT_REPSWABBED (req, 1); - - for (i = 0, tgts = lov->tgts; i < desc->ld_tgt_count; i++, tgts++) { - struct obd_uuid *uuid = &tgts->uuid; - struct obd_device *tgt_obd; - struct obd_uuid lov_osc_uuid = { "LOV_OSC_UUID" }; - - /* NULL termination already checked */ - *uuid = uuids[i]; - - tgt_obd = client_tgtuuid2obd(uuid); - - if (!tgt_obd) { - CERROR("Target %s not attached\n", uuid->uuid); - GOTO(out_disc, rc = -EINVAL); - } - - if (!tgt_obd->obd_set_up) { - CERROR("Target %s not set up\n", uuid->uuid); - GOTO(out_disc, rc = -EINVAL); - } - - rc = obd_connect(&tgts->conn, tgt_obd, &lov_osc_uuid); - - if (rc) { - CERROR("Target %s connect error %d\n", uuid->uuid, rc); - GOTO(out_disc, rc); - } - - rc = obd_iocontrol(IOC_OSC_REGISTER_LOV, &tgts->conn, - sizeof(struct obd_device *), obd, NULL); - if (rc) { - CERROR("Target %s REGISTER_LOV error %d\n", - uuid->uuid, rc); - obd_disconnect(&tgts->conn, 0); - GOTO(out_disc, rc); - } - - desc->ld_active_tgt_count++; - tgts->active = 1; - } - - mdc->cl_max_mds_easize = obd_size_diskmd(conn, NULL); - ptlrpc_req_finished (req); - class_export_put(exp); - RETURN (0); - - out_disc: - while (i-- > 0) { - struct obd_uuid uuid; - --tgts; - --desc->ld_active_tgt_count; - tgts->active = 0; - /* save for CERROR below; (we know it's terminated) */ - uuid = tgts->uuid; - rc2 = obd_disconnect(&tgts->conn, 0); - if (rc2) - CERROR("error: LOV target %s disconnect on OST idx %d: " - "rc = %d\n", uuid.uuid, i, rc2); - } - OBD_FREE(lov->tgts, lov->bufsize); - out_req: - ptlrpc_req_finished (req); - out_conn: - class_export_put(exp); - class_disconnect(conn, 0); - RETURN (rc); -} - -static int lov_disconnect(struct lustre_handle *conn, int failover) -{ - struct obd_device *obd = class_conn2obd(conn); - struct lov_obd *lov = &obd->u.lov; - struct obd_export *exp; - struct list_head *p, *n; - int rc, i; - ENTRY; - - if (!lov->tgts) - goto out_local; - - /* Only disconnect the underlying layers on the final disconnect. */ - lov->refcount--; - if (lov->refcount != 0) - goto out_local; - - for (i = 0; i < lov->desc.ld_tgt_count; i++) { - if (obd->obd_no_recov) { - /* Pass it on to our clients. - * XXX This should be an argument to disconnect, - * XXX not a back-door flag on the OBD. Ah well. - */ - struct obd_device *osc_obd = - class_conn2obd(&lov->tgts[i].conn); - osc_obd->obd_no_recov = 1; - } - rc = obd_disconnect(&lov->tgts[i].conn, failover); - if (rc) { - if (lov->tgts[i].active) { - CERROR("Target %s disconnect error %d\n", - lov->tgts[i].uuid.uuid, rc); - } - rc = 0; - } - if (lov->tgts[i].active) { - lov->desc.ld_active_tgt_count--; - lov->tgts[i].active = 0; - } - } - OBD_FREE(lov->tgts, lov->bufsize); - lov->bufsize = 0; - lov->tgts = NULL; - - exp = class_conn2export(conn); - if (exp == NULL) { - CERROR("export handle "LPU64" invalid! If you can reproduce, " - "please send a full debug log to phik\n", conn->cookie); - RETURN(0); - } - spin_lock(&exp->exp_lov_data.led_lock); - list_for_each_safe(p, n, &exp->exp_lov_data.led_open_head) { - /* XXX close these, instead of just discarding them? */ - struct lov_file_handles *lfh; - lfh = list_entry(p, typeof(*lfh), lfh_list); - CERROR("discarding open LOV handle %p:"LPX64"\n", - lfh, lfh->lfh_handle.h_cookie); - list_del(&lfh->lfh_list); - OBD_FREE(lfh->lfh_och, lfh->lfh_count * FD_OSTDATA_SIZE); - lov_lfh_destroy(lfh); - lov_lfh_put(lfh); - } - spin_unlock(&exp->exp_lov_data.led_lock); - class_export_put(exp); - - out_local: - rc = class_disconnect(conn, 0); - RETURN(rc); -} - -/* Error codes: - * - * -EINVAL : UUID can't be found in the LOV's target list - * -ENOTCONN: The UUID is found, but the target connection is bad (!) - * -EBADF : The UUID is found, but the OBD is the wrong type (!) - */ -static int lov_set_osc_active(struct lov_obd *lov, struct obd_uuid *uuid, - int activate) -{ - struct obd_device *obd; - struct lov_tgt_desc *tgt; - int i, rc = 0; - ENTRY; - - CDEBUG(D_INFO, "Searching in lov %p for uuid %s (activate=%d)\n", - lov, uuid->uuid, activate); - - spin_lock(&lov->lov_lock); - for (i = 0, tgt = lov->tgts; i < lov->desc.ld_tgt_count; i++, tgt++) { - CDEBUG(D_INFO, "lov idx %d is %s conn "LPX64"\n", - i, tgt->uuid.uuid, tgt->conn.cookie); - if (strncmp(uuid->uuid, tgt->uuid.uuid, sizeof uuid->uuid) == 0) - break; - } - - if (i == lov->desc.ld_tgt_count) - GOTO(out, rc = -EINVAL); - - obd = class_conn2obd(&tgt->conn); - if (obd == NULL) { - /* This can happen if OST failure races with node shutdown */ - GOTO(out, rc = -ENOTCONN); - } - - CDEBUG(D_INFO, "Found OBD %s=%s device %d (%p) type %s at LOV idx %d\n", - obd->obd_name, obd->obd_uuid.uuid, obd->obd_minor, obd, - obd->obd_type->typ_name, i); - LASSERT(strcmp(obd->obd_type->typ_name, "osc") == 0); - - if (tgt->active == activate) { - CDEBUG(D_INFO, "OBD %p already %sactive!\n", obd, - activate ? "" : "in"); - GOTO(out, rc); - } - - CDEBUG(D_INFO, "Marking OBD %p %sactive\n", obd, activate ? "" : "in"); - - tgt->active = activate; - if (activate) { - /* - * foreach(export) - * foreach(open_file) - * if (file_handle uses this_osc) - * if (has_no_filehandle) - * open(file_handle, this_osc); - */ - /* XXX reconnect? */ - lov->desc.ld_active_tgt_count++; - } else { - /* - * Should I invalidate filehandles that refer to this OSC, so - * that I reopen them during reactivation? - */ - /* XXX disconnect from OSC? */ - lov->desc.ld_active_tgt_count--; - } - -#warning "FIXME: walk open files list for objects that need opening" - EXIT; - out: - spin_unlock(&lov->lov_lock); - return rc; -} - -static int lov_setup(struct obd_device *obd, obd_count len, void *buf) -{ - struct obd_ioctl_data *data = buf; - struct lov_obd *lov = &obd->u.lov; - int rc = 0; - ENTRY; - - if (data->ioc_inllen1 < 1) { - CERROR("LOV setup requires an MDC name\n"); - RETURN(-EINVAL); - } - - spin_lock_init(&lov->lov_lock); - lov->mdcobd = class_name2obd(data->ioc_inlbuf1); - if (!lov->mdcobd) { - CERROR("LOV %s cannot locate MDC %s\n", obd->obd_uuid.uuid, - data->ioc_inlbuf1); - rc = -EINVAL; - } - RETURN(rc); -} - -/* compute object size given "stripeno" and the ost size */ -static obd_size lov_stripe_size(struct lov_stripe_md *lsm, obd_size ost_size, - int stripeno) -{ - unsigned long ssize = lsm->lsm_stripe_size; - unsigned long swidth = ssize * lsm->lsm_stripe_count; - unsigned long stripe_size; - obd_size lov_size; - - if (ost_size == 0) - return 0; - - /* do_div(a, b) returns a % b, and a = a / b */ - stripe_size = do_div(ost_size, ssize); - - if (stripe_size) - lov_size = ost_size * swidth + stripeno * ssize + stripe_size; - else - lov_size = (ost_size - 1) * swidth + (stripeno + 1) * ssize; - - return lov_size; -} - -static void lov_merge_attrs(struct obdo *tgt, struct obdo *src, obd_flag valid, - struct lov_stripe_md *lsm, int stripeno, int *set) -{ - if (*set) { - if (valid & OBD_MD_FLSIZE) { - /* this handles sparse files properly */ - obd_size lov_size; - - lov_size = lov_stripe_size(lsm, src->o_size, stripeno); - if (lov_size > tgt->o_size) - tgt->o_size = lov_size; - } - if (valid & OBD_MD_FLBLOCKS) - tgt->o_blocks += src->o_blocks; - if (valid & OBD_MD_FLBLKSZ) - tgt->o_blksize += src->o_blksize; - if (valid & OBD_MD_FLCTIME && tgt->o_ctime < src->o_ctime) - tgt->o_ctime = src->o_ctime; - if (valid & OBD_MD_FLMTIME && tgt->o_mtime < src->o_mtime) - tgt->o_mtime = src->o_mtime; - } else { - obdo_cpy_md(tgt, src, valid); - if (valid & OBD_MD_FLSIZE) - tgt->o_size = lov_stripe_size(lsm,src->o_size,stripeno); - *set = 1; - } -} - -/* the LOV expects oa->o_id to be set to the LOV object id */ -static int lov_create(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md **ea, struct obd_trans_info *oti) -{ - struct obd_export *export = class_conn2export(conn); - struct lov_obd *lov; - struct lov_stripe_md *lsm; - struct lov_oinfo *loi; - struct obdo *tmp; - unsigned ost_count, ost_idx; - int set = 0, obj_alloc = 0; - int rc = 0, i; - ENTRY; - - LASSERT(ea); - - if (!export) - GOTO(out_exp, rc = -EINVAL); - - lov = &export->exp_obd->u.lov; - - if (!lov->desc.ld_active_tgt_count) - GOTO(out_exp, rc = -EIO); - - tmp = obdo_alloc(); - if (!tmp) - GOTO(out_exp, rc = -ENOMEM); - - lsm = *ea; - - if (!lsm) { - rc = obd_alloc_memmd(conn, &lsm); - if (rc < 0) - GOTO(out_tmp, rc); - - rc = 0; - lsm->lsm_magic = LOV_MAGIC; - } - - ost_count = lov->desc.ld_tgt_count; - - LASSERT(oa->o_valid & OBD_MD_FLID); - lsm->lsm_object_id = oa->o_id; - if (!lsm->lsm_stripe_size) - lsm->lsm_stripe_size = lov->desc.ld_default_stripe_size; - - if (!*ea || lsm->lsm_stripe_offset >= ost_count) { - get_random_bytes(&ost_idx, 2); - ost_idx %= ost_count; - } else - ost_idx = lsm->lsm_stripe_offset; - - CDEBUG(D_INODE, "allocating %d subobjs for objid "LPX64" at idx %d\n", - lsm->lsm_stripe_count, lsm->lsm_object_id, ost_idx); - - loi = lsm->lsm_oinfo; - for (i = 0; i < ost_count; i++, ost_idx = (ost_idx + 1) % ost_count) { - struct lov_stripe_md obj_md; - struct lov_stripe_md *obj_mdp = &obj_md; - int err; - - if (lov->tgts[ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", ost_idx); - continue; - } - - /* create data objects with "parent" OA */ - memcpy(tmp, oa, sizeof(*tmp)); - /* XXX: LOV STACKING: use real "obj_mdp" sub-data */ - err = obd_create(&lov->tgts[ost_idx].conn, tmp, &obj_mdp, oti); - if (err) { - if (lov->tgts[ost_idx].active) { - CERROR("error creating objid "LPX64" sub-object" - " on OST idx %d/%d: rc = %d\n", oa->o_id, - ost_idx, lsm->lsm_stripe_count, err); - if (err > 0) { - CERROR("obd_create returned invalid " - "err %d\n", err); - err = -EIO; - } - } - if (!rc) - rc = err; - continue; - } - loi->loi_id = tmp->o_id; - loi->loi_ost_idx = ost_idx; - CDEBUG(D_INODE, "objid "LPX64" has subobj "LPX64" at idx %d\n", - lsm->lsm_object_id, loi->loi_id, ost_idx); - - if (set == 0) - lsm->lsm_stripe_offset = ost_idx; - lov_merge_attrs(oa, tmp, OBD_MD_FLBLKSZ, lsm, obj_alloc, &set); - ot_init(&loi->loi_dirty_ot_inline); - loi->loi_dirty_ot = &loi->loi_dirty_ot_inline; - - ++obj_alloc; - ++loi; - - /* If we have allocated enough objects, we are OK */ - if (obj_alloc == lsm->lsm_stripe_count) - GOTO(out_done, rc = 0); - } - - if (*ea != NULL) { - CERROR("can't lstripe objid "LPX64": have %u want %u, rc %d\n", - lsm->lsm_object_id, obj_alloc, lsm->lsm_stripe_count,rc); - if (rc == 0) - rc = -EFBIG; - GOTO(out_cleanup, rc); - } else { - struct lov_stripe_md *lsm_new; - /* XXX LOV STACKING call into osc for sizes */ - unsigned size = lov_stripe_md_size(obj_alloc); - - CERROR("reallocating LSM for objid "LPX64": old %u new %u\n", - lsm->lsm_object_id, obj_alloc, lsm->lsm_stripe_count); - OBD_ALLOC(lsm_new, size); - if (!lsm_new) - GOTO(out_cleanup, rc = -ENOMEM); - memcpy(lsm_new, lsm, size); - lsm_new->lsm_stripe_count = obj_alloc; - - /* XXX LOV STACKING call into osc for sizes */ - OBD_FREE(lsm, lov_stripe_md_size(lsm->lsm_stripe_count)); - lsm = lsm_new; - - rc = 0; - } - out_done: - *ea = lsm; - - out_tmp: - obdo_free(tmp); - out_exp: - class_export_put(export); - return rc; - - out_cleanup: - while (obj_alloc-- > 0) { - int err; - - --loi; - /* destroy already created objects here */ - memcpy(tmp, oa, sizeof(*tmp)); - tmp->o_id = loi->loi_id; - err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, tmp, NULL, - NULL); - if (err) - CERROR("Failed to uncreate objid "LPX64" subobj " - LPX64" on OST idx %d: rc = %d\n", - oa->o_id, loi->loi_id, loi->loi_ost_idx, - err); - } - if (*ea == NULL) - obd_free_memmd(conn, &lsm); - goto out_tmp; -} - -#define lsm_bad_magic(LSMP) \ -({ \ - struct lov_stripe_md *_lsm__ = (LSMP); \ - int _ret__ = 0; \ - if (!_lsm__) { \ - CERROR("LOV requires striping ea\n"); \ - _ret__ = 1; \ - } else if (_lsm__->lsm_magic != LOV_MAGIC) { \ - CERROR("LOV striping magic bad %#x != %#x\n", \ - _lsm__->lsm_magic, LOV_MAGIC); \ - _ret__ = 1; \ - } \ - _ret__; \ -}) - -static int lov_destroy(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *lsm, struct obd_trans_info *oti) -{ - struct obdo tmp; - struct obd_export *export = class_conn2export(conn); - struct lov_obd *lov; - struct lov_oinfo *loi; - struct lov_file_handles *lfh = NULL; - int rc = 0, i; - ENTRY; - - if (lsm_bad_magic(lsm)) - GOTO(out, rc = -EINVAL); - - if (!export || !export->exp_obd) - GOTO(out, rc = -ENODEV); - - if (oa->o_valid & OBD_MD_FLHANDLE) - lfh = lov_handle2lfh(obdo_handle(oa)); - - lov = &export->exp_obd->u.lov; - for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) { - int err; - if (lov->tgts[loi->loi_ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); - /* Orphan clean up will (someday) fix this up. */ - continue; - } - - memcpy(&tmp, oa, sizeof(tmp)); - tmp.o_id = loi->loi_id; - if (lfh) - memcpy(obdo_handle(&tmp), lfh->lfh_och + i, - FD_OSTDATA_SIZE); - else - tmp.o_valid &= ~OBD_MD_FLHANDLE; - err = obd_destroy(&lov->tgts[loi->loi_ost_idx].conn, &tmp, - NULL, NULL); - if (err && lov->tgts[loi->loi_ost_idx].active) { - CERROR("error: destroying objid "LPX64" subobj " - LPX64" on OST idx %d: rc = %d\n", - oa->o_id, loi->loi_id, loi->loi_ost_idx, err); - if (!rc) - rc = err; - } - } - if (lfh != NULL) - lov_lfh_put(lfh); - EXIT; - out: - class_export_put(export); - return rc; -} - -static int lov_getattr(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *lsm) -{ - struct obdo tmp; - struct obd_export *export = class_conn2export(conn); - struct lov_obd *lov; - struct lov_oinfo *loi; - struct lov_file_handles *lfh = NULL; - int i, rc = 0, set = 0; - ENTRY; - - if (lsm_bad_magic(lsm)) - GOTO(out, rc = -EINVAL); - - if (!export || !export->exp_obd) - GOTO(out, rc = -ENODEV); - - lov = &export->exp_obd->u.lov; - - if (oa->o_valid & OBD_MD_FLHANDLE) - lfh = lov_handle2lfh(obdo_handle(oa)); - - CDEBUG(D_INFO, "objid "LPX64": %ux%u byte stripes\n", - lsm->lsm_object_id, lsm->lsm_stripe_count, lsm->lsm_stripe_size); - for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) { - int err; - - if (lov->tgts[loi->loi_ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); - continue; - } - - CDEBUG(D_INFO, "objid "LPX64"[%d] has subobj "LPX64" at idx " - "%u\n", oa->o_id, i, loi->loi_id, loi->loi_ost_idx); - /* create data objects with "parent" OA */ - memcpy(&tmp, oa, sizeof(tmp)); - tmp.o_id = loi->loi_id; - if (lfh) - memcpy(obdo_handle(&tmp), lfh->lfh_och + i, - FD_OSTDATA_SIZE); - else - tmp.o_valid &= ~OBD_MD_FLHANDLE; - - err = obd_getattr(&lov->tgts[loi->loi_ost_idx].conn, &tmp,NULL); - if (err) { - if (lov->tgts[loi->loi_ost_idx].active) { - CERROR("error: getattr objid "LPX64" subobj " - LPX64" on OST idx %d: rc = %d\n", - oa->o_id, loi->loi_id, loi->loi_ost_idx, - err); - GOTO(out, rc = err); - } - } else { - lov_merge_attrs(oa, &tmp, tmp.o_valid, lsm, i, &set); - } - } - if (!set) - rc = -EIO; - GOTO(out, rc); - out: - if (lfh != NULL) - lov_lfh_put(lfh); - class_export_put(export); - return rc; -} - -static int lov_getattr_interpret(struct ptlrpc_request_set *rqset, - struct lov_getattr_async_args *aa, int rc) -{ - struct lov_stripe_md *lsm = aa->aa_lsm; - struct obdo *oa = aa->aa_oa; - struct obdo *obdos = aa->aa_stripe_oas; - struct lov_oinfo *loi; - int i; - int set = 0; - ENTRY; - - if (rc == 0) { - /* NB all stripe requests succeeded to get here */ - - for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; - i++,loi++) { - if (obdos[i].o_valid == 0) /* inactive stripe */ - continue; - - lov_merge_attrs(oa, &obdos[i], obdos[i].o_valid, lsm, - i, &set); - } - - if (!set) { - CERROR ("No stripes had valid attrs\n"); - rc = -EIO; - } - } - - OBD_FREE (obdos, lsm->lsm_stripe_count * sizeof (*obdos)); - RETURN (rc); -} - -static int lov_getattr_async (struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *lsm, - struct ptlrpc_request_set *rqset) -{ - struct obdo *obdos; - struct obd_export *export = class_conn2export(conn); - struct lov_obd *lov; - struct lov_oinfo *loi; - struct lov_file_handles *lfh = NULL; - struct lov_getattr_async_args *aa; - int i; - int set = 0; - int rc = 0; - ENTRY; - - if (!lsm) { - CERROR("LOV requires striping ea\n"); - GOTO(out, rc = -EINVAL); - } - - if (lsm->lsm_magic != LOV_MAGIC) { - CERROR("LOV striping magic bad %#x != %#x\n", - lsm->lsm_magic, LOV_MAGIC); - GOTO(out, rc = -EINVAL); - } - - if (!export || !export->exp_obd) - GOTO(out, rc = -ENODEV); - - lov = &export->exp_obd->u.lov; - - OBD_ALLOC (obdos, lsm->lsm_stripe_count * sizeof (*obdos)); - if (obdos == NULL) - GOTO (out, rc = -ENOMEM); - - if (oa->o_valid & OBD_MD_FLHANDLE) - lfh = lov_handle2lfh(obdo_handle(oa)); - - CDEBUG(D_INFO, "objid "LPX64": %ux%u byte stripes\n", - lsm->lsm_object_id, lsm->lsm_stripe_count, lsm->lsm_stripe_size); - for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) { - int err; - - if (lov->tgts[loi->loi_ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); - /* leaves obdos[i].obd_valid unset */ - continue; - } - - CDEBUG(D_INFO, "objid "LPX64"[%d] has subobj "LPX64" at idx " - "%u\n", oa->o_id, i, loi->loi_id, loi->loi_ost_idx); - /* create data objects with "parent" OA */ - memcpy(&obdos[i], oa, sizeof(obdos[i])); - obdos[i].o_id = loi->loi_id; - if (lfh) - memcpy(obdo_handle(&obdos[i]), lfh->lfh_och + i, - FD_OSTDATA_SIZE); - else - obdos[i].o_valid &= ~OBD_MD_FLHANDLE; - - err = obd_getattr_async (&lov->tgts[loi->loi_ost_idx].conn, - &obdos[i], NULL, rqset); - if (err) { - CERROR("error: getattr objid "LPX64" subobj " - LPX64" on OST idx %d: rc = %d\n", - oa->o_id, loi->loi_id, loi->loi_ost_idx, - err); - GOTO(out_obdos, rc = err); - } - set = 1; - } - if (!set) - GOTO (out_obdos, rc = -EIO); - - LASSERT (rqset->set_interpret == NULL); - rqset->set_interpret = lov_getattr_interpret; - LASSERT (sizeof (rqset->set_args) >= sizeof (*aa)); - aa = (struct lov_getattr_async_args *)&rqset->set_args; - aa->aa_lsm = lsm; - aa->aa_oa = oa; - aa->aa_stripe_oas = obdos; - GOTO (out, rc = 0); - - out_obdos: - OBD_FREE (obdos, lsm->lsm_stripe_count * sizeof (*obdos)); - out: - if (lfh != NULL) - lov_lfh_put(lfh); - class_export_put(export); - RETURN (rc); -} - -static int lov_setattr(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *lsm, struct obd_trans_info *oti) -{ - struct obdo *tmp; - struct obd_export *export = class_conn2export(conn); - struct lov_obd *lov; - struct lov_oinfo *loi; - struct lov_file_handles *lfh = NULL; - int rc = 0, i, set = 0; - ENTRY; - - if (lsm_bad_magic(lsm)) - GOTO(out, rc = -EINVAL); - - if (!export || !export->exp_obd) - GOTO(out, rc = -ENODEV); - - /* size changes should go through punch and not setattr */ - LASSERT(!(oa->o_valid & OBD_MD_FLSIZE)); - - /* for now, we only expect mtime updates here */ - LASSERT(!(oa->o_valid & ~(OBD_MD_FLID |OBD_MD_FLTYPE |OBD_MD_FLMTIME))); - - tmp = obdo_alloc(); - if (!tmp) - GOTO(out, rc = -ENOMEM); - - if (oa->o_valid & OBD_MD_FLHANDLE) - lfh = lov_handle2lfh(obdo_handle(oa)); - - lov = &export->exp_obd->u.lov; - for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) { - int err; - - if (lov->tgts[loi->loi_ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); - continue; - } - - obdo_cpy_md(tmp, oa, oa->o_valid); - - if (lfh) - memcpy(obdo_handle(tmp), lfh->lfh_och + i, - FD_OSTDATA_SIZE); - else - tmp->o_valid &= ~OBD_MD_FLHANDLE; - - tmp->o_id = loi->loi_id; - - err = obd_setattr(&lov->tgts[loi->loi_ost_idx].conn, tmp, - NULL, NULL); - if (err) { - if (lov->tgts[loi->loi_ost_idx].active) { - CERROR("error: setattr objid "LPX64" subobj " - LPX64" on OST idx %d: rc = %d\n", - oa->o_id, loi->loi_id, loi->loi_ost_idx, - err); - if (!rc) - rc = err; - } - } else - set = 1; - } - obdo_free(tmp); - if (!set && !rc) - rc = -EIO; - if (lfh != NULL) - lov_lfh_put(lfh); - GOTO(out, rc); - out: - class_export_put(export); - return rc; -} - -static int lov_open(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *lsm, struct obd_trans_info *oti, - struct obd_client_handle *och) -{ - struct obdo *tmp; /* on the heap here, on the stack in lov_close? */ - struct obd_export *export = class_conn2export(conn); - struct lov_obd *lov; - struct lov_oinfo *loi; - struct lov_file_handles *lfh = NULL; - int set = 0, rc = 0, i; - ENTRY; - LASSERT(och != NULL); - - if (lsm_bad_magic(lsm)) - GOTO(out_exp, rc = -EINVAL); - - if (!export || !export->exp_obd) - GOTO(out_exp, rc = -ENODEV); - - tmp = obdo_alloc(); - if (!tmp) - GOTO(out_exp, rc = -ENOMEM); - - lfh = lov_lfh_new(); - if (lfh == NULL) - GOTO(out_tmp, rc = -ENOMEM); - OBD_ALLOC(lfh->lfh_och, lsm->lsm_stripe_count * sizeof *och); - if (!lfh->lfh_och) - GOTO(out_lfh, rc = -ENOMEM); - - lov = &export->exp_obd->u.lov; - oa->o_size = 0; - oa->o_blocks = 0; - for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) { - if (lov->tgts[loi->loi_ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); - continue; - } - - /* create data objects with "parent" OA */ - memcpy(tmp, oa, sizeof(*tmp)); - tmp->o_id = loi->loi_id; - - rc = obd_open(&lov->tgts[loi->loi_ost_idx].conn, tmp, - NULL, NULL, lfh->lfh_och + i); - if (rc) { - if (!lov->tgts[loi->loi_ost_idx].active) { - rc = 0; - continue; - } - CERROR("error: open objid "LPX64" subobj "LPX64 - " on OST idx %d: rc = %d\n", - oa->o_id, lsm->lsm_oinfo[i].loi_id, - loi->loi_ost_idx, rc); - goto out_handles; - } - - lov_merge_attrs(oa, tmp, tmp->o_valid, lsm, i, &set); - } - - lfh->lfh_count = lsm->lsm_stripe_count; - och->och_fh.cookie = lfh->lfh_handle.h_cookie; - obdo_handle(oa)->cookie = lfh->lfh_handle.h_cookie; - oa->o_valid |= OBD_MD_FLHANDLE; - - /* llfh refcount transfers to list */ - spin_lock(&export->exp_lov_data.led_lock); - list_add(&lfh->lfh_list, &export->exp_lov_data.led_open_head); - spin_unlock(&export->exp_lov_data.led_lock); - - GOTO(out_tmp, rc); - out_tmp: - obdo_free(tmp); - out_exp: - class_export_put(export); - return rc; - - out_handles: - for (i--, loi = &lsm->lsm_oinfo[i]; i >= 0; i--, loi--) { - int err; - - if (lov->tgts[loi->loi_ost_idx].active == 0) - continue; - - memcpy(tmp, oa, sizeof(*tmp)); - tmp->o_id = loi->loi_id; - memcpy(obdo_handle(tmp), lfh->lfh_och + i, FD_OSTDATA_SIZE); - - err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, tmp, - NULL, NULL); - if (err && lov->tgts[loi->loi_ost_idx].active) { - CERROR("error: closing objid "LPX64" subobj "LPX64 - " on OST idx %d after open error: rc=%d\n", - oa->o_id, loi->loi_id, loi->loi_ost_idx, err); - } - } - - OBD_FREE(lfh->lfh_och, lsm->lsm_stripe_count * FD_OSTDATA_SIZE); - out_lfh: - lov_lfh_destroy(lfh); - lov_lfh_put(lfh); - goto out_tmp; -} - -static int lov_close(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *lsm, struct obd_trans_info *oti) -{ - struct obdo tmp; - struct obd_export *export = class_conn2export(conn); - struct lov_obd *lov; - struct lov_oinfo *loi; - struct lov_file_handles *lfh = NULL; - int rc = 0, i; - ENTRY; - - if (lsm_bad_magic(lsm)) - GOTO(out, rc = -EINVAL); - - if (!export || !export->exp_obd) - GOTO(out, rc = -ENODEV); - - if (oa->o_valid & OBD_MD_FLHANDLE) - lfh = lov_handle2lfh(obdo_handle(oa)); - - lov = &export->exp_obd->u.lov; - for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) { - int err; - - /* create data objects with "parent" OA */ - memcpy(&tmp, oa, sizeof(tmp)); - tmp.o_id = loi->loi_id; - if (lfh) - memcpy(obdo_handle(&tmp), lfh->lfh_och + i, - FD_OSTDATA_SIZE); - else - tmp.o_valid &= ~OBD_MD_FLHANDLE; - - err = obd_close(&lov->tgts[loi->loi_ost_idx].conn, &tmp, - NULL, NULL); - if (err) { - if (lov->tgts[loi->loi_ost_idx].active) { - CERROR("error: close objid "LPX64" subobj "LPX64 - " on OST idx %d: rc = %d\n", oa->o_id, - loi->loi_id, loi->loi_ost_idx, err); - } - if (!rc) - rc = err; - } - } - if (lfh != NULL) { - spin_lock(&export->exp_lov_data.led_lock); - list_del(&lfh->lfh_list); - spin_unlock(&export->exp_lov_data.led_lock); - lov_lfh_put(lfh); /* drop the reference owned by the list */ - - OBD_FREE(lfh->lfh_och, lsm->lsm_stripe_count * FD_OSTDATA_SIZE); - lov_lfh_destroy(lfh); - lov_lfh_put(lfh); /* balance handle2lfh above */ - } - GOTO(out, rc); - out: - class_export_put(export); - return rc; -} - -#ifndef log2 -#define log2(n) ffz(~(n)) -#endif - -/* we have an offset in file backed by an lov and want to find out where - * that offset lands in our given stripe of the file. for the easy - * case where the offset is within the stripe, we just have to scale the - * offset down to make it relative to the stripe instead of the lov. - * - * the harder case is what to do when the offset doesn't intersect the - * stripe. callers will want start offsets clamped ahead to the start - * of the nearest stripe in the file. end offsets similarly clamped to the - * nearest ending byte of a stripe in the file: - * - * all this function does is move offsets to the nearest region of the - * stripe, and it does its work "mod" the full length of all the stripes. - * consider a file with 3 stripes: - * - * S E - * --------------------------------------------------------------------- - * | 0 | 1 | 2 | 0 | 1 | 2 | - * --------------------------------------------------------------------- - * - * to find stripe 1's offsets for S and E, it divides by the full stripe - * width and does its math in the context of a single set of stripes: - * - * S E - * ----------------------------------- - * | 0 | 1 | 2 | - * ----------------------------------- - * - * it'll notice that E is outside stripe 1 and clamp it to the end of the - * stripe, then multiply it back out by lov_off to give the real offsets in - * the stripe: - * - * S E - * --------------------------------------------------------------------- - * | 1 | 1 | 1 | 1 | 1 | 1 | - * --------------------------------------------------------------------- - * - * it would have done similarly and pulled S forward to the start of a 1 - * stripe if, say, S had landed in a 0 stripe. - * - * this rounding isn't always correct. consider an E lov offset that lands - * on a 0 stripe, the "mod stripe width" math will pull it forward to the - * start of a 1 stripe, when in fact it wanted to be rounded back to the end - * of a previous 1 stripe. this logic is handled by callers and this is why: - * - * this function returns < 0 when the offset was "before" the stripe and - * was moved forward to the start of the stripe in question; 0 when it - * falls in the stripe and no shifting was done; > 0 when the offset - * was outside the stripe and was pulled back to its final byte. */ -static int lov_stripe_offset(struct lov_stripe_md *lsm, obd_off lov_off, - int stripeno, obd_off *obd_off) -{ - unsigned long ssize = lsm->lsm_stripe_size; - unsigned long swidth = ssize * lsm->lsm_stripe_count; - unsigned long stripe_off, this_stripe; - int ret = 0; - - if (lov_off == OBD_OBJECT_EOF) { - *obd_off = OBD_OBJECT_EOF; - return 0; - } - - /* do_div(a, b) returns a % b, and a = a / b */ - stripe_off = do_div(lov_off, swidth); - - this_stripe = stripeno * ssize; - if (stripe_off < this_stripe) { - stripe_off = 0; - ret = -1; - } else { - stripe_off -= this_stripe; - - if (stripe_off >= ssize) { - stripe_off = ssize; - ret = 1; - } - } - - *obd_off = lov_off * ssize + stripe_off; - return ret; -} - -/* given an extent in an lov and a stripe, calculate the extent of the stripe - * that is contained within the lov extent. this returns true if the given - * stripe does intersect with the lov extent. */ -static int lov_stripe_intersects(struct lov_stripe_md *lsm, int stripeno, - obd_off start, obd_off end, - obd_off *obd_start, obd_off *obd_end) -{ - int start_side, end_side; - - start_side = lov_stripe_offset(lsm, start, stripeno, obd_start); - end_side = lov_stripe_offset(lsm, end, stripeno, obd_end); - - CDEBUG(D_INODE, "["LPU64"->"LPU64"] -> [(%d) "LPU64"->"LPU64" (%d)]\n", - start, end, start_side, *obd_start, *obd_end, end_side); - - /* this stripe doesn't intersect the file extent when neither - * start or the end intersected the stripe and obd_start and - * obd_end got rounded up to the save value. */ - if (start_side != 0 && end_side != 0 && *obd_start == *obd_end) - return 0; - - /* as mentioned in the lov_stripe_offset commentary, end - * might have been shifted in the wrong direction. This - * happens when an end offset is before the stripe when viewed - * through the "mod stripe size" math. we detect it being shifted - * in the wrong direction and touch it up. - * interestingly, this can't underflow since end must be > start - * if we passed through the previous check. - * (should we assert for that somewhere?) */ - if (end_side != 0) - (*obd_end)--; - - return 1; -} - -/* compute which stripe number "lov_off" will be written into */ -static int lov_stripe_number(struct lov_stripe_md *lsm, obd_off lov_off) -{ - unsigned long ssize = lsm->lsm_stripe_size; - unsigned long swidth = ssize * lsm->lsm_stripe_count; - unsigned long stripe_off; - - stripe_off = do_div(lov_off, swidth); - - return stripe_off / ssize; -} - -/* FIXME: maybe we'll just make one node the authoritative attribute node, then - * we can send this 'punch' to just the authoritative node and the nodes - * that the punch will affect. */ -static int lov_punch(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *lsm, - obd_off start, obd_off end, struct obd_trans_info *oti) -{ - struct obdo tmp; - struct obd_export *export = class_conn2export(conn); - struct lov_obd *lov; - struct lov_oinfo *loi; - struct lov_file_handles *lfh = NULL; - int rc = 0, i; - ENTRY; - - if (lsm_bad_magic(lsm)) - GOTO(out, rc = -EINVAL); - - if (!export || !export->exp_obd) - GOTO(out, rc = -ENODEV); - - if (oa->o_valid & OBD_MD_FLHANDLE) - lfh = lov_handle2lfh(obdo_handle(oa)); - - lov = &export->exp_obd->u.lov; - for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) { - obd_off starti, endi; - int err; - - if (lov->tgts[loi->loi_ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); - continue; - } - - if (!lov_stripe_intersects(lsm, i, start, end, &starti, &endi)) - continue; - - /* create data objects with "parent" OA */ - memcpy(&tmp, oa, sizeof(tmp)); - tmp.o_id = loi->loi_id; - if (lfh) - memcpy(obdo_handle(&tmp), lfh->lfh_och + i, - FD_OSTDATA_SIZE); - else - tmp.o_valid &= ~OBD_MD_FLHANDLE; - - err = obd_punch(&lov->tgts[loi->loi_ost_idx].conn, &tmp, NULL, - starti, endi, NULL); - if (err) { - if (lov->tgts[loi->loi_ost_idx].active) { - CERROR("error: punch objid "LPX64" subobj "LPX64 - " on OST idx %d: rc = %d\n", oa->o_id, - loi->loi_id, loi->loi_ost_idx, err); - } - if (!rc) - rc = err; - } - } - if (lfh != NULL) - lov_lfh_put(lfh); - GOTO(out, rc); - out: - class_export_put(export); - return rc; -} - -static int lov_brw_check(struct lov_obd *lov, struct lov_stripe_md *lsm, - obd_count oa_bufs, struct brw_page *pga) -{ - int i; - - /* The caller just wants to know if there's a chance that this - * I/O can succeed */ - for (i = 0; i < oa_bufs; i++) { - int stripe = lov_stripe_number(lsm, pga[i].off); - int ost = lsm->lsm_oinfo[stripe].loi_ost_idx; - struct ldlm_extent ext, subext; - ext.start = pga[i].off; - ext.start = pga[i].off + pga[i].count; - - if (!lov_stripe_intersects(lsm, i, ext.start, ext.end, - &subext.start, &subext.end)) - continue; - - if (lov->tgts[ost].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", ost); - return -EIO; - } - } - return 0; -} - -static int lov_brw(int cmd, struct lustre_handle *conn, - struct lov_stripe_md *lsm, obd_count oa_bufs, - struct brw_page *pga, struct obd_trans_info *oti) -{ - struct { - int bufct; - int index; - int subcount; - struct lov_stripe_md lsm; - int ost_idx; - } *stripeinfo, *si, *si_last; - struct obd_export *export = class_conn2export(conn); - struct lov_obd *lov; - struct brw_page *ioarr; - struct lov_oinfo *loi; - int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count; - ENTRY; - - if (lsm_bad_magic(lsm)) - GOTO(out_exp, rc = -EINVAL); - - lov = &export->exp_obd->u.lov; - - if (cmd == OBD_BRW_CHECK) { - rc = lov_brw_check(lov, lsm, oa_bufs, pga); - GOTO(out_exp, rc); - } - - OBD_ALLOC(stripeinfo, stripe_count * sizeof(*stripeinfo)); - if (!stripeinfo) - GOTO(out_exp, rc = -ENOMEM); - - OBD_ALLOC(where, sizeof(*where) * oa_bufs); - if (!where) - GOTO(out_sinfo, rc = -ENOMEM); - - OBD_ALLOC(ioarr, sizeof(*ioarr) * oa_bufs); - if (!ioarr) - GOTO(out_where, rc = -ENOMEM); - - for (i = 0; i < oa_bufs; i++) { - where[i] = lov_stripe_number(lsm, pga[i].off); - stripeinfo[where[i]].bufct++; - } - - for (i = 0, loi = lsm->lsm_oinfo, si_last = si = stripeinfo; - i < stripe_count; i++, loi++, si_last = si, si++) { - if (i > 0) - si->index = si_last->index + si_last->bufct; - si->lsm.lsm_object_id = loi->loi_id; - si->ost_idx = loi->loi_ost_idx; - } - - for (i = 0; i < oa_bufs; i++) { - int which = where[i]; - int shift; - - shift = stripeinfo[which].index + stripeinfo[which].subcount; - LASSERT(shift < oa_bufs); - ioarr[shift] = pga[i]; - lov_stripe_offset(lsm, pga[i].off, which, &ioarr[shift].off); - stripeinfo[which].subcount++; - } - - for (i = 0, si = stripeinfo; i < stripe_count; i++, si++) { - int shift = si->index; - - if (lov->tgts[si->ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", si->ost_idx); - GOTO(out_ioarr, rc = -EIO); - } - - if (si->bufct) { - LASSERT(shift < oa_bufs); - rc = obd_brw(cmd, &lov->tgts[si->ost_idx].conn, - &si->lsm, si->bufct, &ioarr[shift], - oti); - if (rc) - GOTO(out_ioarr, rc); - } - } - GOTO(out_ioarr, rc); - out_ioarr: - OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs); - out_where: - OBD_FREE(where, sizeof(*where) * oa_bufs); - out_sinfo: - OBD_FREE(stripeinfo, stripe_count * sizeof(*stripeinfo)); - out_exp: - class_export_put(export); - return rc; -} - -static int lov_brw_interpret (struct ptlrpc_request_set *set, - struct lov_brw_async_args *aa, int rc) -{ - obd_count oa_bufs = aa->aa_oa_bufs; - struct brw_page *ioarr = aa->aa_ioarr; - ENTRY; - - OBD_FREE (ioarr, sizeof (*ioarr) * oa_bufs); - RETURN (rc); -} - -static int lov_brw_async(int cmd, struct lustre_handle *conn, - struct lov_stripe_md *lsm, obd_count oa_bufs, - struct brw_page *pga, struct ptlrpc_request_set *set, - struct obd_trans_info *oti) -{ - struct { - int bufct; - int index; - int subcount; - struct lov_stripe_md lsm; - int ost_idx; - } *stripeinfo, *si, *si_last; - struct obd_export *export = class_conn2export(conn); - struct lov_obd *lov; - struct brw_page *ioarr; - struct lov_oinfo *loi; - struct lov_brw_async_args *aa; - int rc = 0, i, *where, stripe_count = lsm->lsm_stripe_count; - ENTRY; - - if (lsm_bad_magic(lsm)) - GOTO(out_exp, rc = -EINVAL); - - lov = &export->exp_obd->u.lov; - - if (cmd == OBD_BRW_CHECK) { - rc = lov_brw_check(lov, lsm, oa_bufs, pga); - GOTO(out_exp, rc); - } - - OBD_ALLOC(stripeinfo, stripe_count * sizeof(*stripeinfo)); - if (!stripeinfo) - GOTO(out_exp, rc = -ENOMEM); - - OBD_ALLOC(where, sizeof(*where) * oa_bufs); - if (!where) - GOTO(out_sinfo, rc = -ENOMEM); - - OBD_ALLOC(ioarr, sizeof(*ioarr) * oa_bufs); - if (!ioarr) - GOTO(out_where, rc = -ENOMEM); - - for (i = 0; i < oa_bufs; i++) { - where[i] = lov_stripe_number(lsm, pga[i].off); - stripeinfo[where[i]].bufct++; - } - - for (i = 0, loi = lsm->lsm_oinfo, si_last = si = stripeinfo; - i < stripe_count; i++, loi++, si_last = si, si++) { - if (i > 0) - si->index = si_last->index + si_last->bufct; - si->lsm.lsm_object_id = loi->loi_id; - si->ost_idx = loi->loi_ost_idx; - } - - for (i = 0; i < oa_bufs; i++) { - int which = where[i]; - int shift; - - shift = stripeinfo[which].index + stripeinfo[which].subcount; - LASSERT(shift < oa_bufs); - ioarr[shift] = pga[i]; - lov_stripe_offset(lsm, pga[i].off, which, &ioarr[shift].off); - stripeinfo[which].subcount++; - } - - for (i = 0, si = stripeinfo; i < stripe_count; i++, si++) { - int shift = si->index; - - if (si->bufct == 0) - continue; - - if (lov->tgts[si->ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", si->ost_idx); - GOTO(out_ioarr, rc = -EIO); - } - - LASSERT(shift < oa_bufs); - rc = obd_brw_async(cmd, &lov->tgts[si->ost_idx].conn, - &si->lsm, si->bufct, &ioarr[shift], - set, oti); - if (rc) - GOTO(out_ioarr, rc); - } - LASSERT (rc == 0); - LASSERT (set->set_interpret == NULL); - set->set_interpret = lov_brw_interpret; - LASSERT (sizeof (set->set_args) >= sizeof (struct lov_brw_async_args)); - aa = (struct lov_brw_async_args *)&set->set_args; - aa->aa_oa_bufs = oa_bufs; - aa->aa_ioarr = ioarr; - GOTO(out_where, rc); - out_ioarr: - OBD_FREE(ioarr, sizeof(*ioarr) * oa_bufs); - out_where: - OBD_FREE(where, sizeof(*where) * oa_bufs); - out_sinfo: - OBD_FREE(stripeinfo, stripe_count * sizeof(*stripeinfo)); - out_exp: - class_export_put(export); - return rc; -} - -static int lov_enqueue(struct lustre_handle *conn, struct lov_stripe_md *lsm, - struct lustre_handle *parent_lock, - __u32 type, void *cookie, int cookielen, __u32 mode, - int *flags, void *cb, void *data, - struct lustre_handle *lockh) -{ - struct obd_export *export = class_conn2export(conn); - struct lov_lock_handles *lov_lockh = NULL; - struct lustre_handle *lov_lockhp; - struct lov_obd *lov; - struct lov_oinfo *loi; - struct lov_stripe_md submd; - ldlm_error_t rc; - int i; - ENTRY; - - if (lsm_bad_magic(lsm)) - GOTO(out_exp, rc = -EINVAL); - - /* we should never be asked to replay a lock this way. */ - LASSERT((*flags & LDLM_FL_REPLAY) == 0); - - if (!export || !export->exp_obd) - GOTO(out_exp, rc = -ENODEV); - - if (lsm->lsm_stripe_count > 1) { - lov_lockh = lov_llh_new(lsm); - if (lov_lockh == NULL) - GOTO(out_exp, rc = -ENOMEM); - - lockh->cookie = lov_lockh->llh_handle.h_cookie; - lov_lockhp = lov_lockh->llh_handles; - } else { - lov_lockhp = lockh; - } - - lov = &export->exp_obd->u.lov; - for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; - i++, loi++, lov_lockhp++) { - struct ldlm_extent *extent = (struct ldlm_extent *)cookie; - struct ldlm_extent sub_ext; - - *flags = 0; - if (!lov_stripe_intersects(lsm, i, extent->start, extent->end, - &sub_ext.start, &sub_ext.end)) - continue; - - if (lov->tgts[loi->loi_ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); - continue; - } - - /* XXX LOV STACKING: submd should be from the subobj */ - submd.lsm_object_id = loi->loi_id; - submd.lsm_stripe_count = 0; - /* XXX submd is not fully initialized here */ - *flags = 0; - rc = obd_enqueue(&(lov->tgts[loi->loi_ost_idx].conn), &submd, - parent_lock, type, &sub_ext, sizeof(sub_ext), - mode, flags, cb, data, lov_lockhp); - - // XXX add a lock debug statement here - if (rc != ELDLM_OK) { - memset(lov_lockhp, 0, sizeof(*lov_lockhp)); - if (lov->tgts[loi->loi_ost_idx].active) { - CERROR("error: enqueue objid "LPX64" subobj " - LPX64" on OST idx %d: rc = %d\n", - lsm->lsm_object_id, loi->loi_id, - loi->loi_ost_idx, rc); - goto out_locks; - } - } - } - if (lsm->lsm_stripe_count > 1) - lov_llh_put(lov_lockh); - GOTO(out_exp, rc = ELDLM_OK); - - out_locks: - while (loi--, lov_lockhp--, i-- > 0) { - struct lov_stripe_md submd; - int err; - - if (lov_lockhp->cookie == 0) - continue; - - /* XXX LOV STACKING: submd should be from the subobj */ - submd.lsm_object_id = loi->loi_id; - submd.lsm_stripe_count = 0; - err = obd_cancel(&lov->tgts[loi->loi_ost_idx].conn, &submd, - mode, lov_lockhp); - if (err && lov->tgts[loi->loi_ost_idx].active) { - CERROR("error: cancelling objid "LPX64" on OST " - "idx %d after enqueue error: rc = %d\n", - loi->loi_id, loi->loi_ost_idx, err); - } - } - - if (lsm->lsm_stripe_count > 1) { - lov_llh_destroy(lov_lockh); - lov_llh_put(lov_lockh); - } - out_exp: - class_export_put(export); - RETURN(rc); -} - -static int lov_match(struct lustre_handle *conn, struct lov_stripe_md *lsm, - __u32 type, void *cookie, int cookielen, __u32 mode, - int *flags, void *data, struct lustre_handle *lockh) -{ - struct obd_export *export = class_conn2export(conn); - struct lov_lock_handles *lov_lockh = NULL; - struct lustre_handle *lov_lockhp; - struct lov_obd *lov; - struct lov_oinfo *loi; - struct lov_stripe_md submd; - ldlm_error_t rc = 0; - int i; - ENTRY; - - if (lsm_bad_magic(lsm)) - GOTO(out_exp, rc = -EINVAL); - - if (!export || !export->exp_obd) - GOTO(out_exp, rc = -ENODEV); - - if (lsm->lsm_stripe_count > 1) { - lov_lockh = lov_llh_new(lsm); - if (lov_lockh == NULL) - GOTO(out_exp, rc = -ENOMEM); - - lockh->cookie = lov_lockh->llh_handle.h_cookie; - lov_lockhp = lov_lockh->llh_handles; - } else { - lov_lockhp = lockh; - } - - lov = &export->exp_obd->u.lov; - for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; - i++, loi++, lov_lockhp++) { - struct ldlm_extent *extent = (struct ldlm_extent *)cookie; - struct ldlm_extent sub_ext; - int lov_flags; - - if (!lov_stripe_intersects(lsm, i, extent->start, extent->end, - &sub_ext.start, &sub_ext.end)) - continue; - - if (lov->tgts[loi->loi_ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); - rc = -EIO; - break; - } - - /* XXX LOV STACKING: submd should be from the subobj */ - submd.lsm_object_id = loi->loi_id; - submd.lsm_stripe_count = 0; - lov_flags = *flags; - /* XXX submd is not fully initialized here */ - rc = obd_match(&(lov->tgts[loi->loi_ost_idx].conn), &submd, - type, &sub_ext, sizeof(sub_ext), mode, - &lov_flags, data, lov_lockhp); - if (rc != 1) - break; - } - if (rc == 1) { - if (lsm->lsm_stripe_count > 1) - lov_llh_put(lov_lockh); - GOTO(out_exp, 1); - } - - while (loi--, lov_lockhp--, i-- > 0) { - struct lov_stripe_md submd; - int err; - - if (lov_lockhp->cookie == 0) - continue; - - /* XXX LOV STACKING: submd should be from the subobj */ - submd.lsm_object_id = loi->loi_id; - submd.lsm_stripe_count = 0; - err = obd_cancel(&lov->tgts[loi->loi_ost_idx].conn, &submd, - mode, lov_lockhp); - if (err && lov->tgts[loi->loi_ost_idx].active) { - CERROR("error: cancelling objid "LPX64" on OST " - "idx %d after match failure: rc = %d\n", - loi->loi_id, loi->loi_ost_idx, err); - } - } - - if (lsm->lsm_stripe_count > 1) { - lov_llh_destroy(lov_lockh); - lov_llh_put(lov_lockh); - } - out_exp: - class_export_put(export); - RETURN(rc); -} - -static int lov_cancel(struct lustre_handle *conn, struct lov_stripe_md *lsm, - __u32 mode, struct lustre_handle *lockh) -{ - struct obd_export *export = class_conn2export(conn); - struct lov_lock_handles *lov_lockh = NULL; - struct lustre_handle *lov_lockhp; - struct lov_obd *lov; - struct lov_oinfo *loi; - int rc = 0, i; - ENTRY; - - if (lsm_bad_magic(lsm)) - GOTO(out, rc = -EINVAL); - - if (!export || !export->exp_obd) - GOTO(out, rc = -ENODEV); - - LASSERT(lockh); - if (lsm->lsm_stripe_count > 1) { - lov_lockh = lov_handle2llh(lockh); - if (!lov_lockh) { - CERROR("LOV: invalid lov lock handle %p\n", lockh); - GOTO(out, rc = -EINVAL); - } - - lov_lockhp = lov_lockh->llh_handles; - } else { - lov_lockhp = lockh; - } - - lov = &export->exp_obd->u.lov; - for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; - i++, loi++, lov_lockhp++) { - struct lov_stripe_md submd; - int err; - - if (lov_lockhp->cookie == 0) { - CDEBUG(D_HA, "lov idx %d subobj "LPX64" no lock?\n", - loi->loi_ost_idx, loi->loi_id); - continue; - } - - /* XXX LOV STACKING: submd should be from the subobj */ - submd.lsm_object_id = loi->loi_id; - submd.lsm_stripe_count = 0; - err = obd_cancel(&lov->tgts[loi->loi_ost_idx].conn, &submd, - mode, lov_lockhp); - if (err) { - if (lov->tgts[loi->loi_ost_idx].active) { - CERROR("error: cancel objid "LPX64" subobj " - LPX64" on OST idx %d: rc = %d\n", - lsm->lsm_object_id, - loi->loi_id, loi->loi_ost_idx, err); - if (!rc) - rc = err; - } - } - } - - if (lsm->lsm_stripe_count > 1) - lov_llh_destroy(lov_lockh); - if (lov_lockh != NULL) - lov_llh_put(lov_lockh); - GOTO(out, rc); - out: - class_export_put(export); - return rc; -} - -static int lov_cancel_unused(struct lustre_handle *conn, - struct lov_stripe_md *lsm, int flags, void *opaque) -{ - struct obd_export *export = class_conn2export(conn); - struct lov_obd *lov; - struct lov_oinfo *loi; - int rc = 0, i; - ENTRY; - - if (lsm_bad_magic(lsm)) - GOTO(out, rc = -EINVAL); - - if (!export || !export->exp_obd) - GOTO(out, rc = -ENODEV); - - lov = &export->exp_obd->u.lov; - for (i = 0,loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; i++,loi++) { - struct lov_stripe_md submd; - int err; - - if (lov->tgts[loi->loi_ost_idx].active == 0) - CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); - - submd.lsm_object_id = loi->loi_id; - submd.lsm_stripe_count = 0; - err = obd_cancel_unused(&lov->tgts[loi->loi_ost_idx].conn, - &submd, flags, opaque); - if (err && lov->tgts[loi->loi_ost_idx].active) { - CERROR("error: cancel unused objid "LPX64" subobj "LPX64 - " on OST idx %d: rc = %d\n", lsm->lsm_object_id, - loi->loi_id, loi->loi_ost_idx, err); - if (!rc) - rc = err; - } - } - GOTO(out, rc); - out: - class_export_put(export); - return rc; -} - -#define LOV_U64_MAX ((__u64)~0ULL) -#define LOV_SUM_MAX(tot, add) \ - do { \ - if ((tot) + (add) < (tot)) \ - (tot) = LOV_U64_MAX; \ - else \ - (tot) += (add); \ - } while(0) - -static int lov_statfs(struct obd_export *export, struct obd_statfs *osfs) -{ - struct obd_export *tgt_export; - struct lov_obd *lov; - struct obd_statfs lov_sfs; - int set = 0; - int rc = 0; - int i; - ENTRY; - - if (!export || !export->exp_obd) - RETURN(-ENODEV); - - lov = &export->exp_obd->u.lov; - - /* We only get block data from the OBD */ - for (i = 0; i < lov->desc.ld_tgt_count; i++) { - int err; - - if (!lov->tgts[i].active) { - CDEBUG(D_HA, "lov idx %d inactive\n", i); - continue; - } - - tgt_export = class_conn2export(&lov->tgts[i].conn); - if (!tgt_export) { - CDEBUG(D_HA, "lov idx %d NULL export\n", i); - continue; - } - - err = obd_statfs(tgt_export, &lov_sfs); - class_export_put(tgt_export); - if (err) { - if (lov->tgts[i].active) { - CERROR("error: statfs OSC %s on OST idx %d: " - "err = %d\n", - lov->tgts[i].uuid.uuid, i, err); - if (!rc) - rc = err; - } - continue; - } - if (!set) { - memcpy(osfs, &lov_sfs, sizeof(lov_sfs)); - set = 1; - } else { - osfs->os_bfree += lov_sfs.os_bfree; - osfs->os_bavail += lov_sfs.os_bavail; - osfs->os_blocks += lov_sfs.os_blocks; - /* XXX not sure about this one - depends on policy. - * - could be minimum if we always stripe on all OBDs - * (but that would be wrong for any other policy, - * if one of the OBDs has no more objects left) - * - could be sum if we stripe whole objects - * - could be average, just to give a nice number - * - * To give a "reasonable" (if not wholly accurate) - * number, we divide the total number of free objects - * by expected stripe count (watch out for overflow). - */ - LOV_SUM_MAX(osfs->os_files, lov_sfs.os_files); - LOV_SUM_MAX(osfs->os_ffree, lov_sfs.os_ffree); - } - } - if (set) { - __u32 expected_stripes = lov->desc.ld_default_stripe_count ? - lov->desc.ld_default_stripe_count : - lov->desc.ld_active_tgt_count; - - if (osfs->os_files != LOV_U64_MAX) - do_div(osfs->os_files, expected_stripes); - if (osfs->os_ffree != LOV_U64_MAX) - do_div(osfs->os_ffree, expected_stripes); - } else if (!rc) - rc = -EIO; - RETURN(rc); -} - -static int lov_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len, - void *karg, void *uarg) -{ - struct obd_device *obddev = class_conn2obd(conn); - struct lov_obd *lov = &obddev->u.lov; - int i, count = lov->desc.ld_tgt_count; - struct obd_uuid *uuidp; - int rc; - - ENTRY; - - switch (cmd) { - case IOC_LOV_SET_OSC_ACTIVE: { - struct obd_ioctl_data *data = karg; - uuidp = (struct obd_uuid *)data->ioc_inlbuf1; - rc = lov_set_osc_active(lov, uuidp, data->ioc_offset); - break; - } - case OBD_IOC_LOV_GET_CONFIG: { - struct obd_ioctl_data *data = karg; - struct lov_tgt_desc *tgtdesc; - struct lov_desc *desc; - char *buf = NULL; - - buf = NULL; - len = 0; - if (obd_ioctl_getdata(&buf, &len, (void *)uarg)) - RETURN(-EINVAL); - - data = (struct obd_ioctl_data *)buf; - - if (sizeof(*desc) > data->ioc_inllen1) { - OBD_FREE(buf, len); - RETURN(-EINVAL); - } - - if (sizeof(uuidp->uuid) * count > data->ioc_inllen2) { - OBD_FREE(buf, len); - RETURN(-EINVAL); - } - - desc = (struct lov_desc *)data->ioc_inlbuf1; - memcpy(desc, &(lov->desc), sizeof(*desc)); - - uuidp = (struct obd_uuid *)data->ioc_inlbuf2; - tgtdesc = lov->tgts; - for (i = 0; i < count; i++, uuidp++, tgtdesc++) - obd_str2uuid(uuidp, tgtdesc->uuid.uuid); - - rc = copy_to_user((void *)uarg, buf, len); - if (rc) - rc = -EFAULT; - obd_ioctl_freedata(buf, len); - break; - } - case LL_IOC_LOV_SETSTRIPE: - rc = lov_setstripe(conn, karg, uarg); - break; - case LL_IOC_LOV_GETSTRIPE: - rc = lov_getstripe(conn, karg, uarg); - break; - default: { - int set = 0; - if (count == 0) - RETURN(-ENOTTY); - rc = 0; - for (i = 0; i < count; i++) { - int err; - - err = obd_iocontrol(cmd, &lov->tgts[i].conn, - len, karg, uarg); - if (err) { - if (lov->tgts[i].active) { - CERROR("error: iocontrol OSC %s on OST" - "idx %d: err = %d\n", - lov->tgts[i].uuid.uuid, i, err); - if (!rc) - rc = err; - } - } else - set = 1; - } - if (!set && !rc) - rc = -EIO; - } - } - - RETURN(rc); -} - -static int lov_get_info(struct lustre_handle *conn, __u32 keylen, - void *key, __u32 *vallen, void *val) -{ - struct obd_device *obddev = class_conn2obd(conn); - struct lov_obd *lov = &obddev->u.lov; - int i; - ENTRY; - - if (!vallen || !val) - RETURN(-EFAULT); - - if (keylen > strlen("lock_to_stripe") && - strcmp(key, "lock_to_stripe") == 0) { - struct { - char name[16]; - struct ldlm_lock *lock; - struct lov_stripe_md *lsm; - } *data = key; - __u32 *stripe = val; - struct lov_oinfo *loi; - - if (*vallen < sizeof(*stripe)) - RETURN(-EFAULT); - *vallen = sizeof(*stripe); - - /* XXX This is another one of those bits that will need to - * change if we ever actually support nested LOVs. It uses - * the lock's connection to find out which stripe it is. */ - for (i = 0, loi = data->lsm->lsm_oinfo; - i < data->lsm->lsm_stripe_count; - i++, loi++) { - if (lov->tgts[loi->loi_ost_idx].conn.cookie == - data->lock->l_connh->cookie) { - *stripe = i; - RETURN(0); - } - } - RETURN(-ENXIO); - } - - RETURN(-EINVAL); -} - -static int lov_mark_page_dirty(struct lustre_handle *conn, - struct lov_stripe_md *lsm, unsigned long offset) -{ - struct lov_obd *lov = &class_conn2obd(conn)->u.lov; - struct lov_oinfo *loi; - struct lov_stripe_md *submd; - int stripe, rc; - obd_off off; - ENTRY; - - if (lsm_bad_magic(lsm)) - RETURN(-EINVAL); - - OBD_ALLOC(submd, lov_stripe_md_size(1)); - if (submd == NULL) - RETURN(-ENOMEM); - - stripe = lov_stripe_number(lsm, (obd_off)offset << PAGE_CACHE_SHIFT); - lov_stripe_offset(lsm, (obd_off)offset << PAGE_CACHE_SHIFT, stripe, - &off); - off >>= PAGE_CACHE_SHIFT; - - loi = &lsm->lsm_oinfo[stripe]; - CDEBUG(D_INODE, "off %lu => off %lu on stripe %d\n", offset, - (unsigned long)off, stripe); - submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline; - - rc = obd_mark_page_dirty(&lov->tgts[loi->loi_ost_idx].conn, submd, off); - OBD_FREE(submd, lov_stripe_md_size(1)); - RETURN(rc); -} - -static int lov_clear_dirty_pages(struct lustre_handle *conn, - struct lov_stripe_md *lsm, unsigned long start, - unsigned long end, unsigned long *cleared) - -{ - struct obd_export *export = class_conn2export(conn); - __u64 start_off = (__u64)start << PAGE_CACHE_SHIFT; - __u64 end_off = (__u64)end << PAGE_CACHE_SHIFT; - __u64 obd_start, obd_end; - struct lov_stripe_md *submd = NULL; - struct lov_obd *lov; - struct lov_oinfo *loi; - int i, rc; - unsigned long osc_cleared; - ENTRY; - - *cleared = 0; - - if (lsm_bad_magic(lsm)) - GOTO(out_exp, rc = -EINVAL); - - if (!export || !export->exp_obd) - GOTO(out_exp, rc = -ENODEV); - - OBD_ALLOC(submd, lov_stripe_md_size(1)); - if (submd == NULL) - GOTO(out_exp, rc = -ENOMEM); - - lov = &export->exp_obd->u.lov; - rc = 0; - for (i = 0, loi = lsm->lsm_oinfo; - i < lsm->lsm_stripe_count; - i++, loi++) { - if (lov->tgts[loi->loi_ost_idx].active == 0) { - CDEBUG(D_HA, "lov idx %d inactive\n", loi->loi_ost_idx); - continue; - } - - if(!lov_stripe_intersects(lsm, i, start_off, end_off, - &obd_start, &obd_end)) - continue; - obd_start >>= PAGE_CACHE_SHIFT; - obd_end >>= PAGE_CACHE_SHIFT; - - CDEBUG(D_INODE, "offs [%lu,%lu] => offs [%lu,%lu] stripe %d\n", - start, end, (unsigned long)obd_start, - (unsigned long)obd_end, loi->loi_ost_idx); - submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline; - rc = obd_clear_dirty_pages(&lov->tgts[loi->loi_ost_idx].conn, - submd, obd_start, obd_end, - &osc_cleared); - if (rc) - break; - *cleared += osc_cleared; - } -out_exp: - if (submd) - OBD_FREE(submd, lov_stripe_md_size(1)); - class_export_put(export); - RETURN(rc); -} - -static int lov_last_dirty_offset(struct lustre_handle *conn, - struct lov_stripe_md *lsm, - unsigned long *offset) -{ - struct obd_export *export = class_conn2export(conn); - struct lov_stripe_md *submd = NULL; - struct lov_obd *lov; - struct lov_oinfo *loi; - unsigned long tmp, count, skip; - int err, i, rc; - ENTRY; - - if (lsm_bad_magic(lsm)) - GOTO(out_exp, rc = -EINVAL); - - if (!export || !export->exp_obd) - GOTO(out_exp, rc = -ENODEV); - - OBD_ALLOC(submd, lov_stripe_md_size(1)); - if (submd == NULL) - GOTO(out_exp, rc = -ENOMEM); - - *offset = 0; - lov = &export->exp_obd->u.lov; - rc = -ENOENT; - for (i = 0, loi = lsm->lsm_oinfo; i < lsm->lsm_stripe_count; - i++, loi++) { - - count = lsm->lsm_stripe_size >> PAGE_CACHE_SHIFT; - skip = (lsm->lsm_stripe_count - 1) * count; - - submd->lsm_oinfo[0].loi_dirty_ot = &loi->loi_dirty_ot_inline; - - err = obd_last_dirty_offset(&lov->tgts[loi->loi_ost_idx].conn, - submd, &tmp); - if (err == -ENOENT) - continue; - if (err) - GOTO(out_exp, rc = err); - - rc = 0; - if (tmp != ~0) - tmp += (tmp/count * skip) + (i * count); - if (tmp > *offset) - *offset = tmp; - } -out_exp: - if (submd) - OBD_FREE(submd, lov_stripe_md_size(1)); - class_export_put(export); - RETURN(rc); -} - -struct obd_ops lov_obd_ops = { - o_owner: THIS_MODULE, - o_attach: lov_attach, - o_detach: lov_detach, - o_setup: lov_setup, - o_connect: lov_connect, - o_disconnect: lov_disconnect, - o_statfs: lov_statfs, - o_packmd: lov_packmd, - o_unpackmd: lov_unpackmd, - o_create: lov_create, - o_destroy: lov_destroy, - o_getattr: lov_getattr, - o_getattr_async: lov_getattr_async, - o_setattr: lov_setattr, - o_open: lov_open, - o_close: lov_close, - o_brw: lov_brw, - o_brw_async: lov_brw_async, - o_punch: lov_punch, - o_enqueue: lov_enqueue, - o_match: lov_match, - o_cancel: lov_cancel, - o_cancel_unused: lov_cancel_unused, - o_iocontrol: lov_iocontrol, - o_get_info: lov_get_info, - .o_mark_page_dirty = lov_mark_page_dirty, - .o_clear_dirty_pages = lov_clear_dirty_pages, - .o_last_dirty_offset = lov_last_dirty_offset, -}; - -int __init lov_init(void) -{ - struct lprocfs_static_vars lvars; - int rc; - - printk(KERN_INFO "Lustre Logical Object Volume driver; " - "info@clusterfs.com\n"); - lprocfs_init_vars(&lvars); - rc = class_register_type(&lov_obd_ops, lvars.module_vars, - OBD_LOV_DEVICENAME); - RETURN(rc); -} - -static void __exit lov_exit(void) -{ - class_unregister_type(OBD_LOV_DEVICENAME); -} - -#ifdef __KERNEL__ -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Lustre Logical Object Volume OBD driver"); -MODULE_LICENSE("GPL"); - -module_init(lov_init); -module_exit(lov_exit); -#endif diff --git a/lustre/lov/lov_pack.c b/lustre/lov/lov_pack.c deleted file mode 100644 index bbb40de..0000000 --- a/lustre/lov/lov_pack.c +++ /dev/null @@ -1,361 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002, 2003 Cluster File Systems, Inc. - * Author: Andreas Dilger <adilger@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * (Un)packing of OST/MDS requests - * - */ - -#define DEBUG_SUBSYSTEM S_LLITE -#ifndef __KERNEL__ -#include <liblustre.h> -#endif - -#include <linux/lustre_net.h> -#include <linux/obd.h> -#include <linux/obd_lov.h> -#include <linux/obd_class.h> -#include <linux/obd_support.h> - -void lov_dump_lmm(int level, struct lov_mds_md *lmm) -{ - struct lov_object_id *loi; - int idx; - - CDEBUG(level, "objid "LPX64", magic %#08x, ost_count %u\n", - lmm->lmm_object_id, lmm->lmm_magic, lmm->lmm_ost_count); - CDEBUG(level,"stripe_size %u, stripe_count %u, stripe_offset %u\n", - lmm->lmm_stripe_size, lmm->lmm_stripe_count, - lmm->lmm_stripe_offset); - for (idx = 0, loi = lmm->lmm_objects; idx < lmm->lmm_ost_count; - idx++, loi++) - CDEBUG(level, "ost idx %u subobj "LPX64"\n", idx, - loi->l_object_id); -} - -#define LMM_ASSERT(test) \ -do { \ - if (!(test)) lov_dump_lmm(D_ERROR, lmm); \ - LASSERT(test); /* so we know what assertion failed */ \ -} while(0) - -/* Pack LOV object metadata for disk storage. It is packed in LE byte - * order and is opaque to the networking layer. - * - * XXX In the future, this will be enhanced to get the EA size from the - * underlying OSC device(s) to get their EA sizes so we can stack - * LOVs properly. For now lov_mds_md_size() just assumes one obd_id - * per stripe. - */ -int lov_packmd(struct lustre_handle *conn, struct lov_mds_md **lmmp, - struct lov_stripe_md *lsm) -{ - struct obd_device *obd = class_conn2obd(conn); - struct lov_obd *lov = &obd->u.lov; - struct lov_oinfo *loi; - struct lov_mds_md *lmm; - int ost_count = lov->desc.ld_tgt_count; - int stripe_count = ost_count; - int lmm_size; - int i; - ENTRY; - - if (lsm) { - int i, max = 0; - if (lsm->lsm_magic != LOV_MAGIC) { - CERROR("bad mem LOV MAGIC: %#010x != %#010x\n", - lsm->lsm_magic, LOV_MAGIC); - RETURN(-EINVAL); - } - stripe_count = lsm->lsm_stripe_count; - - for (i = 0,loi = lsm->lsm_oinfo; i < stripe_count; i++,loi++) { - if (loi->loi_ost_idx > max) - max = loi->loi_ost_idx; - } - ost_count = max + 1; - } - - /* XXX LOV STACKING call into osc for sizes */ - lmm_size = lov_mds_md_size(ost_count); - - if (!lmmp) - RETURN(lmm_size); - - if (*lmmp && !lsm) { - ost_count = le32_to_cpu ((*lmmp)->lmm_ost_count); - OBD_FREE(*lmmp, lov_mds_md_size(ost_count)); - *lmmp = NULL; - RETURN(0); - } - - if (!*lmmp) { - OBD_ALLOC(*lmmp, lmm_size); - if (!*lmmp) - RETURN(-ENOMEM); - } - - lmm = *lmmp; - lmm->lmm_magic = cpu_to_le32 (LOV_MAGIC); - lmm->lmm_ost_count = cpu_to_le16 (ost_count); - - if (!lsm) - RETURN(lmm_size); - - lmm->lmm_object_id = cpu_to_le64 (lsm->lsm_object_id); - lmm->lmm_stripe_count = cpu_to_le16 (stripe_count); - lmm->lmm_stripe_size = cpu_to_le32 (lsm->lsm_stripe_size); - lmm->lmm_stripe_offset = cpu_to_le32 (lsm->lsm_stripe_offset); - - /* Only fill in the object ids which we are actually using. - * Assumes lmm_objects is otherwise zero-filled. */ - for (i = 0, loi = lsm->lsm_oinfo; i < stripe_count; i++, loi++) { - /* XXX call down to osc_packmd() to do the packing */ - LASSERT (loi->loi_id); - lmm->lmm_objects[loi->loi_ost_idx].l_object_id = - cpu_to_le64 (loi->loi_id); - } - - RETURN(lmm_size); -} - -static int lov_get_stripecnt(struct lov_obd *lov, int stripe_count) -{ - if (!stripe_count) - stripe_count = lov->desc.ld_default_stripe_count; - if (!stripe_count || stripe_count > lov->desc.ld_active_tgt_count) - stripe_count = lov->desc.ld_active_tgt_count; - - return stripe_count; -} - -/* Unpack LOV object metadata from disk storage. It is packed in LE byte - * order and is opaque to the networking layer. - */ -int lov_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp, - struct lov_mds_md *lmm, int lmm_bytes) -{ - struct obd_device *obd = class_conn2obd(conn); - struct lov_obd *lov = &obd->u.lov; - struct lov_stripe_md *lsm; - struct lov_oinfo *loi; - int ost_count = 0; - int ost_offset = 0; - int stripe_count; - int lsm_size; - int i; - ENTRY; - - if (lmm) { - if (lmm_bytes < sizeof (*lmm)) { - CERROR("lov_mds_md too small: %d, need %d\n", - lmm_bytes, (int)sizeof(*lmm)); - RETURN(-EINVAL); - } - if (le32_to_cpu (lmm->lmm_magic) != LOV_MAGIC) { - CERROR("bad disk LOV MAGIC: %#08x != %#08x\n", - le32_to_cpu (lmm->lmm_magic), LOV_MAGIC); - RETURN(-EINVAL); - } - - ost_count = le16_to_cpu (lmm->lmm_ost_count); - stripe_count = le16_to_cpu (lmm->lmm_stripe_count); - - if (ost_count == 0 || stripe_count == 0) { - CERROR ("zero ost %d or stripe %d count\n", - ost_count, stripe_count); - RETURN (-EINVAL); - } - - if (lmm_bytes < lov_mds_md_size (ost_count)) { - CERROR ("lov_mds_md too small: %d, need %d\n", - lmm_bytes, lov_mds_md_size (ost_count)); - RETURN (-EINVAL); - } - } else - stripe_count = lov_get_stripecnt(lov, 0); - - /* XXX LOV STACKING call into osc for sizes */ - lsm_size = lov_stripe_md_size(stripe_count); - - if (!lsmp) - RETURN(lsm_size); - - if (*lsmp && !lmm) { - stripe_count = (*lsmp)->lsm_stripe_count; - OBD_FREE(*lsmp, lov_stripe_md_size(stripe_count)); - *lsmp = NULL; - RETURN(0); - } - - if (!*lsmp) { - OBD_ALLOC(*lsmp, lsm_size); - if (!*lsmp) - RETURN(-ENOMEM); - } - - lsm = *lsmp; - lsm->lsm_magic = LOV_MAGIC; - lsm->lsm_stripe_count = stripe_count; - lsm->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count; - - if (!lmm) - RETURN(lsm_size); - - lsm->lsm_object_id = le64_to_cpu (lmm->lmm_object_id); - lsm->lsm_stripe_size = le32_to_cpu (lmm->lmm_stripe_size); - ost_offset = lsm->lsm_stripe_offset = le32_to_cpu (lmm->lmm_stripe_offset); - - LMM_ASSERT(lsm->lsm_object_id); - LMM_ASSERT(ost_count); - - for (i = 0, loi = lsm->lsm_oinfo; i < ost_count; i++, ost_offset++) { - ost_offset %= ost_count; - - if (!lmm->lmm_objects[ost_offset].l_object_id) - continue; - - LMM_ASSERT(loi - lsm->lsm_oinfo < stripe_count); - /* XXX LOV STACKING call down to osc_unpackmd() */ - loi->loi_id = - le64_to_cpu (lmm->lmm_objects[ost_offset].l_object_id); - loi->loi_ost_idx = ost_offset; - loi->loi_dirty_ot = &loi->loi_dirty_ot_inline; - ot_init(loi->loi_dirty_ot); - loi++; - } - LMM_ASSERT(loi - lsm->lsm_oinfo > 0); - LMM_ASSERT(loi - lsm->lsm_oinfo == stripe_count); - - RETURN(lsm_size); -} - -/* Configure object striping information on a new file. - * - * @lmmu is a pointer to a user struct with one or more of the fields set to - * indicate the application preference: lmm_stripe_count, lmm_stripe_size, - * lmm_stripe_offset, and lmm_stripe_pattern. lmm_magic must be LOV_MAGIC. - * @lsmp is a pointer to an in-core stripe MD that needs to be filled in. - */ -int lov_setstripe(struct lustre_handle *conn, struct lov_stripe_md **lsmp, - struct lov_mds_md *lmmu) -{ - struct obd_device *obd = class_conn2obd(conn); - struct lov_obd *lov = &obd->u.lov; - struct lov_mds_md lmm; - struct lov_stripe_md *lsm; - int stripe_count; - int rc; - ENTRY; - - rc = copy_from_user(&lmm, lmmu, sizeof(lmm)); - if (rc) - RETURN(-EFAULT); - - /* Bug 1185 FIXME: struct lov_mds_md is little-endian everywhere else */ - - if (lmm.lmm_magic != LOV_MAGIC) { - CERROR("bad userland LOV MAGIC: %#08x != %#08x\n", - lmm.lmm_magic, LOV_MAGIC); - RETURN(-EINVAL); - } -#if 0 /* the stripe_count/offset is "advisory", and it gets fixed later */ - if (lmm.lmm_stripe_count > lov->desc.ld_tgt_count && - lmm.lmm_stripe_count != 0xffffffff) { - CERROR("stripe count %u more than OST count %d\n", - lmm.lmm_stripe_count, lov->desc.ld_tgt_count); - RETURN(-EINVAL); - } - if (lmm.lmm_stripe_offset >= lov->desc.ld_tgt_count && - lmm.lmm_stripe_offset != 0xffffffff) { - CERROR("stripe offset %u more than max OST index %d\n", - lmm.lmm_stripe_offset, lov->desc.ld_tgt_count); - RETURN(-EINVAL); - } -#endif - if (lmm.lmm_stripe_size & (PAGE_SIZE - 1)) { - CERROR("stripe size %u not multiple of %lu\n", - lmm.lmm_stripe_size, PAGE_SIZE); - RETURN(-EINVAL); - } - stripe_count = lov_get_stripecnt(lov, lmm.lmm_stripe_count); - - if ((__u64)lmm.lmm_stripe_size * stripe_count > ~0UL) { - CERROR("stripe width %ux%u > %lu on 32-bit system\n", - lmm.lmm_stripe_size, (int)lmm.lmm_stripe_count, ~0UL); - RETURN(-EINVAL); - } - - /* XXX LOV STACKING call into osc for sizes */ - OBD_ALLOC(lsm, lov_stripe_md_size(stripe_count)); - if (!lsm) - RETURN(-ENOMEM); - - lsm->lsm_magic = LOV_MAGIC; - lsm->lsm_stripe_count = stripe_count; - lsm->lsm_stripe_offset = lmm.lmm_stripe_offset; - lsm->lsm_stripe_size = lmm.lmm_stripe_size; - lsm->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES * stripe_count; - - *lsmp = lsm; - - RETURN(rc); -} - -/* Retrieve object striping information. - * - * @lmmu is a pointer to an in-core struct with lmm_ost_count indicating - * the maximum number of OST indices which will fit in the user buffer. - * lmm_magic must be LOV_MAGIC. - */ -int lov_getstripe(struct lustre_handle *conn, struct lov_stripe_md *lsm, - struct lov_mds_md *lmmu) -{ - struct lov_mds_md lmm, *lmmk = NULL; - int rc, lmm_size; - ENTRY; - - if (!lsm) - RETURN(-ENODATA); - - rc = copy_from_user(&lmm, lmmu, sizeof(lmm)); - if (rc) - RETURN(-EFAULT); - - if (lmm.lmm_magic != LOV_MAGIC) - RETURN(-EINVAL); - - rc = lov_packmd(conn, &lmmk, lsm); - if (rc < 0) - RETURN(rc); - /* Bug 1185 FIXME: convert lmmk to big-endian before copy to userspace */ - lmm_size = rc; - rc = 0; - - /* User wasn't expecting this many OST entries */ - if (lmm.lmm_ost_count < lmmk->lmm_ost_count) - rc = -EOVERFLOW; - else if (copy_to_user(lmmu, lmmk, lmm_size)) - rc = -EFAULT; - - obd_free_diskmd (conn, &lmmk); - - RETURN(rc); -} diff --git a/lustre/lov/lproc_lov.c b/lustre/lov/lproc_lov.c deleted file mode 100644 index e0b3adb..0000000 --- a/lustre/lov/lproc_lov.c +++ /dev/null @@ -1,212 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ -#define DEBUG_SUBSYSTEM S_CLASS - -#include <linux/version.h> -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include <asm/statfs.h> -#endif -#include <linux/lprocfs_status.h> -#include <linux/obd_class.h> -#include <linux/seq_file.h> - -#ifndef LPROCFS -struct lprocfs_vars lprocfs_module_vars[] = { {0} }; -struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; -#else - -DEFINE_LPROCFS_STATFS_FCT(rd_blksize, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filestotal, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filesfree, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filegroups, obd_self_statfs); - -int rd_stripesize(char *page, char **start, off_t off, int count, int *eof, - void *data) -{ - struct obd_device *dev = (struct obd_device *)data; - struct lov_desc *desc; - - LASSERT(dev != NULL); - desc = &dev->u.lov.desc; - *eof = 1; - return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_size); -} - -int rd_stripeoffset(char *page, char **start, off_t off, int count, int *eof, - void *data) -{ - struct obd_device *dev = (struct obd_device *)data; - struct lov_desc *desc; - - LASSERT(dev != NULL); - desc = &dev->u.lov.desc; - *eof = 1; - return snprintf(page, count, LPU64"\n", desc->ld_default_stripe_offset); -} - -int rd_stripetype(char *page, char **start, off_t off, int count, int *eof, - void *data) -{ - struct obd_device* dev = (struct obd_device*)data; - struct lov_desc *desc; - - LASSERT(dev != NULL); - desc = &dev->u.lov.desc; - *eof = 1; - return snprintf(page, count, "%u\n", desc->ld_pattern); -} - -int rd_stripecount(char *page, char **start, off_t off, int count, int *eof, - void *data) -{ - struct obd_device *dev = (struct obd_device *)data; - struct lov_desc *desc; - - LASSERT(dev != NULL); - desc = &dev->u.lov.desc; - *eof = 1; - return snprintf(page, count, "%u\n", desc->ld_default_stripe_count); -} - -int rd_numobd(char *page, char **start, off_t off, int count, int *eof, - void *data) -{ - struct obd_device *dev = (struct obd_device*)data; - struct lov_desc *desc; - - LASSERT(dev != NULL); - desc = &dev->u.lov.desc; - *eof = 1; - return snprintf(page, count, "%u\n", desc->ld_tgt_count); - -} - -int rd_activeobd(char *page, char **start, off_t off, int count, int *eof, - void *data) -{ - struct obd_device* dev = (struct obd_device*)data; - struct lov_desc *desc; - - LASSERT(dev != NULL); - desc = &dev->u.lov.desc; - *eof = 1; - return snprintf(page, count, "%u\n", desc->ld_active_tgt_count); -} - -int rd_mdc(char *page, char **start, off_t off, int count, int *eof, void *data) -{ - struct obd_device *dev = (struct obd_device*) data; - struct lov_obd *lov; - - LASSERT(dev != NULL); - lov = &dev->u.lov; - *eof = 1; - return snprintf(page, count, "%s\n", lov->mdcobd->obd_uuid.uuid); -} - -static void *ll_tgt_seq_start(struct seq_file *p, loff_t *pos) -{ - struct obd_device *dev = p->private; - struct lov_obd *lov = &dev->u.lov; - - return (*pos >= lov->desc.ld_tgt_count) ? NULL : &(lov->tgts[*pos]); - -} -static void ll_tgt_seq_stop(struct seq_file *p, void *v) -{ - -} - -static void *ll_tgt_seq_next(struct seq_file *p, void *v, loff_t *pos) -{ - struct obd_device *dev = p->private; - struct lov_obd *lov = &dev->u.lov; - - ++*pos; - return (*pos >=lov->desc.ld_tgt_count) ? NULL : &(lov->tgts[*pos]); -} - -static int ll_tgt_seq_show(struct seq_file *p, void *v) -{ - struct lov_tgt_desc *tgt = v; - struct obd_device *dev = p->private; - struct lov_obd *lov = &dev->u.lov; - int idx = tgt - &(lov->tgts[0]); - return seq_printf(p, "%d: %s %sACTIVE\n", idx+1, tgt->uuid.uuid, - tgt->active ? "" : "IN"); -} - -struct seq_operations ll_tgt_sops = { - .start = ll_tgt_seq_start, - .stop = ll_tgt_seq_stop, - .next = ll_tgt_seq_next, - .show = ll_tgt_seq_show, -}; - -static int ll_target_seq_open(struct inode *inode, struct file *file) -{ - struct proc_dir_entry *dp = inode->u.generic_ip; - struct seq_file *seq; - int rc = seq_open(file, &ll_tgt_sops); - - if (rc) - return rc; - - seq = file->private_data; - seq->private = dp->data; - - return 0; -} -struct lprocfs_vars lprocfs_obd_vars[] = { - { "uuid", lprocfs_rd_uuid, 0, 0 }, - { "stripesize", rd_stripesize, 0, 0 }, - { "stripeoffset", rd_stripeoffset, 0, 0 }, - { "stripecount", rd_stripecount, 0, 0 }, - { "stripetype", rd_stripetype, 0, 0 }, - { "numobd", rd_numobd, 0, 0 }, - { "activeobd", rd_activeobd, 0, 0 }, - { "filestotal", rd_filestotal, 0, 0 }, - { "filesfree", rd_filesfree, 0, 0 }, - { "filegroups", rd_filegroups, 0, 0 }, - { "blocksize", rd_blksize, 0, 0 }, - { "kbytestotal", rd_kbytestotal, 0, 0 }, - { "kbytesfree", rd_kbytesfree, 0, 0 }, - { "target_mdc", rd_mdc, 0, 0 }, - { 0 } -}; - -struct lprocfs_vars lprocfs_module_vars[] = { - { "num_refs", lprocfs_rd_numrefs, 0, 0 }, - { 0 } -}; - -struct file_operations ll_proc_target_fops = { - .open = ll_target_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -#endif /* LPROCFS */ -LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars) diff --git a/lustre/mdc/.cvsignore b/lustre/mdc/.cvsignore deleted file mode 100644 index e530020..0000000 --- a/lustre/mdc/.cvsignore +++ /dev/null @@ -1,8 +0,0 @@ -.Xrefs -config.log -config.status -configure -Makefile -Makefile.in -.deps -TAGS diff --git a/lustre/mdc/Makefile.am b/lustre/mdc/Makefile.am deleted file mode 100644 index f4f0218..0000000 --- a/lustre/mdc/Makefile.am +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -DEFS= - -if LIBLUSTRE -lib_LIBRARIES = libmdc.a -libmdc_a_SOURCES = mdc_request.c mdc_reint.c mdc_lib.c mdc_internal.h -else -MODULE = mdc -modulefs_DATA = mdc.o -EXTRA_PROGRAMS = mdc - -mdc_SOURCES = mdc_request.c mdc_reint.c lproc_mdc.c mdc_lib.c mdc_internal.h -endif - -include $(top_srcdir)/Rules diff --git a/lustre/mdc/lproc_mdc.c b/lustre/mdc/lproc_mdc.c deleted file mode 100644 index 3f81507..0000000 --- a/lustre/mdc/lproc_mdc.c +++ /dev/null @@ -1,63 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ -#define DEBUG_SUBSYSTEM S_CLASS - -#include <linux/version.h> -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include <asm/statfs.h> -#endif -#include <linux/obd_class.h> -#include <linux/lprocfs_status.h> - -#ifndef LPROCFS -struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; -struct lprocfs_vars lprocfs_module_vars[] = { {0} }; -#else - -DEFINE_LPROCFS_STATFS_FCT(rd_blksize, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filestotal, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filesfree, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filegroups, obd_self_statfs); - -struct lprocfs_vars lprocfs_obd_vars[] = { - { "uuid", lprocfs_rd_uuid, 0, 0 }, - { "blocksize", rd_blksize, 0, 0 }, - { "kbytestotal", rd_kbytestotal, 0, 0 }, - { "kbytesfree", rd_kbytesfree, 0, 0 }, - { "filestotal", rd_filestotal, 0, 0 }, - { "filesfree", rd_filesfree, 0, 0 }, - { "filegroups", rd_filegroups, 0, 0 }, - { "mds_server_uuid", lprocfs_rd_server_uuid, 0, 0 }, - { "mds_conn_uuid", lprocfs_rd_conn_uuid, 0, 0 }, - { 0 } -}; - -struct lprocfs_vars lprocfs_module_vars[] = { - { "num_refs", lprocfs_rd_numrefs, 0, 0 }, - { 0 } -}; - -#endif /* LPROCFS */ - -LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars) diff --git a/lustre/mdc/mdc_internal.h b/lustre/mdc/mdc_internal.h deleted file mode 100644 index e39a0aa..0000000 --- a/lustre/mdc/mdc_internal.h +++ /dev/null @@ -1,24 +0,0 @@ -void mds_pack_req_body(struct ptlrpc_request *); -void mds_pack_rep_body(struct ptlrpc_request *); -void mds_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size, - obd_id ino, int type); -void mds_getattr_pack(struct ptlrpc_request *req, int valid, int offset, - int flags, struct mdc_op_data *data); -void mds_setattr_pack(struct ptlrpc_request *req, - struct mdc_op_data *data, - struct iattr *iattr, void *ea, int ealen); -void mds_create_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *op_data, - __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time, - const void *data, int datalen); -void mds_open_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *op_data, - __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time, - __u32 flags, const void *data, int datalen); -void mds_unlink_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *data); -void mds_link_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *data); -void mds_rename_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *data, - const char *old, int oldlen, const char *new, int newlen); diff --git a/lustre/mdc/mdc_lib.c b/lustre/mdc/mdc_lib.c deleted file mode 100644 index 806a830..0000000 --- a/lustre/mdc/mdc_lib.c +++ /dev/null @@ -1,281 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_MDS -#ifndef __KERNEL__ -# include <liblustre.h> -#endif -#include <linux/lustre_idl.h> -#include <linux/lustre_net.h> -#include <linux/lustre_mds.h> -#include <linux/lustre_lite.h> - -void mds_readdir_pack(struct ptlrpc_request *req, __u64 offset, __u32 size, - obd_id ino, int type, __u64 xid) -{ - struct mds_body *b; - - b = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*b)); - b->fsuid = current->fsuid; - b->fsgid = current->fsgid; - b->capability = current->cap_effective; - b->fid1.id = ino; - b->fid1.f_type = type; - b->size = offset; /* !! */ - b->suppgid = -1; - b->blocks = xid; /* !! */ - b->nlink = size; /* !! */ -} - -static void mds_pack_body(struct mds_body *b) -{ - LASSERT (b != NULL); - - b->fsuid = current->fsuid; - b->fsgid = current->fsgid; - b->capability = current->cap_effective; -} - -void mds_pack_req_body(struct ptlrpc_request *req) -{ - struct mds_body *b = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*b)); - mds_pack_body(b); -} - -/* packing of MDS records */ -void mds_create_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *op_data, - __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time, - const void *data, int datalen) -{ - struct mds_rec_create *rec; - char *tmp; - rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec)); - - rec->cr_opcode = REINT_CREATE; - rec->cr_fsuid = current->fsuid; - rec->cr_fsgid = current->fsgid; - rec->cr_cap = current->cap_effective; - ll_ino2fid(&rec->cr_fid, op_data->ino1, op_data->gen1, op_data->typ1); - memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid)); - rec->cr_mode = mode; - rec->cr_rdev = rdev; - rec->cr_uid = uid; - rec->cr_gid = gid; - rec->cr_time = time; - if (in_group_p(op_data->gid1)) - rec->cr_suppgid = op_data->gid1; - else - rec->cr_suppgid = -1; - - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, op_data->namelen + 1); - LOGL0(op_data->name, op_data->namelen, tmp); - - if (data) { - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, datalen); - memcpy (tmp, data, datalen); - } -} -/* packing of MDS records */ -void mds_open_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *op_data, - __u32 mode, __u64 rdev, __u32 uid, __u32 gid, __u64 time, - __u32 flags, const void *data, int datalen) -{ - struct mds_rec_create *rec; - char *tmp; - rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec)); - - /* XXX do something about time, uid, gid */ - rec->cr_opcode = REINT_OPEN; - rec->cr_fsuid = current->fsuid; - rec->cr_fsgid = current->fsgid; - rec->cr_cap = current->cap_effective; - ll_ino2fid(&rec->cr_fid, op_data->ino1, - op_data->gen1, op_data->typ1); - memset(&rec->cr_replayfid, 0, sizeof(rec->cr_replayfid)); - rec->cr_mode = mode; - rec->cr_flags = flags; - rec->cr_rdev = rdev; - rec->cr_uid = uid; - rec->cr_gid = gid; - rec->cr_time = time; - if (in_group_p(op_data->gid1)) - rec->cr_suppgid = op_data->gid1; - else - rec->cr_suppgid = -1; - - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, op_data->namelen + 1); - LOGL0(op_data->name, op_data->namelen, tmp); - - if (data) { - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, datalen); - memcpy (tmp, data, datalen); - } -} -void mds_setattr_pack(struct ptlrpc_request *req, - struct mdc_op_data *data, - struct iattr *iattr, void *ea, int ealen) -{ - struct mds_rec_setattr *rec = lustre_msg_buf(req->rq_reqmsg, 0, - sizeof (*rec)); - rec->sa_opcode = REINT_SETATTR; - rec->sa_fsuid = current->fsuid; - rec->sa_fsgid = current->fsgid; - rec->sa_cap = current->cap_effective; - ll_ino2fid(&rec->sa_fid, data->ino1, data->gen1, data->typ1); - - if (iattr) { - rec->sa_valid = iattr->ia_valid; - rec->sa_mode = iattr->ia_mode; - rec->sa_uid = iattr->ia_uid; - rec->sa_gid = iattr->ia_gid; - rec->sa_size = iattr->ia_size; - rec->sa_atime = LTIME_S(iattr->ia_atime); - rec->sa_mtime = LTIME_S(iattr->ia_mtime); - rec->sa_ctime = LTIME_S(iattr->ia_ctime); - rec->sa_attr_flags = iattr->ia_attr_flags; - - if ((iattr->ia_valid & ATTR_GID) && in_group_p(iattr->ia_gid)) - rec->sa_suppgid = iattr->ia_gid; - else if ((iattr->ia_valid & ATTR_MODE) && - in_group_p(data->gid1)) - rec->sa_suppgid = data->gid1; - else - rec->sa_suppgid = -1; - } - - if (ealen != 0) - memcpy(lustre_msg_buf(req->rq_reqmsg, 1, ealen), ea, ealen); -} - -void mds_unlink_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *data) -{ - struct mds_rec_unlink *rec; - char *tmp; - - rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec)); - LASSERT (rec != NULL); - - rec->ul_opcode = REINT_UNLINK; - rec->ul_fsuid = current->fsuid; - rec->ul_fsgid = current->fsgid; - rec->ul_cap = current->cap_effective; - rec->ul_mode = data->mode; - if (in_group_p(data->gid1)) - rec->ul_suppgid = data->gid1; - else - rec->ul_suppgid = -1; - ll_ino2fid(&rec->ul_fid1, data->ino1, data->gen1, data->typ1); - if (data->ino2) - ll_ino2fid(&rec->ul_fid2, data->ino2, data->gen2, data->typ2); - - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, data->namelen + 1); - LASSERT (tmp != NULL); - LOGL0(data->name, data->namelen, tmp); -} - -void mds_link_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *data) -{ - struct mds_rec_link *rec; - char *tmp; - - rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec)); - - rec->lk_opcode = REINT_LINK; - rec->lk_fsuid = current->fsuid; - rec->lk_fsgid = current->fsgid; - rec->lk_cap = current->cap_effective; - if (in_group_p(data->gid1)) - rec->lk_suppgid1 = data->gid1; - else - rec->lk_suppgid1 = -1; - if (in_group_p(data->gid2)) - rec->lk_suppgid2 = data->gid2; - else - rec->lk_suppgid2 = -1; - ll_ino2fid(&rec->lk_fid1, data->ino1, data->gen1, data->typ1); - ll_ino2fid(&rec->lk_fid2, data->ino2, data->gen2, data->typ2); - - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, data->namelen + 1); - LOGL0(data->name, data->namelen, tmp); -} - -void mds_rename_pack(struct ptlrpc_request *req, int offset, - struct mdc_op_data *data, - const char *old, int oldlen, const char *new, int newlen) -{ - struct mds_rec_rename *rec; - char *tmp; - - rec = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*rec)); - - /* XXX do something about time, uid, gid */ - rec->rn_opcode = REINT_RENAME; - rec->rn_fsuid = current->fsuid; - rec->rn_fsgid = current->fsgid; - rec->rn_cap = current->cap_effective; - if (in_group_p(data->gid1)) - rec->rn_suppgid1 = data->gid1; - else - rec->rn_suppgid1 = -1; - if (in_group_p(data->gid2)) - rec->rn_suppgid2 = data->gid2; - else - rec->rn_suppgid2 = -1; - ll_ino2fid(&rec->rn_fid1, data->ino1, data->gen1, data->typ1); - ll_ino2fid(&rec->rn_fid2, data->ino2, data->gen2, data->typ2); - - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, oldlen + 1); - LOGL0(old, oldlen, tmp); - - if (new) { - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 2, newlen + 1); - LOGL0(new, newlen, tmp); - } -} - -void mds_getattr_pack(struct ptlrpc_request *req, int valid, int offset, - int flags, struct mdc_op_data *data) -{ - struct mds_body *b; - b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*b)); - - b->fsuid = current->fsuid; - b->fsgid = current->fsgid; - b->capability = current->cap_effective; - b->valid = valid; - b->flags = flags; - if (in_group_p(data->gid1)) - b->suppgid = data->gid1; - else - b->suppgid = -1; - - ll_ino2fid(&b->fid1, data->ino1, data->gen1, data->typ1); - if (data->name) { - char *tmp; - tmp = lustre_msg_buf(req->rq_reqmsg, offset + 1, - data->namelen + 1); - LOGL0(data->name, data->namelen, tmp); - } -} diff --git a/lustre/mdc/mdc_reint.c b/lustre/mdc/mdc_reint.c deleted file mode 100644 index 2da2fdb..0000000 --- a/lustre/mdc/mdc_reint.c +++ /dev/null @@ -1,237 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define EXPORT_SYMTAB -#define DEBUG_SUBSYSTEM S_MDC - -#ifdef __KERNEL__ -# include <linux/config.h> -# include <linux/module.h> -# include <linux/kernel.h> -#else -# include <liblustre.h> -#endif - -#include <linux/obd_class.h> -#include <linux/lustre_mds.h> -#include "mdc_internal.h" - -/* mdc_setattr does its own semaphore handling */ -static int mdc_reint(struct ptlrpc_request *request, int level) -{ - int rc; - __u32 *opcodeptr; - - opcodeptr = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*opcodeptr)); - request->rq_level = level; - - if (!(*opcodeptr == REINT_SETATTR)) - mdc_get_rpc_lock(&mdc_rpc_lock, NULL); - rc = ptlrpc_queue_wait(request); - if (!(*opcodeptr == REINT_SETATTR)) - mdc_put_rpc_lock(&mdc_rpc_lock, NULL); - - if (rc) - CDEBUG(D_INFO, "error in handling %d\n", rc); - return rc; -} - -/* If mdc_setattr is called with an 'iattr', then it is a normal RPC that - * should take the normal semaphore and go to the normal portal. - * - * If it is called with iattr->ia_valid & ATTR_FROM_OPEN, then it is a - * magic open-path setattr that should take the setattr semaphore and - * go to the setattr portal. */ -int mdc_setattr(struct lustre_handle *conn, - struct mdc_op_data *data, - struct iattr *iattr, void *ea, int ealen, - struct ptlrpc_request **request) -{ - struct ptlrpc_request *req; - struct mds_rec_setattr *rec; - struct mdc_rpc_lock *rpc_lock; - int rc, bufcount = 1, size[2] = {sizeof(*rec), ealen}; - ENTRY; - - LASSERT(iattr != NULL); - - if (ealen > 0) - bufcount = 2; - - req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, bufcount, - size, NULL); - if (!req) - RETURN(-ENOMEM); - - if (iattr->ia_valid & ATTR_FROM_OPEN) { - req->rq_request_portal = MDS_SETATTR_PORTAL; //XXX FIXME bug 249 - rpc_lock = &mdc_setattr_lock; - } else - rpc_lock = &mdc_rpc_lock; - - mds_setattr_pack(req, data, iattr, ea, ealen); - - size[0] = sizeof(struct mds_body); - req->rq_replen = lustre_msg_size(1, size); - - mdc_get_rpc_lock(rpc_lock, NULL); - rc = mdc_reint(req, LUSTRE_CONN_FULL); - mdc_put_rpc_lock(rpc_lock, NULL); - - *request = req; - if (rc == -ERESTARTSYS) - rc = 0; - - RETURN(rc); -} - -int mdc_create(struct lustre_handle *conn, - struct mdc_op_data *op_data, - const void *data, int datalen, - int mode, __u32 uid, __u32 gid, __u64 time, __u64 rdev, - struct ptlrpc_request **request) -{ - struct ptlrpc_request *req; - int rc, size[3] = {sizeof(struct mds_rec_create), - op_data->namelen + 1, 0}; - int level, bufcount = 2; -// ENTRY; - - if (data && datalen) { - size[bufcount] = datalen; - bufcount++; - } - - req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, bufcount, - size, NULL); - if (!req) - return -ENOMEM; -// RETURN(-ENOMEM); - - /* mds_create_pack fills msg->bufs[1] with name - * and msg->bufs[2] with tgt, for symlinks or lov MD data */ - mds_create_pack(req, 0, op_data, - mode, rdev, uid, gid, time, - data, datalen); - - size[0] = sizeof(struct mds_body); - req->rq_replen = lustre_msg_size(1, size); - - level = LUSTRE_CONN_FULL; - resend: - rc = mdc_reint(req, level); - /* Resend if we were told to. */ - if (rc == -ERESTARTSYS) { - level = LUSTRE_CONN_RECOVER; - goto resend; - } - - if (!rc) - mdc_store_inode_generation(req, 0, 0); - - *request = req; - return rc; -// RETURN(rc); -} - -int mdc_unlink(struct lustre_handle *conn, - struct mdc_op_data *data, - struct ptlrpc_request **request) -{ - struct obd_device *obddev = class_conn2obd(conn); - struct ptlrpc_request *req = *request; - int rc, size[2] = {sizeof(struct mds_rec_unlink), data->namelen + 1}; - ENTRY; - - LASSERT(req == NULL); - - req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 2, size, - NULL); - if (!req) - RETURN(-ENOMEM); - *request = req; - - size[0] = sizeof(struct mds_body); - size[1] = obddev->u.cli.cl_max_mds_easize; - req->rq_replen = lustre_msg_size(2, size); - - mds_unlink_pack(req, 0, data); - - rc = mdc_reint(req, LUSTRE_CONN_FULL); - if (rc == -ERESTARTSYS) - rc = 0; - RETURN(rc); -} - -int mdc_link(struct lustre_handle *conn, - struct mdc_op_data *data, - struct ptlrpc_request **request) -{ - struct ptlrpc_request *req; - int rc, size[2] = {sizeof(struct mds_rec_link), data->namelen + 1}; - ENTRY; - - req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 2, size, - NULL); - if (!req) - RETURN(-ENOMEM); - - mds_link_pack(req, 0, data); - - size[0] = sizeof(struct mds_body); - req->rq_replen = lustre_msg_size(1, size); - - rc = mdc_reint(req, LUSTRE_CONN_FULL); - *request = req; - if (rc == -ERESTARTSYS) - rc = 0; - - RETURN(rc); -} - -int mdc_rename(struct lustre_handle *conn, - struct mdc_op_data *data, - const char *old, int oldlen, - const char *new, int newlen, - struct ptlrpc_request **request) -{ - struct ptlrpc_request *req; - int rc, size[3] = {sizeof(struct mds_rec_rename), oldlen + 1, - newlen + 1}; - ENTRY; - - req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_REINT, 3, size, - NULL); - if (!req) - RETURN(-ENOMEM); - - mds_rename_pack(req, 0, data, old, oldlen, new, newlen); - - size[0] = sizeof(struct mds_body); - req->rq_replen = lustre_msg_size(1, size); - - rc = mdc_reint(req, LUSTRE_CONN_FULL); - *request = req; - if (rc == -ERESTARTSYS) - rc = 0; - - RETURN(rc); -} diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c deleted file mode 100644 index 204a836..0000000 --- a/lustre/mdc/mdc_request.c +++ /dev/null @@ -1,736 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define EXPORT_SYMTAB -#define DEBUG_SUBSYSTEM S_MDC - -#ifdef __KERNEL__ -# include <linux/module.h> -# include <linux/pagemap.h> -# include <linux/miscdevice.h> -# include <linux/init.h> -#else -# include <liblustre.h> -# include <linux/obd_class.h> -#endif - -#include <linux/lustre_mds.h> -#include <linux/lustre_lite.h> -#include <linux/lustre_dlm.h> -#include <linux/lprocfs_status.h> -#include "mdc_internal.h" - -#define REQUEST_MINOR 244 - -extern int mds_queue_req(struct ptlrpc_request *); -struct mdc_rpc_lock mdc_rpc_lock; -struct mdc_rpc_lock mdc_setattr_lock; -EXPORT_SYMBOL(mdc_rpc_lock); - -/* Helper that implements most of mdc_getstatus and signal_completed_replay. */ -static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid, - int level, int msg_flags) -{ - struct ptlrpc_request *req; - struct mds_body *body; - int rc, size = sizeof(*body); - ENTRY; - - req = ptlrpc_prep_req(imp, MDS_GETSTATUS, 1, &size, NULL); - if (!req) - GOTO(out, rc = -ENOMEM); - - body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); - req->rq_level = level; - req->rq_replen = lustre_msg_size(1, &size); - - mds_pack_req_body(req); - req->rq_reqmsg->flags |= msg_flags; - rc = ptlrpc_queue_wait(req); - - if (!rc) { - body = lustre_swab_repbuf (req, 0, sizeof (*body), - lustre_swab_mds_body); - if (body == NULL) { - CERROR ("Can't extract mds_body\n"); - GOTO (out, rc = -EPROTO); - } - - memcpy(rootfid, &body->fid1, sizeof(*rootfid)); - - CDEBUG(D_NET, "root ino="LPU64", last_committed="LPU64 - ", last_xid="LPU64"\n", - rootfid->id, req->rq_repmsg->last_committed, - req->rq_repmsg->last_xid); - } - - EXIT; - out: - ptlrpc_req_finished(req); - return rc; -} - -/* should become mdc_getinfo() */ -int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid) -{ - return send_getstatus(class_conn2cliimp(conn), rootfid, LUSTRE_CONN_CON, - 0); -} - -int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh, - struct ptlrpc_request **request) -{ - struct ptlrpc_request *req; - struct mds_status_req *streq; - struct lov_desc *desc; - struct obd_uuid *uuids; - int rc, size[2] = {sizeof(*streq)}; - int i; - ENTRY; - - req = ptlrpc_prep_req(class_conn2cliimp(mdc_connh), MDS_GETLOVINFO, 1, - size, NULL); - if (!req) - RETURN (-ENOMEM); - - *request = req; - streq = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*streq)); - streq->flags = MDS_STATUS_LOV; - streq->repbuf = LOV_MAX_UUID_BUFFER_SIZE; - - /* prepare for reply */ - req->rq_level = LUSTRE_CONN_CON; - size[0] = sizeof (*desc); - size[1] = LOV_MAX_UUID_BUFFER_SIZE; - req->rq_replen = lustre_msg_size(2, size); - - mdc_get_rpc_lock(&mdc_rpc_lock, NULL); - rc = ptlrpc_queue_wait(req); - mdc_put_rpc_lock(&mdc_rpc_lock, NULL); - - if (rc != 0) { - CERROR ("rcp failed\n"); - GOTO (failed, rc); - } - - desc = lustre_swab_repbuf (req, 0, sizeof (*desc), - lustre_swab_lov_desc); - if (desc == NULL) { - CERROR ("Can't unpack lov_desc\n"); - GOTO (failed, rc = -EPROTO); - } - - LASSERT_REPSWAB (req, 1); - /* array of uuids byte-sex insensitive; just verify they are all - * there and terminated */ - uuids = lustre_msg_buf (req->rq_repmsg, 1, - desc->ld_tgt_count * sizeof (*uuids)); - if (uuids == NULL) { - CERROR ("Can't unpack %d uuids\n", desc->ld_tgt_count); - GOTO (failed, rc = -EPROTO); - } - - for (i = 0; i < desc->ld_tgt_count; i++) { - int uid_len = strnlen (uuids[i].uuid, sizeof (uuids[i].uuid)); - - if (uid_len == sizeof (uuids[i].uuid)) { - CERROR ("Unterminated uuid %d:%*s\n", - i, (int)sizeof (uuids[i].uuid), uuids[i].uuid); - GOTO (failed, rc = -EPROTO); - } - } - RETURN(0); - - failed: - ptlrpc_req_finished (req); - RETURN (rc); -} - -int mdc_getattr_common (struct lustre_handle *conn, - unsigned int ea_size, struct ptlrpc_request *req) -{ - struct mds_body *body; - void *eadata; - int rc; - int size[2] = {sizeof(*body), 0}; - int bufcount = 1; - ENTRY; - - /* request message already built */ - - if (ea_size != 0) { - size[bufcount++] = ea_size; - CDEBUG(D_INODE, "reserved %u bytes for MD/symlink in packet\n", - ea_size); - } - req->rq_replen = lustre_msg_size(bufcount, size); - - mdc_get_rpc_lock(&mdc_rpc_lock, NULL); - rc = ptlrpc_queue_wait(req); - mdc_put_rpc_lock(&mdc_rpc_lock, NULL); - if (rc != 0) - RETURN (rc); - - body = lustre_swab_repbuf (req, 0, sizeof (*body), - lustre_swab_mds_body); - if (body == NULL) { - CERROR ("Can't unpack mds_body\n"); - RETURN (-EPROTO); - } - - CDEBUG(D_NET, "mode: %o\n", body->mode); - - LASSERT_REPSWAB (req, 1); - if (body->eadatasize != 0) { - /* reply indicates presence of eadata; check it's there... */ - eadata = lustre_msg_buf (req->rq_repmsg, 1, body->eadatasize); - if (eadata == NULL) { - CERROR ("Missing/short eadata\n"); - RETURN (-EPROTO); - } - } - - RETURN (0); -} - -int mdc_getattr(struct lustre_handle *conn, struct ll_fid *fid, - unsigned long valid, unsigned int ea_size, - struct ptlrpc_request **request) -{ - struct ptlrpc_request *req; - struct mds_body *body; - int size = sizeof(*body); - int rc; - ENTRY; - - /* XXX do we need to make another request here? We just did a getattr - * to do the lookup in the first place. - */ - req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETATTR, 1, &size, - NULL); - if (!req) - GOTO(out, rc = -ENOMEM); - - body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); - memcpy(&body->fid1, fid, sizeof(*fid)); - body->valid = valid; - body->eadatasize = ea_size; - mds_pack_req_body(req); - - rc = mdc_getattr_common (conn, ea_size, req); - if (rc != 0) { - ptlrpc_req_finished (req); - req = NULL; - } - out: - *request = req; - RETURN (rc); -} - -int mdc_getattr_name(struct lustre_handle *conn, struct ll_fid *fid, - char *filename, int namelen, unsigned long valid, - unsigned int ea_size, struct ptlrpc_request **request) -{ - struct ptlrpc_request *req; - struct mds_body *body; - int rc, size[2] = {sizeof(*body), namelen}; - ENTRY; - - req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETATTR_NAME, 2, - size, NULL); - if (!req) - GOTO(out, rc = -ENOMEM); - - body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); - memcpy(&body->fid1, fid, sizeof(*fid)); - body->valid = valid; - body->eadatasize = ea_size; - mds_pack_req_body(req); - - LASSERT (strnlen (filename, namelen) == namelen - 1); - memcpy(lustre_msg_buf(req->rq_reqmsg, 1, namelen), filename, namelen); - - rc = mdc_getattr_common (conn, ea_size, req); - if (rc != 0) { - ptlrpc_req_finished (req); - req = NULL; - } - out: - *request = req; - return rc; -} - -/* This should be called with both the request and the reply still packed. */ -void mdc_store_inode_generation(struct ptlrpc_request *req, int reqoff, - int repoff) -{ - struct mds_rec_create *rec = - lustre_msg_buf(req->rq_reqmsg, reqoff, sizeof (*rec)); - struct mds_body *body = - lustre_msg_buf(req->rq_repmsg, repoff, sizeof (*body)); - - LASSERT (rec != NULL); - LASSERT (body != NULL); - - memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid); - DEBUG_REQ(D_HA, req, "storing generation %x for ino "LPD64, - rec->cr_replayfid.generation, rec->cr_replayfid.id); -} - -/* We always reserve enough space in the reply packet for a stripe MD, because - * we don't know in advance the file type. - * - * XXX we could get that from ext2_dir_entry_2 file_type - */ -int mdc_enqueue(struct lustre_handle *conn, - int lock_type, - struct lookup_intent *it, - int lock_mode, - struct mdc_op_data *data, - struct lustre_handle *lockh, - char *tgt, - int tgtlen, - ldlm_completion_callback cb_completion, - ldlm_blocking_callback cb_blocking, - void *cb_data) -{ - struct ptlrpc_request *req; - struct obd_device *obddev = class_conn2obd(conn); - struct ldlm_res_id res_id = - { .name = {data->ino1, data->gen1} }; - int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)}; - int rc, flags = LDLM_FL_HAS_INTENT; - int repsize[3] = {sizeof(struct ldlm_reply), - sizeof(struct mds_body), - obddev->u.cli.cl_max_mds_easize}; - struct ldlm_reply *dlm_rep; - struct ldlm_intent *lit; - struct ldlm_request *lockreq; - void *eadata; - unsigned long irqflags; - int reply_buffers = 0; - ENTRY; - -// LDLM_DEBUG_NOLOCK("mdsintent=%s,name=%s,dir=%lu", -// ldlm_it2str(it->it_op), it_name, it_inode->i_ino); - - if (it->it_op & IT_OPEN) { - it->it_mode |= S_IFREG; - it->it_mode &= ~current->fs->umask; - - size[2] = sizeof(struct mds_rec_create); - size[3] = data->namelen + 1; - req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4, - size, NULL); - if (!req) - RETURN(-ENOMEM); - - spin_lock_irqsave (&req->rq_lock, irqflags); - req->rq_replay = 1; - spin_unlock_irqrestore (&req->rq_lock, irqflags); - - /* pack the intent */ - lit = lustre_msg_buf(req->rq_reqmsg, 1, sizeof (*lit)); - lit->opc = (__u64)it->it_op; - - /* pack the intended request */ - mds_open_pack(req, 2, data, it->it_mode, 0, current->fsuid, - current->fsgid, LTIME_S(CURRENT_TIME), - it->it_flags, tgt, tgtlen); - /* get ready for the reply */ - reply_buffers = 3; - req->rq_replen = lustre_msg_size(3, repsize); - } else if (it->it_op & IT_UNLINK) { - size[2] = sizeof(struct mds_rec_unlink); - size[3] = data->namelen + 1; - req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4, - size, NULL); - if (!req) - RETURN(-ENOMEM); - - /* pack the intent */ - lit = lustre_msg_buf(req->rq_reqmsg, 1, sizeof (*lit)); - lit->opc = (__u64)it->it_op; - - /* pack the intended request */ - mds_unlink_pack(req, 2, data); - /* get ready for the reply */ - reply_buffers = 3; - req->rq_replen = lustre_msg_size(3, repsize); - } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) { - int valid = OBD_MD_FLNOTOBD | OBD_MD_FLEASIZE; - size[2] = sizeof(struct mds_body); - size[3] = data->namelen + 1; - - req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4, - size, NULL); - if (!req) - RETURN(-ENOMEM); - - /* pack the intent */ - lit = lustre_msg_buf(req->rq_reqmsg, 1, sizeof (*lit)); - lit->opc = (__u64)it->it_op; - - /* pack the intended request */ - mds_getattr_pack(req, valid, 2, it->it_flags, data); - /* get ready for the reply */ - reply_buffers = 3; - req->rq_replen = lustre_msg_size(3, repsize); - } else if (it->it_op == IT_READDIR) { - req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 1, - size, NULL); - if (!req) - RETURN(-ENOMEM); - - /* get ready for the reply */ - reply_buffers = 1; - req->rq_replen = lustre_msg_size(1, repsize); - } else { - LBUG(); - RETURN(-EINVAL); - } - - mdc_get_rpc_lock(&mdc_rpc_lock, it); - rc = ldlm_cli_enqueue(conn, req, obddev->obd_namespace, NULL, res_id, - lock_type, NULL, 0, lock_mode, &flags, - cb_completion, cb_blocking, cb_data, lockh); - mdc_put_rpc_lock(&mdc_rpc_lock, it); - - /* Similarly, if we're going to replay this request, we don't want to - * actually get a lock, just perform the intent. */ - if (req->rq_transno || req->rq_replay) { - lockreq = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*lockreq)); - lockreq->lock_flags |= LDLM_FL_INTENT_ONLY; - } - - /* This can go when we're sure that this can never happen */ - LASSERT(rc != -ENOENT); - if (rc == ELDLM_LOCK_ABORTED) { - lock_mode = 0; - memset(lockh, 0, sizeof(*lockh)); - } else if (rc != 0) { - CERROR("ldlm_cli_enqueue: %d\n", rc); - LASSERT (rc < 0); - ptlrpc_req_finished(req); - RETURN(rc); - } else { /* rc = 0 */ - struct ldlm_lock *lock = ldlm_handle2lock(lockh); - LASSERT(lock); - - /* If the server gave us back a different lock mode, we should - * fix up our variables. */ - if (lock->l_req_mode != lock_mode) { - ldlm_lock_addref(lockh, lock->l_req_mode); - ldlm_lock_decref(lockh, lock_mode); - lock_mode = lock->l_req_mode; - } - - LDLM_LOCK_PUT(lock); - } - - dlm_rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*dlm_rep)); - LASSERT (dlm_rep != NULL); /* checked by ldlm_cli_enqueue() */ - LASSERT_REPSWABBED (req, 0); /* swabbed by ldlm_cli_enqueue() */ - - it->it_disposition = (int) dlm_rep->lock_policy_res1; - it->it_status = (int) dlm_rep->lock_policy_res2; - it->it_lock_mode = lock_mode; - it->it_data = req; - - /* We know what to expect, so we do any byte flipping required here */ - LASSERT (reply_buffers == 3 || reply_buffers == 1); - if (reply_buffers == 3) { - struct mds_body *body; - - body = lustre_swab_repbuf (req, 1, sizeof (*body), - lustre_swab_mds_body); - if (body == NULL) { - CERROR ("Can't swab mds_body\n"); - RETURN (-EPROTO); - } - - if ((body->valid & OBD_MD_FLEASIZE) != 0) { - /* The eadata is opaque; just check that it is - * there. Eventually, obd_unpackmd() will check - * the contents */ - eadata = lustre_swab_repbuf (req, 2, body->eadatasize, - NULL); - if (eadata == NULL) { - CERROR ("Missing/short eadata\n"); - RETURN (-EPROTO); - } - } - } - - RETURN(rc); -} - -static void mdc_replay_open(struct ptlrpc_request *req) -{ - struct obd_client_handle *och = req->rq_replay_data; - struct lustre_handle old, *file_fh = &och->och_fh; - struct list_head *tmp; - struct mds_body *body; - - body = lustre_swab_repbuf (req, 1, sizeof (*body), - lustre_swab_mds_body); - LASSERT (body != NULL); - - memcpy(&old, file_fh, sizeof(old)); - CDEBUG(D_HA, "updating handle from "LPD64" to "LPD64"\n", - file_fh->cookie, body->handle.cookie); - memcpy(file_fh, &body->handle, sizeof(body->handle)); - - /* A few frames up, ptlrpc_replay holds the lock, so this is safe. */ - list_for_each(tmp, &req->rq_import->imp_sending_list) { - req = list_entry(tmp, struct ptlrpc_request, rq_list); - if (req->rq_reqmsg->opc != MDS_CLOSE) - continue; - body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); - if (memcmp(&body->handle, &old, sizeof(old))) - continue; - - DEBUG_REQ(D_HA, req, "updating close body with new fh"); - memcpy(&body->handle, file_fh, sizeof(*file_fh)); - } -} - -void mdc_set_open_replay_data(struct obd_client_handle *och) -{ - struct ptlrpc_request *req = och->och_req; - struct mds_rec_create *rec = - lustre_msg_buf(req->rq_reqmsg, 2, sizeof (*rec)); - struct mds_body *body = - lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body)); - - LASSERT (rec != NULL); - /* outgoing messages always in my byte order */ - LASSERT (body != NULL); - /* incoming message in my byte order (it's been swabbed) */ - LASSERT_REPSWABBED (req, 1); - - memcpy(&rec->cr_replayfid, &body->fid1, sizeof rec->cr_replayfid); - req->rq_replay_cb = mdc_replay_open; - req->rq_replay_data = och; -} - -int mdc_close(struct lustre_handle *conn, obd_id ino, int type, - struct lustre_handle *fh, struct ptlrpc_request **request) -{ - struct mds_body *body; - int rc, size = sizeof(*body); - struct ptlrpc_request *req; - ENTRY; - - req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_CLOSE, 1, &size, - NULL); - if (!req) - GOTO(out, rc = -ENOMEM); - - body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); - ll_ino2fid(&body->fid1, ino, 0, type); - memcpy(&body->handle, fh, sizeof(body->handle)); - - req->rq_replen = lustre_msg_size(0, NULL); - - mdc_get_rpc_lock(&mdc_rpc_lock, NULL); - rc = ptlrpc_queue_wait(req); - mdc_put_rpc_lock(&mdc_rpc_lock, NULL); - - EXIT; - out: - *request = req; - return rc; -} - -int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset, - struct page *page, struct ptlrpc_request **request) -{ - struct obd_import *imp = class_conn2cliimp(conn); - struct ptlrpc_request *req = NULL; - struct ptlrpc_bulk_desc *desc = NULL; - struct mds_body *body; - int rc, size = sizeof(*body); - ENTRY; - - CDEBUG(D_INODE, "inode: %ld\n", (long)ino); - - req = ptlrpc_prep_req(imp, MDS_READPAGE, 1, &size, NULL); - if (!req) - GOTO(out, rc = -ENOMEM); - /* XXX FIXME bug 249 */ - req->rq_request_portal = MDS_READPAGE_PORTAL; - - desc = ptlrpc_prep_bulk_imp (req, BULK_PUT_SINK, MDS_BULK_PORTAL); - if (desc == NULL) { - GOTO(out, rc = -ENOMEM); - } - /* NB req now owns desc and will free it when it gets freed */ - - rc = ptlrpc_prep_bulk_page(desc, page, 0, PAGE_CACHE_SIZE); - if (rc != 0) - GOTO(out, rc); - - mds_readdir_pack(req, offset, PAGE_CACHE_SIZE, ino, type); - - req->rq_replen = lustre_msg_size(1, &size); - rc = ptlrpc_queue_wait(req); - - if (rc == 0) { - LASSERT (desc->bd_page_count == 1); - body = lustre_swab_repbuf (req, 0, sizeof (*body), - lustre_swab_mds_body); - if (body == NULL) { - CERROR ("Can't unpack mds_body\n"); - GOTO (out, rc = -EPROTO); - } - } - - EXIT; - out: - *request = req; - return rc; -} - -static int mdc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len, - void *karg, void *uarg) -{ - struct obd_device *obddev = class_conn2obd(conn); - struct obd_ioctl_data *data = karg; - struct obd_import *imp = obddev->u.cli.cl_import; - ENTRY; - - switch (cmd) { - case OBD_IOC_CLIENT_RECOVER: - RETURN(ptlrpc_recover_import(imp, data->ioc_inlbuf1)); - case IOC_OSC_SET_ACTIVE: - if (data->ioc_offset) { - CERROR("%s: can't reactivate MDC\n", - obddev->obd_uuid.uuid); - RETURN(-ENOTTY); - } - RETURN(ptlrpc_set_import_active(imp, 0)); - default: - CERROR("osc_ioctl(): unrecognised ioctl %#x\n", cmd); - RETURN(-ENOTTY); - } -} - -static int mdc_statfs(struct obd_export *exp, struct obd_statfs *osfs) -{ - struct ptlrpc_request *req; - struct obd_statfs *msfs; - int rc, size = sizeof(*msfs); - ENTRY; - - req = ptlrpc_prep_req(exp->exp_obd->u.cli.cl_import, MDS_STATFS, 0, - NULL, NULL); - if (!req) - RETURN(-ENOMEM); - - req->rq_replen = lustre_msg_size(1, &size); - - mdc_get_rpc_lock(&mdc_rpc_lock, NULL); - rc = ptlrpc_queue_wait(req); - mdc_put_rpc_lock(&mdc_rpc_lock, NULL); - - if (rc) - GOTO(out, rc); - - msfs = lustre_swab_repbuf (req, 0, sizeof (*msfs), - lustre_swab_obd_statfs); - if (msfs == NULL) { - CERROR ("Can't unpack obd_statfs\n"); - GOTO (out, rc = -EPROTO); - } - - memcpy (osfs, msfs, sizeof (*msfs)); - EXIT; -out: - ptlrpc_req_finished(req); - - return rc; -} - -static int mdc_attach(struct obd_device *dev, obd_count len, void *data) -{ - struct lprocfs_static_vars lvars; - - lprocfs_init_vars(&lvars); - return lprocfs_obd_attach(dev, lvars.obd_vars); -} - -static int mdc_detach(struct obd_device *dev) -{ - return lprocfs_obd_detach(dev); -} - -struct obd_ops mdc_obd_ops = { - o_owner: THIS_MODULE, - o_attach: mdc_attach, - o_detach: mdc_detach, - o_setup: client_obd_setup, - o_cleanup: client_obd_cleanup, - o_connect: client_import_connect, - o_disconnect: client_import_disconnect, - o_iocontrol: mdc_iocontrol, - o_statfs: mdc_statfs -}; - -int __init mdc_init(void) -{ - struct lprocfs_static_vars lvars; - mdc_init_rpc_lock(&mdc_rpc_lock); - mdc_init_rpc_lock(&mdc_setattr_lock); - lprocfs_init_vars(&lvars); - return class_register_type(&mdc_obd_ops, lvars.module_vars, - LUSTRE_MDC_NAME); -} - -static void __exit mdc_exit(void) -{ - class_unregister_type(LUSTRE_MDC_NAME); -} - -#ifdef __KERNEL__ -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Lustre Metadata Client"); -MODULE_LICENSE("GPL"); - -EXPORT_SYMBOL(mdc_getstatus); -EXPORT_SYMBOL(mdc_getlovinfo); -EXPORT_SYMBOL(mdc_enqueue); -EXPORT_SYMBOL(mdc_getattr); -EXPORT_SYMBOL(mdc_getattr_name); -EXPORT_SYMBOL(mdc_create); -EXPORT_SYMBOL(mdc_unlink); -EXPORT_SYMBOL(mdc_rename); -EXPORT_SYMBOL(mdc_link); -EXPORT_SYMBOL(mdc_readpage); -EXPORT_SYMBOL(mdc_setattr); -EXPORT_SYMBOL(mdc_close); -EXPORT_SYMBOL(mdc_set_open_replay_data); - -EXPORT_SYMBOL(mdc_store_inode_generation); - -module_init(mdc_init); -module_exit(mdc_exit); -#endif diff --git a/lustre/mds/.cvsignore b/lustre/mds/.cvsignore deleted file mode 100644 index e530020..0000000 --- a/lustre/mds/.cvsignore +++ /dev/null @@ -1,8 +0,0 @@ -.Xrefs -config.log -config.status -configure -Makefile -Makefile.in -.deps -TAGS diff --git a/lustre/mds/Makefile.am b/lustre/mds/Makefile.am deleted file mode 100644 index cb63910..0000000 --- a/lustre/mds/Makefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -DEFS= -MODULE = mds -modulefs_DATA = mds.o -EXTRA_PROGRAMS = mds -mds_SOURCES = mds_lov.c handler.c mds_reint.c mds_fs.c lproc_mds.c mds_open.c \ -mds_lib.c mds_internal.h - -include $(top_srcdir)/Rules diff --git a/lustre/mds/Makefile.mk b/lustre/mds/Makefile.mk deleted file mode 100644 index 6b712fb..0000000 --- a/lustre/mds/Makefile.mk +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include fs/lustre/portals/Kernelenv - -obj-y += mds.o - -mds-objs := mds_lov.o handler.o mds_reint.o mds_fs.o lproc_mds.o mds_internal.h mds_updates.o mds_open.o simple.o target.o diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c deleted file mode 100644 index de3f2ed..0000000 --- a/lustre/mds/handler.c +++ /dev/null @@ -1,1976 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lustre/mds/handler.c - * Lustre Metadata Server (mds) request handler - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Author: Peter Braam <braam@clusterfs.com> - * Author: Andreas Dilger <adilger@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Mike Shaver <shaver@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define EXPORT_SYMTAB -#define DEBUG_SUBSYSTEM S_MDS - -#include <linux/module.h> -#include <linux/lustre_mds.h> -#include <linux/lustre_dlm.h> -#include <linux/init.h> -#include <linux/obd_class.h> -#include <linux/random.h> -#include <linux/fs.h> -#include <linux/jbd.h> -#include <linux/ext3_fs.h> -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -# include <linux/smp_lock.h> -# include <linux/buffer_head.h> -# include <linux/workqueue.h> -# include <linux/mount.h> -#else -# include <linux/locks.h> -#endif -#include <linux/obd_lov.h> -#include <linux/lustre_mds.h> -#include <linux/lustre_fsfilt.h> -#include <linux/lprocfs_status.h> -#include "mds_internal.h" - -extern int mds_get_lovtgts(struct mds_obd *obd, int tgt_count, - struct obd_uuid *uuidarray); -extern int mds_get_lovdesc(struct mds_obd *obd, struct lov_desc *desc); -int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle, - struct ptlrpc_request *req, int rc, int disp); -static int mds_cleanup(struct obd_device * obddev, int force, int failover); - -inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req) -{ - return &req->rq_export->exp_obd->u.mds; -} - -static int mds_bulk_timeout(void *data) -{ - struct ptlrpc_bulk_desc *desc = data; - struct obd_export *exp = desc->bd_export; - - CERROR("bulk send timed out: evicting %s@%s\n", - exp->exp_client_uuid.uuid, - exp->exp_connection->c_remote_uuid.uuid); - ptlrpc_fail_export(exp); - ptlrpc_abort_bulk (desc); - RETURN(1); -} - -/* Assumes caller has already pushed into the kernel filesystem context */ -static int mds_sendpage(struct ptlrpc_request *req, struct file *file, - __u64 offset, __u64 xid) -{ - struct ptlrpc_bulk_desc *desc; - struct l_wait_info lwi; - struct page *page; - int rc = 0; - ENTRY; - - LASSERT ((offset & (PAGE_CACHE_SIZE - 1)) == 0); - - desc = ptlrpc_prep_bulk_exp (req, BULK_PUT_SOURCE, MDS_BULK_PORTAL); - if (desc == NULL) - GOTO(out, rc = -ENOMEM); - - LASSERT (PAGE_SIZE == PAGE_CACHE_SIZE); - page = alloc_pages (GFP_KERNEL, 0); - if (page == NULL) - GOTO(cleanup_bulk, rc = -ENOMEM); - - rc = ptlrpc_prep_bulk_page(desc, page, 0, PAGE_CACHE_SIZE); - if (rc != 0) - GOTO(cleanup_buf, rc); - - CDEBUG(D_EXT2, "reading %lu@"LPU64" from dir %lu (size %llu)\n", - PAGE_CACHE_SIZE, offset, file->f_dentry->d_inode->i_ino, - file->f_dentry->d_inode->i_size); - rc = fsfilt_readpage(req->rq_export->exp_obd, file, page_address (page), - PAGE_CACHE_SIZE, (loff_t *)&offset); - - if (rc != PAGE_CACHE_SIZE) - GOTO(cleanup_buf, rc = -EIO); - - rc = ptlrpc_bulk_put(desc); - if (rc) - GOTO(cleanup_buf, rc); - - if (OBD_FAIL_CHECK(OBD_FAIL_MDS_SENDPAGE)) { - CERROR("obd_fail_loc=%x, fail operation rc=%d\n", - OBD_FAIL_MDS_SENDPAGE, rc); - ptlrpc_abort_bulk(desc); - GOTO(cleanup_buf, rc); - } - - lwi = LWI_TIMEOUT(obd_timeout * HZ, mds_bulk_timeout, desc); - rc = l_wait_event(desc->bd_waitq, ptlrpc_bulk_complete (desc), &lwi); - if (rc) { - LASSERT (rc == -ETIMEDOUT); - GOTO(cleanup_buf, rc); - } - - EXIT; - cleanup_buf: - __free_pages (page, 0); - cleanup_bulk: - ptlrpc_free_bulk (desc); - out: - return rc; -} - -/* only valid locked dentries or errors should be returned */ -struct dentry *mds_fid2locked_dentry(struct obd_device *obd, struct ll_fid *fid, - struct vfsmount **mnt, int lock_mode, - struct lustre_handle *lockh) -{ - struct mds_obd *mds = &obd->u.mds; - struct dentry *de = mds_fid2dentry(mds, fid, mnt), *retval = de; - struct ldlm_res_id res_id = { .name = {0} }; - int flags = 0, rc; - ENTRY; - - if (IS_ERR(de)) - RETURN(de); - - res_id.name[0] = de->d_inode->i_ino; - res_id.name[1] = de->d_inode->i_generation; - rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL, - res_id, LDLM_PLAIN, NULL, 0, lock_mode, - &flags, ldlm_completion_ast, - mds_blocking_ast, NULL, lockh); - if (rc != ELDLM_OK) { - l_dput(de); - retval = ERR_PTR(-ENOLCK); /* XXX translate ldlm code */ - } - - RETURN(retval); -} - -#ifndef DCACHE_DISCONNECTED -#define DCACHE_DISCONNECTED DCACHE_NFSD_DISCONNECTED -#endif - - -/* Look up an entry by inode number. */ -/* this function ONLY returns valid dget'd dentries with an initialized inode - or errors */ -struct dentry *mds_fid2dentry(struct mds_obd *mds, struct ll_fid *fid, - struct vfsmount **mnt) -{ - char fid_name[32]; - unsigned long ino = fid->id; - __u32 generation = fid->generation; - struct inode *inode; - struct dentry *result; - - if (ino == 0) - RETURN(ERR_PTR(-ESTALE)); - - snprintf(fid_name, sizeof(fid_name), "0x%lx", ino); - - /* under ext3 this is neither supposed to return bad inodes - nor NULL inodes. */ - result = ll_lookup_one_len(fid_name, mds->mds_fid_de, strlen(fid_name)); - if (IS_ERR(result)) - RETURN(result); - - inode = result->d_inode; - if (!inode) - RETURN(ERR_PTR(-ENOENT)); - - CDEBUG(D_DENTRY, "--> mds_fid2dentry: ino %lu, gen %u, sb %p\n", - inode->i_ino, inode->i_generation, inode->i_sb); - - if (generation && inode->i_generation != generation) { - /* we didn't find the right inode.. */ - CERROR("bad inode %lu, link: %d ct: %d or generation %u/%u\n", - inode->i_ino, inode->i_nlink, - atomic_read(&inode->i_count), inode->i_generation, - generation); - dput(result); - RETURN(ERR_PTR(-ENOENT)); - } - - if (mnt) { - *mnt = mds->mds_vfsmnt; - mntget(*mnt); - } - - RETURN(result); -} - - -/* Establish a connection to the MDS. - * - * This will set up an export structure for the client to hold state data - * about that client, like open files, the last operation number it did - * on the server, etc. - */ -static int mds_connect(struct lustre_handle *conn, struct obd_device *obd, - struct obd_uuid *cluuid) -{ - struct obd_export *exp; - struct mds_export_data *med; - struct mds_client_data *mcd; - int rc, abort_recovery; - ENTRY; - - if (!conn || !obd || !cluuid) - RETURN(-EINVAL); - - /* Check for aborted recovery. */ - spin_lock_bh(&obd->obd_processing_task_lock); - abort_recovery = obd->obd_abort_recovery; - spin_unlock_bh(&obd->obd_processing_task_lock); - if (abort_recovery) - target_abort_recovery(obd); - - /* XXX There is a small race between checking the list and adding a - * new connection for the same UUID, but the real threat (list - * corruption when multiple different clients connect) is solved. - * - * There is a second race between adding the export to the list, - * and filling in the client data below. Hence skipping the case - * of NULL mcd above. We should already be controlling multiple - * connects at the client, and we can't hold the spinlock over - * memory allocations without risk of deadlocking. - */ - rc = class_connect(conn, obd, cluuid); - if (rc) - RETURN(rc); - exp = class_conn2export(conn); - LASSERT(exp); - med = &exp->exp_mds_data; - class_export_put(exp); - - OBD_ALLOC(mcd, sizeof(*mcd)); - if (!mcd) { - CERROR("mds: out of memory for client data\n"); - GOTO(out_export, rc = -ENOMEM); - } - - memcpy(mcd->mcd_uuid, cluuid, sizeof(mcd->mcd_uuid)); - med->med_mcd = mcd; - - INIT_LIST_HEAD(&med->med_open_head); - spin_lock_init(&med->med_open_lock); - - rc = mds_client_add(obd, &obd->u.mds, med, -1); - if (rc) - GOTO(out_mcd, rc); - - RETURN(0); - -out_mcd: - OBD_FREE(mcd, sizeof(*mcd)); -out_export: - class_disconnect(conn, 0); - - return rc; -} - -static void mds_mfd_addref(void *mfdp) -{ - struct mds_file_data *mfd = mfdp; - - atomic_inc(&mfd->mfd_refcount); - CDEBUG(D_INFO, "GETting mfd %p : new refcount %d\n", mfd, - atomic_read(&mfd->mfd_refcount)); -} - -struct mds_file_data *mds_mfd_new(void) -{ - struct mds_file_data *mfd; - - OBD_ALLOC(mfd, sizeof *mfd); - if (mfd == NULL) { - CERROR("mds: out of memory\n"); - return NULL; - } - - atomic_set(&mfd->mfd_refcount, 2); - - INIT_LIST_HEAD(&mfd->mfd_handle.h_link); - class_handle_hash(&mfd->mfd_handle, mds_mfd_addref); - - return mfd; -} - -static struct mds_file_data *mds_handle2mfd(struct lustre_handle *handle) -{ - ENTRY; - LASSERT(handle != NULL); - RETURN(class_handle2object(handle->cookie)); -} - -void mds_mfd_put(struct mds_file_data *mfd) -{ - CDEBUG(D_INFO, "PUTting mfd %p : new refcount %d\n", mfd, - atomic_read(&mfd->mfd_refcount) - 1); - LASSERT(atomic_read(&mfd->mfd_refcount) > 0 && - atomic_read(&mfd->mfd_refcount) < 0x5a5a); - if (atomic_dec_and_test(&mfd->mfd_refcount)) { - LASSERT(list_empty(&mfd->mfd_handle.h_link)); - OBD_FREE(mfd, sizeof *mfd); - } -} - -void mds_mfd_destroy(struct mds_file_data *mfd) -{ - class_handle_unhash(&mfd->mfd_handle); - mds_mfd_put(mfd); -} - -/* Call with med->med_open_lock held, please. */ -static int mds_close_mfd(struct mds_file_data *mfd, struct mds_export_data *med) -{ - struct dentry *de = NULL; - -#ifdef CONFIG_SMP - LASSERT(spin_is_locked(&med->med_open_lock)); -#endif - list_del(&mfd->mfd_list); - - if (mfd->mfd_dentry->d_parent) { - LASSERT(atomic_read(&mfd->mfd_dentry->d_parent->d_count)); - de = dget(mfd->mfd_dentry->d_parent); - } - - /* this is the actual "close" */ - l_dput(mfd->mfd_dentry); - - if (de) - l_dput(de); - - mds_mfd_destroy(mfd); - RETURN(0); -} - -static int mds_disconnect(struct lustre_handle *conn, int failover) -{ - struct obd_export *export = class_conn2export(conn); - int rc; - unsigned long flags; - ENTRY; - - ldlm_cancel_locks_for_export(export); - - spin_lock_irqsave(&export->exp_lock, flags); - export->exp_failover = failover; - spin_unlock_irqrestore(&export->exp_lock, flags); - - rc = class_disconnect(conn, failover); - class_export_put(export); - - RETURN(rc); -} - -static void mds_destroy_export(struct obd_export *export) -{ - struct mds_export_data *med = &export->exp_mds_data; - struct list_head *tmp, *n; - int rc; - - ENTRY; - LASSERT(!strcmp(export->exp_obd->obd_type->typ_name, - LUSTRE_MDS_NAME)); - - /* - * Close any open files. - */ - spin_lock(&med->med_open_lock); - list_for_each_safe(tmp, n, &med->med_open_head) { - struct mds_file_data *mfd = - list_entry(tmp, struct mds_file_data, mfd_list); -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - struct dentry *dentry = mfd->mfd_dentry; - CERROR("force closing client file handle for %*s (%s:%lu)\n", - dentry->d_name.len, dentry->d_name.name, - kdevname(dentry->d_inode->i_sb->s_dev), - dentry->d_inode->i_ino); -#endif - rc = mds_close_mfd(mfd, med); - if (rc) - CDEBUG(D_INODE, "Error closing file: %d\n", rc); - } - spin_unlock(&med->med_open_lock); - - if (export->exp_outstanding_reply) { - struct ptlrpc_request *req = export->exp_outstanding_reply; - unsigned long flags; - - /* Fake the ack, so the locks get cancelled. */ - LBUG (); - /* Actually we can't do this because it prevents us knowing - * if the ACK callback ran or not */ - spin_lock_irqsave (&req->rq_lock, flags); - req->rq_want_ack = 0; - req->rq_err = 1; - wake_up(&req->rq_wait_for_rep); - spin_unlock_irqrestore (&req->rq_lock, flags); - - export->exp_outstanding_reply = NULL; - } - - if (!export->exp_failover) - mds_client_free(export); - EXIT; -} - -/* - * XXX This is NOT guaranteed to flush all transactions to disk (even though - * it is equivalent to calling sync()) because it only _starts_ the flush - * and does not wait for completion. It's better than nothing though. - * What we really want is a mild form of fsync_dev_lockfs(), but it is - * non-standard, or enabling do_sync_supers in ext3, just for this call. - */ -static void mds_fsync_super(struct super_block *sb) -{ - lock_kernel(); - lock_super(sb); - if (sb->s_dirt && sb->s_op && sb->s_op->write_super) - sb->s_op->write_super(sb); - unlock_super(sb); - unlock_kernel(); -} - -static int mds_getstatus(struct ptlrpc_request *req) -{ - struct mds_obd *mds = mds_req2mds(req); - struct mds_body *body; - int rc, size = sizeof(*body); - ENTRY; - - rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_GETSTATUS_PACK)) { - CERROR("mds: out of memory for message: size=%d\n", size); - req->rq_status = -ENOMEM; /* superfluous? */ - RETURN(-ENOMEM); - } - - /* Flush any outstanding transactions to disk so the client will - * get the latest last_committed value and can drop their local - * requests if they have any. This would be fsync_super() if it - * was exported. - */ - mds_fsync_super(mds->mds_sb); - - body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body)); - memcpy(&body->fid1, &mds->mds_rootfid, sizeof(body->fid1)); - - /* the last_committed and last_xid fields are filled in for all - * replies already - no need to do so here also. - */ - RETURN(0); -} - -static int mds_getlovinfo(struct ptlrpc_request *req) -{ - struct mds_obd *mds = mds_req2mds(req); - struct mds_status_req *streq; - struct lov_desc *desc; - struct obd_uuid *uuid0; - int tgt_count; - int rc, size[2] = {sizeof(*desc)}; - ENTRY; - - streq = lustre_swab_reqbuf (req, 0, sizeof (*streq), - lustre_swab_mds_status_req); - if (streq == NULL) { - CERROR ("Can't unpack mds_status_req\n"); - RETURN (-EFAULT); - } - - if (streq->repbuf > LOV_MAX_UUID_BUFFER_SIZE) { - CERROR ("Illegal request for uuid array > %d\n", - streq->repbuf); - RETURN (-EINVAL); - } - size[1] = streq->repbuf; - - rc = lustre_pack_msg(2, size, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc) { - CERROR("mds: out of memory for message: size=%d\n", size[1]); - RETURN(-ENOMEM); - } - - if (!mds->mds_has_lov_desc) { - req->rq_status = -ENOENT; - RETURN(0); - } - - /* XXX We're sending the lov_desc in my byte order. - * Receiver will swab... */ - desc = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*desc)); - memcpy(desc, &mds->mds_lov_desc, sizeof (*desc)); - - tgt_count = mds->mds_lov_desc.ld_tgt_count; - uuid0 = lustre_msg_buf (req->rq_repmsg, 1, - tgt_count * sizeof (*uuid0)); - if (uuid0 == NULL) { - CERROR("too many targets, enlarge client buffers\n"); - req->rq_status = -ENOSPC; - RETURN(0); - } - - rc = mds_get_lovtgts(mds, tgt_count, uuid0); - if (rc) { - CERROR("get_lovtgts error %d\n", rc); - req->rq_status = rc; - RETURN(0); - } - RETURN(0); -} - -int mds_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, - void *data, int flag) -{ - int do_ast; - ENTRY; - - if (flag == LDLM_CB_CANCELING) { - /* Don't need to do anything here. */ - RETURN(0); - } - - /* XXX layering violation! -phil */ - l_lock(&lock->l_resource->lr_namespace->ns_lock); - /* Get this: if mds_blocking_ast is racing with ldlm_intent_policy, - * such that mds_blocking_ast is called just before l_i_p takes the - * ns_lock, then by the time we get the lock, we might not be the - * correct blocking function anymore. So check, and return early, if - * so. */ - if (lock->l_blocking_ast != mds_blocking_ast) { - l_unlock(&lock->l_resource->lr_namespace->ns_lock); - RETURN(0); - } - - lock->l_flags |= LDLM_FL_CBPENDING; - do_ast = (!lock->l_readers && !lock->l_writers); - l_unlock(&lock->l_resource->lr_namespace->ns_lock); - - if (do_ast) { - struct lustre_handle lockh; - int rc; - - LDLM_DEBUG(lock, "already unused, calling ldlm_cli_cancel"); - ldlm_lock2handle(lock, &lockh); - rc = ldlm_cli_cancel(&lockh); - if (rc < 0) - CERROR("ldlm_cli_cancel: %d\n", rc); - } else { - LDLM_DEBUG(lock, "Lock still has references, will be " - "cancelled later"); - } - RETURN(0); -} - -int mds_pack_md(struct obd_device *obd, struct lustre_msg *msg, - int offset, struct mds_body *body, struct inode *inode) -{ - struct mds_obd *mds = &obd->u.mds; - struct lov_mds_md *lmm; - int lmm_size; - int rc; - ENTRY; - - lmm = lustre_msg_buf(msg, offset, 0); - if (lmm == NULL) { - /* Some problem with getting eadata when I sized the reply - * buffer... */ - CDEBUG(D_INFO, "no space reserved for inode %lu MD\n", - inode->i_ino); - RETURN(0); - } - lmm_size = msg->buflens[offset]; - - /* I don't really like this, but it is a sanity check on the client - * MD request. However, if the client doesn't know how much space - * to reserve for the MD, this shouldn't be fatal either... - */ - if (lmm_size > mds->mds_max_mdsize) { - CERROR("Reading MD for inode %lu of %d bytes > max %d\n", - inode->i_ino, lmm_size, mds->mds_max_mdsize); - // RETURN(-EINVAL); - } - - rc = fsfilt_get_md(obd, inode, lmm, lmm_size); - if (rc < 0) { - CERROR ("Error %d reading eadata for ino %lu\n", - rc, inode->i_ino); - } else if (rc > 0) { - body->valid |= OBD_MD_FLEASIZE; - body->eadatasize = rc; - rc = 0; - } - - RETURN(rc); -} - -static int mds_getattr_internal(struct obd_device *obd, struct dentry *dentry, - struct ptlrpc_request *req, - struct mds_body *reqbody, int reply_off) -{ - struct mds_body *body; - struct inode *inode = dentry->d_inode; - int rc = 0; - ENTRY; - - if (inode == NULL) - RETURN(-ENOENT); - - body = lustre_msg_buf(req->rq_repmsg, reply_off, sizeof (*body)); - LASSERT (body != NULL); /* caller prepped reply */ - - mds_pack_inode2fid(&body->fid1, inode); - mds_pack_inode2body(body, inode); - - if (S_ISREG(inode->i_mode) && - (reqbody->valid & OBD_MD_FLEASIZE) != 0) { - rc = mds_pack_md(obd, req->rq_repmsg, reply_off + 1, - body, inode); - } else if (S_ISLNK(inode->i_mode) && - (reqbody->valid & OBD_MD_LINKNAME) != 0) { - char *symname = lustre_msg_buf(req->rq_repmsg, reply_off + 1, 0); - int len; - - LASSERT (symname != NULL); /* caller prepped reply */ - len = req->rq_repmsg->buflens[reply_off + 1]; - - rc = inode->i_op->readlink(dentry, symname, len); - if (rc < 0) { - CERROR("readlink failed: %d\n", rc); - } else if (rc != len - 1) { - CERROR ("Unexpected readlink rc %d: expecting %d\n", - rc, len - 1); - rc = -EINVAL; - } else { - CDEBUG(D_INODE, "read symlink dest %s\n", symname); - body->valid |= OBD_MD_LINKNAME; - body->eadatasize = rc + 1; - symname[rc] = 0; /* NULL terminate */ - rc = 0; - } - } - RETURN(rc); -} - -static int mds_getattr_pack_msg(struct ptlrpc_request *req, struct inode *inode, - int offset) -{ - struct mds_obd *mds = mds_req2mds(req); - struct mds_body *body; - int rc = 0, size[2] = {sizeof(*body)}, bufcount = 1; - ENTRY; - - body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*body)); - LASSERT (body != NULL); /* checked by caller */ - LASSERT_REQSWABBED (req, offset); /* swabbed by caller */ - - if (S_ISREG(inode->i_mode) && - (body->valid & OBD_MD_FLEASIZE) != 0) { - int rc = fsfilt_get_md(req->rq_export->exp_obd, inode, NULL, 0); - CDEBUG(D_INODE, "got %d bytes MD data for inode %lu\n", - rc, inode->i_ino); - if (rc < 0) { - if (rc != -ENODATA) - CERROR("error getting inode %lu MD: rc = %d\n", - inode->i_ino, rc); - size[bufcount] = 0; - } else if (rc > mds->mds_max_mdsize) { - size[bufcount] = 0; - CERROR("MD size %d larger than maximum possible %u\n", - rc, mds->mds_max_mdsize); - } else - size[bufcount] = rc; - bufcount++; - } else if (S_ISLNK (inode->i_mode) && - (body->valid & OBD_MD_LINKNAME) != 0) { - if (inode->i_size + 1 != body->eadatasize) - CERROR ("symlink size: %Lu, reply space: %d\n", - inode->i_size + 1, body->eadatasize); - size[bufcount] = MIN(inode->i_size + 1, body->eadatasize); - bufcount++; - CDEBUG(D_INODE, "symlink size: %Lu, reply space: %d\n", - inode->i_size + 1, body->eadatasize); - } - - if (OBD_FAIL_CHECK(OBD_FAIL_MDS_GETATTR_PACK)) { - CERROR("failed MDS_GETATTR_PACK test\n"); - req->rq_status = -ENOMEM; - GOTO(out, rc = -ENOMEM); - } - - rc = lustre_pack_msg(bufcount, size, NULL, &req->rq_replen, - &req->rq_repmsg); - if (rc) { - CERROR("out of memoryK\n"); - req->rq_status = rc; - GOTO(out, rc); - } - - EXIT; - out: - return(rc); -} - -/* This is more copy-and-paste from getattr_name than I'd like. */ -static void reconstruct_getattr_name(int offset, struct ptlrpc_request *req, - struct lustre_handle *client_lockh) -{ - struct obd_device *obd = req->rq_export->exp_obd; - struct mds_obd *mds = mds_req2mds(req); - struct dentry *parent, *child; - struct mds_body *body; - struct inode *dir; - struct obd_run_ctxt saved; - struct obd_ucred uc; - int namelen, rc = 0; - char *name; - - if (req->rq_export->exp_outstanding_reply) - mds_steal_ack_locks(req->rq_export, req); - - body = lustre_msg_buf(req->rq_reqmsg, offset, sizeof (*body)); - LASSERT (body != NULL); /* checked by caller */ - LASSERT_REQSWABBED (req, offset); /* swabbed by caller */ - - name = lustre_msg_string(req->rq_reqmsg, offset + 1, 0); - LASSERT (name != NULL); /* checked by caller */ - LASSERT_REQSWABBED (req, offset + 1); /* swabbed by caller */ - namelen = req->rq_reqmsg->buflens[offset + 1]; - - LASSERT (offset == 2 || offset == 0); - /* requests were at offset 2, replies go back at 1 */ - if (offset) - offset = 1; - - uc.ouc_fsuid = body->fsuid; - uc.ouc_fsgid = body->fsgid; - uc.ouc_cap = body->capability; - uc.ouc_suppgid1 = body->suppgid; - uc.ouc_suppgid2 = -1; - push_ctxt(&saved, &mds->mds_ctxt, &uc); - parent = mds_fid2dentry(mds, &body->fid1, NULL); - LASSERT(!IS_ERR(parent)); - dir = parent->d_inode; - LASSERT(dir); - child = ll_lookup_one_len(name, parent, namelen - 1); - LASSERT(!IS_ERR(child)); - - if (req->rq_repmsg == NULL) { - rc = mds_getattr_pack_msg(req, child->d_inode, offset); - /* XXX need to handle error here */ - LASSERT (rc == 0); - } - - rc = mds_getattr_internal(obd, child, req, body, offset); - req->rq_status = rc; - l_dput(child); - l_dput(parent); -} - -static int mds_getattr_name(int offset, struct ptlrpc_request *req, - struct lustre_handle *child_lockh) -{ - struct mds_obd *mds = mds_req2mds(req); - struct obd_device *obd = req->rq_export->exp_obd; - struct obd_run_ctxt saved; - struct mds_body *body; - struct dentry *de = NULL, *dchild = NULL; - struct inode *dir; - struct obd_ucred uc; - struct ldlm_res_id child_res_id = { .name = {0} }; - struct lustre_handle parent_lockh; - int namesize; - int flags = 0, rc = 0, cleanup_phase = 0, req_was_resent; - char *name; - ENTRY; - - LASSERT(!strcmp(obd->obd_type->typ_name, "mds")); - - /* Swab now, before anyone looks inside the request */ - - body = lustre_swab_reqbuf (req, offset, sizeof (*body), - lustre_swab_mds_body); - if (body == NULL) { - CERROR ("Can't swab mds_body\n"); - GOTO (cleanup, rc = -EFAULT); - } - - LASSERT_REQSWAB (req, offset + 1); - name = lustre_msg_string (req->rq_reqmsg, offset + 1, 0); - if (name == NULL) { - CERROR ("Can't unpack name\n"); - GOTO (cleanup, rc = -EFAULT); - } - namesize = req->rq_reqmsg->buflens[offset + 1]; - - req_was_resent = lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT; - if (child_lockh->cookie) { - LASSERT(req_was_resent); - reconstruct_getattr_name(offset, req, child_lockh); - RETURN(0); - } else if (req_was_resent) { - DEBUG_REQ(D_HA, req, "no reply for RESENT req"); - } - - LASSERT (offset == 0 || offset == 2); - /* if requests were at offset 2, replies go back at 1 */ - if (offset) - offset = 1; - - uc.ouc_fsuid = body->fsuid; - uc.ouc_fsgid = body->fsgid; - uc.ouc_cap = body->capability; - uc.ouc_suppgid1 = body->suppgid; - uc.ouc_suppgid2 = -1; - push_ctxt(&saved, &mds->mds_ctxt, &uc); - /* Step 1: Lookup/lock parent */ - de = mds_fid2locked_dentry(obd, &body->fid1, NULL, LCK_PR, - &parent_lockh); - if (IS_ERR(de)) - GOTO(cleanup, rc = PTR_ERR(de)); - dir = de->d_inode; - LASSERT(dir); - - cleanup_phase = 1; /* parent dentry and lock */ - - CDEBUG(D_INODE, "parent ino %lu, name %s\n", dir->i_ino, name); - - /* Step 2: Lookup child */ - dchild = ll_lookup_one_len(name, de, namesize - 1); - if (IS_ERR(dchild)) { - CDEBUG(D_INODE, "child lookup error %ld\n", PTR_ERR(dchild)); - GOTO(cleanup, rc = PTR_ERR(dchild)); - } - - cleanup_phase = 2; /* child dentry */ - - if (dchild->d_inode == NULL) { - GOTO(cleanup, rc = -ENOENT); - } - - /* Step 3: Lock child */ - child_res_id.name[0] = dchild->d_inode->i_ino; - child_res_id.name[1] = dchild->d_inode->i_generation; - rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL, - child_res_id, LDLM_PLAIN, NULL, 0, LCK_PR, - &flags, ldlm_completion_ast, mds_blocking_ast, - NULL, child_lockh); - if (rc != ELDLM_OK) { - CERROR("ldlm_cli_enqueue: %d\n", rc); - GOTO(cleanup, rc = -EIO); - } - - cleanup_phase = 3; /* child lock */ - - if (req->rq_repmsg == NULL) { - rc = mds_getattr_pack_msg(req, dchild->d_inode, offset); - if (rc != 0) { - CERROR ("mds_getattr_pack_msg: %d\n", rc); - GOTO (cleanup, rc); - } - } - - rc = mds_getattr_internal(obd, dchild, req, body, offset); - GOTO(cleanup, rc); /* returns the lock to the client */ - - cleanup: - switch (cleanup_phase) { - case 3: - if (rc) - ldlm_lock_decref(child_lockh, LCK_PR); - case 2: - l_dput(dchild); - - case 1: - if (rc) { - ldlm_lock_decref(&parent_lockh, LCK_PR); - } else { - memcpy(&req->rq_ack_locks[0].lock, &parent_lockh, - sizeof(parent_lockh)); - req->rq_ack_locks[0].mode = LCK_PR; - } - l_dput(de); - default: ; - } - req->rq_status = rc; - pop_ctxt(&saved, &mds->mds_ctxt, &uc); - return rc; -} - -static int mds_getattr(int offset, struct ptlrpc_request *req) -{ - struct mds_obd *mds = mds_req2mds(req); - struct obd_device *obd = req->rq_export->exp_obd; - struct obd_run_ctxt saved; - struct dentry *de; - struct mds_body *body; - struct obd_ucred uc; - int rc = 0; - ENTRY; - - body = lustre_swab_reqbuf (req, offset, sizeof (*body), - lustre_swab_mds_body); - if (body == NULL) { - CERROR ("Can't unpack body\n"); - RETURN (-EFAULT); - } - - uc.ouc_fsuid = body->fsuid; - uc.ouc_fsgid = body->fsgid; - uc.ouc_cap = body->capability; - push_ctxt(&saved, &mds->mds_ctxt, &uc); - de = mds_fid2dentry(mds, &body->fid1, NULL); - if (IS_ERR(de)) { - rc = req->rq_status = -ENOENT; - GOTO(out_pop, PTR_ERR(de)); - } - - rc = mds_getattr_pack_msg(req, de->d_inode, offset); - if (rc != 0) { - CERROR ("mds_getattr_pack_msg: %d\n", rc); - GOTO (out_pop, rc); - } - - req->rq_status = mds_getattr_internal(obd, de, req, body, 0); - - l_dput(de); - GOTO(out_pop, rc); -out_pop: - pop_ctxt(&saved, &mds->mds_ctxt, &uc); - return rc; -} - -static int mds_statfs(struct ptlrpc_request *req) -{ - struct obd_device *obd = req->rq_export->exp_obd; - struct obd_statfs *osfs; - int rc, size = sizeof(*osfs); - ENTRY; - - rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_STATFS_PACK)) { - CERROR("mds: statfs lustre_pack_msg failed: rc = %d\n", rc); - GOTO(out, rc); - } - - osfs = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*osfs)); - rc = fsfilt_statfs(obd, obd->u.mds.mds_sb, osfs); - if (rc) { - CERROR("mds: statfs failed: rc %d\n", rc); - GOTO(out, rc); - } - - EXIT; -out: - req->rq_status = rc; - return 0; -} - -static void reconstruct_close(struct ptlrpc_request *req) -{ - struct mds_export_data *med = &req->rq_export->exp_mds_data; - struct mds_client_data *mcd = med->med_mcd; - - req->rq_transno = mcd->mcd_last_transno; - req->rq_status = mcd->mcd_last_result; - - /* XXX When open-unlink is working, we'll need to steal ack locks as - * XXX well, and make sure that we do the right unlinking after we - * XXX get the ack back. - */ -} - -static int mds_close(struct ptlrpc_request *req) -{ - struct mds_export_data *med = &req->rq_export->exp_mds_data; - struct mds_body *body; - struct mds_file_data *mfd; - int rc; - ENTRY; - - MDS_CHECK_RESENT(req, reconstruct_close(req)); - - body = lustre_swab_reqbuf(req, 0, sizeof (*body), - lustre_swab_mds_body); - if (body == NULL) { - CERROR ("Can't unpack body\n"); - RETURN (-EFAULT); - } - - mfd = mds_handle2mfd(&body->handle); - if (mfd == NULL) { - DEBUG_REQ(D_ERROR, req, "no handle for file close "LPD64 - ": cookie "LPX64"\n", body->fid1.id, - body->handle.cookie); - RETURN(-ESTALE); - } - - spin_lock(&med->med_open_lock); - req->rq_status = mds_close_mfd(mfd, med); - spin_unlock(&med->med_open_lock); - - if (OBD_FAIL_CHECK(OBD_FAIL_MDS_CLOSE_PACK)) { - CERROR("test case OBD_FAIL_MDS_CLOSE_PACK\n"); - req->rq_status = -ENOMEM; - mds_mfd_put(mfd); - RETURN(-ENOMEM); - } - - rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc) { - CERROR("mds: lustre_pack_msg: rc = %d\n", rc); - req->rq_status = rc; - } - - mds_mfd_put(mfd); - RETURN(0); -} - -static int mds_readpage(struct ptlrpc_request *req) -{ - struct mds_obd *mds = mds_req2mds(req); - struct vfsmount *mnt; - struct dentry *de; - struct file *file; - struct mds_body *body, *repbody; - struct obd_run_ctxt saved; - int rc, size = sizeof(*repbody); - struct obd_ucred uc; - ENTRY; - - rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_READPAGE_PACK)) { - CERROR("mds: out of memory\n"); - GOTO(out, rc = -ENOMEM); - } - - body = lustre_swab_reqbuf (req, 0, sizeof (*body), - lustre_swab_mds_body); - if (body == NULL) - GOTO (out, rc = -EFAULT); - - /* body->size is actually the offset -eeb */ - if ((body->size & (PAGE_SIZE - 1)) != 0) { - CERROR ("offset "LPU64"not on a page boundary\n", body->size); - GOTO (out, rc = -EFAULT); - } - - /* body->nlink is actually the #bytes to read -eeb */ - if (body->nlink != PAGE_SIZE) { - CERROR ("size %d is not PAGE_SIZE\n", body->nlink); - GOTO (out, rc = -EFAULT); - } - - uc.ouc_fsuid = body->fsuid; - uc.ouc_fsgid = body->fsgid; - uc.ouc_cap = body->capability; - push_ctxt(&saved, &mds->mds_ctxt, &uc); - de = mds_fid2dentry(mds, &body->fid1, &mnt); - if (IS_ERR(de)) - GOTO(out_pop, rc = PTR_ERR(de)); - - CDEBUG(D_INODE, "ino %lu\n", de->d_inode->i_ino); - - file = dentry_open(de, mnt, O_RDONLY | O_LARGEFILE); - /* note: in case of an error, dentry_open puts dentry */ - if (IS_ERR(file)) - GOTO(out_pop, rc = PTR_ERR(file)); - - repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*repbody)); - repbody->size = file->f_dentry->d_inode->i_size; - repbody->valid = OBD_MD_FLSIZE; - - /* to make this asynchronous make sure that the handling function - doesn't send a reply when this function completes. Instead a - callback function would send the reply */ - /* body->blocks is actually the xid -phil */ - /* body->size is actually the offset -eeb */ - rc = mds_sendpage(req, file, body->size, body->blocks); - - filp_close(file, 0); -out_pop: - pop_ctxt(&saved, &mds->mds_ctxt, &uc); -out: - req->rq_status = rc; - RETURN(0); -} - -int mds_reint(struct ptlrpc_request *req, int offset, - struct lustre_handle *lockh) -{ - struct mds_update_record *rec; /* 116 bytes on the stack? no sir! */ - int rc; - - OBD_ALLOC(rec, sizeof(*rec)); - if (rec == NULL) - RETURN(-ENOMEM); - - rc = mds_update_unpack(req, offset, rec); - if (rc || OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNPACK)) { - CERROR("invalid record\n"); - GOTO(out, req->rq_status = -EINVAL); - } - /* rc will be used to interrupt a for loop over multiple records */ - rc = mds_reint_rec(rec, offset, req, lockh); - out: - OBD_FREE(rec, sizeof(*rec)); - return rc; -} - -static int filter_recovery_request(struct ptlrpc_request *req, - struct obd_device *obd, int *process) -{ - switch (req->rq_reqmsg->opc) { - case MDS_CONNECT: /* This will never get here, but for completeness. */ - case OST_CONNECT: /* This will never get here, but for completeness. */ - case MDS_DISCONNECT: - case OST_DISCONNECT: - *process = 1; - RETURN(0); - - case MDS_CLOSE: - case MDS_GETSTATUS: /* used in unmounting */ - case OBD_PING: - case MDS_REINT: - case LDLM_ENQUEUE: - *process = target_queue_recovery_request(req, obd); - RETURN(0); - - default: - DEBUG_REQ(D_ERROR, req, "not permitted during recovery"); - *process = 0; - /* XXX what should we set rq_status to here? */ - req->rq_status = -EAGAIN; - RETURN(ptlrpc_error(req)); - } -} - -static char *reint_names[] = { - [REINT_SETATTR] "setattr", - [REINT_CREATE] "create", - [REINT_LINK] "link", - [REINT_UNLINK] "unlink", - [REINT_RENAME] "rename", - [REINT_OPEN] "open", -}; - -void mds_steal_ack_locks(struct obd_export *exp, - struct ptlrpc_request *req) -{ - unsigned long flags; - - struct ptlrpc_request *oldrep = exp->exp_outstanding_reply; - memcpy(req->rq_ack_locks, oldrep->rq_ack_locks, - sizeof req->rq_ack_locks); - spin_lock_irqsave (&req->rq_lock, flags); - oldrep->rq_resent = 1; - wake_up(&oldrep->rq_wait_for_rep); - spin_unlock_irqrestore (&req->rq_lock, flags); - DEBUG_REQ(D_HA, oldrep, "stole locks from"); - DEBUG_REQ(D_HA, req, "stole locks for"); -} - -int mds_handle(struct ptlrpc_request *req) -{ - int should_process; - int rc = 0; - struct mds_obd *mds = NULL; /* quell gcc overwarning */ - struct obd_device *obd = NULL; - ENTRY; - - OBD_FAIL_RETURN(OBD_FAIL_MDS_ALL_REQUEST_NET | OBD_FAIL_ONCE, 0); - - LASSERT(!strcmp(req->rq_obd->obd_type->typ_name, LUSTRE_MDT_NAME)); - - /* XXX identical to OST */ - if (req->rq_reqmsg->opc != MDS_CONNECT) { - struct mds_export_data *med; - int recovering, abort_recovery; - - if (req->rq_export == NULL) { - CERROR("lustre_mds: operation %d on unconnected MDS\n", - req->rq_reqmsg->opc); - req->rq_status = -ENOTCONN; - GOTO(out, rc = -ENOTCONN); - } - - med = &req->rq_export->exp_mds_data; - obd = req->rq_export->exp_obd; - mds = &obd->u.mds; - - /* Check for aborted recovery. */ - spin_lock_bh(&obd->obd_processing_task_lock); - abort_recovery = obd->obd_abort_recovery; - recovering = obd->obd_recovering; - spin_unlock_bh(&obd->obd_processing_task_lock); - if (abort_recovery) { - target_abort_recovery(obd); - } else if (recovering) { - rc = filter_recovery_request(req, obd, &should_process); - if (rc || !should_process) - RETURN(rc); - } - } - - switch (req->rq_reqmsg->opc) { - case MDS_CONNECT: - DEBUG_REQ(D_INODE, req, "connect"); - OBD_FAIL_RETURN(OBD_FAIL_MDS_CONNECT_NET, 0); - rc = target_handle_connect(req, mds_handle); - /* Make sure that last_rcvd is correct. */ - if (!rc) { - /* Now that we have an export, set mds. */ - mds = mds_req2mds(req); - mds_fsync_super(mds->mds_sb); - } - break; - - case MDS_DISCONNECT: - DEBUG_REQ(D_INODE, req, "disconnect"); - OBD_FAIL_RETURN(OBD_FAIL_MDS_DISCONNECT_NET, 0); - rc = target_handle_disconnect(req); - /* Make sure that last_rcvd is correct. */ - if (!rc) - mds_fsync_super(mds->mds_sb); - req->rq_status = rc; /* superfluous? */ - break; - - case MDS_GETSTATUS: - DEBUG_REQ(D_INODE, req, "getstatus"); - OBD_FAIL_RETURN(OBD_FAIL_MDS_GETSTATUS_NET, 0); - rc = mds_getstatus(req); - break; - - case MDS_GETLOVINFO: - DEBUG_REQ(D_INODE, req, "getlovinfo"); - rc = mds_getlovinfo(req); - break; - - case MDS_GETATTR: - DEBUG_REQ(D_INODE, req, "getattr"); - OBD_FAIL_RETURN(OBD_FAIL_MDS_GETATTR_NET, 0); - rc = mds_getattr(0, req); - break; - - case MDS_GETATTR_NAME: { - struct lustre_handle lockh; - DEBUG_REQ(D_INODE, req, "getattr_name"); - OBD_FAIL_RETURN(OBD_FAIL_MDS_GETATTR_NAME_NET, 0); - - /* If this request gets a reconstructed reply, we won't be - * acquiring any new locks in mds_getattr_name, so we don't - * want to cancel. - */ - lockh.cookie = 0; - rc = mds_getattr_name(0, req, &lockh); - if (rc == 0 && lockh.cookie) - ldlm_lock_decref(&lockh, LCK_PR); - break; - } - case MDS_STATFS: - DEBUG_REQ(D_INODE, req, "statfs"); - OBD_FAIL_RETURN(OBD_FAIL_MDS_STATFS_NET, 0); - rc = mds_statfs(req); - break; - - case MDS_READPAGE: - DEBUG_REQ(D_INODE, req, "readpage"); - OBD_FAIL_RETURN(OBD_FAIL_MDS_READPAGE_NET, 0); - rc = mds_readpage(req); - - if (OBD_FAIL_CHECK(OBD_FAIL_MDS_SENDPAGE)) - return 0; - break; - - case MDS_REINT: { - __u32 *opcp = lustre_msg_buf (req->rq_reqmsg, 0, sizeof (*opcp)); - __u32 opc; - int size[2] = {sizeof(struct mds_body), mds->mds_max_mdsize}; - int bufcount; - - /* NB only peek inside req now; mds_reint() will swab it */ - if (opcp == NULL) { - CERROR ("Can't inspect opcode\n"); - rc = -EINVAL; - break; - } - opc = *opcp; - if (lustre_msg_swabbed (req->rq_reqmsg)) - __swab32s (&opc); - - DEBUG_REQ(D_INODE, req, "reint %d (%s)", opc, - (opc < sizeof (reint_names) / sizeof (reint_names[0]) || - reint_names[opc] == NULL) ? reint_names[opc] : "unknown opcode"); - - OBD_FAIL_RETURN(OBD_FAIL_MDS_REINT_NET, 0); - - if (opc == REINT_UNLINK) - bufcount = 2; - else - bufcount = 1; - - rc = lustre_pack_msg(bufcount, size, NULL, - &req->rq_replen, &req->rq_repmsg); - if (rc) - break; - - rc = mds_reint(req, 0, NULL); - OBD_FAIL_RETURN(OBD_FAIL_MDS_REINT_NET_REP, 0); - break; - } - - case MDS_CLOSE: - DEBUG_REQ(D_INODE, req, "close"); - OBD_FAIL_RETURN(OBD_FAIL_MDS_CLOSE_NET, 0); - rc = mds_close(req); - break; - - case OBD_PING: - DEBUG_REQ(D_INODE, req, "ping"); - rc = target_handle_ping(req); - break; - - case LDLM_ENQUEUE: - DEBUG_REQ(D_INODE, req, "enqueue"); - OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0); - rc = ldlm_handle_enqueue(req, ldlm_server_completion_ast, - ldlm_server_blocking_ast); - break; - case LDLM_CONVERT: - DEBUG_REQ(D_INODE, req, "convert"); - OBD_FAIL_RETURN(OBD_FAIL_LDLM_CONVERT, 0); - rc = ldlm_handle_convert(req); - break; - case LDLM_BL_CALLBACK: - case LDLM_CP_CALLBACK: - DEBUG_REQ(D_INODE, req, "callback"); - CERROR("callbacks should not happen on MDS\n"); - LBUG(); - OBD_FAIL_RETURN(OBD_FAIL_LDLM_BL_CALLBACK, 0); - break; - default: - req->rq_status = -ENOTSUPP; - rc = ptlrpc_error(req); - RETURN(rc); - } - - EXIT; - - /* If we're DISCONNECTing, the mds_export_data is already freed */ - if (!rc && req->rq_reqmsg->opc != MDS_DISCONNECT) { - struct mds_export_data *med = &req->rq_export->exp_mds_data; - struct obd_device *obd = list_entry(mds, struct obd_device, - u.mds); - req->rq_repmsg->last_xid = - le64_to_cpu (med->med_mcd->mcd_last_xid); - - if (!obd->obd_no_transno) { - req->rq_repmsg->last_committed = - obd->obd_last_committed; - } else { - DEBUG_REQ(D_IOCTL, req, - "not sending last_committed update"); - } - CDEBUG(D_INFO, "last_transno "LPU64", last_committed "LPU64 - ", xid "LPU64"\n", - mds->mds_last_transno, obd->obd_last_committed, - req->rq_xid); - } - out: - - if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_LAST_REPLAY) { - if (obd && obd->obd_recovering) { - DEBUG_REQ(D_HA, req, "LAST_REPLAY, queuing reply"); - return target_queue_final_reply(req, rc); - } - /* Lost a race with recovery; let the error path DTRT. */ - rc = req->rq_status = -ENOTCONN; - } - - target_send_reply(req, rc, OBD_FAIL_MDS_ALL_REPLY_NET); - return 0; -} - -/* Update the server data on disk. This stores the new mount_count and - * also the last_rcvd value to disk. If we don't have a clean shutdown, - * then the server last_rcvd value may be less than that of the clients. - * This will alert us that we may need to do client recovery. - * - * Also assumes for mds_last_transno that we are not modifying it (no locking). - */ -int mds_update_server_data(struct mds_obd *mds) -{ - struct mds_server_data *msd = mds->mds_server_data; - struct file *filp = mds->mds_rcvd_filp; - struct obd_run_ctxt saved; - loff_t off = 0; - int rc; - - push_ctxt(&saved, &mds->mds_ctxt, NULL); - msd->msd_last_transno = cpu_to_le64(mds->mds_last_transno); - msd->msd_mount_count = cpu_to_le64(mds->mds_mount_count); - - CDEBUG(D_SUPER, "MDS mount_count is %Lu, last_transno is %Lu\n", - (unsigned long long)mds->mds_mount_count, - (unsigned long long)mds->mds_last_transno); - rc = lustre_fwrite(filp, (char *)msd, sizeof(*msd), &off); - if (rc != sizeof(*msd)) { - CERROR("error writing MDS server data: rc = %d\n", rc); - if (rc > 0) - rc = -EIO; - GOTO(out, rc); - } -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - rc = fsync_dev(filp->f_dentry->d_inode->i_rdev); -#else - rc = file_fsync(filp, filp->f_dentry, 1); -#endif - if (rc) - CERROR("error flushing MDS server data: rc = %d\n", rc); - -out: - pop_ctxt(&saved, &mds->mds_ctxt, NULL); - RETURN(rc); -} - -/* mount the file system (secretly) */ -static int mds_setup(struct obd_device *obddev, obd_count len, void *buf) -{ - struct obd_ioctl_data* data = buf; - struct mds_obd *mds = &obddev->u.mds; - struct vfsmount *mnt; - int rc = 0; - unsigned long page; - ENTRY; - - -#ifdef CONFIG_DEV_RDONLY - dev_clear_rdonly(2); -#endif - if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2) - RETURN(rc = -EINVAL); - - obddev->obd_fsops = fsfilt_get_ops(data->ioc_inlbuf2); - if (IS_ERR(obddev->obd_fsops)) - RETURN(rc = PTR_ERR(obddev->obd_fsops)); - - - if (data->ioc_inllen3 > 0 && data->ioc_inlbuf3) { - if (*data->ioc_inlbuf3 == '/') { - CERROR("mds namespace mount: %s\n", - data->ioc_inlbuf3); -// mds->mds_nspath = strdup(ioc->inlbuf4); - } else { - CERROR("namespace mount must be absolute path: '%s'\n", - data->ioc_inlbuf3); - } - } - - if (!(page = __get_free_page(GFP_KERNEL))) - return -ENOMEM; - - memset((void *)page, 0, PAGE_SIZE); - sprintf((char *)page, "iopen_nopriv"); - - mnt = do_kern_mount(data->ioc_inlbuf2, 0, - data->ioc_inlbuf1, (void *)page); - free_page(page); - if (IS_ERR(mnt)) { - rc = PTR_ERR(mnt); - CERROR("do_kern_mount failed: rc = %d\n", rc); - GOTO(err_ops, rc); - } - - CDEBUG(D_SUPER, "%s: mnt = %p\n", data->ioc_inlbuf1, mnt); - mds->mds_sb = mnt->mnt_root->d_inode->i_sb; - if (!mds->mds_sb) - GOTO(err_put, rc = -ENODEV); - - spin_lock_init(&mds->mds_transno_lock); - mds->mds_max_mdsize = sizeof(struct lov_mds_md); - rc = mds_fs_setup(obddev, mnt); - if (rc) { - CERROR("MDS filesystem method init failed: rc = %d\n", rc); - GOTO(err_put, rc); - } - - obddev->obd_namespace = - ldlm_namespace_new("mds_server", LDLM_NAMESPACE_SERVER); - if (obddev->obd_namespace == NULL) { - mds_cleanup(obddev, 0, 0); - GOTO(err_fs, rc = -ENOMEM); - } - - ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL, - "mds_ldlm_client", &obddev->obd_ldlm_client); - - mds->mds_has_lov_desc = 0; - - RETURN(0); - -err_fs: - mds_fs_cleanup(obddev, 0); -err_put: - unlock_kernel(); - mntput(mds->mds_vfsmnt); - mds->mds_sb = 0; - lock_kernel(); -err_ops: - fsfilt_put_ops(obddev->obd_fsops); - return rc; -} - -static int mds_cleanup(struct obd_device *obddev, int force, int failover) -{ - struct super_block *sb; - struct mds_obd *mds = &obddev->u.mds; - ENTRY; - - sb = mds->mds_sb; - if (!mds->mds_sb) - RETURN(0); - - mds_update_server_data(mds); - mds_fs_cleanup(obddev, failover); - - unlock_kernel(); - - /* 2 seems normal on mds, (may_umount() also expects 2 - fwiw), but we only see 1 at this point in obdfilter. */ - if (atomic_read(&obddev->u.mds.mds_vfsmnt->mnt_count) > 2){ - CERROR("%s: mount point busy, mnt_count: %d\n", - obddev->obd_name, - atomic_read(&obddev->u.mds.mds_vfsmnt->mnt_count)); - } - - mntput(mds->mds_vfsmnt); - mds->mds_sb = 0; - - ldlm_namespace_free(obddev->obd_namespace); - - if (obddev->obd_recovering) - target_cancel_recovery_timer(obddev); - lock_kernel(); -#ifdef CONFIG_DEV_RDONLY - dev_clear_rdonly(2); -#endif - fsfilt_put_ops(obddev->obd_fsops); - - RETURN(0); -} - -static void fixup_handle_for_resent_req(struct ptlrpc_request *req, - struct ldlm_lock *new_lock, - struct lustre_handle *lockh) -{ - struct obd_export *exp = req->rq_export; - struct obd_device *obd = exp->exp_obd; - struct ldlm_request *dlmreq = - lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*dlmreq)); - struct lustre_handle remote_hdl = dlmreq->lock_handle1; - struct list_head *iter; - - if (!(lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT)) - return; - - l_lock(&obd->obd_namespace->ns_lock); - list_for_each(iter, &exp->exp_ldlm_data.led_held_locks) { - struct ldlm_lock *lock; - lock = list_entry(iter, struct ldlm_lock, l_export_chain); - if (lock == new_lock) - continue; - if (lock->l_remote_handle.cookie == remote_hdl.cookie) { - lockh->cookie = lock->l_handle.h_cookie; - DEBUG_REQ(D_HA, req, "restoring lock cookie "LPX64, - lockh->cookie); - l_unlock(&obd->obd_namespace->ns_lock); - return; - } - - } - l_unlock(&obd->obd_namespace->ns_lock); - DEBUG_REQ(D_HA, req, "no existing lock with rhandle "LPX64, - remote_hdl.cookie); -} - -static int ldlm_intent_policy(struct ldlm_namespace *ns, - struct ldlm_lock **lockp, void *req_cookie, - ldlm_mode_t mode, int flags, void *data) -{ - struct ptlrpc_request *req = req_cookie; - struct ldlm_lock *lock = *lockp; - int rc = 0; - ENTRY; - - if (!req_cookie) - RETURN(0); - - if (req->rq_reqmsg->bufcount > 1) { - /* an intent needs to be considered */ - struct ldlm_intent *it; - struct mds_obd *mds = &req->rq_export->exp_obd->u.mds; - struct mds_body *mds_body; - struct ldlm_reply *rep; - struct lustre_handle lockh = { 0 }; - struct ldlm_lock *new_lock; - int rc, offset = 2, repsize[3] = {sizeof(struct ldlm_reply), - sizeof(struct mds_body), - mds->mds_max_mdsize}; - - it = lustre_swab_reqbuf (req, 1, sizeof (*it), - lustre_swab_ldlm_intent); - if (it == NULL) { - CERROR ("Intent missing\n"); - rc = req->rq_status = -EFAULT; - RETURN (rc); - } - - LDLM_DEBUG(lock, "intent policy, opc: %s", - ldlm_it2str(it->opc)); - - rc = lustre_pack_msg(3, repsize, NULL, &req->rq_replen, - &req->rq_repmsg); - if (rc) { - rc = req->rq_status = -ENOMEM; - RETURN(rc); - } - - rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep)); - rep->lock_policy_res1 = IT_INTENT_EXEC; - - fixup_handle_for_resent_req(req, lock, &lockh); - - /* execute policy */ - switch ((long)it->opc) { - case IT_OPEN: - case IT_CREAT|IT_OPEN: - rc = mds_reint(req, offset, &lockh); - /* We return a dentry to the client if IT_OPEN_POS is - * set, or if we make it to the OPEN portion of the - * programme (which implies that we created) */ - if (!(rep->lock_policy_res1 & IT_OPEN_POS || - rep->lock_policy_res1 & IT_OPEN_OPEN)) { - rep->lock_policy_res2 = rc; - RETURN(ELDLM_LOCK_ABORTED); - } - break; - case IT_UNLINK: - rc = mds_reint(req, offset, &lockh); - /* Don't return a lock if the unlink failed, or if we're - * not sending back an EA */ - if (rc) { - rep->lock_policy_res2 = rc; - RETURN(ELDLM_LOCK_ABORTED); - } - if (req->rq_status != 0) { - rep->lock_policy_res2 = req->rq_status; - RETURN(ELDLM_LOCK_ABORTED); - } - mds_body = lustre_msg_buf(req->rq_repmsg, 1, sizeof (*mds_body)); - if (!(mds_body->valid & OBD_MD_FLEASIZE)) { - rep->lock_policy_res2 = rc; - RETURN(ELDLM_LOCK_ABORTED); - } - break; - case IT_GETATTR: - case IT_LOOKUP: - case IT_READDIR: - rc = mds_getattr_name(offset, req, &lockh); - /* FIXME: we need to sit down and decide on who should - * set req->rq_status, who should return negative and - * positive return values, and what they all mean. */ - if (rc) { - rep->lock_policy_res2 = rc; - RETURN(ELDLM_LOCK_ABORTED); - } - if (req->rq_status != 0) { - rep->lock_policy_res2 = req->rq_status; - RETURN(ELDLM_LOCK_ABORTED); - } - break; - default: - CERROR("Unhandled intent "LPD64"\n", it->opc); - LBUG(); - } - - /* By this point, whatever function we called above must have - * filled in 'lockh' or returned an error. We want to give the - * new lock to the client instead of whatever lock it was about - * to get. */ - new_lock = ldlm_handle2lock(&lockh); - LASSERT(new_lock != NULL); - - /* If we've already given this lock to a client once, then we - * should have no readers or writers. Otherwise, we should - * have one reader _or_ writer ref (which will be zeroed below - * before returning the lock to a client. - */ - if (new_lock->l_export == req->rq_export) - LASSERT(new_lock->l_readers + new_lock->l_writers == 0); - else - LASSERT(new_lock->l_readers + new_lock->l_writers == 1); - - /* If we're running an intent only, we want to abort the new - * lock, and let the client abort the original lock. */ - if (flags & LDLM_FL_INTENT_ONLY) { - LDLM_DEBUG(lock, "INTENT_ONLY, aborting locks"); - l_lock(&new_lock->l_resource->lr_namespace->ns_lock); - if (new_lock->l_readers) - ldlm_lock_decref(&lockh, LCK_PR); - else - ldlm_lock_decref(&lockh, LCK_PW); - l_unlock(&new_lock->l_resource->lr_namespace->ns_lock); - LDLM_LOCK_PUT(new_lock); - RETURN(ELDLM_LOCK_ABORTED); - } - - *lockp = new_lock; - - rep->lock_policy_res2 = req->rq_status; - - if (new_lock->l_export == req->rq_export) { - /* Already gave this to the client, which means that we - * reconstructed a reply. */ - LASSERT(lustre_msg_get_flags(req->rq_reqmsg) & - MSG_RESENT); - RETURN(ELDLM_LOCK_REPLACED); - } - - /* Fixup the lock to be given to the client */ - l_lock(&new_lock->l_resource->lr_namespace->ns_lock); - new_lock->l_readers = 0; - new_lock->l_writers = 0; - - new_lock->l_export = req->rq_export; - list_add(&new_lock->l_export_chain, - &new_lock->l_export->exp_ldlm_data.led_held_locks); - - /* We don't need to worry about completion_ast (which isn't set - * in 'lock' yet anyways), because this lock is already - * granted. */ - new_lock->l_blocking_ast = lock->l_blocking_ast; - - memcpy(&new_lock->l_remote_handle, &lock->l_remote_handle, - sizeof(lock->l_remote_handle)); - - new_lock->l_flags &= ~(LDLM_FL_LOCAL | LDLM_FL_AST_SENT | - LDLM_FL_CBPENDING); - - LDLM_LOCK_PUT(new_lock); - l_unlock(&new_lock->l_resource->lr_namespace->ns_lock); - - RETURN(ELDLM_LOCK_REPLACED); - } else { - int size = sizeof(struct ldlm_reply); - rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, - &req->rq_repmsg); - if (rc) { - LBUG(); - RETURN(-ENOMEM); - } - } - RETURN(rc); -} - -int mds_attach(struct obd_device *dev, obd_count len, void *data) -{ - struct lprocfs_static_vars lvars; - - lprocfs_init_multi_vars(0, &lvars); - return lprocfs_obd_attach(dev, lvars.obd_vars); -} - -int mds_detach(struct obd_device *dev) -{ - return lprocfs_obd_detach(dev); -} - -int mdt_attach(struct obd_device *dev, obd_count len, void *data) -{ - struct lprocfs_static_vars lvars; - - lprocfs_init_multi_vars(1, &lvars); - return lprocfs_obd_attach(dev, lvars.obd_vars); -} - -int mdt_detach(struct obd_device *dev) -{ - return lprocfs_obd_detach(dev); -} - -static int mdt_setup(struct obd_device *obddev, obd_count len, void *buf) -{ - struct mds_obd *mds = &obddev->u.mds; - int i, rc = 0; - ENTRY; - - mds->mds_service = ptlrpc_init_svc(MDS_NEVENTS, MDS_NBUFS, - MDS_BUFSIZE, MDS_MAXREQSIZE, - MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL, - mds_handle, "mds", obddev); - - if (!mds->mds_service) { - CERROR("failed to start service\n"); - RETURN(rc = -ENOMEM); - } - - for (i = 0; i < MDT_NUM_THREADS; i++) { - char name[32]; - sprintf(name, "ll_mdt_%02d", i); - rc = ptlrpc_start_thread(obddev, mds->mds_service, name); - if (rc) { - CERROR("cannot start MDT thread #%d: rc %d\n", i, rc); - GOTO(err_thread, rc); - } - } - - mds->mds_setattr_service = - ptlrpc_init_svc(MDS_NEVENTS, MDS_NBUFS, - MDS_BUFSIZE, MDS_MAXREQSIZE, - MDS_SETATTR_PORTAL, MDC_REPLY_PORTAL, - mds_handle, "mds_setattr", obddev); - if (!mds->mds_setattr_service) { - CERROR("failed to start getattr service\n"); - GOTO(err_thread, rc = -ENOMEM); - } - - for (i = 0; i < MDT_NUM_THREADS; i++) { - char name[32]; - sprintf(name, "ll_mdt_attr_%02d", i); - rc = ptlrpc_start_thread(obddev, mds->mds_setattr_service, - name); - if (rc) { - CERROR("cannot start MDT setattr thread #%d: rc %d\n", - i, rc); - GOTO(err_thread2, rc); - } - } - - mds->mds_readpage_service = - ptlrpc_init_svc(MDS_NEVENTS, MDS_NBUFS, - MDS_BUFSIZE, MDS_MAXREQSIZE, - MDS_READPAGE_PORTAL, MDC_REPLY_PORTAL, - mds_handle, "mds_readpage", obddev); - if (!mds->mds_readpage_service) { - CERROR("failed to start readpage service\n"); - GOTO(err_thread2, rc = -ENOMEM); - } - - for (i = 0; i < MDT_NUM_THREADS; i++) { - char name[32]; - sprintf(name, "ll_mdt_rdpg_%02d", i); - rc = ptlrpc_start_thread(obddev, mds->mds_readpage_service, - name); - if (rc) { - CERROR("cannot start MDT readpage thread #%d: rc %d\n", - i, rc); - GOTO(err_thread3, rc); - } - } - - RETURN(0); - -err_thread3: - ptlrpc_stop_all_threads(mds->mds_readpage_service); - ptlrpc_unregister_service(mds->mds_readpage_service); -err_thread2: - ptlrpc_stop_all_threads(mds->mds_setattr_service); - ptlrpc_unregister_service(mds->mds_setattr_service); -err_thread: - ptlrpc_stop_all_threads(mds->mds_service); - ptlrpc_unregister_service(mds->mds_service); - return rc; -} - - -static int mdt_cleanup(struct obd_device *obddev, int force, int failover) -{ - struct mds_obd *mds = &obddev->u.mds; - ENTRY; - - ptlrpc_stop_all_threads(mds->mds_readpage_service); - ptlrpc_unregister_service(mds->mds_readpage_service); - - ptlrpc_stop_all_threads(mds->mds_setattr_service); - ptlrpc_unregister_service(mds->mds_setattr_service); - - ptlrpc_stop_all_threads(mds->mds_service); - ptlrpc_unregister_service(mds->mds_service); - - RETURN(0); -} - -extern int mds_iocontrol(unsigned int cmd, struct lustre_handle *conn, - int len, void *karg, void *uarg); - -/* use obd ops to offer management infrastructure */ -static struct obd_ops mds_obd_ops = { - o_owner: THIS_MODULE, - o_attach: mds_attach, - o_detach: mds_detach, - o_connect: mds_connect, - o_disconnect: mds_disconnect, - o_setup: mds_setup, - o_cleanup: mds_cleanup, - o_iocontrol: mds_iocontrol, - o_destroy_export: mds_destroy_export -}; - -static struct obd_ops mdt_obd_ops = { - o_owner: THIS_MODULE, - o_attach: mdt_attach, - o_detach: mdt_detach, - o_setup: mdt_setup, - o_cleanup: mdt_cleanup, -}; - - -static int __init mds_init(void) -{ - struct lprocfs_static_vars lvars; - - lprocfs_init_multi_vars(0, &lvars); - class_register_type(&mds_obd_ops, lvars.module_vars, LUSTRE_MDS_NAME); - lprocfs_init_multi_vars(1, &lvars); - class_register_type(&mdt_obd_ops, lvars.module_vars, LUSTRE_MDT_NAME); - ldlm_register_intent(ldlm_intent_policy); - - return 0; -} - -static void __exit mds_exit(void) -{ - ldlm_unregister_intent(); - class_unregister_type(LUSTRE_MDS_NAME); - class_unregister_type(LUSTRE_MDT_NAME); -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Lustre Metadata Server (MDS)"); -MODULE_LICENSE("GPL"); - -module_init(mds_init); -module_exit(mds_exit); diff --git a/lustre/mds/lproc_mds.c b/lustre/mds/lproc_mds.c deleted file mode 100644 index 5d6fa57..0000000 --- a/lustre/mds/lproc_mds.c +++ /dev/null @@ -1,116 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ -#define DEBUG_SUBSYSTEM S_CLASS - -#include <linux/version.h> -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include <asm/statfs.h> -#endif -#include <linux/lustre_lite.h> -#include <linux/lustre_fsfilt.h> -#include <linux/lprocfs_status.h> - -#ifndef LPROCFS -struct lprocfs_vars lprocfs_mds_obd_vars[] = { {0} }; -struct lprocfs_vars lprocfs_mds_module_vars[] = { {0} }; -struct lprocfs_vars lprocfs_mdt_obd_vars[] = { {0} }; -struct lprocfs_vars lprocfs_mdt_module_vars[] = { {0} }; - -#else - -static inline int lprocfs_mds_statfs(void *data, struct statfs *sfs) -{ - struct obd_device* dev = (struct obd_device*) data; - struct mds_obd *mds; - - LASSERT(dev != NULL); - mds = &dev->u.mds; - return vfs_statfs(mds->mds_sb, sfs); -} - -DEFINE_LPROCFS_STATFS_FCT(rd_blksize, lprocfs_mds_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, lprocfs_mds_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree, lprocfs_mds_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filestotal, lprocfs_mds_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filesfree, lprocfs_mds_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filegroups, lprocfs_mds_statfs); - -int rd_fstype(char *page, char **start, off_t off, int count, int *eof, - void *data) -{ - struct obd_device *obd = (struct obd_device *)data; - - LASSERT(obd != NULL); - LASSERT(obd->obd_fsops != NULL); - LASSERT(obd->obd_fsops->fs_type != NULL); - return snprintf(page, count, "%s\n", obd->obd_fsops->fs_type); -} - -int lprocfs_mds_rd_mntdev(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct obd_device* obd = (struct obd_device *)data; - - LASSERT(obd != NULL); - LASSERT(obd->u.mds.mds_vfsmnt->mnt_devname); - *eof = 1; - return snprintf(page, count, "%s\n", - obd->u.mds.mds_vfsmnt->mnt_devname); -} - -struct lprocfs_vars lprocfs_mds_obd_vars[] = { - { "uuid", lprocfs_rd_uuid, 0, 0 }, - { "blocksize", rd_blksize, 0, 0 }, - { "kbytestotal",rd_kbytestotal, 0, 0 }, - { "kbytesfree", rd_kbytesfree, 0, 0 }, - { "fstype", rd_fstype, 0, 0 }, - { "filestotal", rd_filestotal, 0, 0 }, - { "filesfree", rd_filesfree, 0, 0 }, - { "filegroups", rd_filegroups, 0, 0 }, - { "mntdev", lprocfs_mds_rd_mntdev, 0, 0 }, - { 0 } -}; - -struct lprocfs_vars lprocfs_mds_module_vars[] = { - { "num_refs", lprocfs_rd_numrefs, 0, 0 }, - { 0 } -}; - -struct lprocfs_vars lprocfs_mdt_obd_vars[] = { - { "uuid", lprocfs_rd_uuid, 0, 0 }, - { 0 } -}; - -struct lprocfs_vars lprocfs_mdt_module_vars[] = { - { "num_refs", lprocfs_rd_numrefs, 0, 0 }, - { 0 } -}; - -#endif -struct lprocfs_static_vars lprocfs_array_vars[] = { {lprocfs_mds_module_vars, - lprocfs_mds_obd_vars}, - {lprocfs_mdt_module_vars, - lprocfs_mdt_obd_vars}}; - -LPROCFS_INIT_MULTI_VARS(lprocfs_array_vars, - (sizeof(lprocfs_array_vars) / - sizeof(struct lprocfs_static_vars))) diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c deleted file mode 100644 index cefc680..0000000 --- a/lustre/mds/mds_fs.c +++ /dev/null @@ -1,453 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * mds/mds_fs.c - * Lustre Metadata Server (MDS) filesystem interface code - * - * Copyright (C) 2002, 2003 Cluster File Systems, Inc. - * Author: Andreas Dilger <adilger@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define EXPORT_SYMTAB -#define DEBUG_SUBSYSTEM S_MDS - -#include <linux/module.h> -#include <linux/kmod.h> -#include <linux/version.h> -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include <linux/mount.h> -#endif -#include <linux/lustre_mds.h> -#include <linux/obd_class.h> -#include <linux/obd_support.h> -#include <linux/lustre_lib.h> -#include <linux/lustre_fsfilt.h> - -/* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */ -#define MDS_MAX_CLIENTS (PAGE_SIZE * 8) -#define MDS_MAX_CLIENT_WORDS (MDS_MAX_CLIENTS / sizeof(unsigned long)) - -#define LAST_RCVD "last_rcvd" - -/* Add client data to the MDS. We use a bitmap to locate a free space - * in the last_rcvd file if cl_off is -1 (i.e. a new client). - * Otherwise, we have just read the data from the last_rcvd file and - * we know its offset. - */ -int mds_client_add(struct obd_device *obd, struct mds_obd *mds, - struct mds_export_data *med, int cl_off) -{ - unsigned long *bitmap = mds->mds_client_bitmap; - int new_client = (cl_off == -1); - - LASSERT(bitmap != NULL); - - /* XXX if mcd_uuid were a real obd_uuid, I could use obd_uuid_equals */ - if (!strcmp(med->med_mcd->mcd_uuid, "OBD_CLASS_UUID")) - RETURN(0); - - /* the bitmap operations can handle cl_off > sizeof(long) * 8, so - * there's no need for extra complication here - */ - if (new_client) { - cl_off = find_first_zero_bit(bitmap, MDS_MAX_CLIENTS); - repeat: - if (cl_off >= MDS_MAX_CLIENTS) { - CERROR("no room for clients - fix MDS_MAX_CLIENTS\n"); - return -ENOMEM; - } - if (test_and_set_bit(cl_off, bitmap)) { - CERROR("MDS client %d: found bit is set in bitmap\n", - cl_off); - cl_off = find_next_zero_bit(bitmap, MDS_MAX_CLIENTS, - cl_off); - goto repeat; - } - } else { - if (test_and_set_bit(cl_off, bitmap)) { - CERROR("MDS client %d: bit already set in bitmap!!\n", - cl_off); - LBUG(); - } - } - - CDEBUG(D_INFO, "client at offset %d with UUID '%s' added\n", - cl_off, med->med_mcd->mcd_uuid); - - med->med_off = cl_off; - - if (new_client) { - struct obd_run_ctxt saved; - loff_t off = MDS_LR_CLIENT + (cl_off * MDS_LR_SIZE); - ssize_t written; - void *handle; - - push_ctxt(&saved, &mds->mds_ctxt, NULL); - /* We need to start a transaction here first, to avoid a - * possible ordering deadlock on last_rcvd->i_sem and the - * journal lock. In most places we start the journal handle - * first (because we do compound transactions), and then - * later do the write into last_rcvd, which gets i_sem. - * - * Without this transaction, clients connecting at the same - * time other MDS operations are ongoing get last_rcvd->i_sem - * first (in generic_file_write()) and start the journal - * transaction afterwards, and can deadlock with other ops. - * - * We use FSFILT_OP_SETATTR because it is smallest, but all - * ops include enough space for the last_rcvd update so we - * could use any of them, or maybe an FSFILT_OP_NONE is best? - */ - handle = fsfilt_start(obd,mds->mds_rcvd_filp->f_dentry->d_inode, - FSFILT_OP_SETATTR); - if (IS_ERR(handle)) { - written = PTR_ERR(handle); - CERROR("unable to start transaction: rc %d\n", - (int)written); - } else { - written = lustre_fwrite(mds->mds_rcvd_filp,med->med_mcd, - sizeof(*med->med_mcd), &off); - fsfilt_commit(obd,mds->mds_rcvd_filp->f_dentry->d_inode, - handle, 0); - } - pop_ctxt(&saved, &mds->mds_ctxt, NULL); - - if (written != sizeof(*med->med_mcd)) { - if (written < 0) - RETURN(written); - RETURN(-EIO); - } - CDEBUG(D_INFO, "wrote client mcd at off %u (len %u)\n", - MDS_LR_CLIENT + (cl_off * MDS_LR_SIZE), - (unsigned int)sizeof(*med->med_mcd)); - } - return 0; -} - -int mds_client_free(struct obd_export *exp) -{ - struct mds_export_data *med = &exp->exp_mds_data; - struct mds_obd *mds = &exp->exp_obd->u.mds; - struct mds_client_data zero_mcd; - struct obd_run_ctxt saved; - int written; - unsigned long *bitmap = mds->mds_client_bitmap; - loff_t off; - - LASSERT(bitmap); - if (!med->med_mcd) - RETURN(0); - - /* XXX if mcd_uuid were a real obd_uuid, I could use obd_uuid_equals */ - if (!strcmp(med->med_mcd->mcd_uuid, "OBD_CLASS_UUID")) - GOTO(free_and_out, 0); - - off = MDS_LR_CLIENT + (med->med_off * MDS_LR_SIZE); - - CDEBUG(D_INFO, "freeing client at offset %u (%lld)with UUID '%s'\n", - med->med_off, off, med->med_mcd->mcd_uuid); - - if (!test_and_clear_bit(med->med_off, bitmap)) { - CERROR("MDS client %u: bit already clear in bitmap!!\n", - med->med_off); - LBUG(); - } - - memset(&zero_mcd, 0, sizeof zero_mcd); - push_ctxt(&saved, &mds->mds_ctxt, NULL); - written = lustre_fwrite(mds->mds_rcvd_filp, (const char *)&zero_mcd, - sizeof(zero_mcd), &off); - pop_ctxt(&saved, &mds->mds_ctxt, NULL); - - if (written != sizeof(zero_mcd)) { - CERROR("error zeroing out client %s off %d in %s: %d\n", - med->med_mcd->mcd_uuid, med->med_off, LAST_RCVD, - written); - } else { - CDEBUG(D_INFO, "zeroed out disconnecting client %s at off %d\n", - med->med_mcd->mcd_uuid, med->med_off); - } - - free_and_out: - OBD_FREE(med->med_mcd, sizeof(*med->med_mcd)); - - return 0; -} - -static int mds_server_free_data(struct mds_obd *mds) -{ - OBD_FREE(mds->mds_client_bitmap, - MDS_MAX_CLIENT_WORDS * sizeof(unsigned long)); - OBD_FREE(mds->mds_server_data, sizeof(*mds->mds_server_data)); - mds->mds_server_data = NULL; - - return 0; -} - -static int mds_read_last_rcvd(struct obd_device *obddev, struct file *f) -{ - struct mds_obd *mds = &obddev->u.mds; - struct mds_server_data *msd; - struct mds_client_data *mcd = NULL; - loff_t off = 0; - int cl_off; - unsigned long last_rcvd_size = f->f_dentry->d_inode->i_size; - __u64 last_transno = 0; - __u64 last_mount; - int rc = 0; - - LASSERT(sizeof(struct mds_client_data) == MDS_LR_SIZE); - LASSERT(sizeof(struct mds_server_data) <= MDS_LR_CLIENT); - - OBD_ALLOC(msd, sizeof(*msd)); - if (!msd) - RETURN(-ENOMEM); - - OBD_ALLOC(mds->mds_client_bitmap, - MDS_MAX_CLIENT_WORDS * sizeof(unsigned long)); - if (!mds->mds_client_bitmap) { - OBD_FREE(msd, sizeof(*msd)); - RETURN(-ENOMEM); - } - - rc = lustre_fread(f, (char *)msd, sizeof(*msd), &off); - - mds->mds_server_data = msd; - if (rc == 0) { - CERROR("%s: empty MDS %s, new MDS?\n", obddev->obd_name, - LAST_RCVD); - RETURN(0); - } - - if (rc != sizeof(*msd)) { - CERROR("error reading MDS %s: rc = %d\n", LAST_RCVD, rc); - if (rc > 0) - rc = -EIO; - GOTO(err_msd, rc); - } - - CDEBUG(D_INODE, "last_rcvd has size %lu (msd + %lu clients)\n", - last_rcvd_size, (last_rcvd_size - MDS_LR_CLIENT)/MDS_LR_SIZE); - - /* - * When we do a clean MDS shutdown, we save the last_transno into - * the header. - */ - last_transno = le64_to_cpu(msd->msd_last_transno); - mds->mds_last_transno = last_transno; - CDEBUG(D_INODE, "got "LPU64" for server last_rcvd value\n", - last_transno); - - last_mount = le64_to_cpu(msd->msd_mount_count); - mds->mds_mount_count = last_mount; - CDEBUG(D_INODE, "got "LPU64" for server last_mount value\n",last_mount); - - /* off is adjusted by lustre_fread, so we don't adjust it in the loop */ - for (off = MDS_LR_CLIENT, cl_off = 0; off < last_rcvd_size; cl_off++) { - int mount_age; - - if (!mcd) { - OBD_ALLOC(mcd, sizeof(*mcd)); - if (!mcd) - GOTO(err_msd, rc = -ENOMEM); - } - - rc = lustre_fread(f, (char *)mcd, sizeof(*mcd), &off); - if (rc != sizeof(*mcd)) { - CERROR("error reading MDS %s offset %d: rc = %d\n", - LAST_RCVD, cl_off, rc); - if (rc > 0) /* XXX fatal error or just abort reading? */ - rc = -EIO; - break; - } - - if (mcd->mcd_uuid[0] == '\0') { - CDEBUG(D_INFO, "skipping zeroed client at offset %d\n", - cl_off); - continue; - } - - last_transno = le64_to_cpu(mcd->mcd_last_transno); - - /* These exports are cleaned up by mds_disconnect(), so they - * need to be set up like real exports as mds_connect() does. - */ - mount_age = last_mount - le64_to_cpu(mcd->mcd_mount_count); - if (mount_age < MDS_MOUNT_RECOV) { - struct obd_export *exp = class_new_export(obddev); - struct mds_export_data *med; - - if (!exp) { - rc = -ENOMEM; - break; - } - - memcpy(&exp->exp_client_uuid.uuid, mcd->mcd_uuid, - sizeof exp->exp_client_uuid.uuid); - med = &exp->exp_mds_data; - med->med_mcd = mcd; - mds_client_add(obddev, mds, med, cl_off); - /* create helper if export init gets more complex */ - INIT_LIST_HEAD(&med->med_open_head); - spin_lock_init(&med->med_open_lock); - - mcd = NULL; - obddev->obd_recoverable_clients++; - class_export_put(exp); - } else { - CDEBUG(D_INFO, "discarded client %d, UUID '%s', count " - LPU64"\n", cl_off, mcd->mcd_uuid, - le64_to_cpu(mcd->mcd_mount_count)); - } - - CDEBUG(D_OTHER, "client at offset %d has last_transno = %Lu\n", - cl_off, (unsigned long long)last_transno); - - if (last_transno > mds->mds_last_transno) - mds->mds_last_transno = last_transno; - } - - obddev->obd_last_committed = mds->mds_last_transno; - if (obddev->obd_recoverable_clients) { - CERROR("RECOVERY: %d recoverable clients, last_transno " - LPU64"\n", - obddev->obd_recoverable_clients, mds->mds_last_transno); - obddev->obd_next_recovery_transno = obddev->obd_last_committed - + 1; - obddev->obd_recovering = 1; - } - - if (mcd) - OBD_FREE(mcd, sizeof(*mcd)); - - return 0; - -err_msd: - mds_server_free_data(mds); - return rc; -} - -static int mds_fs_prep(struct obd_device *obddev) -{ - struct mds_obd *mds = &obddev->u.mds; - struct obd_run_ctxt saved; - struct dentry *dentry; - struct file *f; - int rc; - - push_ctxt(&saved, &mds->mds_ctxt, NULL); - dentry = simple_mkdir(current->fs->pwd, "ROOT", 0755); - if (IS_ERR(dentry)) { - rc = PTR_ERR(dentry); - CERROR("cannot create ROOT directory: rc = %d\n", rc); - GOTO(err_pop, rc); - } - - mds->mds_rootfid.id = dentry->d_inode->i_ino; - mds->mds_rootfid.generation = dentry->d_inode->i_generation; - mds->mds_rootfid.f_type = S_IFDIR; - - dput(dentry); - - dentry = lookup_one_len("__iopen__", current->fs->pwd, - strlen("__iopen__")); - if (IS_ERR(dentry) || !dentry->d_inode) { - rc = (IS_ERR(dentry)) ? PTR_ERR(dentry): -ENOENT; - CERROR("cannot open iopen FH directory: rc = %d\n", rc); - GOTO(err_pop, rc); - } - mds->mds_fid_de = dentry; - - f = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0644); - if (IS_ERR(f)) { - rc = PTR_ERR(f); - CERROR("cannot open/create %s file: rc = %d\n", LAST_RCVD, rc); - GOTO(err_pop, rc = PTR_ERR(f)); - } - if (!S_ISREG(f->f_dentry->d_inode->i_mode)) { - CERROR("%s is not a regular file!: mode = %o\n", LAST_RCVD, - f->f_dentry->d_inode->i_mode); - GOTO(err_filp, rc = -ENOENT); - } - - rc = fsfilt_journal_data(obddev, f); - if (rc) { - CERROR("cannot journal data on %s: rc = %d\n", LAST_RCVD, rc); - GOTO(err_filp, rc); - } - - rc = mds_read_last_rcvd(obddev, f); - if (rc) { - CERROR("cannot read %s: rc = %d\n", LAST_RCVD, rc); - GOTO(err_client, rc); - } - mds->mds_rcvd_filp = f; -err_pop: - pop_ctxt(&saved, &mds->mds_ctxt, NULL); - - return rc; - -err_client: - class_disconnect_exports(obddev, 0); -err_filp: - if (filp_close(f, 0)) - CERROR("can't close %s after error\n", LAST_RCVD); - goto err_pop; -} - -int mds_fs_setup(struct obd_device *obddev, struct vfsmount *mnt) -{ - struct mds_obd *mds = &obddev->u.mds; - ENTRY; - - mds->mds_vfsmnt = mnt; - - OBD_SET_CTXT_MAGIC(&mds->mds_ctxt); - mds->mds_ctxt.pwdmnt = mnt; - mds->mds_ctxt.pwd = mnt->mnt_root; - mds->mds_ctxt.fs = get_ds(); - RETURN(mds_fs_prep(obddev)); -} - -int mds_fs_cleanup(struct obd_device *obddev, int failover) -{ - struct mds_obd *mds = &obddev->u.mds; - struct obd_run_ctxt saved; - int rc = 0; - - if (failover) - CERROR("%s: shutting down for failover; client state will" - " be preserved.\n", obddev->obd_name); - - class_disconnect_exports(obddev, failover); /* this cleans up client - info too */ - mds_server_free_data(mds); - - push_ctxt(&saved, &mds->mds_ctxt, NULL); - if (mds->mds_rcvd_filp) { - rc = filp_close(mds->mds_rcvd_filp, 0); - mds->mds_rcvd_filp = NULL; - if (rc) - CERROR("last_rcvd file won't close, rc=%d\n", rc); - } - pop_ctxt(&saved, &mds->mds_ctxt, NULL); - shrink_dcache_parent(mds->mds_fid_de); - dput(mds->mds_fid_de); - - return rc; -} diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h deleted file mode 100644 index 0b62a92..0000000 --- a/lustre/mds/mds_internal.h +++ /dev/null @@ -1,15 +0,0 @@ -struct mds_file_data *mds_mfd_new(void); -void mds_mfd_put(struct mds_file_data *mfd); -void mds_mfd_destroy(struct mds_file_data *mfd); -int mds_update_unpack(struct ptlrpc_request *, int offset, - struct mds_update_record *); - -/* mds/mds_fs.c */ -int mds_client_add(struct obd_device *obd, struct mds_obd *mds, - struct mds_export_data *med, int cl_off); -int mds_client_free(struct obd_export *exp); - -#ifdef __KERNEL__ -void mds_pack_inode2fid(struct ll_fid *fid, struct inode *inode); -void mds_pack_inode2body(struct mds_body *body, struct inode *inode); -#endif diff --git a/lustre/mds/mds_lib.c b/lustre/mds/mds_lib.c deleted file mode 100644 index 8f16795..0000000 --- a/lustre/mds/mds_lib.c +++ /dev/null @@ -1,310 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_MDS - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/version.h> -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -# include <linux/locks.h> // for wait_on_buffer -#else -# include <linux/buffer_head.h> // for wait_on_buffer -#endif -#include <linux/unistd.h> - -#include <asm/system.h> -#include <asm/uaccess.h> - -#include <linux/fs.h> -#include <linux/stat.h> -#include <asm/uaccess.h> -#include <linux/slab.h> -#include <asm/segment.h> - -#include <linux/obd_support.h> -#include <linux/lustre_lib.h> -#include <linux/lustre_mds.h> -#include <linux/lustre_lite.h> - -void mds_pack_inode2fid(struct ll_fid *fid, struct inode *inode) -{ - fid->id = inode->i_ino; - fid->generation = inode->i_generation; - fid->f_type = (S_IFMT & inode->i_mode); -} - -void mds_pack_inode2body(struct mds_body *b, struct inode *inode) -{ - b->valid = OBD_MD_FLID | OBD_MD_FLATIME | OBD_MD_FLMTIME | - OBD_MD_FLCTIME | OBD_MD_FLSIZE | OBD_MD_FLBLOCKS | - OBD_MD_FLUID | OBD_MD_FLGID | OBD_MD_FLTYPE | OBD_MD_FLMODE | - OBD_MD_FLNLINK | OBD_MD_FLGENER; - - /* The MDS file size isn't authoritative for regular files, so don't - * even pretend. */ - if (S_ISREG(inode->i_mode)) - b->valid &= ~(OBD_MD_FLSIZE | OBD_MD_FLBLOCKS); - - b->ino = inode->i_ino; - b->atime = LTIME_S(inode->i_atime); - b->mtime = LTIME_S(inode->i_mtime); - b->ctime = LTIME_S(inode->i_ctime); - b->mode = inode->i_mode; - b->size = inode->i_size; - b->blocks = inode->i_blocks; - b->uid = inode->i_uid; - b->gid = inode->i_gid; - b->flags = inode->i_flags; - b->rdev = b->rdev; - b->nlink = inode->i_nlink; - b->generation = inode->i_generation; - b->suppgid = -1; -} -/* unpacking */ -static int mds_setattr_unpack(struct ptlrpc_request *req, int offset, - struct mds_update_record *r) -{ - struct iattr *attr = &r->ur_iattr; - struct mds_rec_setattr *rec; - ENTRY; - - rec = lustre_swab_reqbuf (req, offset, sizeof (*rec), - lustre_swab_mds_rec_setattr); - if (rec == NULL) - RETURN (-EFAULT); - - r->ur_fsuid = rec->sa_fsuid; - r->ur_fsgid = rec->sa_fsgid; - r->ur_cap = rec->sa_cap; - r->ur_suppgid1 = rec->sa_suppgid; - r->ur_suppgid2 = -1; - r->ur_fid1 = &rec->sa_fid; - attr->ia_valid = rec->sa_valid; - attr->ia_mode = rec->sa_mode; - attr->ia_uid = rec->sa_uid; - attr->ia_gid = rec->sa_gid; - attr->ia_size = rec->sa_size; - LTIME_S(attr->ia_atime) = rec->sa_atime; - LTIME_S(attr->ia_mtime) = rec->sa_mtime; - LTIME_S(attr->ia_ctime) = rec->sa_ctime; - attr->ia_attr_flags = rec->sa_attr_flags; - - LASSERT_REQSWAB (req, offset + 1); - if (req->rq_reqmsg->bufcount > offset + 1) { - r->ur_eadata = lustre_msg_buf (req->rq_reqmsg, - offset + 1, 0); - if (r->ur_eadata == NULL) - RETURN (-EFAULT); - r->ur_eadatalen = req->rq_reqmsg->buflens[offset + 1]; - } else { - r->ur_eadata = NULL; - r->ur_eadatalen = 0; - } - - RETURN(0); -} - -static int mds_create_unpack(struct ptlrpc_request *req, int offset, - struct mds_update_record *r) -{ - struct mds_rec_create *rec; - ENTRY; - - rec = lustre_swab_reqbuf (req, offset, sizeof (*rec), - lustre_swab_mds_rec_create); - if (rec == NULL) - RETURN (-EFAULT); - - r->ur_fsuid = rec->cr_fsuid; - r->ur_fsgid = rec->cr_fsgid; - r->ur_cap = rec->cr_cap; - r->ur_fid1 = &rec->cr_fid; - r->ur_fid2 = &rec->cr_replayfid; - r->ur_mode = rec->cr_mode; - r->ur_rdev = rec->cr_rdev; - r->ur_uid = rec->cr_uid; - r->ur_gid = rec->cr_gid; - r->ur_time = rec->cr_time; - r->ur_flags = rec->cr_flags; - r->ur_suppgid1 = rec->cr_suppgid; - r->ur_suppgid2 = -1; - - LASSERT_REQSWAB (req, offset + 1); - r->ur_name = lustre_msg_string (req->rq_reqmsg, offset + 1, 0); - if (r->ur_name == NULL) - RETURN (-EFAULT); - r->ur_namelen = req->rq_reqmsg->buflens[offset + 1]; - - LASSERT_REQSWAB (req, offset + 2); - if (req->rq_reqmsg->bufcount > offset + 2) { - /* NB for now, we only seem to pass NULL terminated symlink - * target strings here. If this ever changes, we'll have - * to stop checking for a buffer filled completely with a - * NULL terminated string here, and make the callers check - * depending on what they expect. We should probably stash - * it in r->ur_eadata in that case, so it's obvious... -eeb - */ - r->ur_tgt = lustre_msg_string(req->rq_reqmsg, offset + 2, 0); - if (r->ur_tgt == NULL) - RETURN (-EFAULT); - r->ur_tgtlen = req->rq_reqmsg->buflens[offset + 2]; - } else { - r->ur_tgt = NULL; - r->ur_tgtlen = 0; - } - RETURN(0); -} - -static int mds_link_unpack(struct ptlrpc_request *req, int offset, - struct mds_update_record *r) -{ - struct mds_rec_link *rec; - ENTRY; - - rec = lustre_swab_reqbuf (req, offset, sizeof (*rec), - lustre_swab_mds_rec_link); - if (rec == NULL) - RETURN (-EFAULT); - - r->ur_fsuid = rec->lk_fsuid; - r->ur_fsgid = rec->lk_fsgid; - r->ur_cap = rec->lk_cap; - r->ur_suppgid1 = rec->lk_suppgid1; - r->ur_suppgid2 = rec->lk_suppgid2; - r->ur_fid1 = &rec->lk_fid1; - r->ur_fid2 = &rec->lk_fid2; - - LASSERT_REQSWAB (req, offset + 1); - r->ur_name = lustre_msg_string (req->rq_reqmsg, offset + 1, 0); - if (r->ur_name == NULL) - RETURN (-EFAULT); - r->ur_namelen = req->rq_reqmsg->buflens[offset + 1]; - RETURN(0); -} - -static int mds_unlink_unpack(struct ptlrpc_request *req, int offset, - struct mds_update_record *r) -{ - struct mds_rec_unlink *rec; - ENTRY; - - rec = lustre_swab_reqbuf (req, offset, sizeof (*rec), - lustre_swab_mds_rec_unlink); - if (rec == NULL) - RETURN(-EFAULT); - - r->ur_fsuid = rec->ul_fsuid; - r->ur_fsgid = rec->ul_fsgid; - r->ur_cap = rec->ul_cap; - r->ur_mode = rec->ul_mode; - r->ur_suppgid1 = rec->ul_suppgid; - r->ur_suppgid2 = -1; - r->ur_fid1 = &rec->ul_fid1; - r->ur_fid2 = &rec->ul_fid2; - - LASSERT_REQSWAB (req, offset + 1); - r->ur_name = lustre_msg_string(req->rq_reqmsg, offset + 1, 0); - if (r->ur_name == NULL) - RETURN(-EFAULT); - r->ur_namelen = req->rq_reqmsg->buflens[offset + 1]; - RETURN(0); -} - -static int mds_rename_unpack(struct ptlrpc_request *req, int offset, - struct mds_update_record *r) -{ - struct mds_rec_rename *rec; - ENTRY; - - rec = lustre_swab_reqbuf (req, offset, sizeof (*rec), - lustre_swab_mds_rec_unlink); - if (rec == NULL) - RETURN(-EFAULT); - - r->ur_fsuid = rec->rn_fsuid; - r->ur_fsgid = rec->rn_fsgid; - r->ur_cap = rec->rn_cap; - r->ur_suppgid1 = rec->rn_suppgid1; - r->ur_suppgid2 = rec->rn_suppgid2; - r->ur_fid1 = &rec->rn_fid1; - r->ur_fid2 = &rec->rn_fid2; - - LASSERT_REQSWAB (req, offset + 1); - r->ur_name = lustre_msg_string(req->rq_reqmsg, offset + 1, 0); - if (r->ur_name == NULL) - RETURN(-EFAULT); - r->ur_namelen = req->rq_reqmsg->buflens[offset + 1]; - - LASSERT_REQSWAB (req, offset + 2); - r->ur_tgt = lustre_msg_string(req->rq_reqmsg, offset + 2, 0); - if (r->ur_tgt == NULL) - RETURN(-EFAULT); - r->ur_tgtlen = req->rq_reqmsg->buflens[offset + 2]; - RETURN(0); -} - -typedef int (*update_unpacker)(struct ptlrpc_request *req, int offset, - struct mds_update_record *r); - -static update_unpacker mds_unpackers[REINT_MAX + 1] = { - [REINT_SETATTR] mds_setattr_unpack, - [REINT_CREATE] mds_create_unpack, - [REINT_LINK] mds_link_unpack, - [REINT_UNLINK] mds_unlink_unpack, - [REINT_RENAME] mds_rename_unpack, - [REINT_OPEN] mds_create_unpack, -}; - -int mds_update_unpack(struct ptlrpc_request *req, int offset, - struct mds_update_record *rec) -{ - __u32 *opcodep; - __u32 opcode; - int rc; - ENTRY; - - /* NB don't lustre_swab_reqbuf() here. We're just taking a peek - * and we want to leave it to the specific unpacker once we've - * identified the message type */ - opcodep = lustre_msg_buf (req->rq_reqmsg, offset, sizeof (*opcodep)); - if (opcodep == NULL) - RETURN(-EFAULT); - - opcode = *opcodep; - if (lustre_msg_swabbed (req->rq_reqmsg)) - __swab32s (&opcode); - - if (opcode > REINT_MAX || - mds_unpackers[opcode] == NULL) { - CERROR ("Unexpected opcode %d\n", opcode); - RETURN(-EFAULT); - } - - rec->ur_opcode = opcode; - rc = mds_unpackers[opcode](req, offset, rec); - RETURN(rc); -} diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c deleted file mode 100644 index 02c53cc..0000000 --- a/lustre/mds/mds_lov.c +++ /dev/null @@ -1,286 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * linux/mds/mds_lov.c - * Lustre Metadata Server (mds) handling of striped file data - * - * Copyright (C) 2001-2003 Cluster File Systems, Inc. - * Author: Peter Braam <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define EXPORT_SYMTAB -#define DEBUG_SUBSYSTEM S_MDS - -#include <linux/module.h> -#include <linux/lustre_mds.h> -#include <linux/lustre_idl.h> -#include <linux/obd_class.h> -#include <linux/obd_lov.h> -#include <linux/lustre_lib.h> - -void le_lov_desc_to_cpu (struct lov_desc *ld) -{ - ld->ld_tgt_count = le32_to_cpu (ld->ld_tgt_count); - ld->ld_default_stripe_count = le32_to_cpu (ld->ld_default_stripe_count); - ld->ld_default_stripe_size = le32_to_cpu (ld->ld_default_stripe_size); - ld->ld_pattern = le32_to_cpu (ld->ld_pattern); -} - -void cpu_to_le_lov_desc (struct lov_desc *ld) -{ - ld->ld_tgt_count = cpu_to_le32 (ld->ld_tgt_count); - ld->ld_default_stripe_count = cpu_to_le32 (ld->ld_default_stripe_count); - ld->ld_default_stripe_size = cpu_to_le32 (ld->ld_default_stripe_size); - ld->ld_pattern = cpu_to_le32 (ld->ld_pattern); -} - -int mds_set_lovdesc(struct obd_device *obd, struct lov_desc *desc, - struct obd_uuid *uuidarray) -{ - struct mds_obd *mds = &obd->u.mds; - struct obd_run_ctxt saved; - struct file *f; - int tgt_count; - int rc; - int i; - struct lov_desc *disk_desc; - ENTRY; - - tgt_count = desc->ld_tgt_count; - if (desc->ld_default_stripe_count > desc->ld_tgt_count) { - CERROR("default stripe count %u > OST count %u\n", - desc->ld_default_stripe_count, desc->ld_tgt_count); - RETURN(-EINVAL); - } - if (desc->ld_default_stripe_size & (PAGE_SIZE - 1)) { - CERROR("default stripe size "LPU64" not a multiple of %lu\n", - desc->ld_default_stripe_size, PAGE_SIZE); - RETURN(-EINVAL); - } - if (desc->ld_default_stripe_offset > desc->ld_tgt_count) { - CERROR("default stripe offset "LPU64" > max OST index %u\n", - desc->ld_default_stripe_offset, desc->ld_tgt_count); - RETURN(-EINVAL); - } - if (desc->ld_pattern != 0) { - CERROR("stripe pattern %u unknown\n", - desc->ld_pattern); - RETURN(-EINVAL); - } - - OBD_ALLOC (disk_desc, sizeof (*disk_desc)); - if (disk_desc == NULL) { - CERROR ("Can't allocate disk_desc\n"); - RETURN (-ENOMEM); - } - - *disk_desc = *desc; - cpu_to_le_lov_desc (disk_desc); - - rc = 0; - push_ctxt(&saved, &mds->mds_ctxt, NULL); - - /* Bug 1186: FIXME: if there is an existing LOVDESC, verify new - * tgt_count > old */ - f = filp_open("LOVDESC", O_CREAT|O_RDWR, 0644); - if (IS_ERR(f)) { - CERROR("Cannot open/create LOVDESC file\n"); - GOTO(out, rc = PTR_ERR(f)); - } - - rc = lustre_fwrite(f, (char *)disk_desc, sizeof(*disk_desc), &f->f_pos); - if (filp_close(f, 0)) - CERROR("Error closing LOVDESC file\n"); - if (rc != sizeof(*desc)) { - CERROR("Cannot open/create LOVDESC file\n"); - if (rc >= 0) - rc = -EIO; - GOTO(out, rc); - } - - /* Bug 1186: FIXME: if there is an existing LOVTGTS, verify - * existing UUIDs same */ - f = filp_open("LOVTGTS", O_CREAT|O_RDWR, 0644); - if (IS_ERR(f)) { - CERROR("Cannot open/create LOVTGTS file\n"); - GOTO(out, rc = PTR_ERR(f)); - } - - rc = 0; - for (i = 0; i < tgt_count ; i++) { - rc = lustre_fwrite(f, uuidarray[i].uuid, - sizeof(uuidarray[i]), &f->f_pos); - if (rc != sizeof(uuidarray[i])) { - CERROR("cannot write LOV UUID %s (%d)\n", - uuidarray[i].uuid, i); - if (rc >= 0) - rc = -EIO; - break; - } - rc = 0; - } - if (filp_close(f, 0)) - CERROR("Error closing LOVTGTS file\n"); - - memcpy(&mds->mds_lov_desc, desc, sizeof *desc); - mds->mds_has_lov_desc = 1; - /* XXX the MDS should not really know about this */ - mds->mds_max_mdsize = lov_mds_md_size(desc->ld_tgt_count); - -out: - pop_ctxt(&saved, &mds->mds_ctxt, NULL); - OBD_FREE (disk_desc, sizeof (*disk_desc)); - - RETURN(rc); -} - -int mds_get_lovdesc(struct mds_obd *mds, struct lov_desc *desc) -{ - struct obd_run_ctxt saved; - struct file *f; - int rc; - ENTRY; - - push_ctxt(&saved, &mds->mds_ctxt, NULL); - f = filp_open("LOVDESC", O_RDONLY, 0644); - if (IS_ERR(f)) { - CERROR("Cannot open LOVDESC file\n"); - GOTO(out, rc = PTR_ERR(f)); - } - - rc = lustre_fread(f, (char *)desc, sizeof(*desc), &f->f_pos); - if (filp_close(f, 0)) - CERROR("Error closing LOVDESC file\n"); - - if (rc != sizeof(*desc)) { - CERROR("Cannot read LOVDESC file: rc = %d\n", rc); - GOTO(out, rc = -EIO); - } else - rc = 0; - - le_lov_desc_to_cpu (desc); /* convert to my byte order */ - - EXIT; -out: - pop_ctxt(&saved, &mds->mds_ctxt, NULL); - - return rc; -} - -int mds_get_lovtgts(struct mds_obd *mds, int tgt_count,struct obd_uuid *uuidarray) -{ - struct obd_run_ctxt saved; - struct file *f; - int rc; - int rc2; - - push_ctxt(&saved, &mds->mds_ctxt, NULL); - f = filp_open("LOVTGTS", O_RDONLY, 0644); - if (IS_ERR(f)) { - CERROR("Cannot open LOVTGTS file\n"); - GOTO(out, rc = PTR_ERR(f)); - } - - rc = lustre_fread(f, (char *)uuidarray, tgt_count * sizeof(*uuidarray), - &f->f_pos); - rc2 = filp_close(f, 0); - if (rc2) - CERROR("Error closing LOVTGTS file: rc = %d\n", rc2); - - if (rc != tgt_count * sizeof(*uuidarray)) { - CERROR("Error reading LOVTGTS file: rc = %d\n", rc); - if (rc >= 0) - rc = -EIO; - GOTO(out, rc); - } else - rc = 0; - EXIT; -out: - pop_ctxt(&saved, &mds->mds_ctxt, NULL); - - RETURN(rc); -} - -int mds_iocontrol(unsigned int cmd, struct lustre_handle *conn, - int len, void *karg, void *uarg) -{ - struct obd_device *obd = class_conn2obd(conn); - struct obd_ioctl_data *data = karg; - struct lov_desc *desc; - struct obd_uuid *uuidarray; - int count; - int rc; - - switch (cmd) { - case OBD_IOC_LOV_SET_CONFIG: - desc = (struct lov_desc *)data->ioc_inlbuf1; - if (sizeof(*desc) > data->ioc_inllen1) { - CERROR("descriptor size wrong\n"); - RETURN(-EINVAL); - } - - count = desc->ld_tgt_count; - uuidarray = (struct obd_uuid *)data->ioc_inlbuf2; - if (sizeof(*uuidarray) * count != data->ioc_inllen2) { - CERROR("UUID array size wrong\n"); - RETURN(-EINVAL); - } - rc = mds_set_lovdesc(obd, desc, uuidarray); - - RETURN(rc); - case OBD_IOC_LOV_GET_CONFIG: - desc = (struct lov_desc *)data->ioc_inlbuf1; - if (sizeof(*desc) > data->ioc_inllen1) { - CERROR("descriptor size wrong\n"); - RETURN(-EINVAL); - } - - count = desc->ld_tgt_count; - uuidarray = (struct obd_uuid *)data->ioc_inlbuf2; - if (sizeof(*uuidarray) * count != data->ioc_inllen2) { - CERROR("UUID array size wrong\n"); - RETURN(-EINVAL); - } - rc = mds_get_lovdesc(&obd->u.mds, desc); - if (desc->ld_tgt_count > count) { - CERROR("UUID array size too small\n"); - RETURN(-ENOSPC); - } - rc = mds_get_lovtgts(&obd->u.mds, desc->ld_tgt_count, - uuidarray); - - RETURN(rc); - - case OBD_IOC_SET_READONLY: - CERROR("setting device %s read-only\n", - ll_bdevname(obd->u.mds.mds_sb->s_dev)); -#ifdef CONFIG_DEV_RDONLY - dev_set_rdonly(obd->u.mds.mds_sb->s_dev, 2); -#endif - RETURN(0); - - case OBD_IOC_ABORT_RECOVERY: - CERROR("aborting recovery for device %s\n", obd->obd_name); - target_abort_recovery(obd); - RETURN(0); - - default: - RETURN(-EINVAL); - } - RETURN(0); -} diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c deleted file mode 100644 index 04d6ee9..0000000 --- a/lustre/mds/mds_open.c +++ /dev/null @@ -1,429 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Cluster File Systems, Inc. - * Author: Peter Braam <braam@clusterfs.com> - * Author: Andreas Dilger <adilger@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Mike Shaver <shaver@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define EXPORT_SYMTAB -#define DEBUG_SUBSYSTEM S_MDS - -#include <linux/module.h> -#include <linux/lustre_mds.h> -#include <linux/lustre_dlm.h> -#include <linux/init.h> -#include <linux/obd_class.h> -#include <linux/random.h> -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -# include <linux/buffer_head.h> -# include <linux/workqueue.h> -#else -# include <linux/locks.h> -#endif -#include <linux/obd_lov.h> -#include <linux/lustre_mds.h> -#include <linux/lustre_fsfilt.h> -#include <linux/lprocfs_status.h> - -#include "mds_internal.h" - -extern inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req); -int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle, - struct ptlrpc_request *req, int rc, __u32 op_data); -extern int enqueue_ordered_locks(int lock_mode, struct obd_device *obd, - struct ldlm_res_id *p1_res_id, - struct ldlm_res_id *p2_res_id, - struct ldlm_res_id *c1_res_id, - struct ldlm_res_id *c2_res_id, - struct lustre_handle *p1_lockh, - struct lustre_handle *p2_lockh, - struct lustre_handle *c1_lockh, - struct lustre_handle *c2_lockh); - -struct mds_file_data *mds_dentry_open(struct dentry *dentry, - struct vfsmount *mnt, - int flags, - struct ptlrpc_request *req) -{ - struct mds_export_data *med = &req->rq_export->exp_mds_data; - struct inode *inode; - int mode; - struct mds_file_data *mfd; - int error; - - mfd = mds_mfd_new(); - if (!mfd) { - CERROR("mds: out of memory\n"); - GOTO(cleanup_dentry, error = -ENOMEM); - } - - mode = (flags+1) & O_ACCMODE; - inode = dentry->d_inode; - - if (mode & FMODE_WRITE) { - error = get_write_access(inode); - if (error) - goto cleanup_mfd; - } - - mfd->mfd_mode = mode; - mfd->mfd_dentry = dentry; - mfd->mfd_xid = req->rq_xid; - - spin_lock(&med->med_open_lock); - list_add(&mfd->mfd_list, &med->med_open_head); - spin_unlock(&med->med_open_lock); - mds_mfd_put(mfd); - return mfd; - -cleanup_mfd: - mds_mfd_put(mfd); - mds_mfd_destroy(mfd); -cleanup_dentry: - dput(dentry); - mntput(mnt); - return ERR_PTR(error); -} - -void reconstruct_open(struct mds_update_record *rec, int offset, - struct ptlrpc_request *req, - struct lustre_handle *child_lockh) -{ - struct mds_export_data *med = &req->rq_export->exp_mds_data; - struct mds_client_data *mcd = med->med_mcd; - struct mds_obd *mds = mds_req2mds(req); - struct mds_file_data *mfd; - struct obd_device *obd = req->rq_export->exp_obd; - struct dentry *parent, *child; - struct ldlm_reply *rep; - struct mds_body *body; - int disp, rc; - struct list_head *t; - int put_child = 1; - ENTRY; - - LASSERT(offset == 2); /* only called via intent */ - rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep)); - body = lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body)); - - /* copy rc, transno and disp; steal locks */ - req->rq_transno = mcd->mcd_last_transno; - req->rq_status = mcd->mcd_last_result; - disp = rep->lock_policy_res1 = mcd->mcd_last_data; - - if (req->rq_export->exp_outstanding_reply) - mds_steal_ack_locks(req->rq_export, req); - - /* We never care about these. */ - disp &= ~(IT_OPEN_LOOKUP | IT_OPEN_POS | IT_OPEN_NEG); - if (!disp) { - EXIT; - return; /* error looking up parent or child */ - } - - parent = mds_fid2dentry(mds, rec->ur_fid1, NULL); - LASSERT(!IS_ERR(parent)); - - child = ll_lookup_one_len(rec->ur_name, parent, rec->ur_namelen - 1); - LASSERT(!IS_ERR(child)); - - if (!child->d_inode) { - GOTO(out_dput, 0); /* child not present to open */ - } - - /* At this point, we know we have a child, which means that we'll send - * it back _unless_ it was open failed, _and_ we didn't create the file. - * I love you guys. No, really. - */ - if (((disp & (IT_OPEN_OPEN | IT_OPEN_CREATE)) == IT_OPEN_OPEN) && - req->rq_status) { - GOTO(out_dput, 0); - } - - /* get lock (write for O_CREAT, read otherwise) */ - - mds_pack_inode2fid(&body->fid1, child->d_inode); - mds_pack_inode2body(body, child->d_inode); - if (S_ISREG(child->d_inode->i_mode)) { - rc = mds_pack_md(obd, req->rq_repmsg, 2, body, - child->d_inode); - if (rc) - LASSERT(rc == req->rq_status); - } else { - /* XXX need to check this case */ - } - - /* If we're opening a file without an EA, change to a write - lock (unless we already have one). */ - - /* If we have -EEXIST as the status, and we were asked to create - * exclusively, we can tell we failed because the file already existed. - */ - if (req->rq_status == -EEXIST && - ((rec->ur_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) { - GOTO(out_dput, 0); - } - - /* If we didn't get as far as trying to open, then some locking thing - * probably went wrong, and we'll just bail here. - */ - if ((disp & IT_OPEN_OPEN) == 0) - GOTO(out_dput, 0); - - /* If we failed, then we must have failed opening, so don't look for - * file descriptor or anything, just give the client the bad news. - */ - if (req->rq_status) - GOTO(out_dput, 0); - - mfd = NULL; - list_for_each(t, &med->med_open_head) { - mfd = list_entry(t, struct mds_file_data, mfd_list); - if (mfd->mfd_xid == req->rq_xid) - break; - mfd = NULL; - } - - if (req->rq_export->exp_outstanding_reply) { - /* if we're not recovering, it had better be found */ - LASSERT(mfd); - } else if (mfd == NULL) { - mntget(mds->mds_vfsmnt); - CERROR("Re-opened file \n"); - mfd = mds_dentry_open(child, mds->mds_vfsmnt, - rec->ur_flags & ~(O_DIRECT | O_TRUNC), req); - if (!mfd) { - CERROR("mds: out of memory\n"); - GOTO(out_dput, req->rq_status = -ENOMEM); - } - put_child = 0; - } - - body->handle.cookie = mfd->mfd_handle.h_cookie; - - out_dput: - if (put_child) - l_dput(child); - l_dput(parent); - EXIT; -} - -int mds_open(struct mds_update_record *rec, int offset, - struct ptlrpc_request *req, struct lustre_handle *child_lockh) -{ - static const char acc_table [] = {[O_RDONLY] MAY_READ, - [O_WRONLY] MAY_WRITE, - [O_RDWR] MAY_READ | MAY_WRITE}; - struct mds_obd *mds = mds_req2mds(req); - struct obd_device *obd = req->rq_export->exp_obd; - struct ldlm_reply *rep; - struct mds_body *body; - struct dentry *dchild = NULL, *parent; - struct mds_export_data *med; - struct mds_file_data *mfd = NULL; - struct ldlm_res_id child_res_id = { .name = {0} }; - struct lustre_handle parent_lockh; - int rc = 0, parent_mode, child_mode = LCK_PR, lock_flags, created = 0; - int cleanup_phase = 0; - void *handle = NULL; - int acc_mode; - ENTRY; - - LASSERT(offset == 2); /* only called via intent */ - rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rep)); - body = lustre_msg_buf(req->rq_repmsg, 1, sizeof (*body)); - - MDS_CHECK_RESENT(req, reconstruct_open(rec, offset, req, child_lockh)); - - med = &req->rq_export->exp_mds_data; - rep->lock_policy_res1 |= IT_OPEN_LOOKUP; - if (OBD_FAIL_CHECK(OBD_FAIL_MDS_OPEN_PACK)) { - CERROR("test case OBD_FAIL_MDS_OPEN_PACK\n"); - req->rq_status = -ENOMEM; - RETURN(-ENOMEM); - } - - if ((rec->ur_flags & O_ACCMODE) >= sizeof (acc_table)) - RETURN(-EINVAL); - acc_mode = acc_table [rec->ur_flags & O_ACCMODE]; - if ((rec->ur_flags & O_TRUNC) != 0) - acc_mode |= MAY_WRITE; - - /* Step 1: Find and lock the parent */ - parent_mode = (rec->ur_flags & O_CREAT) ? LCK_PW : LCK_PR; - parent = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, parent_mode, - &parent_lockh); - if (IS_ERR(parent)) { - rc = PTR_ERR(parent); - CERROR("parent lookup error %d\n", rc); - GOTO(cleanup, rc); - } - LASSERT(parent->d_inode); - - cleanup_phase = 1; /* parent dentry and lock */ - - /* Step 2: Lookup the child */ - dchild = ll_lookup_one_len(rec->ur_name, parent, rec->ur_namelen - 1); - if (IS_ERR(dchild)) - GOTO(cleanup, rc = PTR_ERR(dchild)); - - cleanup_phase = 2; /* child dentry */ - - if (dchild->d_inode) - rep->lock_policy_res1 |= IT_OPEN_POS; - else - rep->lock_policy_res1 |= IT_OPEN_NEG; - - /* Step 3: If the child was negative, and we're supposed to, - * create it. */ - if (!dchild->d_inode) { - if (!(rec->ur_flags & O_CREAT)) { - /* It's negative and we weren't supposed to create it */ - GOTO(cleanup, rc = -ENOENT); - } - - rep->lock_policy_res1 |= IT_OPEN_CREATE; - handle = fsfilt_start(obd, parent->d_inode, FSFILT_OP_CREATE); - if (IS_ERR(handle)) { - rc = PTR_ERR(handle); - handle = NULL; - GOTO(cleanup, rc); - } - rc = vfs_create(parent->d_inode, dchild, rec->ur_mode); - if (rc) - GOTO(cleanup, rc); - created = 1; - child_mode = LCK_PW; - acc_mode = 0; /* Don't check for permissions */ - } - - /* Step 4: It's positive, so lock the child */ - child_res_id.name[0] = dchild->d_inode->i_ino; - child_res_id.name[1] = dchild->d_inode->i_generation; - reacquire: - lock_flags = 0; - rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL, - child_res_id, LDLM_PLAIN, NULL, 0, child_mode, - &lock_flags, ldlm_completion_ast, - mds_blocking_ast, NULL, child_lockh); - if (rc != ELDLM_OK) { - CERROR("ldlm_cli_enqueue: %d\n", rc); - GOTO(cleanup, rc = -EIO); - } - - cleanup_phase = 3; /* child lock */ - - mds_pack_inode2fid(&body->fid1, dchild->d_inode); - mds_pack_inode2body(body, dchild->d_inode); - - if (S_ISREG(dchild->d_inode->i_mode)) { - /* Check permissions etc */ - rc = permission(dchild->d_inode, acc_mode); - if (rc != 0) - GOTO(cleanup, rc); - - /* Can't write to a read-only file */ - if (IS_RDONLY(dchild->d_inode) && (acc_mode & MAY_WRITE) != 0) - GOTO(cleanup, rc = -EPERM); - - /* An append-only file must be opened in append mode for - * writing */ - if (IS_APPEND(dchild->d_inode) && - (acc_mode & MAY_WRITE) != 0 && - ((rec->ur_flags & O_APPEND) == 0 || - (rec->ur_flags & O_TRUNC) != 0)) - GOTO (cleanup, rc = -EPERM); - - rc = mds_pack_md(obd, req->rq_repmsg, 2, body, dchild->d_inode); - if (rc) - GOTO(cleanup, rc); - } - - if (!created && (rec->ur_flags & O_CREAT) && - (rec->ur_flags & O_EXCL)) { - /* File already exists, we didn't just create it, and we - * were passed O_EXCL; err-or. */ - GOTO(cleanup, rc = -EEXIST); // returns a lock to the client - } - - /* If we're opening a file without an EA, the client needs a write - * lock. */ - if (S_ISREG(dchild->d_inode->i_mode) && - child_mode != LCK_PW && !(body->valid & OBD_MD_FLEASIZE)) { - ldlm_lock_decref(child_lockh, child_mode); - child_mode = LCK_PW; - goto reacquire; - } - - /* if we are following a symlink, don't open */ - if (S_ISLNK(dchild->d_inode->i_mode)) - GOTO(cleanup, rc = 0); - - if ((rec->ur_flags & O_DIRECTORY) && !S_ISDIR(dchild->d_inode->i_mode)) - GOTO(cleanup, rc = -ENOTDIR); - - /* Step 5: mds_open it */ - rep->lock_policy_res1 |= IT_OPEN_OPEN; - - /* dentry_open does a dput(de) and mntput(mds->mds_vfsmnt) on error */ - mfd = mds_dentry_open(dchild, mds->mds_vfsmnt, - rec->ur_flags & ~(O_DIRECT | O_TRUNC), req); - if (!mfd) { - CERROR("mds: out of memory\n"); - dchild = NULL; /* prevent a double dput in step 2 */ - GOTO(cleanup, rc = -ENOMEM); - } - - cleanup_phase = 4; /* mfd allocated */ - body->handle.cookie = mfd->mfd_handle.h_cookie; - CDEBUG(D_INODE, "mfd %p, cookie "LPX64"\n", mfd, - mfd->mfd_handle.h_cookie); - GOTO(cleanup, rc = 0); /* returns a lock to the client */ - - cleanup: - rc = mds_finish_transno(mds, dchild ? dchild->d_inode : NULL, handle, - req, rc, rep->lock_policy_res1); - switch (cleanup_phase) { - case 4: - if (rc && !S_ISLNK(dchild->d_inode->i_mode)) - mds_mfd_destroy(mfd); - case 3: - /* This is the same logic as in the IT_OPEN part of - * ldlm_intent_policy: if we found the dentry, or we tried to - * open it (meaning that we created, if it wasn't found), then - * we return the lock to the caller and client. */ - if (!(rep->lock_policy_res1 & (IT_OPEN_OPEN | IT_OPEN_POS))) - ldlm_lock_decref(child_lockh, child_mode); - case 2: - if (rc || S_ISLNK(dchild->d_inode->i_mode)) - l_dput(dchild); - case 1: - l_dput(parent); - if (rc) { - ldlm_lock_decref(&parent_lockh, parent_mode); - } else { - memcpy(&req->rq_ack_locks[0].lock, &parent_lockh, - sizeof(parent_lockh)); - req->rq_ack_locks[0].mode = parent_mode; - } - } - RETURN(rc); -} diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c deleted file mode 100644 index 50949dd..0000000 --- a/lustre/mds/mds_reint.c +++ /dev/null @@ -1,1148 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * linux/mds/mds_reint.c - * Lustre Metadata Server (mds) reintegration routines - * - * Copyright (C) 2002, 2003 Cluster File Systems, Inc. - * Author: Peter Braam <braam@clusterfs.com> - * Author: Andreas Dilger <adilger@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define EXPORT_SYMTAB -#define DEBUG_SUBSYSTEM S_MDS - -#include <linux/fs.h> -#include <linux/obd_support.h> -#include <linux/obd_class.h> -#include <linux/obd.h> -#include <linux/lustre_lib.h> -#include <linux/lustre_idl.h> -#include <linux/lustre_mds.h> -#include <linux/lustre_dlm.h> -#include <linux/lustre_fsfilt.h> -#include "mds_internal.h" - -extern inline struct mds_obd *mds_req2mds(struct ptlrpc_request *req); - -static void mds_commit_cb(struct obd_device *obd, __u64 transno, int error) -{ - obd_transno_commit_cb(obd, transno, error); -} - -/* Assumes caller has already pushed us into the kernel context. */ -int mds_finish_transno(struct mds_obd *mds, struct inode *i, void *handle, - struct ptlrpc_request *req, int rc, - __u32 op_data) -{ - struct mds_export_data *med = &req->rq_export->exp_mds_data; - struct mds_client_data *mcd = med->med_mcd; - struct obd_device *obd = req->rq_export->exp_obd; - int err; - __u64 transno; - loff_t off; - ssize_t written; - ENTRY; - - /* if the export has already been failed, we have no last_rcvd slot */ - if (req->rq_export->exp_failed) { - CERROR("committing transaction for disconnected client\n"); - if (handle) - GOTO(commit, rc); - GOTO(out, rc); - } - - if (!handle) { - /* if we're starting our own xaction, use our own inode */ - i = mds->mds_rcvd_filp->f_dentry->d_inode; - handle = fsfilt_start(obd, i, FSFILT_OP_SETATTR); - if (IS_ERR(handle)) { - CERROR("fsfilt_start: %ld\n", PTR_ERR(handle)); - GOTO(out, rc = PTR_ERR(handle)); - } - } - - off = MDS_LR_CLIENT + med->med_off * MDS_LR_SIZE; - - transno = req->rq_reqmsg->transno; - if (transno == 0) { - spin_lock(&mds->mds_transno_lock); - transno = ++mds->mds_last_transno; - spin_unlock(&mds->mds_transno_lock); - } - req->rq_repmsg->transno = req->rq_transno = transno; - mcd->mcd_last_transno = cpu_to_le64(transno); - mcd->mcd_mount_count = cpu_to_le64(mds->mds_mount_count); - mcd->mcd_last_xid = cpu_to_le64(req->rq_xid); - mcd->mcd_last_result = cpu_to_le32(rc); - mcd->mcd_last_data = cpu_to_le32(op_data); - - fsfilt_set_last_rcvd(req->rq_export->exp_obd, transno, handle, - mds_commit_cb); - written = lustre_fwrite(mds->mds_rcvd_filp, mcd, sizeof(*mcd), &off); - CDEBUG(D_INODE, "wrote trans "LPU64" client %s at #%u: written = " - LPSZ"\n", transno, mcd->mcd_uuid, med->med_off, written); - - if (written != sizeof(*mcd)) { - CERROR("error writing to last_rcvd: rc = "LPSZ"\n", written); - if (rc == 0) { - if (written < 0) - rc = written; - else - rc = -EIO; - } - } - -commit: - err = fsfilt_commit(obd, i, handle, 0); - if (err) { - CERROR("error committing transaction: %d\n", err); - if (!rc) - rc = err; - } - - EXIT; - out: - return rc; -} - -/* this gives the same functionality as the code between - * sys_chmod and inode_setattr - * chown_common and inode_setattr - * utimes and inode_setattr - */ -int mds_fix_attr(struct inode *inode, struct mds_update_record *rec) -{ - time_t now = LTIME_S(CURRENT_TIME); - struct iattr *attr = &rec->ur_iattr; - unsigned int ia_valid = attr->ia_valid; - int error; - ENTRY; - - /* only fix up attrs if the client VFS didn't already */ - if (!(ia_valid & ATTR_RAW)) - RETURN(0); - - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - RETURN(-EPERM); - - LTIME_S(attr->ia_ctime) = now; - if (!(ia_valid & ATTR_ATIME_SET)) - LTIME_S(attr->ia_atime) = now; - if (!(ia_valid & ATTR_MTIME_SET)) - LTIME_S(attr->ia_mtime) = now; - - /* times */ - if ((ia_valid & (ATTR_MTIME|ATTR_ATIME))==(ATTR_MTIME|ATTR_ATIME) && - !(ia_valid & ATTR_ATIME_SET)) { - if (rec->ur_fsuid != inode->i_uid && - (error = permission(inode,MAY_WRITE)) != 0) - RETURN(error); - } else if (ia_valid & ATTR_UID) { - /* chown */ - error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - RETURN(-EPERM); - if (attr->ia_uid == (uid_t) -1) - attr->ia_uid = inode->i_uid; - if (attr->ia_gid == (gid_t) -1) - attr->ia_gid = inode->i_gid; - attr->ia_mode = inode->i_mode; - attr->ia_valid = ATTR_UID | ATTR_GID | ATTR_CTIME; - /* - * If the user or group of a non-directory has been - * changed by a non-root user, remove the setuid bit. - * 19981026 David C Niemi <niemi@tux.org> - * - * Changed this to apply to all users, including root, - * to avoid some races. This is the behavior we had in - * 2.0. The check for non-root was definitely wrong - * for 2.2 anyway, as it should have been using - * CAP_FSETID rather than fsuid -- 19990830 SD. - */ - if ((inode->i_mode & S_ISUID) == S_ISUID && - !S_ISDIR(inode->i_mode)) { - attr->ia_mode &= ~S_ISUID; - attr->ia_valid |= ATTR_MODE; - } - /* - * Likewise, if the user or group of a non-directory - * has been changed by a non-root user, remove the - * setgid bit UNLESS there is no group execute bit - * (this would be a file marked for mandatory - * locking). 19981026 David C Niemi <niemi@tux.org> - * - * Removed the fsuid check (see the comment above) -- - * 19990830 SD. - */ - if (((inode->i_mode & (S_ISGID | S_IXGRP)) == - (S_ISGID | S_IXGRP)) && !S_ISDIR(inode->i_mode)) { - attr->ia_mode &= ~S_ISGID; - attr->ia_valid |= ATTR_MODE; - } - } else if (ia_valid & ATTR_MODE) { - int mode = attr->ia_mode; - /* chmod */ - if (attr->ia_mode == (mode_t) -1) - attr->ia_mode = inode->i_mode; - attr->ia_mode = - (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); - } - RETURN(0); -} - -static void reconstruct_reint_setattr(struct mds_update_record *rec, - int offset, struct ptlrpc_request *req) -{ - struct mds_export_data *med = &req->rq_export->exp_mds_data; - struct mds_client_data *mcd = med->med_mcd; - struct mds_obd *obd = &req->rq_export->exp_obd->u.mds; - struct dentry *de; - struct mds_body *body; - - req->rq_transno = mcd->mcd_last_transno; - req->rq_status = mcd->mcd_last_result; - - if (req->rq_export->exp_outstanding_reply) - mds_steal_ack_locks(req->rq_export, req); - - de = mds_fid2dentry(obd, rec->ur_fid1, NULL); - if (IS_ERR(de)) { - LASSERT(PTR_ERR(de) == req->rq_status); - return; - } - - body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body)); - mds_pack_inode2fid(&body->fid1, de->d_inode); - mds_pack_inode2body(body, de->d_inode); - - l_dput(de); -} - -/* In the raw-setattr case, we lock the child inode. - * In the write-back case or if being called from open, the client holds a lock - * already. - * - * We use the ATTR_FROM_OPEN flag to tell these cases apart. */ -static int mds_reint_setattr(struct mds_update_record *rec, int offset, - struct ptlrpc_request *req, - struct lustre_handle *lh) -{ - struct mds_obd *mds = mds_req2mds(req); - struct obd_device *obd = req->rq_export->exp_obd; - struct mds_body *body; - struct dentry *de; - struct inode *inode = NULL; - struct lustre_handle lockh; - void *handle = NULL; - int rc = 0, cleanup_phase = 0, err, locked = 0; - ENTRY; - - LASSERT(offset == 0); - - MDS_CHECK_RESENT(req, reconstruct_reint_setattr(rec, offset, req)); - - if (rec->ur_iattr.ia_valid & ATTR_FROM_OPEN) { - de = mds_fid2dentry(mds, rec->ur_fid1, NULL); - if (IS_ERR(de)) - GOTO(cleanup, rc = PTR_ERR(de)); - } else { - de = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_PW, - &lockh); - if (IS_ERR(de)) - GOTO(cleanup, rc = PTR_ERR(de)); - locked = 1; - } - - cleanup_phase = 1; - inode = de->d_inode; - LASSERT(inode); - - CDEBUG(D_INODE, "ino %lu\n", inode->i_ino); - - OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_SETATTR_WRITE, - to_kdev_t(inode->i_sb->s_dev)); - - handle = fsfilt_start(obd, inode, FSFILT_OP_SETATTR); - if (IS_ERR(handle)) { - rc = PTR_ERR(handle); - handle = NULL; - GOTO(cleanup, rc); - } - - rc = mds_fix_attr(inode, rec); - if (rc) - GOTO(cleanup, rc); - - rc = fsfilt_setattr(obd, de, handle, &rec->ur_iattr, 0); - if (rc == 0 && - S_ISREG(inode->i_mode) && - rec->ur_eadata != NULL) { - rc = fsfilt_set_md(obd, inode, handle, - rec->ur_eadata, rec->ur_eadatalen); - } - - body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body)); - mds_pack_inode2fid(&body->fid1, inode); - mds_pack_inode2body(body, inode); - - EXIT; - cleanup: - err = mds_finish_transno(mds, inode, handle, req, rc, 0); - switch(cleanup_phase) { - case 1: - l_dput(de); - if (locked) { - if (rc) { - ldlm_lock_decref(&lockh, LCK_PW); - } else { - memcpy(&req->rq_ack_locks[0].lock, &lockh, - sizeof(lockh)); - req->rq_ack_locks[0].mode = LCK_PW; - } - } - case 0: - break; - default: - LBUG(); - } - if (err && !rc) - rc = err; - - req->rq_status = rc; - return 0; -} - -static void reconstruct_reint_create(struct mds_update_record *rec, int offset, - struct ptlrpc_request *req) -{ - struct mds_export_data *med = &req->rq_export->exp_mds_data; - struct mds_client_data *mcd = med->med_mcd; - struct mds_obd *obd = &req->rq_export->exp_obd->u.mds; - struct dentry *parent, *child; - struct mds_body *body; - - req->rq_transno = mcd->mcd_last_transno; - req->rq_status = mcd->mcd_last_result; - - if (req->rq_export->exp_outstanding_reply) - mds_steal_ack_locks(req->rq_export, req); - - if (req->rq_status) - return; - - parent = mds_fid2dentry(obd, rec->ur_fid1, NULL); - LASSERT(!IS_ERR(parent)); - child = ll_lookup_one_len(rec->ur_name, parent, rec->ur_namelen - 1); - LASSERT(!IS_ERR(child)); - body = lustre_msg_buf(req->rq_repmsg, offset, sizeof (*body)); - mds_pack_inode2fid(&body->fid1, child->d_inode); - mds_pack_inode2body(body, child->d_inode); - l_dput(parent); - l_dput(child); -} - -static int mds_reint_create(struct mds_update_record *rec, int offset, - struct ptlrpc_request *req, - struct lustre_handle *lh) -{ - struct dentry *de = NULL; - struct mds_obd *mds = mds_req2mds(req); - struct obd_device *obd = req->rq_export->exp_obd; - struct dentry *dchild = NULL; - struct inode *dir = NULL; - void *handle = NULL; - struct lustre_handle lockh; - int rc = 0, err, type = rec->ur_mode & S_IFMT, cleanup_phase = 0; - int created = 0; - ENTRY; - - LASSERT(offset == 0); - LASSERT(!strcmp(req->rq_export->exp_obd->obd_type->typ_name, "mds")); - - MDS_CHECK_RESENT(req, reconstruct_reint_create(rec, offset, req)); - - if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_CREATE)) - GOTO(cleanup, rc = -ESTALE); - - de = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_PW, &lockh); - if (IS_ERR(de)) { - rc = PTR_ERR(de); - CERROR("parent lookup error %d\n", rc); - GOTO(cleanup, rc); - } - cleanup_phase = 1; /* locked parent dentry */ - dir = de->d_inode; - LASSERT(dir); - CDEBUG(D_INODE, "parent ino %lu creating name %s mode %o\n", - dir->i_ino, rec->ur_name, rec->ur_mode); - - ldlm_lock_dump_handle(D_OTHER, &lockh); - - dchild = ll_lookup_one_len(rec->ur_name, de, rec->ur_namelen - 1); - if (IS_ERR(dchild)) { - rc = PTR_ERR(dchild); - CERROR("child lookup error %d\n", rc); - GOTO(cleanup, rc); - } - - cleanup_phase = 2; /* child dentry */ - - OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_CREATE_WRITE, - to_kdev_t(dir->i_sb->s_dev)); - - if (dir->i_mode & S_ISGID) { - rec->ur_gid = dir->i_gid; - if (S_ISDIR(rec->ur_mode)) - rec->ur_mode |= S_ISGID; - } - - if (rec->ur_fid2->id) - dchild->d_fsdata = (void *)(unsigned long)rec->ur_fid2->id; - - switch (type) { - case S_IFREG:{ - handle = fsfilt_start(obd, dir, FSFILT_OP_CREATE); - if (IS_ERR(handle)) - GOTO(cleanup, rc = PTR_ERR(handle)); - rc = vfs_create(dir, dchild, rec->ur_mode); - EXIT; - break; - } - case S_IFDIR:{ - handle = fsfilt_start(obd, dir, FSFILT_OP_MKDIR); - if (IS_ERR(handle)) - GOTO(cleanup, rc = PTR_ERR(handle)); - rc = vfs_mkdir(dir, dchild, rec->ur_mode); - EXIT; - break; - } - case S_IFLNK:{ - handle = fsfilt_start(obd, dir, FSFILT_OP_SYMLINK); - if (IS_ERR(handle)) - GOTO(cleanup, rc = PTR_ERR(handle)); - if (rec->ur_tgt == NULL) /* no target supplied */ - rc = -EINVAL; /* -EPROTO? */ - else - rc = vfs_symlink(dir, dchild, rec->ur_tgt); - EXIT; - break; - } - case S_IFCHR: - case S_IFBLK: - case S_IFIFO: - case S_IFSOCK:{ - int rdev = rec->ur_rdev; - handle = fsfilt_start(obd, dir, FSFILT_OP_MKNOD); - if (IS_ERR(handle)) - GOTO(cleanup, (handle = NULL, rc = PTR_ERR(handle))); - rc = vfs_mknod(dir, dchild, rec->ur_mode, rdev); - EXIT; - break; - } - default: - CERROR("bad file type %o creating %s\n", type, rec->ur_name); - GOTO(cleanup, rc = -EINVAL); - } - - /* In case we stored the desired inum in here, we want to clean up. - * We also do this in the cleanup block, for the error cases. - */ - dchild->d_fsdata = NULL; - - if (rc) { - CDEBUG(D_INODE, "error during create: %d\n", rc); - GOTO(cleanup, rc); - } else { - struct iattr iattr; - struct inode *inode = dchild->d_inode; - struct mds_body *body; - - created = 1; - LTIME_S(iattr.ia_atime) = rec->ur_time; - LTIME_S(iattr.ia_ctime) = rec->ur_time; - LTIME_S(iattr.ia_mtime) = rec->ur_time; - iattr.ia_uid = rec->ur_uid; - iattr.ia_gid = rec->ur_gid; - iattr.ia_valid = ATTR_UID | ATTR_GID | ATTR_ATIME | - ATTR_MTIME | ATTR_CTIME; - - if (rec->ur_fid2->id) { - LASSERT(rec->ur_fid2->id == inode->i_ino); - inode->i_generation = rec->ur_fid2->generation; - /* Dirtied and committed by the upcoming setattr. */ - CDEBUG(D_INODE, "recreated ino %lu with gen %x\n", - inode->i_ino, inode->i_generation); - } else { - CDEBUG(D_INODE, "created ino %lu with gen %x\n", - inode->i_ino, inode->i_generation); - } - - rc = fsfilt_setattr(obd, dchild, handle, &iattr, 0); - if (rc) { - CERROR("error on setattr: rc = %d\n", rc); - /* XXX should we abort here in case of error? */ - } - - body = lustre_msg_buf(req->rq_repmsg, offset, sizeof (*body)); - mds_pack_inode2fid(&body->fid1, inode); - mds_pack_inode2body(body, inode); - } - EXIT; - -cleanup: - err = mds_finish_transno(mds, dir, handle, req, rc, 0); - - if (rc && created) { - /* Destroy the file we just created. This should not need - * extra journal credits, as we have already modified all of - * the blocks needed in order to create the file in the first - * place. - */ - switch (type) { - case S_IFDIR: - err = vfs_rmdir(dir, dchild); - if (err) - CERROR("rmdir in error path: %d\n", err); - break; - default: - err = vfs_unlink(dir, dchild); - if (err) - CERROR("unlink in error path: %d\n", err); - break; - } - } else { - rc = err; - } - switch (cleanup_phase) { - case 2: /* child dentry */ - dchild->d_fsdata = NULL; - l_dput(dchild); - case 1: /* locked parent dentry */ - if (rc) { - ldlm_lock_decref(&lockh, LCK_PW); - } else { - memcpy(&req->rq_ack_locks[0].lock, &lockh, - sizeof(lockh)); - req->rq_ack_locks[0].mode = LCK_PW; - } - l_dput(de); - case 0: - break; - default: - CERROR("invalid cleanup_phase %d\n", cleanup_phase); - LBUG(); - } - req->rq_status = rc; - return 0; -} - -/* This function doesn't use ldlm_match_or_enqueue because we're always called - * with EX or PW locks, and the MDS is no longer allowed to match write locks, - * because they take the place of local semaphores. - * - * Two locks are taken in numerical order */ -int enqueue_ordered_locks(int lock_mode, struct obd_device *obd, - struct ldlm_res_id *p1_res_id, - struct ldlm_res_id *p2_res_id, - struct lustre_handle *p1_lockh, - struct lustre_handle *p2_lockh) -{ - struct ldlm_res_id res_id[2]; - struct lustre_handle *handles[2] = {p1_lockh, p2_lockh}; - int rc, flags; - ENTRY; - - LASSERT(p1_res_id != NULL && p2_res_id != NULL); - - CDEBUG(D_INFO, "locks before: "LPU64"/"LPU64"\n", - p1_res_id[0].name[0], p2_res_id[0].name[0]); - - if (p1_res_id->name[0] < p2_res_id->name[0]) { - handles[0] = p1_lockh; - handles[1] = p2_lockh; - res_id[0] = *p1_res_id; - res_id[1] = *p2_res_id; - } else { - handles[1] = p1_lockh; - handles[0] = p2_lockh; - res_id[1] = *p1_res_id; - res_id[0] = *p2_res_id; - } - - CDEBUG(D_INFO, "lock order: "LPU64"/"LPU64"\n", - p1_res_id[0].name[0], p2_res_id[0].name[0]); - - flags = LDLM_FL_LOCAL_ONLY; - rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL, res_id[0], - LDLM_PLAIN, NULL, 0, lock_mode, &flags, - ldlm_completion_ast, mds_blocking_ast, NULL, - handles[0]); - if (rc != ELDLM_OK) - RETURN(-EIO); - ldlm_lock_dump_handle(D_OTHER, handles[0]); - - if (memcmp(&res_id[0], &res_id[1], sizeof(res_id[0])) == 0) { - memcpy(handles[1], handles[0], sizeof(*(handles[1]))); - ldlm_lock_addref(handles[1], lock_mode); - } else { - flags = LDLM_FL_LOCAL_ONLY; - rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL, - res_id[1], LDLM_PLAIN, NULL, 0, lock_mode, - &flags, ldlm_completion_ast, - mds_blocking_ast, NULL, handles[1]); - if (rc != ELDLM_OK) { - ldlm_lock_decref(handles[0], lock_mode); - RETURN(-EIO); - } - } - ldlm_lock_dump_handle(D_OTHER, handles[1]); - - RETURN(0); -} - -static void reconstruct_reint_unlink(struct mds_update_record *rec, int offset, - struct ptlrpc_request *req, - struct lustre_handle *child_lockh) -{ - struct mds_export_data *med = &req->rq_export->exp_mds_data; - struct mds_client_data *mcd = med->med_mcd; - - req->rq_transno = mcd->mcd_last_transno; - req->rq_status = mcd->mcd_last_result; - - if (req->rq_export->exp_outstanding_reply) - mds_steal_ack_locks(req->rq_export, req); - - DEBUG_REQ(D_ERROR, req, - "can't get EA for reconstructed unlink, leaking OST inodes"); -} - -static int mds_reint_unlink(struct mds_update_record *rec, int offset, - struct ptlrpc_request *req, - struct lustre_handle *child_lockh) -{ - struct dentry *dir_de = NULL; - struct dentry *dchild = NULL; - struct mds_obd *mds = mds_req2mds(req); - struct obd_device *obd = req->rq_export->exp_obd; - struct mds_body *body = NULL; - struct inode *dir_inode = NULL, *child_inode; - struct lustre_handle parent_lockh; - void *handle = NULL; - struct ldlm_res_id child_res_id = { .name = {0} }; - int rc = 0, flags = 0, return_lock = 0; - int cleanup_phase = 0; - ENTRY; - - LASSERT(offset == 0 || offset == 2); - - MDS_CHECK_RESENT(req, reconstruct_reint_unlink(rec, offset, req, - child_lockh)); - - if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_UNLINK)) - GOTO(cleanup, rc = -ENOENT); - - /* Step 1: Lookup the parent by FID */ - dir_de = mds_fid2locked_dentry(obd, rec->ur_fid1, NULL, LCK_PW, - &parent_lockh); - if (IS_ERR(dir_de)) - GOTO(cleanup, rc = PTR_ERR(dir_de)); - dir_inode = dir_de->d_inode; - LASSERT(dir_inode); - - cleanup_phase = 1; /* Have parent dentry lock */ - - /* Step 2: Lookup the child */ - dchild = ll_lookup_one_len(rec->ur_name, dir_de, rec->ur_namelen - 1); - if (IS_ERR(dchild)) - GOTO(cleanup, rc = PTR_ERR(dchild)); - - cleanup_phase = 2; /* child dentry */ - - child_inode = dchild->d_inode; - if (child_inode == NULL) { - CDEBUG(D_INODE, - "child doesn't exist (dir %lu, name %s)\n", - dir_inode->i_ino, rec->ur_name); - rc = -ENOENT; - GOTO(cleanup, rc); - } - - DEBUG_REQ(D_INODE, req, "parent ino %lu, child ino %lu", - dir_inode->i_ino, child_inode->i_ino); - - /* Step 3: Get a lock on the child */ - child_res_id.name[0] = child_inode->i_ino; - child_res_id.name[1] = child_inode->i_generation; - - rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL, - child_res_id, LDLM_PLAIN, NULL, 0, LCK_EX, - &flags, ldlm_completion_ast, mds_blocking_ast, - NULL, child_lockh); - if (rc != ELDLM_OK) - GOTO(cleanup, rc); - - cleanup_phase = 3; /* child lock */ - - OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_UNLINK_WRITE, - to_kdev_t(dir_inode->i_sb->s_dev)); - - /* ldlm_reply in buf[0] if called via intent */ - if (offset) - offset = 1; - - body = lustre_msg_buf(req->rq_repmsg, offset, sizeof (*body)); - LASSERT(body != NULL); - - /* Step 4: Do the unlink: client decides between rmdir/unlink! - * (bug 72) */ - switch (rec->ur_mode & S_IFMT) { - case S_IFDIR: - /* Drop any lingering child directories before we start our - * transaction, to avoid doing multiple inode dirty/delete - * in our compound transaction (bug 1321). - */ - shrink_dcache_parent(dchild); - handle = fsfilt_start(obd, dir_inode, FSFILT_OP_RMDIR); - if (IS_ERR(handle)) - GOTO(cleanup, rc = PTR_ERR(handle)); - cleanup_phase = 4; - rc = vfs_rmdir(dir_inode, dchild); - break; - case S_IFREG: - /* If this is the last reference to this inode, get the OBD EA - * data first so the client can destroy OST objects */ - if (S_ISREG(child_inode->i_mode) && child_inode->i_nlink == 1) { - mds_pack_inode2fid(&body->fid1, child_inode); - mds_pack_inode2body(body, child_inode); - mds_pack_md(obd, req->rq_repmsg, offset + 1, - body, child_inode); - if (body->valid & OBD_MD_FLEASIZE) - return_lock = 1; - } - /* no break */ - case S_IFLNK: - case S_IFCHR: - case S_IFBLK: - case S_IFIFO: - case S_IFSOCK: - handle = fsfilt_start(obd, dir_inode, FSFILT_OP_UNLINK); - if (IS_ERR(handle)) - GOTO(cleanup, rc = PTR_ERR(handle)); - cleanup_phase = 4; - rc = vfs_unlink(dir_inode, dchild); - break; - default: - CERROR("bad file type %o unlinking %s\n", rec->ur_mode, - rec->ur_name); - LBUG(); - GOTO(cleanup, rc = -EINVAL); - } - - cleanup: - switch(cleanup_phase) { - case 4: - rc = mds_finish_transno(mds, dir_inode, handle, req, rc, 0); - if (rc && body) { - /* Don't unlink the OST objects if the MDS unlink failed */ - body->valid = 0; - } - case 3: /* child lock */ - if (rc != 0 || return_lock == 0) - ldlm_lock_decref(child_lockh, LCK_EX); - case 2: /* child dentry */ - l_dput(dchild); - case 1: /* parent dentry and lock */ - if (rc) { - ldlm_lock_decref(&parent_lockh, LCK_EX); - } else { - memcpy(&req->rq_ack_locks[0].lock, &parent_lockh, - sizeof(parent_lockh)); - req->rq_ack_locks[0].mode = LCK_EX; - } - l_dput(dir_de); - case 0: - break; - default: - CERROR("invalid cleanup_phase %d\n", cleanup_phase); - LBUG(); - } - req->rq_status = rc; - return 0; -} - -static void reconstruct_reint_link(struct mds_update_record *rec, int offset, - struct ptlrpc_request *req) -{ - struct mds_export_data *med = &req->rq_export->exp_mds_data; - struct mds_client_data *mcd = med->med_mcd; - - req->rq_transno = mcd->mcd_last_transno; - req->rq_status = mcd->mcd_last_result; - - if (req->rq_export->exp_outstanding_reply) - mds_steal_ack_locks(req->rq_export, req); -} - -static int mds_reint_link(struct mds_update_record *rec, int offset, - struct ptlrpc_request *req, - struct lustre_handle *lh) -{ - struct obd_device *obd = req->rq_export->exp_obd; - struct dentry *de_src = NULL; - struct dentry *de_tgt_dir = NULL; - struct dentry *dchild = NULL; - struct mds_obd *mds = mds_req2mds(req); - struct lustre_handle *handle = NULL, tgt_dir_lockh, src_lockh; - struct ldlm_res_id src_res_id = { .name = {0} }; - struct ldlm_res_id tgt_dir_res_id = { .name = {0} }; - int lock_mode = 0, rc = 0, cleanup_phase = 0; - ENTRY; - - LASSERT(offset == 0); - - MDS_CHECK_RESENT(req, reconstruct_reint_link(rec, offset, req)); - - if (OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_LINK)) - GOTO(cleanup, rc = -ENOENT); - - /* Step 1: Lookup the source inode and target directory by FID */ - de_src = mds_fid2dentry(mds, rec->ur_fid1, NULL); - if (IS_ERR(de_src)) - GOTO(cleanup, rc = PTR_ERR(de_src)); - - cleanup_phase = 1; /* source dentry */ - - de_tgt_dir = mds_fid2dentry(mds, rec->ur_fid2, NULL); - if (IS_ERR(de_tgt_dir)) - GOTO(cleanup, rc = PTR_ERR(de_tgt_dir)); - - cleanup_phase = 2; /* target directory dentry */ - - CDEBUG(D_INODE, "linking %*s/%s to inode %lu\n", - de_tgt_dir->d_name.len, de_tgt_dir->d_name.name, rec->ur_name, - de_src->d_inode->i_ino); - - /* Step 2: Take the two locks */ - lock_mode = LCK_EX; - src_res_id.name[0] = de_src->d_inode->i_ino; - src_res_id.name[1] = de_src->d_inode->i_generation; - tgt_dir_res_id.name[0] = de_tgt_dir->d_inode->i_ino; - tgt_dir_res_id.name[1] = de_tgt_dir->d_inode->i_generation; - - rc = enqueue_ordered_locks(LCK_EX, obd, &src_res_id, &tgt_dir_res_id, - &src_lockh, &tgt_dir_lockh); - if (rc != ELDLM_OK) - GOTO(cleanup, rc = -EIO); - - cleanup_phase = 3; /* locks */ - - /* Step 3: Lookup the child */ - dchild = ll_lookup_one_len(rec->ur_name, de_tgt_dir, rec->ur_namelen-1); - if (IS_ERR(dchild)) { - CERROR("child lookup error %ld\n", PTR_ERR(dchild)); - GOTO(cleanup, rc = PTR_ERR(dchild)); - } - - cleanup_phase = 4; /* child dentry */ - - if (dchild->d_inode) { - CDEBUG(D_INODE, "child exists (dir %lu, name %s)\n", - de_tgt_dir->d_inode->i_ino, rec->ur_name); - rc = -EEXIST; - GOTO(cleanup, rc); - } - - /* Step 4: Do it. */ - OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE, - to_kdev_t(de_src->d_inode->i_sb->s_dev)); - - handle = fsfilt_start(obd, de_tgt_dir->d_inode, FSFILT_OP_LINK); - if (IS_ERR(handle)) { - rc = PTR_ERR(handle); - GOTO(cleanup, rc); - } - - rc = vfs_link(de_src, de_tgt_dir->d_inode, dchild); - if (rc) - CERROR("link error %d\n", rc); -cleanup: - rc = mds_finish_transno(mds, de_tgt_dir ? de_tgt_dir->d_inode : NULL, - handle, req, rc, 0); - EXIT; - - switch (cleanup_phase) { - case 4: /* child dentry */ - l_dput(dchild); - case 3: /* locks */ - if (rc) { - ldlm_lock_decref(&src_lockh, lock_mode); - ldlm_lock_decref(&tgt_dir_lockh, lock_mode); - } else { - memcpy(&req->rq_ack_locks[0].lock, &src_lockh, - sizeof(src_lockh)); - memcpy(&req->rq_ack_locks[1].lock, &tgt_dir_lockh, - sizeof(tgt_dir_lockh)); - req->rq_ack_locks[0].mode = lock_mode; - req->rq_ack_locks[1].mode = lock_mode; - } - case 2: /* target dentry */ - l_dput(de_tgt_dir); - case 1: /* source dentry */ - l_dput(de_src); - case 0: - break; - default: - CERROR("invalid cleanup_phase %d\n", cleanup_phase); - LBUG(); - } - req->rq_status = rc; - return 0; -} - -static void reconstruct_reint_rename(struct mds_update_record *rec, - int offset, struct ptlrpc_request *req) -{ - struct mds_export_data *med = &req->rq_export->exp_mds_data; - struct mds_client_data *mcd = med->med_mcd; - - req->rq_transno = mcd->mcd_last_transno; - req->rq_status = mcd->mcd_last_result; - - if (req->rq_export->exp_outstanding_reply) - mds_steal_ack_locks(req->rq_export, req); - else - LBUG(); /* don't support it yet, but it'll be fun! */ - -} - -static int mds_reint_rename(struct mds_update_record *rec, int offset, - struct ptlrpc_request *req, - struct lustre_handle *lockh) -{ - struct obd_device *obd = req->rq_export->exp_obd; - struct dentry *de_srcdir = NULL; - struct dentry *de_tgtdir = NULL; - struct dentry *de_old = NULL; - struct dentry *de_new = NULL; - struct mds_obd *mds = mds_req2mds(req); - struct lustre_handle dlm_handles[4]; - struct ldlm_res_id p1_res_id = { .name = {0} }; - struct ldlm_res_id p2_res_id = { .name = {0} }; - struct ldlm_res_id c1_res_id = { .name = {0} }; - struct ldlm_res_id c2_res_id = { .name = {0} }; - int rc = 0, lock_count = 3, flags = LDLM_FL_LOCAL_ONLY; - int cleanup_phase = 0; - void *handle = NULL; - ENTRY; - - LASSERT(offset == 0); - - MDS_CHECK_RESENT(req, reconstruct_reint_rename(rec, offset, req)); - - de_srcdir = mds_fid2dentry(mds, rec->ur_fid1, NULL); - if (IS_ERR(de_srcdir)) - GOTO(cleanup, rc = PTR_ERR(de_srcdir)); - - cleanup_phase = 1; /* source directory dentry */ - - de_tgtdir = mds_fid2dentry(mds, rec->ur_fid2, NULL); - if (IS_ERR(de_tgtdir)) - GOTO(cleanup, rc = PTR_ERR(de_tgtdir)); - - cleanup_phase = 2; /* target directory dentry */ - - /* The idea here is that we need to get four locks in the end: - * one on each parent directory, one on each child. We need to take - * these locks in some kind of order (to avoid deadlocks), and the order - * I selected is "increasing resource number" order. We need to take - * the locks on the parent directories, however, before we can lookup - * the children. Thus the following plan: - * - * 1. Take locks on the parent(s), in order - * 2. Lookup the children - * 3. Take locks on the children, in order - * 4. Execute the rename - */ - - /* Step 1: Take locks on the parent(s), in order */ - p1_res_id.name[0] = de_srcdir->d_inode->i_ino; - p1_res_id.name[1] = de_srcdir->d_inode->i_generation; - - p2_res_id.name[0] = de_tgtdir->d_inode->i_ino; - p2_res_id.name[1] = de_tgtdir->d_inode->i_generation; - - rc = enqueue_ordered_locks(LCK_EX, obd, &p1_res_id, &p2_res_id, - &(dlm_handles[0]), &(dlm_handles[1])); - if (rc != ELDLM_OK) - GOTO(cleanup, rc); - - cleanup_phase = 3; /* parent locks */ - - /* Step 2: Lookup the children */ - de_old = ll_lookup_one_len(rec->ur_name, de_srcdir, rec->ur_namelen-1); - if (IS_ERR(de_old)) { - CERROR("old child lookup error (%*s): %ld\n", - rec->ur_namelen - 1, rec->ur_name, PTR_ERR(de_old)); - GOTO(cleanup, rc = PTR_ERR(de_old)); - } - - cleanup_phase = 4; /* original name dentry */ - - if (de_old->d_inode == NULL) - GOTO(cleanup, rc = -ENOENT); - - /* sanity check for src inode */ - if (de_old->d_inode->i_ino == de_srcdir->d_inode->i_ino || - de_old->d_inode->i_ino == de_tgtdir->d_inode->i_ino) - GOTO(cleanup, rc = -EINVAL); - - de_new = ll_lookup_one_len(rec->ur_tgt, de_tgtdir, rec->ur_tgtlen - 1); - if (IS_ERR(de_new)) { - CERROR("new child lookup error (%*s): %ld\n", - rec->ur_tgtlen - 1, rec->ur_tgt, PTR_ERR(de_new)); - GOTO(cleanup, rc = PTR_ERR(de_new)); - } - - cleanup_phase = 5; /* target dentry */ - - /* sanity check for dest inode */ - if (de_new->d_inode && - (de_new->d_inode->i_ino == de_srcdir->d_inode->i_ino || - de_new->d_inode->i_ino == de_tgtdir->d_inode->i_ino)) - GOTO(cleanup, rc = -EINVAL); - - /* Step 3: Take locks on the children */ - c1_res_id.name[0] = de_old->d_inode->i_ino; - c1_res_id.name[1] = de_old->d_inode->i_generation; - if (de_new->d_inode == NULL) { - flags = LDLM_FL_LOCAL_ONLY; - rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL, - c1_res_id, LDLM_PLAIN, NULL, 0, LCK_EX, - &flags, ldlm_completion_ast, - mds_blocking_ast, NULL, - &(dlm_handles[2])); - lock_count = 3; - } else { - c2_res_id.name[0] = de_new->d_inode->i_ino; - c2_res_id.name[1] = de_new->d_inode->i_generation; - rc = enqueue_ordered_locks(LCK_EX, obd, &c1_res_id, &c2_res_id, - &(dlm_handles[2]), - &(dlm_handles[3])); - lock_count = 4; - } - if (rc != ELDLM_OK) - GOTO(cleanup, rc); - - cleanup_phase = 6; /* child locks */ - - /* Step 4: Execute the rename */ - OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_RENAME_WRITE, - to_kdev_t(de_srcdir->d_inode->i_sb->s_dev)); - - handle = fsfilt_start(obd, de_tgtdir->d_inode, FSFILT_OP_RENAME); - if (IS_ERR(handle)) - GOTO(cleanup, rc = PTR_ERR(handle)); - - lock_kernel(); - rc = vfs_rename(de_srcdir->d_inode, de_old, de_tgtdir->d_inode, de_new, - NULL); - unlock_kernel(); - - EXIT; -cleanup: - rc = mds_finish_transno(mds, de_tgtdir ? de_tgtdir->d_inode : NULL, - handle, req, rc, 0); - switch (cleanup_phase) { - case 6: /* child locks */ - if (rc) { - ldlm_lock_decref(&(dlm_handles[2]), LCK_EX); - if (lock_count == 4) - ldlm_lock_decref(&(dlm_handles[3]), LCK_EX); - } else { - memcpy(&req->rq_ack_locks[2].lock, &(dlm_handles[2]), - sizeof(dlm_handles[2])); - req->rq_ack_locks[2].mode = LCK_EX; - if (lock_count == 4) { - memcpy(&req->rq_ack_locks[3].lock, - &dlm_handles[3], sizeof(dlm_handles[3])); - req->rq_ack_locks[3].mode = LCK_EX; - } - } - case 5: /* target dentry */ - l_dput(de_new); - case 4: /* source dentry */ - l_dput(de_old); - case 3: /* parent locks */ - if (rc) { - ldlm_lock_decref(&(dlm_handles[0]), LCK_EX); - ldlm_lock_decref(&(dlm_handles[1]), LCK_EX); - } else { - memcpy(&req->rq_ack_locks[0].lock, &(dlm_handles[0]), - sizeof(dlm_handles[0])); - memcpy(&req->rq_ack_locks[1].lock, &(dlm_handles[1]), - sizeof(dlm_handles[1])); - req->rq_ack_locks[0].mode = LCK_EX; - req->rq_ack_locks[1].mode = LCK_EX; - } - case 2: /* target directory dentry */ - l_dput(de_tgtdir); - case 1: /* source directry dentry */ - l_dput(de_srcdir); - case 0: - break; - default: - CERROR("invalid cleanup_phase %d\n", cleanup_phase); - LBUG(); - } - req->rq_status = rc; - return 0; -} - -typedef int (*mds_reinter)(struct mds_update_record *, int offset, - struct ptlrpc_request *, struct lustre_handle *); - -static mds_reinter reinters[REINT_MAX + 1] = { - [REINT_SETATTR] mds_reint_setattr, - [REINT_CREATE] mds_reint_create, - [REINT_UNLINK] mds_reint_unlink, - [REINT_LINK] mds_reint_link, - [REINT_RENAME] mds_reint_rename, - [REINT_OPEN] mds_open -}; - -int mds_reint_rec(struct mds_update_record *rec, int offset, - struct ptlrpc_request *req, struct lustre_handle *lockh) -{ - struct mds_obd *mds = mds_req2mds(req); - struct obd_run_ctxt saved; - int rc; - ENTRY; - - /* checked by unpacker */ - LASSERT(rec->ur_opcode <= REINT_MAX && - reinters[rec->ur_opcode] != NULL); - - push_ctxt(&saved, &mds->mds_ctxt, &rec->ur_uc); - rc = reinters[rec->ur_opcode] (rec, offset, req, lockh); - pop_ctxt(&saved, &mds->mds_ctxt, &rec->ur_uc); - - RETURN(rc); -} diff --git a/lustre/missing b/lustre/missing deleted file mode 100755 index 6a37006..0000000 --- a/lustre/missing +++ /dev/null @@ -1,336 +0,0 @@ -#! /bin/sh -# Common stub for a few missing GNU programs while installing. -# Copyright (C) 1996, 1997, 1999, 2000, 2002 Free Software Foundation, Inc. -# Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996. - -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA -# 02111-1307, USA. - -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -if test $# -eq 0; then - echo 1>&2 "Try \`$0 --help' for more information" - exit 1 -fi - -run=: - -# In the cases where this matters, `missing' is being run in the -# srcdir already. -if test -f configure.ac; then - configure_ac=configure.ac -else - configure_ac=configure.in -fi - -case "$1" in ---run) - # Try to run requested program, and just exit if it succeeds. - run= - shift - "$@" && exit 0 - ;; -esac - -# If it does not exist, or fails to run (possibly an outdated version), -# try to emulate it. -case "$1" in - - -h|--h|--he|--hel|--help) - echo "\ -$0 [OPTION]... PROGRAM [ARGUMENT]... - -Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an -error status if there is no known handling for PROGRAM. - -Options: - -h, --help display this help and exit - -v, --version output version information and exit - --run try to run the given command, and emulate it if it fails - -Supported PROGRAM values: - aclocal touch file \`aclocal.m4' - autoconf touch file \`configure' - autoheader touch file \`config.h.in' - automake touch all \`Makefile.in' files - bison create \`y.tab.[ch]', if possible, from existing .[ch] - flex create \`lex.yy.c', if possible, from existing .c - help2man touch the output file - lex create \`lex.yy.c', if possible, from existing .c - makeinfo touch the output file - tar try tar, gnutar, gtar, then tar without non-portable flags - yacc create \`y.tab.[ch]', if possible, from existing .[ch]" - ;; - - -v|--v|--ve|--ver|--vers|--versi|--versio|--version) - echo "missing 0.4 - GNU automake" - ;; - - -*) - echo 1>&2 "$0: Unknown \`$1' option" - echo 1>&2 "Try \`$0 --help' for more information" - exit 1 - ;; - - aclocal*) - if test -z "$run" && ($1 --version) > /dev/null 2>&1; then - # We have it, but it failed. - exit 1 - fi - - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified \`acinclude.m4' or \`${configure_ac}'. You might want - to install the \`Automake' and \`Perl' packages. Grab them from - any GNU archive site." - touch aclocal.m4 - ;; - - autoconf) - if test -z "$run" && ($1 --version) > /dev/null 2>&1; then - # We have it, but it failed. - exit 1 - fi - - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified \`${configure_ac}'. You might want to install the - \`Autoconf' and \`GNU m4' packages. Grab them from any GNU - archive site." - touch configure - ;; - - autoheader) - if test -z "$run" && ($1 --version) > /dev/null 2>&1; then - # We have it, but it failed. - exit 1 - fi - - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified \`acconfig.h' or \`${configure_ac}'. You might want - to install the \`Autoconf' and \`GNU m4' packages. Grab them - from any GNU archive site." - files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}` - test -z "$files" && files="config.h" - touch_files= - for f in $files; do - case "$f" in - *:*) touch_files="$touch_files "`echo "$f" | - sed -e 's/^[^:]*://' -e 's/:.*//'`;; - *) touch_files="$touch_files $f.in";; - esac - done - touch $touch_files - ;; - - automake*) - if test -z "$run" && ($1 --version) > /dev/null 2>&1; then - # We have it, but it failed. - exit 1 - fi - - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'. - You might want to install the \`Automake' and \`Perl' packages. - Grab them from any GNU archive site." - find . -type f -name Makefile.am -print | - sed 's/\.am$/.in/' | - while read f; do touch "$f"; done - ;; - - autom4te) - if test -z "$run" && ($1 --version) > /dev/null 2>&1; then - # We have it, but it failed. - exit 1 - fi - - echo 1>&2 "\ -WARNING: \`$1' is needed, and you do not seem to have it handy on your - system. You might have modified some files without having the - proper tools for further handling them. - You can get \`$1Help2man' as part of \`Autoconf' from any GNU - archive site." - - file=`echo "$*" | sed -n 's/.*--output[ =]*\([^ ]*\).*/\1/p'` - test -z "$file" && file=`echo "$*" | sed -n 's/.*-o[ ]*\([^ ]*\).*/\1/p'` - if test -f "$file"; then - touch $file - else - test -z "$file" || exec >$file - echo "#! /bin/sh" - echo "# Created by GNU Automake missing as a replacement of" - echo "# $ $@" - echo "exit 0" - chmod +x $file - exit 1 - fi - ;; - - bison|yacc) - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified a \`.y' file. You may need the \`Bison' package - in order for those modifications to take effect. You can get - \`Bison' from any GNU archive site." - rm -f y.tab.c y.tab.h - if [ $# -ne 1 ]; then - eval LASTARG="\${$#}" - case "$LASTARG" in - *.y) - SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'` - if [ -f "$SRCFILE" ]; then - cp "$SRCFILE" y.tab.c - fi - SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'` - if [ -f "$SRCFILE" ]; then - cp "$SRCFILE" y.tab.h - fi - ;; - esac - fi - if [ ! -f y.tab.h ]; then - echo >y.tab.h - fi - if [ ! -f y.tab.c ]; then - echo 'main() { return 0; }' >y.tab.c - fi - ;; - - lex|flex) - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified a \`.l' file. You may need the \`Flex' package - in order for those modifications to take effect. You can get - \`Flex' from any GNU archive site." - rm -f lex.yy.c - if [ $# -ne 1 ]; then - eval LASTARG="\${$#}" - case "$LASTARG" in - *.l) - SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'` - if [ -f "$SRCFILE" ]; then - cp "$SRCFILE" lex.yy.c - fi - ;; - esac - fi - if [ ! -f lex.yy.c ]; then - echo 'main() { return 0; }' >lex.yy.c - fi - ;; - - help2man) - if test -z "$run" && ($1 --version) > /dev/null 2>&1; then - # We have it, but it failed. - exit 1 - fi - - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified a dependency of a manual page. You may need the - \`Help2man' package in order for those modifications to take - effect. You can get \`Help2man' from any GNU archive site." - - file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'` - if test -z "$file"; then - file=`echo "$*" | sed -n 's/.*--output=\([^ ]*\).*/\1/p'` - fi - if [ -f "$file" ]; then - touch $file - else - test -z "$file" || exec >$file - echo ".ab help2man is required to generate this page" - exit 1 - fi - ;; - - makeinfo) - if test -z "$run" && (makeinfo --version) > /dev/null 2>&1; then - # We have makeinfo, but it failed. - exit 1 - fi - - echo 1>&2 "\ -WARNING: \`$1' is missing on your system. You should only need it if - you modified a \`.texi' or \`.texinfo' file, or any other file - indirectly affecting the aspect of the manual. The spurious - call might also be the consequence of using a buggy \`make' (AIX, - DU, IRIX). You might want to install the \`Texinfo' package or - the \`GNU make' package. Grab either from any GNU archive site." - file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'` - if test -z "$file"; then - file=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'` - file=`sed -n '/^@setfilename/ { s/.* \([^ ]*\) *$/\1/; p; q; }' $file` - fi - touch $file - ;; - - tar) - shift - if test -n "$run"; then - echo 1>&2 "ERROR: \`tar' requires --run" - exit 1 - fi - - # We have already tried tar in the generic part. - # Look for gnutar/gtar before invocation to avoid ugly error - # messages. - if (gnutar --version > /dev/null 2>&1); then - gnutar "$@" && exit 0 - fi - if (gtar --version > /dev/null 2>&1); then - gtar "$@" && exit 0 - fi - firstarg="$1" - if shift; then - case "$firstarg" in - *o*) - firstarg=`echo "$firstarg" | sed s/o//` - tar "$firstarg" "$@" && exit 0 - ;; - esac - case "$firstarg" in - *h*) - firstarg=`echo "$firstarg" | sed s/h//` - tar "$firstarg" "$@" && exit 0 - ;; - esac - fi - - echo 1>&2 "\ -WARNING: I can't seem to be able to run \`tar' with the given arguments. - You may want to install GNU tar or Free paxutils, or check the - command line arguments." - exit 1 - ;; - - *) - echo 1>&2 "\ -WARNING: \`$1' is needed, and you do not seem to have it handy on your - system. You might have modified some files without having the - proper tools for further handling them. Check the \`README' file, - it often tells you about the needed prerequirements for installing - this package. You may also peek at any GNU archive site, in case - some other package would contain this missing \`$1' program." - exit 1 - ;; -esac - -exit 0 diff --git a/lustre/mkinstalldirs b/lustre/mkinstalldirs deleted file mode 100755 index a08f3ca..0000000 --- a/lustre/mkinstalldirs +++ /dev/null @@ -1,40 +0,0 @@ -#! /bin/sh -# mkinstalldirs --- make directory hierarchy -# Author: Noah Friedman <friedman@prep.ai.mit.edu> -# Created: 1993-05-16 -# Public domain - -# $Id: mkinstalldirs,v 1.1 2002/05/27 16:48:11 pschwan Exp $ - -errstatus=0 - -for file -do - set fnord `echo ":$file" | sed -ne 's/^:\//#/;s/^://;s/\// /g;s/^#/\//;p'` - shift - - pathcomp= - for d - do - pathcomp="$pathcomp$d" - case "$pathcomp" in - -* ) pathcomp=./$pathcomp ;; - esac - - if test ! -d "$pathcomp"; then - echo "mkdir $pathcomp" - - mkdir "$pathcomp" || lasterr=$? - - if test ! -d "$pathcomp"; then - errstatus=$lasterr - fi - fi - - pathcomp="$pathcomp/" - done -done - -exit $errstatus - -# mkinstalldirs ends here diff --git a/lustre/nodist b/lustre/nodist deleted file mode 100644 index 91aecad..0000000 --- a/lustre/nodist +++ /dev/null @@ -1,21 +0,0 @@ -obd-*/obd-* -CVS -*~ -make.rules -config.* -*.o -*.orig -*.backup -.depfiles -ext2obd/dir.c -ext2obd/file.c -ext2obd/ialloc.c -ext2obd/inode.c -ext2obd/super.c -ext2obd/fsync.c -ext2obd/ioctl.c -ext2obd/balloc.c -ext2obd/acl.c -ext2obd/namei.c -ext2obd/symlink.c -ext2obd/bitmap.c diff --git a/lustre/obdclass/.cvsignore b/lustre/obdclass/.cvsignore deleted file mode 100644 index e530020..0000000 --- a/lustre/obdclass/.cvsignore +++ /dev/null @@ -1,8 +0,0 @@ -.Xrefs -config.log -config.status -configure -Makefile -Makefile.in -.deps -TAGS diff --git a/lustre/obdclass/Makefile.am b/lustre/obdclass/Makefile.am deleted file mode 100644 index 61f4bc2..0000000 --- a/lustre/obdclass/Makefile.am +++ /dev/null @@ -1,39 +0,0 @@ -# FIXME: we need to make it clear that obdclass.o depends on -# lustre_build_version, or 'make -j2' breaks! -DEFS= -MODULE = obdclass - -if EXTN -FSMOD = fsfilt_extN -else -FSMOD = fsfilt_ext3 -endif - -if LIBLUSTRE -lib_LIBRARIES = liblustreclass.a -liblustreclass_a_SOURCES = uuid.c statfs_pack.c genops.c debug.c class_obd.c lustre_handles.c lustre_peer.c lprocfs_status.c simple.c - -class_obd.o: lustre_version - -lustre_version: - echo '#define LUSTRE_VERSION 12' > $(top_builddir)/include/linux/lustre_build_version.h - echo '#define BUILD_VERSION "1"' >> $(top_builddir)/include/linux/lustre_build_version.h - -else -modulefs_DATA = lustre_build_version obdclass.o $(FSMOD).o fsfilt_reiserfs.o -EXTRA_PROGRAMS = obdclass $(FSMOD) fsfilt_reiserfs - -obdclass_SOURCES = class_obd.c debug.c genops.c sysctl.c uuid.c simple.c -obdclass_SOURCES += lprocfs_status.c lustre_handles.c lustre_peer.c -obdclass_SOURCES += fsfilt.c statfs_pack.c otree.c -endif - -include $(top_srcdir)/Rules - -# XXX I'm sure there's some automake mv-if-different helper for this. -lustre_build_version: - perl $(top_srcdir)/scripts/version_tag.pl $(top_srcdir) $(top_builddir) > tmpver - cmp -s $(top_builddir)/include/linux/lustre_build_version.h tmpver \ - 2> /dev/null && \ - $(RM) tmpver || \ - mv tmpver $(top_builddir)/include/linux/lustre_build_version.h diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c deleted file mode 100644 index b497aa3..0000000 --- a/lustre/obdclass/class_obd.c +++ /dev/null @@ -1,935 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Object Devices Class Driver - * - * Copyright (C) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * These are the only exported functions, they provide some generic - * infrastructure for managing object devices - */ - -#define DEBUG_SUBSYSTEM S_CLASS -#define EXPORT_SYMTAB -#ifdef __KERNEL__ -#include <linux/config.h> /* for CONFIG_PROC_FS */ -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/kernel.h> -#include <linux/major.h> -#include <linux/sched.h> -#include <linux/lp.h> -#include <linux/slab.h> -#include <linux/ioport.h> -#include <linux/fcntl.h> -#include <linux/delay.h> -#include <linux/skbuff.h> -#include <linux/proc_fs.h> -#include <linux/fs.h> -#include <linux/poll.h> -#include <linux/init.h> -#include <linux/list.h> -#include <linux/highmem.h> -#include <asm/io.h> -#include <asm/ioctls.h> -#include <asm/system.h> -#include <asm/poll.h> -#include <asm/uaccess.h> -#include <linux/miscdevice.h> -#include <linux/smp_lock.h> -#else - -# include <liblustre.h> - -#endif - -#include <linux/obd_support.h> -#include <linux/obd_class.h> -#include <linux/lustre_debug.h> -#include <linux/lprocfs_status.h> -#include <portals/lib-types.h> /* for PTL_MD_MAX_IOV */ -#include <linux/lustre_build_version.h> - -struct semaphore obd_conf_sem; /* serialize configuration commands */ -struct obd_device obd_dev[MAX_OBD_DEVICES]; -struct list_head obd_types; -atomic_t obd_memory; -int obd_memmax; - -/* Root for /proc/lustre */ -struct proc_dir_entry *proc_lustre_root = NULL; -int obd_proc_read_version(char *page, char **start, off_t off, int count, int *eof, void *data); -struct lprocfs_vars lprocfs_version[] = {{"version", obd_proc_read_version, NULL, NULL },{NULL,NULL,NULL,NULL}}; -int proc_version; - -/* The following are visible and mutable through /proc/sys/lustre/. */ -unsigned long obd_fail_loc; -unsigned long obd_timeout = 100; -unsigned long obd_bulk_timeout = 1; -char obd_lustre_upcall[128] = "/usr/lib/lustre/lustre_upcall"; -unsigned long obd_sync_filter; /* = 0, don't sync by default */ - -#ifdef __KERNEL__ -/* opening /dev/obd */ -static int obd_class_open(struct inode * inode, struct file * file) -{ - struct obd_class_user_state *ocus; - ENTRY; - - OBD_ALLOC(ocus, sizeof(*ocus)); - if (ocus == NULL) - return (-ENOMEM); - - INIT_LIST_HEAD(&ocus->ocus_conns); - file->private_data = ocus; - - PORTAL_MODULE_USE; - RETURN(0); -} - -/* closing /dev/obd */ -static int obd_class_release(struct inode * inode, struct file * file) -{ - struct obd_class_user_state *ocus = file->private_data; - struct obd_class_user_conn *c; - ENTRY; - - while (!list_empty (&ocus->ocus_conns)) { - c = list_entry (ocus->ocus_conns.next, - struct obd_class_user_conn, ocuc_chain); - list_del (&c->ocuc_chain); - - CDEBUG (D_IOCTL, "Auto-disconnect %p\n", &c->ocuc_conn); - - down (&obd_conf_sem); - obd_disconnect (&c->ocuc_conn, 0); - up (&obd_conf_sem); - - OBD_FREE (c, sizeof (*c)); - } - - OBD_FREE (ocus, sizeof (*ocus)); - - PORTAL_MODULE_UNUSE; - RETURN(0); -} -#endif - -static int -obd_class_add_user_conn (struct obd_class_user_state *ocus, - struct lustre_handle *conn) -{ - struct obd_class_user_conn *c; - - /* NB holding obd_conf_sem */ - - OBD_ALLOC (c, sizeof (*c)); - if (ocus == NULL) - return (-ENOMEM); - - c->ocuc_conn = *conn; - list_add (&c->ocuc_chain, &ocus->ocus_conns); - return (0); -} - -static void -obd_class_remove_user_conn (struct obd_class_user_state *ocus, - struct lustre_handle *conn) -{ - struct list_head *e; - struct obd_class_user_conn *c; - - /* NB holding obd_conf_sem or last reference */ - - list_for_each (e, &ocus->ocus_conns) { - c = list_entry (e, struct obd_class_user_conn, ocuc_chain); - if (conn->cookie == c->ocuc_conn.cookie) { - list_del (&c->ocuc_chain); - OBD_FREE (c, sizeof (*c)); - return; - } - } -} - -static inline void obd_data2conn(struct lustre_handle *conn, - struct obd_ioctl_data *data) -{ - memset(conn, 0, sizeof *conn); - conn->cookie = data->ioc_cookie; -} - -static inline void obd_conn2data(struct obd_ioctl_data *data, - struct lustre_handle *conn) -{ - data->ioc_cookie = conn->cookie; -} - -static void dump_exports(struct obd_device *obd) -{ - struct list_head *tmp, *n; - - list_for_each_safe(tmp, n, &obd->obd_exports) { - struct obd_export *exp = list_entry(tmp, struct obd_export, - exp_obd_chain); - CDEBUG(D_ERROR, "%s: %p %s %d %d %p\n", - obd->obd_name, exp, exp->exp_client_uuid.uuid, - atomic_read(&exp->exp_refcount), - exp->exp_failed, exp->exp_outstanding_reply ); - } -} - -int class_handle_ioctl(struct obd_class_user_state *ocus, unsigned int cmd, - unsigned long arg) -{ - char *buf = NULL; - struct obd_ioctl_data *data; - struct portals_debug_ioctl_data *debug_data; - struct obd_device *obd = ocus->ocus_current_obd; - struct lustre_handle conn; - int err = 0, len = 0, serialised = 0; - ENTRY; - - if ((cmd & 0xffffff00) == ((int)'T') << 8) /* ignore all tty ioctls */ - RETURN(err = -ENOTTY); - - /* only for debugging */ - if (cmd == PTL_IOC_DEBUG_MASK) { - debug_data = (struct portals_debug_ioctl_data*)arg; - portal_subsystem_debug = debug_data->subs; - portal_debug = debug_data->debug; - return 0; - } - - switch (cmd) { - case OBD_IOC_BRW_WRITE: - case OBD_IOC_BRW_READ: - case OBD_IOC_GETATTR: - case ECHO_IOC_ENQUEUE: - case ECHO_IOC_CANCEL: - break; - default: - down(&obd_conf_sem); - serialised = 1; - break; - } - - CDEBUG(D_IOCTL, "cmd = %x, obd = %p\n", cmd, obd); - if (!obd && cmd != OBD_IOC_DEVICE && - cmd != OBD_IOC_LIST && cmd != OBD_GET_VERSION && - cmd != OBD_IOC_NAME2DEV && cmd != OBD_IOC_UUID2DEV && - cmd != OBD_IOC_NEWDEV && cmd != OBD_IOC_ADD_UUID && - cmd != OBD_IOC_DEL_UUID && cmd != OBD_IOC_CLOSE_UUID) { - CERROR("OBD ioctl: No device\n"); - GOTO(out, err = -EINVAL); - } - if (obd_ioctl_getdata(&buf, &len, (void *)arg)) { - CERROR("OBD ioctl: data error\n"); - GOTO(out, err = -EINVAL); - } - data = (struct obd_ioctl_data *)buf; - - switch (cmd) { - case OBD_IOC_DEVICE: { - CDEBUG(D_IOCTL, "\n"); - if (data->ioc_dev >= MAX_OBD_DEVICES || data->ioc_dev < 0) { - CERROR("OBD ioctl: DEVICE invalid device %d\n", - data->ioc_dev); - GOTO(out, err = -EINVAL); - } - CDEBUG(D_IOCTL, "device %d\n", data->ioc_dev); - - ocus->ocus_current_obd = &obd_dev[data->ioc_dev]; - GOTO(out, err = 0); - } - - case OBD_IOC_LIST: { - int i; - char *buf2 = data->ioc_bulk; - int remains = data->ioc_inllen1; - - if (!data->ioc_inlbuf1) { - CERROR("No buffer passed!\n"); - GOTO(out, err = -EINVAL); - } - - - for (i = 0 ; i < MAX_OBD_DEVICES ; i++) { - int l; - char *status; - struct obd_device *obd = &obd_dev[i]; - - if (!obd->obd_type) - continue; - if (obd->obd_stopping) - status = "ST"; - else if (obd->obd_set_up) - status = "UP"; - else if (obd->obd_attached) - status = "AT"; - else - status = "-"; - l = snprintf(buf2, remains, "%2d %s %s %s %s %d\n", - i, status, obd->obd_type->typ_name, - obd->obd_name, obd->obd_uuid.uuid, - obd->obd_type->typ_refcnt); - buf2 +=l; - remains -=l; - if (remains <= 0) { - CERROR("not enough space for device listing\n"); - break; - } - } - - err = copy_to_user((void *)arg, data, len); - if (err) - err = -EFAULT; - GOTO(out, err); - } - - case OBD_GET_VERSION: - if (!data->ioc_inlbuf1) { - CERROR("No buffer passed in ioctl\n"); - GOTO(out, err = -EINVAL); - } - - if (strlen(BUILD_VERSION) + 1 > data->ioc_inllen1) { - CERROR("ioctl buffer too small to hold version\n"); - GOTO(out, err = -EINVAL); - } - - memcpy(data->ioc_bulk, BUILD_VERSION, - strlen(BUILD_VERSION) + 1); - - err = copy_to_user((void *)arg, data, len); - if (err) - err = -EFAULT; - GOTO(out, err); - - case OBD_IOC_NAME2DEV: { - /* Resolve a device name. This does not change the - * currently selected device. - */ - int dev; - - if (!data->ioc_inllen1 || !data->ioc_inlbuf1 ) { - CERROR("No name passed,!\n"); - GOTO(out, err = -EINVAL); - } - if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) { - CERROR("Name not nul terminated!\n"); - GOTO(out, err = -EINVAL); - } - - CDEBUG(D_IOCTL, "device name %s\n", data->ioc_inlbuf1); - dev = class_name2dev(data->ioc_inlbuf1); - data->ioc_dev = dev; - if (dev == -1) { - CDEBUG(D_IOCTL, "No device for name %s!\n", - data->ioc_inlbuf1); - GOTO(out, err = -EINVAL); - } - - CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1, - dev); - err = copy_to_user((void *)arg, data, sizeof(*data)); - if (err) - err = -EFAULT; - GOTO(out, err); - } - - case OBD_IOC_UUID2DEV: { - /* Resolve a device uuid. This does not change the - * currently selected device. - */ - int dev; - struct obd_uuid uuid; - - if (!data->ioc_inllen1 || !data->ioc_inlbuf1) { - CERROR("No UUID passed!\n"); - GOTO(out, err = -EINVAL); - } - if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) { - CERROR("UUID not NUL terminated!\n"); - GOTO(out, err = -EINVAL); - } - - CDEBUG(D_IOCTL, "device name %s\n", data->ioc_inlbuf1); - obd_str2uuid(&uuid, data->ioc_inlbuf1); - dev = class_uuid2dev(&uuid); - data->ioc_dev = dev; - if (dev == -1) { - CDEBUG(D_IOCTL, "No device for UUID %s!\n", - data->ioc_inlbuf1); - GOTO(out, err = -EINVAL); - } - - CDEBUG(D_IOCTL, "device name %s, dev %d\n", data->ioc_inlbuf1, - dev); - err = copy_to_user((void *)arg, data, sizeof(*data)); - if (err) - err = -EFAULT; - GOTO(out, err); - } - - - - case OBD_IOC_NEWDEV: { - int dev = -1; - int i; - - ocus->ocus_current_obd = NULL; - for (i = 0 ; i < MAX_OBD_DEVICES ; i++) { - struct obd_device *obd = &obd_dev[i]; - if (!obd->obd_type) { - ocus->ocus_current_obd = obd; - dev = i; - break; - } - } - - - data->ioc_dev = dev; - if (dev == -1) - GOTO(out, err = -EINVAL); - - err = copy_to_user((void *)arg, data, sizeof(*data)); - if (err) - err = -EFAULT; - GOTO(out, err); - } - - case OBD_IOC_ATTACH: { - struct obd_type *type; - int minor, len; - - /* have we attached a type to this device */ - if (obd->obd_attached|| obd->obd_type) { - CERROR("OBD: Device %d already typed as %s.\n", - obd->obd_minor, MKSTR(obd->obd_type->typ_name)); - GOTO(out, err = -EBUSY); - } - - if (!data->ioc_inllen1 || !data->ioc_inlbuf1) { - CERROR("No type passed!\n"); - GOTO(out, err = -EINVAL); - } - if (data->ioc_inlbuf1[data->ioc_inllen1 - 1] != 0) { - CERROR("Type not nul terminated!\n"); - GOTO(out, err = -EINVAL); - } - if (!data->ioc_inllen2 || !data->ioc_inlbuf2) { - CERROR("No name passed!\n"); - GOTO(out, err = -EINVAL); - } - if (data->ioc_inlbuf2[data->ioc_inllen2 - 1] != 0) { - CERROR("Name not nul terminated!\n"); - GOTO(out, err = -EINVAL); - } - if (!data->ioc_inllen3 || !data->ioc_inlbuf3) { - CERROR("No UUID passed!\n"); - GOTO(out, err = -EINVAL); - } - if (data->ioc_inlbuf3[data->ioc_inllen3 - 1] != 0) { - CERROR("UUID not nul terminated!\n"); - GOTO(out, err = -EINVAL); - } - - CDEBUG(D_IOCTL, "attach type %s name: %s uuid: %s\n", - MKSTR(data->ioc_inlbuf1), - MKSTR(data->ioc_inlbuf2), MKSTR(data->ioc_inlbuf3)); - - /* find the type */ - type = class_get_type(data->ioc_inlbuf1); - if (!type) { - CERROR("OBD: unknown type dev %d\n", obd->obd_minor); - GOTO(out, err = -EINVAL); - } - - minor = obd->obd_minor; - memset(obd, 0, sizeof(*obd)); - obd->obd_minor = minor; - obd->obd_type = type; - INIT_LIST_HEAD(&obd->obd_exports); - obd->obd_num_exports = 0; - INIT_LIST_HEAD(&obd->obd_imports); - spin_lock_init(&obd->obd_dev_lock); - init_waitqueue_head(&obd->obd_refcount_waitq); - - /* XXX belong ins setup not attach */ - /* recovery data */ - spin_lock_init(&obd->obd_processing_task_lock); - init_waitqueue_head(&obd->obd_next_transno_waitq); - INIT_LIST_HEAD(&obd->obd_recovery_queue); - INIT_LIST_HEAD(&obd->obd_delayed_reply_queue); - - init_waitqueue_head(&obd->obd_commit_waitq); - - len = strlen(data->ioc_inlbuf2) + 1; - OBD_ALLOC(obd->obd_name, len); - if (!obd->obd_name) { - class_put_type(obd->obd_type); - obd->obd_type = NULL; - GOTO(out, err = -ENOMEM); - } - memcpy(obd->obd_name, data->ioc_inlbuf2, len); - - len = strlen(data->ioc_inlbuf3); - if (len >= sizeof(obd->obd_uuid)) { - CERROR("uuid must be < "LPSZ" bytes long\n", - sizeof(obd->obd_uuid)); - if (obd->obd_name) - OBD_FREE(obd->obd_name, - strlen(obd->obd_name) + 1); - class_put_type(obd->obd_type); - obd->obd_type = NULL; - GOTO(out, err = -EINVAL); - } - memcpy(obd->obd_uuid.uuid, data->ioc_inlbuf3, len); - - /* do the attach */ - if (OBP(obd, attach)) - err = OBP(obd,attach)(obd, sizeof(*data), data); - if (err) { - if(data->ioc_inlbuf2) - OBD_FREE(obd->obd_name, - strlen(obd->obd_name) + 1); - class_put_type(obd->obd_type); - obd->obd_type = NULL; - } else { - obd->obd_attached = 1; - - type->typ_refcnt++; - CDEBUG(D_IOCTL, "OBD: dev %d attached type %s\n", - obd->obd_minor, data->ioc_inlbuf1); - } - - GOTO(out, err); - } - - case OBD_IOC_DETACH: { - ENTRY; - if (obd->obd_set_up) { - CERROR("OBD device %d still set up\n", obd->obd_minor); - GOTO(out, err = -EBUSY); - } - if (!obd->obd_attached) { - CERROR("OBD device %d not attached\n", obd->obd_minor); - GOTO(out, err = -ENODEV); - } - if (OBP(obd, detach)) - err = OBP(obd,detach)(obd); - - if (obd->obd_name) { - OBD_FREE(obd->obd_name, strlen(obd->obd_name)+1); - obd->obd_name = NULL; - } - - obd->obd_attached = 0; - obd->obd_type->typ_refcnt--; - class_put_type(obd->obd_type); - obd->obd_type = NULL; - GOTO(out, err = 0); - } - - case OBD_IOC_SETUP: { - /* have we attached a type to this device? */ - if (!obd->obd_attached) { - CERROR("Device %d not attached\n", obd->obd_minor); - GOTO(out, err = -ENODEV); - } - - /* has this been done already? */ - if (obd->obd_set_up) { - CERROR("Device %d already setup (type %s)\n", - obd->obd_minor, obd->obd_type->typ_name); - GOTO(out, err = -EBUSY); - } - - atomic_set(&obd->obd_refcount, 0); - - if ( OBT(obd) && OBP(obd, setup) ) - err = obd_setup(obd, sizeof(*data), data); - - if (!err) { - obd->obd_type->typ_refcnt++; - obd->obd_set_up = 1; - atomic_inc(&obd->obd_refcount); - } - - GOTO(out, err); - } - case OBD_IOC_CLEANUP: { - int force = 0, failover = 0; - char * flag; - - if (!obd->obd_set_up) { - CERROR("Device %d not setup\n", obd->obd_minor); - GOTO(out, err = -ENODEV); - } - - if (data->ioc_inlbuf1) { - for (flag = data->ioc_inlbuf1; *flag != 0; flag++) - switch (*flag) { - case 'F': - force = 1; - break; - case 'A': - failover = 1; - break; - default: - CERROR("unrecognised flag '%c'\n", - *flag); - } - } - - if (atomic_read(&obd->obd_refcount) == 1 || force) { - /* this will stop new connections, and need to - do it before class_disconnect_exports() */ - obd->obd_stopping = 1; - } - - if (atomic_read(&obd->obd_refcount) > 1) { - struct l_wait_info lwi = LWI_TIMEOUT_INTR(60 * HZ, NULL, - NULL, NULL); - int rc; - - if (!force) { - CERROR("OBD device %d (%p) has refcount %d\n", - obd->obd_minor, obd, - atomic_read(&obd->obd_refcount)); - dump_exports(obd); - GOTO(out, err = -EBUSY); - } - class_disconnect_exports(obd, failover); - CDEBUG(D_IOCTL, - "%s: waiting for obd refs to go away: %d\n", - obd->obd_name, atomic_read(&obd->obd_refcount)); - - rc = l_wait_event(obd->obd_refcount_waitq, - atomic_read(&obd->obd_refcount) < 2, &lwi); - if (rc == 0) { - LASSERT(atomic_read(&obd->obd_refcount) == 1); - } else { - CERROR("wait cancelled cleaning anyway. " - "refcount: %d\n", - atomic_read(&obd->obd_refcount)); - dump_exports(obd); - } - CDEBUG(D_IOCTL, "%s: awake, now finishing cleanup\n", - obd->obd_name); - } - - if (OBT(obd) && OBP(obd, cleanup)) - err = obd_cleanup(obd, force, failover); - - if (!err) { - obd->obd_set_up = obd->obd_stopping = 0; - obd->obd_type->typ_refcnt--; - atomic_dec(&obd->obd_refcount); - /* XXX this should be an LASSERT */ - if (atomic_read(&obd->obd_refcount) > 0) - CERROR("%s still has refcount %d after " - "cleanup.\n", obd->obd_name, - atomic_read(&obd->obd_refcount)); - } - - GOTO(out, err); - } - - case OBD_IOC_CONNECT: { - struct obd_uuid cluuid = { "OBD_CLASS_UUID" }; - obd_data2conn(&conn, data); - - err = obd_connect(&conn, obd, &cluuid); - - CDEBUG(D_IOCTL, "assigned export "LPX64"\n", conn.cookie); - obd_conn2data(data, &conn); - if (err) - GOTO(out, err); - - err = obd_class_add_user_conn (ocus, &conn); - if (err != 0) { - obd_disconnect (&conn, 0); - GOTO (out, err); - } - - err = copy_to_user((void *)arg, data, sizeof(*data)); - if (err != 0) { - obd_class_remove_user_conn (ocus, &conn); - obd_disconnect (&conn, 0); - GOTO (out, err = -EFAULT); - } - GOTO(out, err); - } - - case OBD_IOC_DISCONNECT: { - obd_data2conn(&conn, data); - obd_class_remove_user_conn (ocus, &conn); - err = obd_disconnect(&conn, 0); - GOTO(out, err); - } - - case OBD_IOC_NO_TRANSNO: { - if (!obd->obd_attached) { - CERROR("Device %d not attached\n", obd->obd_minor); - GOTO(out, err = -ENODEV); - } - CDEBUG(D_IOCTL, - "disabling committed-transno notifications on %d\n", - obd->obd_minor); - obd->obd_no_transno = 1; - GOTO(out, err = 0); - } - - case OBD_IOC_CLOSE_UUID: { - struct lustre_peer peer; - CDEBUG(D_IOCTL, "closing all connections to uuid %s\n", - data->ioc_inlbuf1); - lustre_uuid_to_peer(data->ioc_inlbuf1, &peer); - GOTO(out, err = 0); - } - case OBD_IOC_ADD_UUID: { - CDEBUG(D_IOCTL, "adding mapping from uuid %s to nid "LPX64 - ", nal %d\n", data->ioc_inlbuf1, data->ioc_nid, - data->ioc_nal); - - err = class_add_uuid(data->ioc_inlbuf1, data->ioc_nid, - data->ioc_nal); - GOTO(out, err); - } - case OBD_IOC_DEL_UUID: { - CDEBUG(D_IOCTL, "removing mappings for uuid %s\n", - data->ioc_inlbuf1 == NULL ? "<all uuids>" : - data->ioc_inlbuf1); - - err = class_del_uuid(data->ioc_inlbuf1); - GOTO(out, err); - } - default: { - // obd_data2conn(&conn, data); - struct obd_class_user_conn *oconn = list_entry(ocus->ocus_conns.next, struct obd_class_user_conn, ocuc_chain); - err = obd_iocontrol(cmd, &oconn->ocuc_conn, len, data, NULL); - if (err) - GOTO(out, err); - - err = copy_to_user((void *)arg, data, len); - if (err) - err = -EFAULT; - GOTO(out, err); - } - } - - out: - if (buf) - obd_ioctl_freedata(buf, len); - if (serialised) - up(&obd_conf_sem); - RETURN(err); -} /* class_handle_ioctl */ - - - -#define OBD_MINOR 241 -#ifdef __KERNEL__ -/* to control /dev/obd */ -static int obd_class_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) -{ - return class_handle_ioctl(filp->private_data, cmd, arg); -} - -/* declare character device */ -static struct file_operations obd_psdev_fops = { - ioctl: obd_class_ioctl, /* ioctl */ - open: obd_class_open, /* open */ - release: obd_class_release, /* release */ -}; - -/* modules setup */ -static struct miscdevice obd_psdev = { - OBD_MINOR, - "obd_psdev", - &obd_psdev_fops -}; -#else -void *obd_psdev = NULL; -#endif - -EXPORT_SYMBOL(obd_dev); -EXPORT_SYMBOL(obdo_cachep); -EXPORT_SYMBOL(obd_memory); -EXPORT_SYMBOL(obd_memmax); -EXPORT_SYMBOL(obd_fail_loc); -EXPORT_SYMBOL(obd_timeout); -EXPORT_SYMBOL(obd_lustre_upcall); -EXPORT_SYMBOL(obd_sync_filter); -EXPORT_SYMBOL(ptlrpc_put_connection_superhack); -EXPORT_SYMBOL(ptlrpc_abort_inflight_superhack); -EXPORT_SYMBOL(proc_lustre_root); - -EXPORT_SYMBOL(lctl_fake_uuid); - -EXPORT_SYMBOL(class_register_type); -EXPORT_SYMBOL(class_unregister_type); -EXPORT_SYMBOL(class_get_type); -EXPORT_SYMBOL(class_put_type); -EXPORT_SYMBOL(class_name2dev); -EXPORT_SYMBOL(class_name2obd); -EXPORT_SYMBOL(class_uuid2dev); -EXPORT_SYMBOL(class_uuid2obd); -EXPORT_SYMBOL(class_export_get); -EXPORT_SYMBOL(class_export_put); -EXPORT_SYMBOL(class_new_export); -EXPORT_SYMBOL(class_unlink_export); -EXPORT_SYMBOL(class_import_get); -EXPORT_SYMBOL(class_import_put); -EXPORT_SYMBOL(class_new_import); -EXPORT_SYMBOL(class_destroy_import); -EXPORT_SYMBOL(class_connect); -EXPORT_SYMBOL(class_conn2export); -EXPORT_SYMBOL(class_conn2obd); -EXPORT_SYMBOL(class_conn2cliimp); -EXPORT_SYMBOL(class_conn2ldlmimp); -EXPORT_SYMBOL(class_disconnect); -EXPORT_SYMBOL(class_disconnect_exports); -EXPORT_SYMBOL(lustre_uuid_to_peer); - -/* uuid.c */ -EXPORT_SYMBOL(class_uuid_unparse); -EXPORT_SYMBOL(client_tgtuuid2obd); - -EXPORT_SYMBOL(class_handle_hash); -EXPORT_SYMBOL(class_handle_unhash); -EXPORT_SYMBOL(class_handle2object); - -#ifdef __KERNEL__ -static int __init init_obdclass(void) -#else -int init_obdclass(void) -#endif -{ - struct obd_device *obd; - int err; - int i; - - printk(KERN_INFO "OBD class driver Build Version: " BUILD_VERSION - ", info@clusterfs.com\n"); - - class_init_uuidlist(); - class_handle_init(); - - sema_init(&obd_conf_sem, 1); - INIT_LIST_HEAD(&obd_types); - - if ((err = misc_register(&obd_psdev))) { - CERROR("cannot register %d err %d\n", OBD_MINOR, err); - return err; - } - - /* This struct is already zerod for us (static global) */ - for (i = 0, obd = obd_dev; i < MAX_OBD_DEVICES; i++, obd++) - obd->obd_minor = i; - - err = obd_init_caches(); - if (err) - return err; - -#ifdef __KERNEL__ - obd_sysctl_init(); -#endif - -#ifdef LPROCFS - proc_lustre_root = proc_mkdir("lustre", proc_root_fs); - if (!proc_lustre_root) - printk(KERN_ERR "error registering /proc/fs/lustre\n"); - proc_version = lprocfs_add_vars(proc_lustre_root,lprocfs_version,NULL); -#else - proc_lustre_root = NULL; - proc_version = -1; -#endif - return 0; -} - -#ifdef LPROCFS -int obd_proc_read_version(char *page, char **start, off_t off, int count, int *eof, void *data) { - *eof = 1; - return snprintf(page, count, "%s\n", BUILD_VERSION); -} -#else -int obd_proc_read_version(char *page, char **start, off_t off, int count, int *eof, void *data) { return 0; } -#endif - -#ifdef __KERNEL__ -static void __exit cleanup_obdclass(void) -#else -static void cleanup_obdclass(void) -#endif -{ - int i; - ENTRY; - - misc_deregister(&obd_psdev); - for (i = 0; i < MAX_OBD_DEVICES; i++) { - struct obd_device *obd = &obd_dev[i]; - if (obd->obd_type && obd->obd_set_up && - OBT(obd) && OBP(obd, detach)) { - /* XXX should this call generic detach otherwise? */ - OBP(obd, detach)(obd); - } - } - - obd_cleanup_caches(); -#ifdef __KERNEL__ - obd_sysctl_clean(); -#endif - if (proc_lustre_root) { - lprocfs_remove(proc_lustre_root); - proc_lustre_root = NULL; - } - - class_handle_cleanup(); - class_exit_uuidlist(); - - CERROR("obd mem max: %d leaked: %d\n", obd_memmax, - atomic_read(&obd_memory)); - EXIT; -} - -/* Check that we're building against the appropriate version of the Lustre - * kernel patch */ -#ifdef __KERNEL__ -#include <linux/lustre_version.h> -#define LUSTRE_MIN_VERSION 18 -#define LUSTRE_MAX_VERSION 19 -#if (LUSTRE_KERNEL_VERSION < LUSTRE_MIN_VERSION) -# error Cannot continue: Your Lustre kernel patch is older than the sources -#elif (LUSTRE_KERNEL_VERSION > LUSTRE_MAX_VERSION) -# error Cannot continue: Your Lustre sources are older than the kernel patch -#endif - #else -# warning "Lib Lustre - no versioning information" -#endif - -#ifdef __KERNEL__ -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Lustre Class Driver Build Version: " BUILD_VERSION); -MODULE_LICENSE("GPL"); - -module_init(init_obdclass); -module_exit(cleanup_obdclass); -#endif diff --git a/lustre/obdclass/debug.c b/lustre/obdclass/debug.c deleted file mode 100644 index f824b98..0000000 --- a/lustre/obdclass/debug.c +++ /dev/null @@ -1,173 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Helper routines for dumping data structs for debugging. - */ - -#define DEBUG_SUBSYSTEM D_OTHER - -#define EXPORT_SYMTAB -#ifndef __KERNEL__ -# include <liblustre.h> -#endif - -#include <linux/obd_ost.h> -#include <linux/obd_support.h> -#include <linux/lustre_debug.h> -#include <linux/lustre_net.h> - -int dump_ioo(struct obd_ioobj *ioo) -{ - CERROR("obd_ioobj: ioo_id="LPD64", ioo_gr="LPD64", ioo_type=%d, " - "ioo_bufct=%d\n", - ioo->ioo_id, ioo->ioo_gr, ioo->ioo_type, ioo->ioo_bufcnt); - return -EINVAL; -} - -int dump_lniobuf(struct niobuf_local *nb) -{ - CERROR("niobuf_local: offset="LPD64", len=%d, page=%p, rc=%d\n", - nb->offset, nb->len, nb->page, nb->rc); - CERROR("nb->page: index = %ld\n", nb->page ? nb->page->index : -1); - - return -EINVAL; -} - -int dump_rniobuf(struct niobuf_remote *nb) -{ - CERROR("niobuf_remote: offset="LPU64", len=%d, flags=%x\n", - nb->offset, nb->len, nb->flags); - - return -EINVAL; -} - -int dump_obdo(struct obdo *oa) -{ - __u32 valid = oa->o_valid; - - CERROR("obdo: o_valid = %08x\n", valid); - if (valid & OBD_MD_FLID) - CERROR("obdo: o_id = "LPD64"\n", oa->o_id); - if (valid & OBD_MD_FLATIME) - CERROR("obdo: o_atime = "LPD64"\n", oa->o_atime); - if (valid & OBD_MD_FLMTIME) - CERROR("obdo: o_mtime = "LPD64"\n", oa->o_mtime); - if (valid & OBD_MD_FLCTIME) - CERROR("obdo: o_ctime = "LPD64"\n", oa->o_ctime); - if (valid & OBD_MD_FLSIZE) - CERROR("obdo: o_size = "LPD64"\n", oa->o_size); - if (valid & OBD_MD_FLBLOCKS) /* allocation of space */ - CERROR("obdo: o_blocks = "LPD64"\n", oa->o_blocks); - if (valid & OBD_MD_FLBLKSZ) - CERROR("obdo: o_blksize = %d\n", oa->o_blksize); - if (valid & (OBD_MD_FLTYPE | OBD_MD_FLMODE)) - CERROR("obdo: o_mode = %o\n", - oa->o_mode & ((valid & OBD_MD_FLTYPE ? S_IFMT : 0) | - (valid & OBD_MD_FLMODE ? ~S_IFMT : 0))); - if (valid & OBD_MD_FLUID) - CERROR("obdo: o_uid = %d\n", oa->o_uid); - if (valid & OBD_MD_FLGID) - CERROR("obdo: o_gid = %d\n", oa->o_gid); - if (valid & OBD_MD_FLFLAGS) - CERROR("obdo: o_flags = %x\n", oa->o_flags); - if (valid & OBD_MD_FLNLINK) - CERROR("obdo: o_nlink = %d\n", oa->o_nlink); - if (valid & OBD_MD_FLGENER) - CERROR("obdo: o_generation = %d\n", oa->o_generation); - - return -EINVAL; -} - -/* XXX assumes only a single page in request */ -/* -int dump_req(struct ptlrpc_request *req) -{ - struct ost_body *body = lustre_msg_buf(req->rq_reqmsg, 0); - struct obd_ioobj *ioo = lustre_msg_buf(req->rq_reqmsg, 1); - //struct niobuf *nb = lustre_msg_buf(req->rq_reqmsg, 2); - - dump_obdo(&body->oa); - //dump_niobuf(nb); - dump_ioo(ioo); - - return -EINVAL; -} -*/ - -#define LPDS sizeof(__u64) -int page_debug_setup(void *addr, int len, __u64 off, __u64 id) -{ - LASSERT(addr); - - off = cpu_to_le64 (off); - id = cpu_to_le64 (id); - memcpy(addr, (char *)&off, LPDS); - memcpy(addr + LPDS, (char *)&id, LPDS); - - addr += len - LPDS - LPDS; - memcpy(addr, (char *)&off, LPDS); - memcpy(addr + LPDS, (char *)&id, LPDS); - - return 0; -} - -int page_debug_check(char *who, void *addr, int end, __u64 off, __u64 id) -{ - __u64 ne_off; - int err = 0; - - LASSERT(addr); - - ne_off = le64_to_cpu (off); - id = le64_to_cpu (id); - if (memcmp(addr, (char *)&ne_off, LPDS)) { - CERROR("%s: id "LPX64" offset "LPU64" off: "LPX64" != " - LPX64"\n", who, id, off, *(__u64 *)addr, ne_off); - err = -EINVAL; - } - if (memcmp(addr + LPDS, (char *)&id, LPDS)) { - CERROR("%s: id "LPX64" offset "LPU64" id: "LPX64" != "LPX64"\n", - who, id, off, *(__u64 *)(addr + LPDS), id); - err = -EINVAL; - } - - addr += end - LPDS - LPDS; - if (memcmp(addr, (char *)&ne_off, LPDS)) { - CERROR("%s: id "LPX64" offset "LPU64" end off: "LPX64" != " - LPX64"\n", who, id, off, *(__u64 *)addr, ne_off); - err = -EINVAL; - } - if (memcmp(addr + LPDS, (char *)&id, LPDS)) { - CERROR("%s: id "LPX64" offset "LPU64" end id: "LPX64" != " - LPX64"\n", who, id, off, *(__u64 *)(addr + LPDS), id); - err = -EINVAL; - } - - return err; -} -#undef LPDS - -EXPORT_SYMBOL(dump_lniobuf); -EXPORT_SYMBOL(dump_rniobuf); -EXPORT_SYMBOL(dump_ioo); -//EXPORT_SYMBOL(dump_req); -EXPORT_SYMBOL(dump_obdo); -EXPORT_SYMBOL(page_debug_setup); -EXPORT_SYMBOL(page_debug_check); diff --git a/lustre/obdclass/fsfilt.c b/lustre/obdclass/fsfilt.c deleted file mode 100644 index 4357b79..0000000 --- a/lustre/obdclass/fsfilt.c +++ /dev/null @@ -1,107 +0,0 @@ -#define EXPORT_SYMTAB -#define DEBUG_SUBSYSTEM S_FILTER - -#include <linux/fs.h> -#include <linux/jbd.h> -#include <linux/module.h> -#include <linux/kmod.h> -#include <linux/slab.h> -#include <linux/kp30.h> -#include <linux/lustre_fsfilt.h> - -LIST_HEAD(fsfilt_types); - -static struct fsfilt_operations *fsfilt_search_type(const char *type) -{ - struct fsfilt_operations *found; - struct list_head *p; - - list_for_each(p, &fsfilt_types) { - found = list_entry(p, struct fsfilt_operations, fs_list); - if (!strcmp(found->fs_type, type)) { - return found; - } - } - return NULL; -} - -int fsfilt_register_ops(struct fsfilt_operations *fs_ops) -{ - struct fsfilt_operations *found; - - /* lock fsfilt_types list */ - if ((found = fsfilt_search_type(fs_ops->fs_type))) { - if (found != fs_ops) { - CERROR("different operations for type %s\n", - fs_ops->fs_type); - /* unlock fsfilt_types list */ - RETURN(-EEXIST); - } - } else { - PORTAL_MODULE_USE; - list_add(&fs_ops->fs_list, &fsfilt_types); - } - - /* unlock fsfilt_types list */ - return 0; -} - -void fsfilt_unregister_ops(struct fsfilt_operations *fs_ops) -{ - struct list_head *p; - - /* lock fsfilt_types list */ - list_for_each(p, &fsfilt_types) { - struct fsfilt_operations *found; - - found = list_entry(p, typeof(*found), fs_list); - if (found == fs_ops) { - list_del(p); - PORTAL_MODULE_UNUSE; - break; - } - } - /* unlock fsfilt_types list */ -} - -struct fsfilt_operations *fsfilt_get_ops(char *type) -{ - struct fsfilt_operations *fs_ops; - - /* lock fsfilt_types list */ - if (!(fs_ops = fsfilt_search_type(type))) { - char name[32]; - int rc; - - snprintf(name, sizeof(name) - 1, "fsfilt_%s", type); - name[sizeof(name) - 1] = '\0'; - - if ((rc = request_module(name))) { - fs_ops = fsfilt_search_type(type); - CDEBUG(D_INFO, "Loaded module '%s'\n", name); - if (!fs_ops) - rc = -ENOENT; - } - - if (rc) { - CERROR("Can't find fsfilt_%s interface\n", name); - RETURN(ERR_PTR(rc)); - /* unlock fsfilt_types list */ - } - } - __MOD_INC_USE_COUNT(fs_ops->fs_owner); - /* unlock fsfilt_types list */ - - return fs_ops; -} - -void fsfilt_put_ops(struct fsfilt_operations *fs_ops) -{ - __MOD_DEC_USE_COUNT(fs_ops->fs_owner); -} - - -EXPORT_SYMBOL(fsfilt_register_ops); -EXPORT_SYMBOL(fsfilt_unregister_ops); -EXPORT_SYMBOL(fsfilt_get_ops); -EXPORT_SYMBOL(fsfilt_put_ops); diff --git a/lustre/obdclass/fsfilt_ext3.c b/lustre/obdclass/fsfilt_ext3.c deleted file mode 100644 index 5f6322f..0000000 --- a/lustre/obdclass/fsfilt_ext3.c +++ /dev/null @@ -1,545 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lustre/lib/fsfilt_ext3.c - * Lustre filesystem abstraction routines - * - * Copyright (C) 2002, 2003 Cluster File Systems, Inc. - * Author: Andreas Dilger <adilger@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_FILTER - -#include <linux/fs.h> -#include <linux/jbd.h> -#include <linux/slab.h> -#include <linux/pagemap.h> -#include <linux/quotaops.h> -#include <linux/ext3_fs.h> -#include <linux/ext3_jbd.h> -#include <linux/ext3_xattr.h> -#include <linux/kp30.h> -#include <linux/lustre_fsfilt.h> -#include <linux/obd.h> -#include <linux/obd_class.h> -#include <linux/module.h> - -static kmem_cache_t *fcb_cache; -static atomic_t fcb_cache_count = ATOMIC_INIT(0); - -struct fsfilt_cb_data { - struct journal_callback cb_jcb; /* data private to jbd */ - fsfilt_cb_t cb_func; /* MDS/OBD completion function */ - struct obd_device *cb_obd; /* MDS/OBD completion device */ - __u64 cb_last_rcvd; /* MDS/OST last committed operation */ -}; - -#define EXT3_XATTR_INDEX_LUSTRE 5 -#define XATTR_LUSTRE_MDS_OBJID "system.lustre_mds_objid" - -/* - * We don't currently need any additional blocks for rmdir and - * unlink transactions because we are storing the OST oa_id inside - * the inode (which we will be changing anyways as part of this - * transaction). - */ -static void *fsfilt_ext3_start(struct inode *inode, int op) -{ - /* For updates to the last recieved file */ - int nblocks = EXT3_DATA_TRANS_BLOCKS; - void *handle; - - switch(op) { - case FSFILT_OP_RMDIR: - case FSFILT_OP_UNLINK: - nblocks += EXT3_DELETE_TRANS_BLOCKS; - break; - case FSFILT_OP_RENAME: - /* modify additional directory */ - nblocks += EXT3_DATA_TRANS_BLOCKS; - /* no break */ - case FSFILT_OP_SYMLINK: - /* additional block + block bitmap + GDT for long symlink */ - nblocks += 3; - /* no break */ - case FSFILT_OP_CREATE: - case FSFILT_OP_MKDIR: - case FSFILT_OP_MKNOD: - /* modify one inode + block bitmap + GDT */ - nblocks += 3; - /* no break */ - case FSFILT_OP_LINK: - /* modify parent directory */ - nblocks += EXT3_INDEX_EXTRA_TRANS_BLOCKS+EXT3_DATA_TRANS_BLOCKS; - break; - case FSFILT_OP_SETATTR: - /* Setattr on inode */ - nblocks += 1; - break; - default: CERROR("unknown transaction start op %d\n", op); - LBUG(); - } - - LASSERT(!current->journal_info); - lock_kernel(); - handle = journal_start(EXT3_JOURNAL(inode), nblocks); - unlock_kernel(); - - return handle; -} - -/* - * Calculate the number of buffer credits needed to write multiple pages in - * a single ext3 transaction. No, this shouldn't be here, but as yet ext3 - * doesn't have a nice API for calculating this sort of thing in advance. - * - * See comment above ext3_writepage_trans_blocks for details. We assume - * no data journaling is being done, but it does allow for all of the pages - * being non-contiguous. If we are guaranteed contiguous pages we could - * reduce the number of (d)indirect blocks a lot. - * - * With N blocks per page and P pages, for each inode we have at most: - * N*P indirect - * min(N*P, blocksize/4 + 1) dindirect blocks - * niocount tindirect - * - * For the entire filesystem, we have at most: - * min(sum(nindir + P), ngroups) bitmap blocks (from the above) - * min(sum(nindir + P), gdblocks) group descriptor blocks (from the above) - * objcount inode blocks - * 1 superblock - * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quota files - * - * 1 EXT3_DATA_TRANS_BLOCKS for the last_rcvd update. - */ -static int fsfilt_ext3_credits_needed(int objcount, struct fsfilt_objinfo *fso) -{ - struct super_block *sb = fso->fso_dentry->d_inode->i_sb; - int blockpp = 1 << (PAGE_CACHE_SHIFT - sb->s_blocksize_bits); - int addrpp = EXT3_ADDR_PER_BLOCK(sb) * blockpp; - int nbitmaps = 0; - int ngdblocks = 0; - int needed = objcount + 1; - int i; - - for (i = 0; i < objcount; i++, fso++) { - int nblocks = fso->fso_bufcnt * blockpp; - int ndindirect = min(nblocks, addrpp + 1); - int nindir = nblocks + ndindirect + 1; - - nbitmaps += nindir + nblocks; - ngdblocks += nindir + nblocks; - - needed += nindir; - } - - /* Assumes ext3 and ext3 have same sb_info layout at the start. */ - if (nbitmaps > EXT3_SB(sb)->s_groups_count) - nbitmaps = EXT3_SB(sb)->s_groups_count; - if (ngdblocks > EXT3_SB(sb)->s_gdb_count) - ngdblocks = EXT3_SB(sb)->s_gdb_count; - - needed += nbitmaps + ngdblocks; - - /* last_rcvd update */ - needed += EXT3_DATA_TRANS_BLOCKS; - -#ifdef CONFIG_QUOTA - /* We assume that there will be 1 bit set in s_dquot.flags for each - * quota file that is active. This is at least true for now. - */ - needed += hweight32(sb_any_quota_enabled(sb)) * - EXT3_SINGLEDATA_TRANS_BLOCKS; -#endif - - return needed; -} - -/* We have to start a huge journal transaction here to hold all of the - * metadata for the pages being written here. This is necessitated by - * the fact that we do lots of prepare_write operations before we do - * any of the matching commit_write operations, so even if we split - * up to use "smaller" transactions none of them could complete until - * all of them were opened. By having a single journal transaction, - * we eliminate duplicate reservations for common blocks like the - * superblock and group descriptors or bitmaps. - * - * We will start the transaction here, but each prepare_write will - * add a refcount to the transaction, and each commit_write will - * remove a refcount. The transaction will be closed when all of - * the pages have been written. - */ -static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso, - int niocount, struct niobuf_remote *nb) -{ - journal_t *journal; - handle_t *handle; - int needed; - ENTRY; - - LASSERT(!current->journal_info); - journal = EXT3_SB(fso->fso_dentry->d_inode->i_sb)->s_journal; - needed = fsfilt_ext3_credits_needed(objcount, fso); - - /* The number of blocks we could _possibly_ dirty can very large. - * We reduce our request if it is absurd (and we couldn't get that - * many credits for a single handle anyways). - * - * At some point we have to limit the size of I/Os sent at one time, - * increase the size of the journal, or we have to calculate the - * actual journal requirements more carefully by checking all of - * the blocks instead of being maximally pessimistic. It remains to - * be seen if this is a real problem or not. - */ - if (needed > journal->j_max_transaction_buffers) { - CERROR("want too many journal credits (%d) using %d instead\n", - needed, journal->j_max_transaction_buffers); - needed = journal->j_max_transaction_buffers; - } - - lock_kernel(); - handle = journal_start(journal, needed); - unlock_kernel(); - if (IS_ERR(handle)) - CERROR("can't get handle for %d credits: rc = %ld\n", needed, - PTR_ERR(handle)); - - RETURN(handle); -} - -static int fsfilt_ext3_commit(struct inode *inode, void *h, int force_sync) -{ - int rc; - handle_t *handle = h; - - if (force_sync) - handle->h_sync = 1; /* recovery likes this */ - - lock_kernel(); - rc = journal_stop(handle); - unlock_kernel(); - - return rc; -} - -static int fsfilt_ext3_setattr(struct dentry *dentry, void *handle, - struct iattr *iattr, int do_trunc) -{ - struct inode *inode = dentry->d_inode; - int rc; - - lock_kernel(); - - /* A _really_ horrible hack to avoid removing the data stored - * in the block pointers; this is really the "small" stripe MD data. - * We can avoid further hackery by virtue of the MDS file size being - * zero all the time (which doesn't invoke block truncate at unlink - * time), so we assert we never change the MDS file size from zero. - */ - if (iattr->ia_valid & ATTR_SIZE && !do_trunc) { - /* ATTR_SIZE would invoke truncate: clear it */ - iattr->ia_valid &= ~ATTR_SIZE; - inode->i_size = iattr->ia_size; - - /* make sure _something_ gets set - so new inode - * goes to disk (probably won't work over XFS - */ - if (!iattr->ia_valid & ATTR_MODE) { - iattr->ia_valid |= ATTR_MODE; - iattr->ia_mode = inode->i_mode; - } - } - if (inode->i_op->setattr) - rc = inode->i_op->setattr(dentry, iattr); - else{ - rc = inode_change_ok(inode, iattr); - if (!rc) - rc = inode_setattr(inode, iattr); - } - - unlock_kernel(); - - return rc; -} - -static int fsfilt_ext3_set_md(struct inode *inode, void *handle, - void *lmm, int lmm_size) -{ - int rc; - - /* Nasty hack city - store stripe MD data in the block pointers if - * it will fit, because putting it in an EA currently kills the MDS - * performance. We'll fix this with "fast EAs" in the future. - */ - if (lmm_size <= sizeof(EXT3_I(inode)->i_data) - - sizeof(EXT3_I(inode)->i_data[0])) { - /* XXX old_size is debugging only */ - int old_size = EXT3_I(inode)->i_data[0]; - if (old_size != 0) { - LASSERT(old_size < sizeof(EXT3_I(inode)->i_data)); - CERROR("setting EA on %lu again... interesting\n", - inode->i_ino); - } - - EXT3_I(inode)->i_data[0] = cpu_to_le32(lmm_size); - memcpy(&EXT3_I(inode)->i_data[1], lmm, lmm_size); - mark_inode_dirty(inode); - return 0; - } else { - down(&inode->i_sem); - lock_kernel(); - rc = ext3_xattr_set(handle, inode, EXT3_XATTR_INDEX_LUSTRE, - XATTR_LUSTRE_MDS_OBJID, lmm, lmm_size, 0); - unlock_kernel(); - up(&inode->i_sem); - } - - if (rc) - CERROR("error adding MD data to inode %lu: rc = %d\n", - inode->i_ino, rc); - return rc; -} - -static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int lmm_size) -{ - int rc; - - if (EXT3_I(inode)->i_data[0]) { - int size = le32_to_cpu(EXT3_I(inode)->i_data[0]); - LASSERT(size < sizeof(EXT3_I(inode)->i_data)); - if (lmm) { - if (size > lmm_size) - return -ERANGE; - memcpy(lmm, &EXT3_I(inode)->i_data[1], size); - } - return size; - } - - down(&inode->i_sem); - lock_kernel(); - rc = ext3_xattr_get(inode, EXT3_XATTR_INDEX_LUSTRE, - XATTR_LUSTRE_MDS_OBJID, lmm, lmm_size); - unlock_kernel(); - up(&inode->i_sem); - - /* This gives us the MD size */ - if (lmm == NULL) - return (rc == -ENODATA) ? 0 : rc; - - if (rc < 0) { - CDEBUG(D_INFO, "error getting EA %s from inode %lu: " - "rc = %d\n", XATTR_LUSTRE_MDS_OBJID, inode->i_ino, rc); - memset(lmm, 0, lmm_size); - return (rc == -ENODATA) ? 0 : rc; - } - - return rc; -} - -static ssize_t fsfilt_ext3_readpage(struct file *file, char *buf, size_t count, - loff_t *off) -{ - struct inode *inode = file->f_dentry->d_inode; - int rc = 0; - - if (S_ISREG(inode->i_mode)) - rc = file->f_op->read(file, buf, count, off); - else { - const int blkbits = inode->i_sb->s_blocksize_bits; - const int blksize = inode->i_sb->s_blocksize; - - CDEBUG(D_EXT2, "reading "LPSZ" at dir %lu+%llu\n", - count, inode->i_ino, *off); - while (count > 0) { - struct buffer_head *bh; - - bh = NULL; - if (*off < inode->i_size) { - int err = 0; - - bh = ext3_bread(NULL, inode, *off >> blkbits, - 0, &err); - - CDEBUG(D_EXT2, "read %u@%llu\n", blksize, *off); - - if (bh) { - memcpy(buf, bh->b_data, blksize); - brelse(bh); - } else if (err) { - /* XXX in theory we should just fake - * this buffer and continue like ext3, - * especially if this is a partial read - */ - CERROR("error read dir %lu+%llu: %d\n", - inode->i_ino, *off, err); - RETURN(err); - } - } - if (!bh) { - struct ext3_dir_entry_2 *fake = (void *)buf; - - CDEBUG(D_EXT2, "fake %u@%llu\n", blksize, *off); - memset(fake, 0, sizeof(*fake)); - fake->rec_len = cpu_to_le32(blksize); - } - count -= blksize; - buf += blksize; - *off += blksize; - rc += blksize; - } - } - - return rc; -} - -static void fsfilt_ext3_cb_func(struct journal_callback *jcb, int error) -{ - struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb; - - fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, error); - - OBD_SLAB_FREE(fcb, fcb_cache, sizeof *fcb); - atomic_dec(&fcb_cache_count); -} - -static int fsfilt_ext3_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd, - void *handle, fsfilt_cb_t cb_func) -{ - struct fsfilt_cb_data *fcb; - - OBD_SLAB_ALLOC(fcb, fcb_cache, GFP_NOFS, sizeof *fcb); - if (fcb == NULL) - RETURN(-ENOMEM); - - atomic_inc(&fcb_cache_count); - fcb->cb_func = cb_func; - fcb->cb_obd = obd; - fcb->cb_last_rcvd = last_rcvd; - - CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd); - lock_kernel(); - /* Note that an "incompatible pointer" warning here is OK for now */ - journal_callback_set(handle, fsfilt_ext3_cb_func, - (struct journal_callback *)fcb); - unlock_kernel(); - - return 0; -} - -static int fsfilt_ext3_journal_data(struct file *filp) -{ - struct inode *inode = filp->f_dentry->d_inode; - - EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL; - - return 0; -} - -/* - * We need to hack the return value for the free inode counts because - * the current EA code requires one filesystem block per inode with EAs, - * so it is possible to run out of blocks before we run out of inodes. - * - * This can be removed when the ext3 EA code is fixed. - */ -static int fsfilt_ext3_statfs(struct super_block *sb, struct obd_statfs *osfs) -{ - struct statfs sfs; - int rc = vfs_statfs(sb, &sfs); - - if (!rc && sfs.f_bfree < sfs.f_ffree) { - sfs.f_files = (sfs.f_files - sfs.f_ffree) + sfs.f_bfree; - sfs.f_ffree = sfs.f_bfree; - } - - statfs_pack(osfs, &sfs); - return rc; -} - -static int fsfilt_ext3_sync(struct super_block *sb) -{ - return ext3_force_commit(sb); -} - -extern int ext3_prep_san_write(struct inode *inode, long *blocks, - int nblocks, loff_t newsize); -static int fsfilt_ext3_prep_san_write(struct inode *inode, long *blocks, - int nblocks, loff_t newsize) -{ - return ext3_prep_san_write(inode, blocks, nblocks, newsize); -} - -static struct fsfilt_operations fsfilt_ext3_ops = { - fs_type: "ext3", - fs_owner: THIS_MODULE, - fs_start: fsfilt_ext3_start, - fs_brw_start: fsfilt_ext3_brw_start, - fs_commit: fsfilt_ext3_commit, - fs_setattr: fsfilt_ext3_setattr, - fs_set_md: fsfilt_ext3_set_md, - fs_get_md: fsfilt_ext3_get_md, - fs_readpage: fsfilt_ext3_readpage, - fs_journal_data: fsfilt_ext3_journal_data, - fs_set_last_rcvd: fsfilt_ext3_set_last_rcvd, - fs_statfs: fsfilt_ext3_statfs, - fs_sync: fsfilt_ext3_sync, - fs_prep_san_write: fsfilt_ext3_prep_san_write, -}; - -static int __init fsfilt_ext3_init(void) -{ - int rc; - - //rc = ext3_xattr_register(); - fcb_cache = kmem_cache_create("fsfilt_ext3_fcb", - sizeof(struct fsfilt_cb_data), 0, - 0, NULL, NULL); - if (!fcb_cache) { - CERROR("error allocating fsfilt journal callback cache\n"); - GOTO(out, rc = -ENOMEM); - } - - rc = fsfilt_register_ops(&fsfilt_ext3_ops); - - if (rc) - kmem_cache_destroy(fcb_cache); -out: - return rc; -} - -static void __exit fsfilt_ext3_exit(void) -{ - int rc; - - fsfilt_unregister_ops(&fsfilt_ext3_ops); - rc = kmem_cache_destroy(fcb_cache); - - if (rc || atomic_read(&fcb_cache_count)) { - CERROR("can't free fsfilt callback cache: count %d, rc = %d\n", - atomic_read(&fcb_cache_count), rc); - } - - //rc = ext3_xattr_unregister(); -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Lustre ext3 Filesystem Helper v0.1"); -MODULE_LICENSE("GPL"); - -module_init(fsfilt_ext3_init); -module_exit(fsfilt_ext3_exit); diff --git a/lustre/obdclass/fsfilt_extN.c b/lustre/obdclass/fsfilt_extN.c deleted file mode 100644 index 1fba0f4..0000000 --- a/lustre/obdclass/fsfilt_extN.c +++ /dev/null @@ -1,545 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lustre/lib/fsfilt_extN.c - * Lustre filesystem abstraction routines - * - * Copyright (C) 2002, 2003 Cluster File Systems, Inc. - * Author: Andreas Dilger <adilger@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_FILTER - -#include <linux/fs.h> -#include <linux/jbd.h> -#include <linux/slab.h> -#include <linux/pagemap.h> -#include <linux/quotaops.h> -#include <linux/extN_fs.h> -#include <linux/extN_jbd.h> -#include <linux/extN_xattr.h> -#include <linux/kp30.h> -#include <linux/lustre_fsfilt.h> -#include <linux/obd.h> -#include <linux/obd_class.h> -#include <linux/module.h> - -static kmem_cache_t *fcb_cache; -static atomic_t fcb_cache_count = ATOMIC_INIT(0); - -struct fsfilt_cb_data { - struct journal_callback cb_jcb; /* data private to jbd */ - fsfilt_cb_t cb_func; /* MDS/OBD completion function */ - struct obd_device *cb_obd; /* MDS/OBD completion device */ - __u64 cb_last_rcvd; /* MDS/OST last committed operation */ -}; - -#define EXTN_XATTR_INDEX_LUSTRE 5 -#define XATTR_LUSTRE_MDS_OBJID "system.lustre_mds_objid" - -/* - * We don't currently need any additional blocks for rmdir and - * unlink transactions because we are storing the OST oa_id inside - * the inode (which we will be changing anyways as part of this - * transaction). - */ -static void *fsfilt_extN_start(struct inode *inode, int op) -{ - /* For updates to the last recieved file */ - int nblocks = EXTN_DATA_TRANS_BLOCKS; - void *handle; - - switch(op) { - case FSFILT_OP_RMDIR: - case FSFILT_OP_UNLINK: - nblocks += EXTN_DELETE_TRANS_BLOCKS; - break; - case FSFILT_OP_RENAME: - /* modify additional directory */ - nblocks += EXTN_DATA_TRANS_BLOCKS; - /* no break */ - case FSFILT_OP_SYMLINK: - /* additional block + block bitmap + GDT for long symlink */ - nblocks += 3; - /* no break */ - case FSFILT_OP_CREATE: - case FSFILT_OP_MKDIR: - case FSFILT_OP_MKNOD: - /* modify one inode + block bitmap + GDT */ - nblocks += 3; - /* no break */ - case FSFILT_OP_LINK: - /* modify parent directory */ - nblocks += EXTN_INDEX_EXTRA_TRANS_BLOCKS+EXTN_DATA_TRANS_BLOCKS; - break; - case FSFILT_OP_SETATTR: - /* Setattr on inode */ - nblocks += 1; - break; - default: CERROR("unknown transaction start op %d\n", op); - LBUG(); - } - - LASSERT(!current->journal_info); - lock_kernel(); - handle = journal_start(EXTN_JOURNAL(inode), nblocks); - unlock_kernel(); - - return handle; -} - -/* - * Calculate the number of buffer credits needed to write multiple pages in - * a single extN transaction. No, this shouldn't be here, but as yet extN - * doesn't have a nice API for calculating this sort of thing in advance. - * - * See comment above extN_writepage_trans_blocks for details. We assume - * no data journaling is being done, but it does allow for all of the pages - * being non-contiguous. If we are guaranteed contiguous pages we could - * reduce the number of (d)indirect blocks a lot. - * - * With N blocks per page and P pages, for each inode we have at most: - * N*P indirect - * min(N*P, blocksize/4 + 1) dindirect blocks - * niocount tindirect - * - * For the entire filesystem, we have at most: - * min(sum(nindir + P), ngroups) bitmap blocks (from the above) - * min(sum(nindir + P), gdblocks) group descriptor blocks (from the above) - * objcount inode blocks - * 1 superblock - * 2 * EXTN_SINGLEDATA_TRANS_BLOCKS for the quota files - * - * 1 EXTN_DATA_TRANS_BLOCKS for the last_rcvd update. - */ -static int fsfilt_extN_credits_needed(int objcount, struct fsfilt_objinfo *fso) -{ - struct super_block *sb = fso->fso_dentry->d_inode->i_sb; - int blockpp = 1 << (PAGE_CACHE_SHIFT - sb->s_blocksize_bits); - int addrpp = EXTN_ADDR_PER_BLOCK(sb) * blockpp; - int nbitmaps = 0; - int ngdblocks = 0; - int needed = objcount + 1; - int i; - - for (i = 0; i < objcount; i++, fso++) { - int nblocks = fso->fso_bufcnt * blockpp; - int ndindirect = min(nblocks, addrpp + 1); - int nindir = nblocks + ndindirect + 1; - - nbitmaps += nindir + nblocks; - ngdblocks += nindir + nblocks; - - needed += nindir; - } - - /* Assumes extN and extN have same sb_info layout at the start. */ - if (nbitmaps > EXTN_SB(sb)->s_groups_count) - nbitmaps = EXTN_SB(sb)->s_groups_count; - if (ngdblocks > EXTN_SB(sb)->s_gdb_count) - ngdblocks = EXTN_SB(sb)->s_gdb_count; - - needed += nbitmaps + ngdblocks; - - /* last_rcvd update */ - needed += EXTN_DATA_TRANS_BLOCKS; - -#ifdef CONFIG_QUOTA - /* We assume that there will be 1 bit set in s_dquot.flags for each - * quota file that is active. This is at least true for now. - */ - needed += hweight32(sb_any_quota_enabled(sb)) * - EXTN_SINGLEDATA_TRANS_BLOCKS; -#endif - - return needed; -} - -/* We have to start a huge journal transaction here to hold all of the - * metadata for the pages being written here. This is necessitated by - * the fact that we do lots of prepare_write operations before we do - * any of the matching commit_write operations, so even if we split - * up to use "smaller" transactions none of them could complete until - * all of them were opened. By having a single journal transaction, - * we eliminate duplicate reservations for common blocks like the - * superblock and group descriptors or bitmaps. - * - * We will start the transaction here, but each prepare_write will - * add a refcount to the transaction, and each commit_write will - * remove a refcount. The transaction will be closed when all of - * the pages have been written. - */ -static void *fsfilt_extN_brw_start(int objcount, struct fsfilt_objinfo *fso, - int niocount, struct niobuf_remote *nb) -{ - journal_t *journal; - handle_t *handle; - int needed; - ENTRY; - - LASSERT(!current->journal_info); - journal = EXTN_SB(fso->fso_dentry->d_inode->i_sb)->s_journal; - needed = fsfilt_extN_credits_needed(objcount, fso); - - /* The number of blocks we could _possibly_ dirty can very large. - * We reduce our request if it is absurd (and we couldn't get that - * many credits for a single handle anyways). - * - * At some point we have to limit the size of I/Os sent at one time, - * increase the size of the journal, or we have to calculate the - * actual journal requirements more carefully by checking all of - * the blocks instead of being maximally pessimistic. It remains to - * be seen if this is a real problem or not. - */ - if (needed > journal->j_max_transaction_buffers) { - CERROR("want too many journal credits (%d) using %d instead\n", - needed, journal->j_max_transaction_buffers); - needed = journal->j_max_transaction_buffers; - } - - lock_kernel(); - handle = journal_start(journal, needed); - unlock_kernel(); - if (IS_ERR(handle)) - CERROR("can't get handle for %d credits: rc = %ld\n", needed, - PTR_ERR(handle)); - - RETURN(handle); -} - -static int fsfilt_extN_commit(struct inode *inode, void *h, int force_sync) -{ - int rc; - handle_t *handle = h; - - if (force_sync) - handle->h_sync = 1; /* recovery likes this */ - - lock_kernel(); - rc = journal_stop(handle); - unlock_kernel(); - - return rc; -} - -static int fsfilt_extN_setattr(struct dentry *dentry, void *handle, - struct iattr *iattr, int do_trunc) -{ - struct inode *inode = dentry->d_inode; - int rc; - - lock_kernel(); - - /* A _really_ horrible hack to avoid removing the data stored - * in the block pointers; this is really the "small" stripe MD data. - * We can avoid further hackery by virtue of the MDS file size being - * zero all the time (which doesn't invoke block truncate at unlink - * time), so we assert we never change the MDS file size from zero. - */ - if (iattr->ia_valid & ATTR_SIZE && !do_trunc) { - /* ATTR_SIZE would invoke truncate: clear it */ - iattr->ia_valid &= ~ATTR_SIZE; - inode->i_size = iattr->ia_size; - - /* make sure _something_ gets set - so new inode - * goes to disk (probably won't work over XFS - */ - if (!iattr->ia_valid & ATTR_MODE) { - iattr->ia_valid |= ATTR_MODE; - iattr->ia_mode = inode->i_mode; - } - } - if (inode->i_op->setattr) - rc = inode->i_op->setattr(dentry, iattr); - else{ - rc = inode_change_ok(inode, iattr); - if (!rc) - rc = inode_setattr(inode, iattr); - } - - unlock_kernel(); - - return rc; -} - -static int fsfilt_extN_set_md(struct inode *inode, void *handle, - void *lmm, int lmm_size) -{ - int rc; - - /* Nasty hack city - store stripe MD data in the block pointers if - * it will fit, because putting it in an EA currently kills the MDS - * performance. We'll fix this with "fast EAs" in the future. - */ - if (lmm_size <= sizeof(EXTN_I(inode)->i_data) - - sizeof(EXTN_I(inode)->i_data[0])) { - /* XXX old_size is debugging only */ - int old_size = EXTN_I(inode)->i_data[0]; - if (old_size != 0) { - LASSERT(old_size < sizeof(EXTN_I(inode)->i_data)); - CERROR("setting EA on %lu again... interesting\n", - inode->i_ino); - } - - EXTN_I(inode)->i_data[0] = cpu_to_le32(lmm_size); - memcpy(&EXTN_I(inode)->i_data[1], lmm, lmm_size); - mark_inode_dirty(inode); - return 0; - } else { - down(&inode->i_sem); - lock_kernel(); - rc = extN_xattr_set(handle, inode, EXTN_XATTR_INDEX_LUSTRE, - XATTR_LUSTRE_MDS_OBJID, lmm, lmm_size, 0); - unlock_kernel(); - up(&inode->i_sem); - } - - if (rc) - CERROR("error adding MD data to inode %lu: rc = %d\n", - inode->i_ino, rc); - return rc; -} - -static int fsfilt_extN_get_md(struct inode *inode, void *lmm, int lmm_size) -{ - int rc; - - if (EXTN_I(inode)->i_data[0]) { - int size = le32_to_cpu(EXTN_I(inode)->i_data[0]); - LASSERT(size < sizeof(EXTN_I(inode)->i_data)); - if (lmm) { - if (size > lmm_size) - return -ERANGE; - memcpy(lmm, &EXTN_I(inode)->i_data[1], size); - } - return size; - } - - down(&inode->i_sem); - lock_kernel(); - rc = extN_xattr_get(inode, EXTN_XATTR_INDEX_LUSTRE, - XATTR_LUSTRE_MDS_OBJID, lmm, lmm_size); - unlock_kernel(); - up(&inode->i_sem); - - /* This gives us the MD size */ - if (lmm == NULL) - return (rc == -ENODATA) ? 0 : rc; - - if (rc < 0) { - CDEBUG(D_INFO, "error getting EA %s from inode %lu: " - "rc = %d\n", XATTR_LUSTRE_MDS_OBJID, inode->i_ino, rc); - memset(lmm, 0, lmm_size); - return (rc == -ENODATA) ? 0 : rc; - } - - return rc; -} - -static ssize_t fsfilt_extN_readpage(struct file *file, char *buf, size_t count, - loff_t *off) -{ - struct inode *inode = file->f_dentry->d_inode; - int rc = 0; - - if (S_ISREG(inode->i_mode)) - rc = file->f_op->read(file, buf, count, off); - else { - const int blkbits = inode->i_sb->s_blocksize_bits; - const int blksize = inode->i_sb->s_blocksize; - - CDEBUG(D_EXT2, "reading "LPSZ" at dir %lu+%llu\n", - count, inode->i_ino, *off); - while (count > 0) { - struct buffer_head *bh; - - bh = NULL; - if (*off < inode->i_size) { - int err = 0; - - bh = extN_bread(NULL, inode, *off >> blkbits, - 0, &err); - - CDEBUG(D_EXT2, "read %u@%llu\n", blksize, *off); - - if (bh) { - memcpy(buf, bh->b_data, blksize); - brelse(bh); - } else if (err) { - /* XXX in theory we should just fake - * this buffer and continue like extN, - * especially if this is a partial read - */ - CERROR("error read dir %lu+%llu: %d\n", - inode->i_ino, *off, err); - RETURN(err); - } - } - if (!bh) { - struct extN_dir_entry_2 *fake = (void *)buf; - - CDEBUG(D_EXT2, "fake %u@%llu\n", blksize, *off); - memset(fake, 0, sizeof(*fake)); - fake->rec_len = cpu_to_le32(blksize); - } - count -= blksize; - buf += blksize; - *off += blksize; - rc += blksize; - } - } - - return rc; -} - -static void fsfilt_extN_cb_func(struct journal_callback *jcb, int error) -{ - struct fsfilt_cb_data *fcb = (struct fsfilt_cb_data *)jcb; - - fcb->cb_func(fcb->cb_obd, fcb->cb_last_rcvd, error); - - OBD_SLAB_FREE(fcb, fcb_cache, sizeof *fcb); - atomic_dec(&fcb_cache_count); -} - -static int fsfilt_extN_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd, - void *handle, fsfilt_cb_t cb_func) -{ - struct fsfilt_cb_data *fcb; - - OBD_SLAB_ALLOC(fcb, fcb_cache, GFP_NOFS, sizeof *fcb); - if (fcb == NULL) - RETURN(-ENOMEM); - - atomic_inc(&fcb_cache_count); - fcb->cb_func = cb_func; - fcb->cb_obd = obd; - fcb->cb_last_rcvd = last_rcvd; - - CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd); - lock_kernel(); - /* Note that an "incompatible pointer" warning here is OK for now */ - journal_callback_set(handle, fsfilt_extN_cb_func, - (struct journal_callback *)fcb); - unlock_kernel(); - - return 0; -} - -static int fsfilt_extN_journal_data(struct file *filp) -{ - struct inode *inode = filp->f_dentry->d_inode; - - EXTN_I(inode)->i_flags |= EXTN_JOURNAL_DATA_FL; - - return 0; -} - -/* - * We need to hack the return value for the free inode counts because - * the current EA code requires one filesystem block per inode with EAs, - * so it is possible to run out of blocks before we run out of inodes. - * - * This can be removed when the extN EA code is fixed. - */ -static int fsfilt_extN_statfs(struct super_block *sb, struct obd_statfs *osfs) -{ - struct statfs sfs; - int rc = vfs_statfs(sb, &sfs); - - if (!rc && sfs.f_bfree < sfs.f_ffree) { - sfs.f_files = (sfs.f_files - sfs.f_ffree) + sfs.f_bfree; - sfs.f_ffree = sfs.f_bfree; - } - - statfs_pack(osfs, &sfs); - return rc; -} - -static int fsfilt_extN_sync(struct super_block *sb) -{ - return extN_force_commit(sb); -} - -extern int extN_prep_san_write(struct inode *inode, long *blocks, - int nblocks, loff_t newsize); -static int fsfilt_extN_prep_san_write(struct inode *inode, long *blocks, - int nblocks, loff_t newsize) -{ - return extN_prep_san_write(inode, blocks, nblocks, newsize); -} - -static struct fsfilt_operations fsfilt_extN_ops = { - fs_type: "extN", - fs_owner: THIS_MODULE, - fs_start: fsfilt_extN_start, - fs_brw_start: fsfilt_extN_brw_start, - fs_commit: fsfilt_extN_commit, - fs_setattr: fsfilt_extN_setattr, - fs_set_md: fsfilt_extN_set_md, - fs_get_md: fsfilt_extN_get_md, - fs_readpage: fsfilt_extN_readpage, - fs_journal_data: fsfilt_extN_journal_data, - fs_set_last_rcvd: fsfilt_extN_set_last_rcvd, - fs_statfs: fsfilt_extN_statfs, - fs_sync: fsfilt_extN_sync, - fs_prep_san_write: fsfilt_extN_prep_san_write, -}; - -static int __init fsfilt_extN_init(void) -{ - int rc; - - //rc = extN_xattr_register(); - fcb_cache = kmem_cache_create("fsfilt_extN_fcb", - sizeof(struct fsfilt_cb_data), 0, - 0, NULL, NULL); - if (!fcb_cache) { - CERROR("error allocating fsfilt journal callback cache\n"); - GOTO(out, rc = -ENOMEM); - } - - rc = fsfilt_register_ops(&fsfilt_extN_ops); - - if (rc) - kmem_cache_destroy(fcb_cache); -out: - return rc; -} - -static void __exit fsfilt_extN_exit(void) -{ - int rc; - - fsfilt_unregister_ops(&fsfilt_extN_ops); - rc = kmem_cache_destroy(fcb_cache); - - if (rc || atomic_read(&fcb_cache_count)) { - CERROR("can't free fsfilt callback cache: count %d, rc = %d\n", - atomic_read(&fcb_cache_count), rc); - } - - //rc = extN_xattr_unregister(); -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Lustre extN Filesystem Helper v0.1"); -MODULE_LICENSE("GPL"); - -module_init(fsfilt_extN_init); -module_exit(fsfilt_extN_exit); diff --git a/lustre/obdclass/fsfilt_reiserfs.c b/lustre/obdclass/fsfilt_reiserfs.c deleted file mode 100644 index ccefb92..0000000 --- a/lustre/obdclass/fsfilt_reiserfs.c +++ /dev/null @@ -1,201 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lustre/lib/fsfilt_reiserfs.c - * Lustre filesystem abstraction routines - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Andreas Dilger <adilger@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* - * NOTE - According to Hans Reiser, this could actually be implemented more - * efficiently than creating a directory and putting ASCII objids in it. - * Instead, we should return the reiserfs object ID as the lustre objid - * (although I'm not sure what impact that would have on backup/restore). - */ - -#define DEBUG_SUBSYSTEM S_FILTER - -#include <linux/fs.h> -#include <linux/jbd.h> -#include <linux/slab.h> -#include <linux/pagemap.h> -#include <linux/quotaops.h> -#include <linux/version.h> -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -#include <linux/init.h> -#include <asm/statfs.h> -#endif -#include <linux/kp30.h> -#include <linux/lustre_fsfilt.h> -#include <linux/obd.h> -#include <linux/obd_class.h> -#include <linux/module.h> - -static void *fsfilt_reiserfs_start(struct inode *inode, int op) -{ - return (void *)0xf00f00be; -} - -static void *fsfilt_reiserfs_brw_start(int objcount, struct fsfilt_objinfo *fso, - int niocount, struct niobuf_remote *nb) -{ - return (void *)0xf00f00be; -} - -static int fsfilt_reiserfs_commit(struct inode *inode, void *handle, - int force_sync) -{ - if (handle != (void *)0xf00f00be) { - CERROR("bad handle %p", handle); - return -EINVAL; - } - - return 0; -} - -static int fsfilt_reiserfs_setattr(struct dentry *dentry, void *handle, - struct iattr *iattr, int do_trunc) -{ - struct inode *inode = dentry->d_inode; - int rc; - - lock_kernel(); - - /* A _really_ horrible hack to avoid removing the data stored - * in the block pointers; this is really the "small" stripe MD data. - * We can avoid further hackery by virtue of the MDS file size being - * zero all the time (which doesn't invoke block truncate at unlink - * time), so we assert we never change the MDS file size from zero. - */ - if (iattr->ia_valid & ATTR_SIZE && !do_trunc) { - /* ATTR_SIZE would invoke truncate: clear it */ - iattr->ia_valid &= ~ATTR_SIZE; - inode->i_size = iattr->ia_size; - - /* make sure _something_ gets set - so new inode - * goes to disk (probably won't work over XFS - */ - if (!iattr->ia_valid & ATTR_MODE) { - iattr->ia_valid |= ATTR_MODE; - iattr->ia_mode = inode->i_mode; - } - } - if (inode->i_op->setattr) - rc = inode->i_op->setattr(dentry, iattr); - else - rc = inode_setattr(inode, iattr); - - unlock_kernel(); - - return rc; -} - -static int fsfilt_reiserfs_set_md(struct inode *inode, void *handle, - void *lmm, int lmm_size) -{ - /* XXX write stripe data into MDS file itself */ - CERROR("not implemented yet\n"); - - return -ENOSYS; -} - -static int fsfilt_reiserfs_get_md(struct inode *inode, void *lmm, int lmm_size) -{ - if (lmm == NULL) - return inode->i_size; - - CERROR("not implemented yet\n"); - return -ENOSYS; -} - -static ssize_t fsfilt_reiserfs_readpage(struct file *file, char *buf, size_t count, - loff_t *offset) -{ - return file->f_op->read(file, buf, count, offset); -} - -static int fsfilt_reiserfs_set_last_rcvd(struct obd_device *obd, __u64 last_rcvd, - void *handle, fsfilt_cb_t cb_func) -{ - static long next = 0; - - if (time_after(jiffies, next)) { - CERROR("no journal callback kernel patch, faking it...\n"); - next = jiffies + 300 * HZ; - } - - cb_func(obd, last_rcvd, 0); - - return 0; -} - -static int fsfilt_reiserfs_journal_data(struct file *filp) -{ - CERROR("not implemented yet\n"); - return 0; -} - -static int fsfilt_reiserfs_statfs(struct super_block *sb, struct obd_statfs *osfs) -{ - struct statfs sfs; - int rc = vfs_statfs(sb, &sfs); - - statfs_pack(osfs, &sfs); - return rc; -} - -static int fsfilt_reiserfs_sync(struct super_block *sb) -{ - CERROR("not implemented yet\n"); - return -ENOSYS; -} - -static struct fsfilt_operations fsfilt_reiserfs_ops = { - fs_type: "reiserfs", - fs_owner: THIS_MODULE, - fs_start: fsfilt_reiserfs_start, - fs_brw_start: fsfilt_reiserfs_brw_start, - fs_commit: fsfilt_reiserfs_commit, - fs_setattr: fsfilt_reiserfs_setattr, - fs_set_md: fsfilt_reiserfs_set_md, - fs_get_md: fsfilt_reiserfs_get_md, - fs_readpage: fsfilt_reiserfs_readpage, - fs_journal_data: fsfilt_reiserfs_journal_data, - fs_set_last_rcvd: fsfilt_reiserfs_set_last_rcvd, - fs_statfs: fsfilt_reiserfs_statfs, - fs_sync: fsfilt_reiserfs_sync, -}; - -static int __init fsfilt_reiserfs_init(void) -{ - return fsfilt_register_ops(&fsfilt_reiserfs_ops); -} - -static void __exit fsfilt_reiserfs_exit(void) -{ - fsfilt_unregister_ops(&fsfilt_reiserfs_ops); -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Lustre reiserfs Filesystem Helper v0.1"); -MODULE_LICENSE("GPL"); - -module_init(fsfilt_reiserfs_init); -module_exit(fsfilt_reiserfs_exit); diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c deleted file mode 100644 index cd6c856..0000000 --- a/lustre/obdclass/genops.c +++ /dev/null @@ -1,554 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * These are the only exported functions, they provide some generic - * infrastructure for managing object devices - */ - -#define DEBUG_SUBSYSTEM S_CLASS -#ifdef __KERNEL__ -#include <linux/kmod.h> /* for request_module() */ -#include <linux/module.h> -#include <linux/obd_class.h> -#include <linux/random.h> -#include <linux/slab.h> -#else -#include <liblustre.h> -#include <linux/obd_class.h> -#include <linux/obd.h> -#endif -#include <linux/lprocfs_status.h> - -extern struct list_head obd_types; -static spinlock_t obd_types_lock = SPIN_LOCK_UNLOCKED; -kmem_cache_t *obdo_cachep = NULL; -kmem_cache_t *import_cachep = NULL; - -int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c); -void (*ptlrpc_abort_inflight_superhack)(struct obd_import *imp); - -struct obd_uuid lctl_fake_uuid = { .uuid = "OBD_CLASS_UUID" }; - -/* - * support functions: we could use inter-module communication, but this - * is more portable to other OS's - */ -static struct obd_type *class_search_type(char *name) -{ - struct list_head *tmp; - struct obd_type *type; - - spin_lock(&obd_types_lock); - list_for_each(tmp, &obd_types) { - type = list_entry(tmp, struct obd_type, typ_chain); - if (strlen(type->typ_name) == strlen(name) && - strcmp(type->typ_name, name) == 0) { - spin_unlock(&obd_types_lock); - return type; - } - } - spin_unlock(&obd_types_lock); - return NULL; -} - -struct obd_type *class_get_type(char *name) -{ - struct obd_type *type = class_search_type(name); - -#ifdef CONFIG_KMOD - if (!type) { - if (!request_module(name)) { - CDEBUG(D_INFO, "Loaded module '%s'\n", name); - type = class_search_type(name); - } else - CDEBUG(D_INFO, "Can't load module '%s'\n", name); - } -#endif - if (type) - __MOD_INC_USE_COUNT(type->typ_ops->o_owner); - return type; -} - -void class_put_type(struct obd_type *type) -{ - LASSERT(type); - __MOD_DEC_USE_COUNT(type->typ_ops->o_owner); -} - -int class_register_type(struct obd_ops *ops, struct lprocfs_vars *vars, - char *name) -{ - struct obd_type *type; - int rc = 0; - ENTRY; - - LASSERT(strnlen(name, 1024) < 1024); /* sanity check */ - - if (class_search_type(name)) { - CDEBUG(D_IOCTL, "Type %s already registered\n", name); - RETURN(-EEXIST); - } - - rc = -ENOMEM; - OBD_ALLOC(type, sizeof(*type)); - if (type == NULL) - RETURN(rc); - - OBD_ALLOC(type->typ_ops, sizeof(*type->typ_ops)); - OBD_ALLOC(type->typ_name, strlen(name) + 1); - if (type->typ_ops == NULL || type->typ_name == NULL) - GOTO (failed, rc); - - *(type->typ_ops) = *ops; - strcpy(type->typ_name, name); - - type->typ_procroot = lprocfs_register(type->typ_name, proc_lustre_root, - vars, type); - if (IS_ERR(type->typ_procroot)) { - rc = PTR_ERR(type->typ_procroot); - type->typ_procroot = NULL; - GOTO (failed, rc); - } - - spin_lock(&obd_types_lock); - list_add(&type->typ_chain, &obd_types); - spin_unlock(&obd_types_lock); - - RETURN (0); - - failed: - if (type->typ_ops != NULL) - OBD_FREE(type->typ_name, strlen(name) + 1); - if (type->typ_ops != NULL) - OBD_FREE (type->typ_ops, sizeof (*type->typ_ops)); - RETURN(rc); -} - -int class_unregister_type(char *name) -{ - struct obd_type *type = class_search_type(name); - ENTRY; - - if (!type) { - CERROR("unknown obd type\n"); - RETURN(-EINVAL); - } - - if (type->typ_refcnt) { - CERROR("type %s has refcount (%d)\n", name, type->typ_refcnt); - /* This is a bad situation, let's make the best of it */ - /* Remove ops, but leave the name for debugging */ - OBD_FREE(type->typ_ops, sizeof(*type->typ_ops)); - RETURN(-EBUSY); - } - - if (type->typ_procroot) { - lprocfs_remove(type->typ_procroot); - type->typ_procroot = NULL; - } - - spin_lock(&obd_types_lock); - list_del(&type->typ_chain); - spin_unlock(&obd_types_lock); - OBD_FREE(type->typ_name, strlen(name) + 1); - if (type->typ_ops != NULL) - OBD_FREE(type->typ_ops, sizeof(*type->typ_ops)); - OBD_FREE(type, sizeof(*type)); - RETURN(0); -} /* class_unregister_type */ - -int class_name2dev(char *name) -{ - int i; - - if (!name) - return -1; - - for (i = 0; i < MAX_OBD_DEVICES; i++) { - struct obd_device *obd = &obd_dev[i]; - if (obd->obd_name && strcmp(name, obd->obd_name) == 0) - return i; - } - - return -1; -} - -struct obd_device *class_name2obd(char *name) -{ - int dev = class_name2dev(name); - if (dev < 0) - return NULL; - return &obd_dev[dev]; -} - -int class_uuid2dev(struct obd_uuid *uuid) -{ - int i; - - for (i = 0; i < MAX_OBD_DEVICES; i++) { - struct obd_device *obd = &obd_dev[i]; - if (obd_uuid_equals(uuid, &obd->obd_uuid)) - return i; - } - - return -1; -} - -struct obd_device *class_uuid2obd(struct obd_uuid *uuid) -{ - int dev = class_uuid2dev(uuid); - if (dev < 0) - return NULL; - return &obd_dev[dev]; -} - -void obd_cleanup_caches(void) -{ - int rc; - ENTRY; - if (obdo_cachep) { - rc = kmem_cache_destroy(obdo_cachep); - if (rc) - CERROR("Cannot destory ll_obdo_cache\n"); - obdo_cachep = NULL; - } - if (import_cachep) { - rc = kmem_cache_destroy(import_cachep); - if (rc) - CERROR("Cannot destory ll_import_cache\n"); - import_cachep = NULL; - } - EXIT; -} - -int obd_init_caches(void) -{ - ENTRY; - LASSERT(obdo_cachep == NULL); - obdo_cachep = kmem_cache_create("ll_obdo_cache", sizeof(struct obdo), - 0, 0, NULL, NULL); - if (!obdo_cachep) - GOTO(out, -ENOMEM); - - LASSERT(import_cachep == NULL); - import_cachep = kmem_cache_create("ll_import_cache", - sizeof(struct obd_import), - 0, 0, NULL, NULL); - if (!import_cachep) - GOTO(out, -ENOMEM); - - RETURN(0); - out: - obd_cleanup_caches(); - RETURN(-ENOMEM); - -} - -/* map connection to client */ -struct obd_export *class_conn2export(struct lustre_handle *conn) -{ - struct obd_export *export; - ENTRY; - - if (!conn) { - CDEBUG(D_CACHE, "looking for null handle\n"); - RETURN(NULL); - } - - if (conn->cookie == -1) { /* this means assign a new connection */ - CDEBUG(D_CACHE, "want a new connection\n"); - RETURN(NULL); - } - - CDEBUG(D_IOCTL, "looking for export cookie "LPX64"\n", conn->cookie); - export = class_handle2object(conn->cookie); - RETURN(export); -} - -struct obd_device *class_conn2obd(struct lustre_handle *conn) -{ - struct obd_export *export; - export = class_conn2export(conn); - if (export) { - struct obd_device *obd = export->exp_obd; - class_export_put(export); - return obd; - } - return NULL; -} - -struct obd_import *class_conn2cliimp(struct lustre_handle *conn) -{ - return class_conn2obd(conn)->u.cli.cl_import; -} - -struct obd_import *class_conn2ldlmimp(struct lustre_handle *conn) -{ - struct obd_export *export; - export = class_conn2export(conn); - if (export) { - struct obd_import *imp = export->exp_ldlm_data.led_import; - class_export_put(export); - return imp; - } - fixme(); - return NULL; -} - -/* Export management functions */ -static void export_handle_addref(void *export) -{ - class_export_get(export); -} - -struct obd_export *class_export_get(struct obd_export *exp) -{ - atomic_inc(&exp->exp_refcount); - CDEBUG(D_INFO, "GETting export %p : new refcount %d\n", exp, - atomic_read(&exp->exp_refcount)); - return exp; -} - -void class_export_put(struct obd_export *exp) -{ - ENTRY; - - LASSERT(exp); - CDEBUG(D_INFO, "PUTting export %p : new refcount %d\n", exp, - atomic_read(&exp->exp_refcount) - 1); - LASSERT(atomic_read(&exp->exp_refcount) > 0); - LASSERT(atomic_read(&exp->exp_refcount) < 0x5a5a5a); - if (atomic_dec_and_test(&exp->exp_refcount)) { - struct obd_device *obd = exp->exp_obd; - CDEBUG(D_IOCTL, "destroying export %p/%s\n", exp, - exp->exp_client_uuid.uuid); - - LASSERT(obd != NULL); - - /* "Local" exports (lctl, LOV->{mdc,osc}) have no connection. */ - if (exp->exp_connection) - ptlrpc_put_connection_superhack(exp->exp_connection); - - LASSERT(list_empty(&exp->exp_handle.h_link)); - - obd_destroy_export(exp); - - OBD_FREE(exp, sizeof(*exp)); - atomic_dec(&obd->obd_refcount); - wake_up(&obd->obd_refcount_waitq); - } - EXIT; -} - -struct obd_export *class_new_export(struct obd_device *obddev) -{ - struct obd_export *export; - - OBD_ALLOC(export, sizeof(*export)); - if (!export) { - CERROR("no memory! (minor %d)\n", obddev->obd_minor); - return NULL; - } - - atomic_set(&export->exp_refcount, 2); - export->exp_obd = obddev; - /* XXX this should be in LDLM init */ - INIT_LIST_HEAD(&export->exp_ldlm_data.led_held_locks); - - INIT_LIST_HEAD(&export->exp_handle.h_link); - class_handle_hash(&export->exp_handle, export_handle_addref); - spin_lock_init(&export->exp_lock); - - spin_lock(&obddev->obd_dev_lock); - LASSERT(!obddev->obd_stopping); /* shouldn't happen, but might race */ - atomic_inc(&obddev->obd_refcount); - list_add(&export->exp_obd_chain, &export->exp_obd->obd_exports); - export->exp_obd->obd_num_exports++; - spin_unlock(&obddev->obd_dev_lock); - return export; -} - -void class_unlink_export(struct obd_export *exp) -{ - class_handle_unhash(&exp->exp_handle); - - spin_lock(&exp->exp_obd->obd_dev_lock); - list_del_init(&exp->exp_obd_chain); - exp->exp_obd->obd_num_exports--; - spin_unlock(&exp->exp_obd->obd_dev_lock); - - class_export_put(exp); -} - -/* Import management functions */ -static void import_handle_addref(void *import) -{ - class_import_get(import); -} - -struct obd_import *class_import_get(struct obd_import *import) -{ - atomic_inc(&import->imp_refcount); - CDEBUG(D_IOCTL, "import %p refcount=%d\n", import, - atomic_read(&import->imp_refcount)); - return import; -} - -void class_import_put(struct obd_import *import) -{ - ENTRY; - - CDEBUG(D_IOCTL, "import %p refcount=%d\n", import, - atomic_read(&import->imp_refcount) - 1); - - LASSERT(atomic_read(&import->imp_refcount) > 0); - LASSERT(atomic_read(&import->imp_refcount) < 0x5a5a5a); - if (!atomic_dec_and_test(&import->imp_refcount)) { - EXIT; - return; - } - - CDEBUG(D_IOCTL, "destroying import %p\n", import); - - ptlrpc_put_connection_superhack(import->imp_connection); - - LASSERT(list_empty(&import->imp_handle.h_link)); - OBD_FREE(import, sizeof(*import)); - EXIT; -} - -struct obd_import *class_new_import(void) -{ - struct obd_import *imp; - - OBD_ALLOC(imp, sizeof(*imp)); - if (imp == NULL) - return NULL; - - INIT_LIST_HEAD(&imp->imp_replay_list); - INIT_LIST_HEAD(&imp->imp_sending_list); - INIT_LIST_HEAD(&imp->imp_delayed_list); - spin_lock_init(&imp->imp_lock); - imp->imp_max_transno = 0; - imp->imp_peer_committed_transno = 0; - - atomic_set(&imp->imp_refcount, 2); - INIT_LIST_HEAD(&imp->imp_handle.h_link); - class_handle_hash(&imp->imp_handle, import_handle_addref); - - return imp; -} - -void class_destroy_import(struct obd_import *import) -{ - LASSERT(import != NULL); - - class_handle_unhash(&import->imp_handle); - - /* Abort any inflight DLM requests and NULL out their (about to be - * freed) import. */ - /* Invalidate all requests on import, would be better to call - ptlrpc_set_import_active(imp, 0); */ - import->imp_generation++; - ptlrpc_abort_inflight_superhack(import); - - class_import_put(import); -} - -/* a connection defines an export context in which preallocation can - be managed. */ -int class_connect(struct lustre_handle *exporth, struct obd_device *obd, - struct obd_uuid *cluuid) -{ - struct obd_export *export; - LASSERT(exporth != NULL); - LASSERT(obd != NULL); - LASSERT(cluuid != NULL); - - export = class_new_export(obd); - if (export == NULL) - return -ENOMEM; - - exporth->cookie = export->exp_handle.h_cookie; - memcpy(&export->exp_client_uuid, cluuid, - sizeof(export->exp_client_uuid)); - class_export_put(export); - - CDEBUG(D_IOCTL, "connect: client %s, cookie "LPX64"\n", - cluuid->uuid, exporth->cookie); - return 0; -} - -int class_disconnect(struct lustre_handle *conn, int failover) -{ - struct obd_export *export = class_conn2export(conn); - ENTRY; - - if (export == NULL) { - fixme(); - CDEBUG(D_IOCTL, "disconnect: attempting to free " - "nonexistent client "LPX64"\n", conn->cookie); - RETURN(-EINVAL); - } - - CDEBUG(D_IOCTL, "disconnect: cookie "LPX64"\n", conn->cookie); - - class_unlink_export(export); - class_export_put(export); - RETURN(0); -} - -void class_disconnect_exports(struct obd_device *obd, int failover) -{ - int rc; - struct list_head *tmp, *n, work_list; - struct lustre_handle fake_conn; - ENTRY; - - /* Move all of the exports from obd_exports to a work list, en masse. */ - spin_lock(&obd->obd_dev_lock); - list_add(&work_list, &obd->obd_exports); - list_del_init(&obd->obd_exports); - spin_unlock(&obd->obd_dev_lock); - - CDEBUG(D_IOCTL, "OBD device %d (%p) has exports, " - "disconnecting them\n", obd->obd_minor, obd); - list_for_each_safe(tmp, n, &work_list) { - struct obd_export *exp = list_entry(tmp, struct obd_export, - exp_obd_chain); - - class_export_get(exp); - fake_conn.cookie = exp->exp_handle.h_cookie; - rc = obd_disconnect(&fake_conn, failover); - /* exports created from last_rcvd data, and "fake" - exports created by lctl don't have an import */ - if (exp->exp_ldlm_data.led_import != NULL) - class_destroy_import(exp->exp_ldlm_data.led_import); - class_export_put(exp); - - if (rc) { - CDEBUG(D_IOCTL, "disconnecting export %p failed: %d\n", - exp, rc); - } else { - CDEBUG(D_IOCTL, "export %p disconnected\n", exp); - } - } - EXIT; -} diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c deleted file mode 100644 index 4862cf3..0000000 --- a/lustre/obdclass/lprocfs_status.c +++ /dev/null @@ -1,629 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002, 2003 Cluster File Systems, Inc. - * Author: Hariharan Thantry <thantry@users.sourceforge.net> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define EXPORT_SYMTAB -#define DEBUG_SUBSYSTEM S_CLASS -#ifdef __KERNEL__ -#include <linux/config.h> -#include <linux/module.h> -#include <linux/version.h> -#include <linux/slab.h> -#include <linux/types.h> -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include <asm/statfs.h> -#endif -#include <linux/seq_file.h> - -#else -#include <liblustre.h> -#endif - -#include <linux/obd_class.h> -#include <linux/lprocfs_status.h> - -#ifdef LPROCFS - -struct proc_dir_entry *lprocfs_srch(struct proc_dir_entry *head, - const char *name) -{ - struct proc_dir_entry* temp; - - if (!head) - return NULL; - - temp = head->subdir; - while (temp != NULL) { - if (!strcmp(temp->name, name)) - return temp; - - temp = temp->next; - } - return NULL; -} - -/* lprocfs API calls */ - -int lprocfs_add_vars(struct proc_dir_entry *root, struct lprocfs_vars *list, - void *data) -{ - if ((root == NULL) || (list == NULL)) - return -EINVAL; - - while (list->name) { - struct proc_dir_entry *cur_root, *proc; - char *pathcopy, *cur, *next; - int pathsize = strlen(list->name)+1; - - proc = NULL; - cur_root = root; - - /* need copy of path for strsep */ - OBD_ALLOC(pathcopy, pathsize); - if (!pathcopy) - return -ENOMEM; - - next = pathcopy; - strcpy(pathcopy, list->name); - - while (cur_root && (cur = strsep(&next, "/"))) { - if (*cur =='\0') /* skip double/trailing "/" */ - continue; - - proc = lprocfs_srch(cur_root, cur); - CDEBUG(D_OTHER, "cur_root=%s, cur=%s, next=%s, (%s)\n", - cur_root->name, cur, next, - (proc ? "exists" : "new")); - if (next) - cur_root = (proc ? proc : - proc_mkdir(cur, cur_root)); - else if (!proc) { - mode_t mode = 0444; - if (list->write_fptr) - mode = 0644; - proc = create_proc_entry(cur, mode, cur_root); - } - } - - OBD_FREE(pathcopy, pathsize); - - if ((cur_root == NULL) || (proc == NULL)) { - CERROR("LprocFS: No memory to create /proc entry %s", - list->name); - return -ENOMEM; - } - - proc->read_proc = list->read_fptr; - proc->write_proc = list->write_fptr; - proc->data = (list->data ? list->data : data); - list++; - } - return 0; -} - -void lprocfs_remove(struct proc_dir_entry* root) -{ - struct proc_dir_entry *temp = root; - struct proc_dir_entry *rm_entry; - struct proc_dir_entry *parent; - - LASSERT(root != NULL); - parent = root->parent; - LASSERT(parent != NULL); - - while (1) { - while (temp->subdir) - temp = temp->subdir; - - rm_entry = temp; - temp = temp->parent; - remove_proc_entry(rm_entry->name, rm_entry->parent); - if (temp == parent) - break; - } -} - -struct proc_dir_entry *lprocfs_register(const char *name, - struct proc_dir_entry *parent, - struct lprocfs_vars *list, void *data) -{ - struct proc_dir_entry *newchild; - - newchild = lprocfs_srch(parent, name); - if (newchild) { - CERROR(" Lproc: Attempting to register %s more than once \n", - name); - return ERR_PTR(-EALREADY); - } - - newchild = proc_mkdir(name, parent); - if (newchild && list) { - int rc = lprocfs_add_vars(newchild, list, data); - if (rc) { - lprocfs_remove(newchild); - return ERR_PTR(rc); - } - } - return newchild; -} - -/* Generic callbacks */ - -int lprocfs_rd_u64(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - LASSERT(data != NULL); - *eof = 1; - return snprintf(page, count, LPU64"\n", *(__u64 *)data); -} - -int lprocfs_rd_uuid(char* page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct obd_device* dev = (struct obd_device*)data; - - LASSERT(dev != NULL); - *eof = 1; - return snprintf(page, count, "%s\n", dev->obd_uuid.uuid); -} - -int lprocfs_rd_name(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct obd_device* dev = (struct obd_device *)data; - - LASSERT(dev != NULL); - LASSERT(dev->obd_name != NULL); - *eof = 1; - return snprintf(page, count, "%s\n", dev->obd_name); -} - -int lprocfs_rd_blksize(char* page, char **start, off_t off, int count, - int *eof, struct statfs *sfs) -{ - LASSERT(sfs != NULL); - *eof = 1; - return snprintf(page, count, "%lu\n", sfs->f_bsize); -} - -int lprocfs_rd_kbytestotal(char* page, char **start, off_t off, int count, - int *eof, struct statfs *sfs) -{ - __u32 blk_size; - __u64 result; - - LASSERT(sfs != NULL); - blk_size = sfs->f_bsize >> 10; - result = sfs->f_blocks; - - while (blk_size >>= 1) - result <<= 1; - - *eof = 1; - return snprintf(page, count, LPU64"\n", result); -} - -int lprocfs_rd_kbytesfree(char* page, char **start, off_t off, int count, - int *eof, struct statfs *sfs) -{ - __u32 blk_size; - __u64 result; - - LASSERT(sfs != NULL); - blk_size = sfs->f_bsize >> 10; - result = sfs->f_bfree; - - while (blk_size >>= 1) - result <<= 1; - - *eof = 1; - return snprintf(page, count, LPU64"\n", result); -} - -int lprocfs_rd_filestotal(char* page, char **start, off_t off, int count, - int *eof, struct statfs *sfs) -{ - LASSERT(sfs != NULL); - *eof = 1; - return snprintf(page, count, "%ld\n", sfs->f_files); -} - -int lprocfs_rd_filesfree(char* page, char **start, off_t off, int count, - int *eof, struct statfs *sfs) -{ - LASSERT(sfs != NULL); - *eof = 1; - return snprintf(page, count, "%ld\n", sfs->f_ffree); -} - -int lprocfs_rd_filegroups(char* page, char **start, off_t off, int count, - int *eof, struct statfs *sfs) -{ - *eof = 1; - return snprintf(page, count, "unimplemented\n"); -} - -int lprocfs_rd_server_uuid(char* page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct obd_device *obd = (struct obd_device *)data; - struct client_obd *cli; - - LASSERT(obd != NULL); - cli = &obd->u.cli; - *eof = 1; - return snprintf(page, count, "%s\n", - cli->cl_import->imp_target_uuid.uuid); -} - -int lprocfs_rd_conn_uuid(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct obd_device *obd = (struct obd_device*)data; - struct ptlrpc_connection *conn; - - LASSERT(obd != NULL); - conn = obd->u.cli.cl_import->imp_connection; - LASSERT(conn != NULL); - *eof = 1; - return snprintf(page, count, "%s\n", conn->c_remote_uuid.uuid); -} - -int lprocfs_rd_numrefs(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct obd_type* class = (struct obd_type*) data; - - LASSERT(class != NULL); - *eof = 1; - return snprintf(page, count, "%d\n", class->typ_refcnt); -} - -int lprocfs_obd_attach(struct obd_device *dev, struct lprocfs_vars *list) -{ - int rc = 0; - - LASSERT(dev != NULL); - LASSERT(dev->obd_type != NULL); - LASSERT(dev->obd_type->typ_procroot != NULL); - - dev->obd_proc_entry = lprocfs_register(dev->obd_name, - dev->obd_type->typ_procroot, - list, dev); - if (IS_ERR(dev->obd_proc_entry)) { - rc = PTR_ERR(dev->obd_proc_entry); - dev->obd_proc_entry = NULL; - } - return rc; -} - -int lprocfs_obd_detach(struct obd_device *dev) -{ - if (dev && dev->obd_proc_entry) { - lprocfs_remove(dev->obd_proc_entry); - dev->obd_proc_entry = NULL; - } - return 0; -} - -struct lprocfs_stats *lprocfs_alloc_stats(unsigned int num) -{ - struct lprocfs_stats *stats; - struct lprocfs_percpu *percpu; - unsigned int percpusize; - unsigned int i; - - if (num == 0) - return NULL; - - OBD_ALLOC(stats, offsetof(typeof(*stats), ls_percpu[smp_num_cpus])); - if (stats == NULL) - return NULL; - - percpusize = L1_CACHE_ALIGN(offsetof(typeof(*percpu), lp_cntr[num])); - stats->ls_percpu_size = smp_num_cpus * percpusize; - OBD_ALLOC(stats->ls_percpu[0], stats->ls_percpu_size); - if (stats->ls_percpu[0] == NULL) { - OBD_FREE(stats, offsetof(typeof(*stats), - ls_percpu[smp_num_cpus])); - return NULL; - } - - stats->ls_num = num; - for (i = 1; i < smp_num_cpus; i++) - stats->ls_percpu[i] = (void *)(stats->ls_percpu[i - 1]) + - percpusize; - - return stats; -} - -void lprocfs_free_stats(struct lprocfs_stats *stats) -{ - if (stats->ls_num == 0) - return; - - OBD_FREE(stats->ls_percpu[0], stats->ls_percpu_size); - OBD_FREE(stats, offsetof(typeof(*stats), ls_percpu[smp_num_cpus])); -} - -/* Reset counter under lock */ -int lprocfs_counter_write(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - /* not supported */ - return 0; -} - -static void *lprocfs_stats_seq_start(struct seq_file *p, loff_t *pos) -{ - struct lprocfs_stats *stats = p->private; - /* return 1st cpu location */ - return (*pos >= stats->ls_num) ? NULL : - &(stats->ls_percpu[0]->lp_cntr[*pos]); -} - -static void lprocfs_stats_seq_stop(struct seq_file *p, void *v) -{ -} - -static void *lprocfs_stats_seq_next(struct seq_file *p, void *v, loff_t *pos) -{ - struct lprocfs_stats *stats = p->private; - ++*pos; - return (*pos >= stats->ls_num) ? NULL : - &(stats->ls_percpu[0]->lp_cntr[*pos]); -} - -/* seq file export of one lprocfs counter */ -static int lprocfs_stats_seq_show(struct seq_file *p, void *v) -{ - struct lprocfs_stats *stats = p->private; - struct lprocfs_counter *cntr = v; - struct lprocfs_counter t, ret = { .lc_min = ~(__u64)0 }; - int i, idx, rc; - - if (cntr == &(stats->ls_percpu[0])->lp_cntr[0]) { - struct timeval now; - do_gettimeofday(&now); - rc = seq_printf(p, "%-25s %lu.%lu secs.usecs\n", - "snapshot_time", now.tv_sec, now.tv_usec); - if (rc < 0) - return rc; - } - idx = cntr - &(stats->ls_percpu[0])->lp_cntr[0]; - - for (i = 0; i < smp_num_cpus; i++) { - struct lprocfs_counter *percpu_cntr = - &(stats->ls_percpu[i])->lp_cntr[idx]; - int centry; - do { - centry = atomic_read(&percpu_cntr->lc_cntl.la_entry); - t.lc_count = percpu_cntr->lc_count; - t.lc_sum = percpu_cntr->lc_sum; - t.lc_min = percpu_cntr->lc_min; - t.lc_max = percpu_cntr->lc_max; - t.lc_sumsquare = percpu_cntr->lc_sumsquare; - } while (centry != atomic_read(&percpu_cntr->lc_cntl.la_entry) && - centry != atomic_read(&percpu_cntr->lc_cntl.la_exit)); - ret.lc_count += t.lc_count; - ret.lc_sum += t.lc_sum; - if (t.lc_min < ret.lc_min) - ret.lc_min = t.lc_min; - if (t.lc_max > ret.lc_max) - ret.lc_max = t.lc_max; - ret.lc_sumsquare += t.lc_sumsquare; - } - - rc = seq_printf(p, "%-25s "LPU64" samples [%s]", cntr->lc_name, - ret.lc_count, cntr->lc_units); - if (rc < 0) - goto out; - - if ((cntr->lc_config & LPROCFS_CNTR_AVGMINMAX) && (ret.lc_count > 0)) { - rc = seq_printf(p, " "LPU64" "LPU64" "LPU64, - ret.lc_min, ret.lc_max, ret.lc_sum); - if (rc < 0) - goto out; - if (cntr->lc_config & LPROCFS_CNTR_STDDEV) - rc = seq_printf(p, " "LPU64, ret.lc_sumsquare); - if (rc < 0) - goto out; - } - rc = seq_printf(p, "\n"); - out: - return (rc < 0) ? rc : 0; -} - -struct seq_operations lprocfs_stats_seq_sops = { - .start = lprocfs_stats_seq_start, - .stop = lprocfs_stats_seq_stop, - .next = lprocfs_stats_seq_next, - .show = lprocfs_stats_seq_show, -}; - -static int lprocfs_stats_seq_open(struct inode *inode, struct file *file) -{ - struct proc_dir_entry *dp = inode->u.generic_ip; - struct seq_file *seq; - int rc; - - rc = seq_open(file, &lprocfs_stats_seq_sops); - if (rc) - return rc; - seq = file->private_data; - seq->private = dp->data; - return 0; -} - -struct file_operations lprocfs_stats_seq_fops = { - .open = lprocfs_stats_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -int lprocfs_register_stats(struct proc_dir_entry *root, const char* name, - struct lprocfs_stats *stats) -{ - struct proc_dir_entry *entry; - LASSERT(root != NULL); - - entry = create_proc_entry(name, 0444, root); - if (entry == NULL) - return -ENOMEM; - entry->proc_fops = &lprocfs_stats_seq_fops; - entry->data = (void *)stats; - entry->write_proc = lprocfs_counter_write; - return 0; -} - -void lprocfs_counter_init(struct lprocfs_stats *stats, int index, - unsigned conf, const char *name, const char *units) -{ - struct lprocfs_counter *c; - int i; - - LASSERT(stats != NULL); - for (i = 0; i < smp_num_cpus; i++) { - c = &(stats->ls_percpu[i]->lp_cntr[index]); - c->lc_config = conf; - c->lc_min = ~(__u64)0; - c->lc_name = name; - c->lc_units = units; - } -} -EXPORT_SYMBOL(lprocfs_counter_init); - -#define LPROCFS_OBD_OP_INIT(base, stats, op) \ -do { \ - unsigned int coffset = base + OBD_COUNTER_OFFSET(op); \ - LASSERT(coffset < stats->ls_num); \ - lprocfs_counter_init(stats, coffset, 0, #op, "reqs"); \ -} while (0) - -int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats) -{ - struct lprocfs_stats *stats; - unsigned int num_stats; - int rc, i; - - LASSERT(obd->obd_stats == NULL); - LASSERT(obd->obd_proc_entry != NULL); - LASSERT(obd->obd_cntr_base == 0); - - num_stats = 1 + OBD_COUNTER_OFFSET(destroy_export) + - num_private_stats; - stats = lprocfs_alloc_stats(num_stats); - if (!stats) - return -ENOMEM; - - LPROCFS_OBD_OP_INIT(num_private_stats, stats, iocontrol); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, get_info); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, set_info); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, attach); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, detach); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, setup); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, cleanup); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, connect); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, disconnect); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, statfs); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, syncfs); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, packmd); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, unpackmd); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, preallocate); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, create); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, setattr); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, getattr); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, getattr_async); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, open); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, close); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, brw); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, brw_async); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, punch); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, sync); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, migrate); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, copy); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, iterate); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, preprw); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, commitrw); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, enqueue); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, match); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel_unused); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, san_preprw); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, destroy_export); - - for (i = num_private_stats; i < num_stats; i++) { - /* If this assertion failed, it is likely that an obd - * operation was added to struct obd_ops in - * <linux/obd.h>, and that the corresponding line item - * LPROCFS_OBD_OP_INIT(.., .., opname) - * is missing from the list above. */ - LASSERT(&(stats->ls_percpu[0])->lp_cntr[i].lc_name != NULL); - } - rc = lprocfs_register_stats(obd->obd_proc_entry, "stats", stats); - if (rc < 0) { - lprocfs_free_stats(stats); - } else { - obd->obd_stats = stats; - obd->obd_cntr_base = num_private_stats; - } - return rc; -} - -void lprocfs_free_obd_stats(struct obd_device *obd) -{ - struct lprocfs_stats *stats = obd->obd_stats; - - if (stats != NULL) { - obd->obd_stats = NULL; - lprocfs_free_stats(stats); - } -} - -#endif /* LPROCFS*/ - -EXPORT_SYMBOL(lprocfs_register); -EXPORT_SYMBOL(lprocfs_srch); -EXPORT_SYMBOL(lprocfs_remove); -EXPORT_SYMBOL(lprocfs_add_vars); -EXPORT_SYMBOL(lprocfs_obd_attach); -EXPORT_SYMBOL(lprocfs_obd_detach); -EXPORT_SYMBOL(lprocfs_alloc_stats); -EXPORT_SYMBOL(lprocfs_free_stats); -EXPORT_SYMBOL(lprocfs_register_stats); -EXPORT_SYMBOL(lprocfs_alloc_obd_stats); -EXPORT_SYMBOL(lprocfs_free_obd_stats); - -EXPORT_SYMBOL(lprocfs_rd_u64); -EXPORT_SYMBOL(lprocfs_rd_uuid); -EXPORT_SYMBOL(lprocfs_rd_name); -EXPORT_SYMBOL(lprocfs_rd_server_uuid); -EXPORT_SYMBOL(lprocfs_rd_conn_uuid); -EXPORT_SYMBOL(lprocfs_rd_numrefs); - -EXPORT_SYMBOL(lprocfs_rd_blksize); -EXPORT_SYMBOL(lprocfs_rd_kbytestotal); -EXPORT_SYMBOL(lprocfs_rd_kbytesfree); -EXPORT_SYMBOL(lprocfs_rd_filestotal); -EXPORT_SYMBOL(lprocfs_rd_filesfree); -EXPORT_SYMBOL(lprocfs_rd_filegroups); diff --git a/lustre/obdclass/lustre_handles.c b/lustre/obdclass/lustre_handles.c deleted file mode 100644 index 06f86ad..0000000 --- a/lustre/obdclass/lustre_handles.c +++ /dev/null @@ -1,166 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_CLASS -#ifdef __KERNEL__ -#include <linux/types.h> -#include <linux/random.h> -#else -#include <liblustre.h> -#endif - - -#include <linux/kp30.h> -#include <linux/lustre_handles.h> - -static spinlock_t handle_lock = SPIN_LOCK_UNLOCKED; -static spinlock_t random_lock = SPIN_LOCK_UNLOCKED; -static struct list_head *handle_hash = NULL; -static int handle_count = 0; - -#define HANDLE_HASH_SIZE (1 << 14) -#define HANDLE_HASH_MASK (HANDLE_HASH_SIZE - 1) - -void class_handle_hash(struct portals_handle *h, portals_handle_addref_cb cb) -{ - struct list_head *bucket; - ENTRY; - - LASSERT(h != NULL); - LASSERT(list_empty(&h->h_link)); - - /* My hypothesis is that get_random_bytes, if called from two threads at - * the same time, will return the same bytes. -phil */ - spin_lock(&random_lock); - get_random_bytes(&h->h_cookie, sizeof(h->h_cookie)); - spin_unlock(&random_lock); - - h->h_addref = cb; - - bucket = handle_hash + (h->h_cookie & HANDLE_HASH_MASK); - - CDEBUG(D_INFO, "adding object %p with handle "LPX64" to hash\n", - h, h->h_cookie); - - spin_lock(&handle_lock); - list_add(&h->h_link, bucket); - handle_count++; - spin_unlock(&handle_lock); - EXIT; -} - -static void class_handle_unhash_nolock(struct portals_handle *h) -{ - LASSERT(!list_empty(&h->h_link)); - - CDEBUG(D_INFO, "removing object %p with handle "LPX64" from hash\n", - h, h->h_cookie); - - handle_count--; - list_del_init(&h->h_link); -} - -void class_handle_unhash(struct portals_handle *h) -{ - spin_lock(&handle_lock); - class_handle_unhash_nolock(h); - spin_unlock(&handle_lock); -} - -void *class_handle2object(__u64 cookie) -{ - struct list_head *bucket, *tmp; - void *retval = NULL; - ENTRY; - - LASSERT(handle_hash != NULL); - - spin_lock(&handle_lock); - bucket = handle_hash + (cookie & HANDLE_HASH_MASK); - - list_for_each(tmp, bucket) { - struct portals_handle *h; - h = list_entry(tmp, struct portals_handle, h_link); - - if (h->h_cookie == cookie) { - h->h_addref(h); - retval = h; - break; - } - } - spin_unlock(&handle_lock); - - RETURN(retval); -} - -int class_handle_init(void) -{ - struct list_head *bucket; - - LASSERT(handle_hash == NULL); - - PORTAL_ALLOC(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE); - if (handle_hash == NULL) - return -ENOMEM; - - for (bucket = handle_hash + HANDLE_HASH_SIZE - 1; bucket >= handle_hash; - bucket--) - INIT_LIST_HEAD(bucket); - - return 0; -} - -static void cleanup_all_handles(void) -{ - int i; - - spin_lock(&handle_lock); - for (i = 0; i < HANDLE_HASH_SIZE; i++) { - struct list_head *tmp, *pos; - list_for_each_safe(tmp, pos, &(handle_hash[i])) { - struct portals_handle *h; - h = list_entry(tmp, struct portals_handle, h_link); - - CERROR("forcing cleanup for handle "LPX64"\n", - h->h_cookie); - - class_handle_unhash_nolock(h); - } - } - spin_unlock(&handle_lock); -} - -void class_handle_cleanup(void) -{ - LASSERT(handle_hash != NULL); - - if (handle_count != 0) { - CERROR("handle_count at cleanup: %d\n", handle_count); - cleanup_all_handles(); - } - - PORTAL_FREE(handle_hash, sizeof(*handle_hash) * HANDLE_HASH_SIZE); - handle_hash = NULL; - - if (handle_count) - CERROR("leaked %d handles\n", handle_count); -} diff --git a/lustre/obdclass/lustre_peer.c b/lustre/obdclass/lustre_peer.c deleted file mode 100644 index 016354c..0000000 --- a/lustre/obdclass/lustre_peer.c +++ /dev/null @@ -1,179 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define DEBUG_SUBSYSTEM S_RPC - -#ifdef __KERNEL__ -# include <linux/module.h> -# include <linux/init.h> -# include <linux/list.h> -#else -# include <liblustre.h> -#endif -#include <linux/obd.h> -#include <linux/obd_support.h> -#include <linux/obd_class.h> -#include <linux/lustre_lib.h> -#include <linux/lustre_ha.h> -#include <linux/lustre_net.h> -#include <linux/lprocfs_status.h> - -struct uuid_nid_data { - struct list_head head; - ptl_nid_t nid; - char *uuid; - __u32 nal; - ptl_handle_ni_t ni; -}; - -/* FIXME: This should probably become more elegant than a global linked list */ -static struct list_head g_uuid_list; -static spinlock_t g_uuid_lock; - -void class_init_uuidlist(void) -{ - INIT_LIST_HEAD(&g_uuid_list); - spin_lock_init(&g_uuid_lock); -} - -void class_exit_uuidlist(void) -{ - struct list_head *tmp, *n; - - /* Module going => sole user => don't need to lock g_uuid_list */ - list_for_each_safe(tmp, n, &g_uuid_list) { - struct uuid_nid_data *data = - list_entry(tmp, struct uuid_nid_data, head); - - PORTAL_FREE(data->uuid, strlen(data->uuid) + 1); - PORTAL_FREE(data, sizeof(*data)); - } -} - -int lustre_uuid_to_peer(char *uuid, struct lustre_peer *peer) -{ - struct list_head *tmp; - - spin_lock (&g_uuid_lock); - - list_for_each(tmp, &g_uuid_list) { - struct uuid_nid_data *data = - list_entry(tmp, struct uuid_nid_data, head); - - if (strcmp(data->uuid, uuid) == 0) { - peer->peer_nid = data->nid; - peer->peer_ni = data->ni; - - spin_unlock (&g_uuid_lock); - return 0; - } - } - - spin_unlock (&g_uuid_lock); - return -1; -} - -int class_add_uuid(char *uuid, __u64 nid, __u32 nal) -{ - const ptl_handle_ni_t *nip; - struct uuid_nid_data *data; - int rc; - int nob = strnlen (uuid, PAGE_SIZE) + 1; - - if (nob > PAGE_SIZE) - return -EINVAL; - - nip = kportal_get_ni (nal); - if (nip == NULL) { - CERROR("get_ni failed: is the NAL module loaded?\n"); - return -EIO; - } - - rc = -ENOMEM; - PORTAL_ALLOC(data, sizeof(*data)); - if (data == NULL) - goto fail_0; - - PORTAL_ALLOC(data->uuid, nob); - if (data == NULL) - goto fail_1; - - memcpy(data->uuid, uuid, nob); - data->nid = nid; - data->nal = nal; - data->ni = *nip; - - spin_lock (&g_uuid_lock); - - list_add(&data->head, &g_uuid_list); - - spin_unlock (&g_uuid_lock); - - return 0; - - fail_1: - PORTAL_FREE (data, sizeof (*data)); - fail_0: - kportal_put_ni (nal); - return (rc); -} - -/* delete only one entry if uuid is specified, otherwise delete all */ -int class_del_uuid (char *uuid) -{ - struct list_head deathrow; - struct list_head *tmp; - struct list_head *n; - struct uuid_nid_data *data; - - INIT_LIST_HEAD (&deathrow); - - spin_lock (&g_uuid_lock); - - list_for_each_safe(tmp, n, &g_uuid_list) { - data = list_entry(tmp, struct uuid_nid_data, head); - - if (uuid == NULL || strcmp(data->uuid, uuid) == 0) { - list_del (&data->head); - list_add (&data->head, &deathrow); - if (uuid) - break; - } - } - - spin_unlock (&g_uuid_lock); - - if (list_empty (&deathrow)) - return -EINVAL; - - do { - data = list_entry(deathrow.next, struct uuid_nid_data, head); - - list_del (&data->head); - - kportal_put_ni (data->nal); - PORTAL_FREE(data->uuid, strlen(data->uuid) + 1); - PORTAL_FREE(data, sizeof(*data)); - } while (!list_empty (&deathrow)); - - return 0; -} diff --git a/lustre/obdclass/otree.c b/lustre/obdclass/otree.c deleted file mode 100644 index 16ef088..0000000 --- a/lustre/obdclass/otree.c +++ /dev/null @@ -1,266 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Copyright (C) 2002, 2003 Cluster File Systems, Inc - * - * our offset trees (otrees) track single-bit state of offsets in an - * extent tree. - */ - -#define EXPORT_SYMTAB -#include <linux/version.h> -#include <linux/config.h> -#include <linux/module.h> - -#define DEBUG_SUBSYSTEM S_OSC -#include <linux/kp30.h> -#include <linux/obd.h> -#include <linux/lustre_debug.h> -#include <linux/lustre_otree.h> - -struct offset_extent { - rb_node_t oe_node; - unsigned long oe_start, oe_end; -}; - -static struct offset_extent * ot_find_oe(rb_root_t *root, - struct offset_extent *needle) -{ - struct rb_node_s *node = root->rb_node; - struct offset_extent *oe; - ENTRY; - - CDEBUG(D_INODE, "searching [%lu -> %lu]\n", needle->oe_start, - needle->oe_end); - - while (node) { - oe = rb_entry(node, struct offset_extent, oe_node); - if (needle->oe_end < oe->oe_start) - node = node->rb_left; - else if (needle->oe_start > oe->oe_end) - node = node->rb_right; - else { - CDEBUG(D_INODE, "returning [%lu -> %lu]\n", - oe->oe_start, oe->oe_end); - RETURN(oe); - } - } - RETURN(NULL); -} - -/* do the rbtree mechanics to insert a node, callers are responsible - * for making sure that this new node doesn't overlap with existing - * nodes */ -static void ot_indert_oe(rb_root_t *root, struct offset_extent *new_oe) -{ - rb_node_t ** p = &root->rb_node; - rb_node_t * parent = NULL; - struct offset_extent *oe; - ENTRY; - - LASSERT(new_oe->oe_start <= new_oe->oe_end); - - while (*p) { - parent = *p; - oe = rb_entry(parent, struct offset_extent, oe_node); - if ( new_oe->oe_end < oe->oe_start ) - p = &(*p)->rb_left; - else if ( new_oe->oe_start > oe->oe_end ) - p = &(*p)->rb_right; - else - LBUG(); - } - rb_link_node(&new_oe->oe_node, parent, p); - rb_insert_color(&new_oe->oe_node, root); - EXIT; -} - -int ot_mark_offset(struct otree *ot, unsigned long offset) -{ - struct offset_extent needle, *oe, *new_oe; - int rc = 0; - ENTRY; - - OBD_ALLOC(new_oe, sizeof(*new_oe)); - if (new_oe == NULL) - RETURN(-ENOMEM); - - spin_lock(&ot->ot_lock); - - /* find neighbours that we might glom on to */ - needle.oe_start = (offset > 0) ? offset - 1 : offset; - needle.oe_end = (offset < ~0) ? offset + 1 : offset; - oe = ot_find_oe(&ot->ot_root, &needle); - if ( oe == NULL ) { - new_oe->oe_start = offset; - new_oe->oe_end = offset; - ot_indert_oe(&ot->ot_root, new_oe); - ot->ot_num_marked++; - new_oe = NULL; - GOTO(out, rc); - } - - /* already recorded */ - if ( offset >= oe->oe_start && offset <= oe->oe_end ) - GOTO(out, rc); - - /* ok, need to check for adjacent neighbours */ - needle.oe_start = offset; - needle.oe_end = offset; - if (ot_find_oe(&ot->ot_root, &needle)) - GOTO(out, rc); - - /* ok, its safe to extend the oe we found */ - if ( offset == oe->oe_start - 1 ) - oe->oe_start--; - else if ( offset == oe->oe_end + 1 ) - oe->oe_end++; - else - LBUG(); - ot->ot_num_marked++; - -out: - CDEBUG(D_INODE, "%lu now dirty\n", ot->ot_num_marked); - spin_unlock(&ot->ot_lock); - if (new_oe) - OBD_FREE(new_oe, sizeof(*new_oe)); - RETURN(rc); -} - -int ot_clear_extent(struct otree *ot, unsigned long start, unsigned long end) -{ - struct offset_extent needle, *oe, *new_oe; - int rc = 0; - ENTRY; - - /* will allocate more intelligently later */ - OBD_ALLOC(new_oe, sizeof(*new_oe)); - if (new_oe == NULL) - RETURN(-ENOMEM); - - needle.oe_start = start; - needle.oe_end = end; - - spin_lock(&ot->ot_lock); - for ( ; (oe = ot_find_oe(&ot->ot_root, &needle)) ; ) { - rc = 0; - - /* see if we're punching a hole and need to create a node */ - if (oe->oe_start < start && oe->oe_end > end) { - new_oe->oe_start = end + 1; - new_oe->oe_end = oe->oe_end; - oe->oe_end = start - 1; - ot_indert_oe(&ot->ot_root, new_oe); - new_oe = NULL; - ot->ot_num_marked -= end - start + 1; - break; - } - - /* overlapping edges */ - if (oe->oe_start < start && oe->oe_end <= end) { - ot->ot_num_marked -= oe->oe_end - start + 1; - oe->oe_end = start - 1; - oe = NULL; - continue; - } - if (oe->oe_end > end && oe->oe_start >= start) { - ot->ot_num_marked -= end - oe->oe_start + 1; - oe->oe_start = end + 1; - oe = NULL; - continue; - } - - /* an extent entirely within the one we're clearing */ - rb_erase(&oe->oe_node, &ot->ot_root); - ot->ot_num_marked -= oe->oe_end - oe->oe_start + 1; - spin_unlock(&ot->ot_lock); - OBD_FREE(oe, sizeof(*oe)); - spin_lock(&ot->ot_lock); - } - CDEBUG(D_INODE, "%lu now dirty\n", ot->ot_num_marked); - spin_unlock(&ot->ot_lock); - if (new_oe) - OBD_FREE(new_oe, sizeof(*new_oe)); - RETURN(rc); -} - -int ot_find_marked_extent(struct otree *ot, unsigned long *start, - unsigned long *end) -{ - struct offset_extent needle, *oe; - int rc = -ENOENT; - ENTRY; - - needle.oe_start = *start; - needle.oe_end = *end; - - spin_lock(&ot->ot_lock); - oe = ot_find_oe(&ot->ot_root, &needle); - if (oe) { - *start = oe->oe_start; - *end = oe->oe_end; - rc = 0; - } - spin_unlock(&ot->ot_lock); - - RETURN(rc); -} - -int ot_last_marked(struct otree *ot, unsigned long *last) -{ - struct rb_node_s *found, *node; - struct offset_extent *oe; - int rc = -ENOENT; - ENTRY; - - spin_lock(&ot->ot_lock); - for (node = ot->ot_root.rb_node, found = NULL; - node; - found = node, node = node->rb_right) - ; - - if (found) { - oe = rb_entry(found, struct offset_extent, oe_node); - *last = oe->oe_end; - rc = 0; - } - spin_unlock(&ot->ot_lock); - RETURN(rc); -} - -unsigned long ot_num_marked(struct otree *ot) -{ - return ot->ot_num_marked; -} - -void ot_init(struct otree *ot) -{ - CDEBUG(D_INODE, "initializing %p\n", ot); - spin_lock_init(&ot->ot_lock); - ot->ot_num_marked = 0; - ot->ot_root.rb_node = NULL; -} - -EXPORT_SYMBOL(ot_mark_offset); -EXPORT_SYMBOL(ot_clear_extent); -EXPORT_SYMBOL(ot_find_marked_extent); -EXPORT_SYMBOL(ot_last_marked); -EXPORT_SYMBOL(ot_num_marked); -EXPORT_SYMBOL(ot_init); diff --git a/lustre/obdclass/simple.c b/lustre/obdclass/simple.c deleted file mode 100644 index 0ce54a3..0000000 --- a/lustre/obdclass/simple.c +++ /dev/null @@ -1,278 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002, 2003 Cluster File Systems, Inc. - * Author: Peter Braam <braam@clusterfs.com> - * Aurhot: Andreas Dilger <adilger@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define EXPORT_SYMTAB - -#include <linux/version.h> -#include <linux/fs.h> -#include <asm/unistd.h> - -#define DEBUG_SUBSYSTEM S_FILTER - -#include <linux/obd.h> -#include <linux/lustre_lib.h> - -/* Debugging check only needed during development */ -#ifdef OBD_CTXT_DEBUG -# define ASSERT_CTXT_MAGIC(magic) LASSERT((magic) == OBD_RUN_CTXT_MAGIC) -# define ASSERT_NOT_KERNEL_CTXT(msg) LASSERT(!segment_eq(get_fs(), get_ds())) -# define ASSERT_KERNEL_CTXT(msg) LASSERT(segment_eq(get_fs(), get_ds())) -#else -# define ASSERT_CTXT_MAGIC(magic) do {} while(0) -# define ASSERT_NOT_KERNEL_CTXT(msg) do {} while(0) -# define ASSERT_KERNEL_CTXT(msg) do {} while(0) -#endif - -/* push / pop to root of obd store */ -void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, - struct obd_ucred *uc) -{ - //ASSERT_NOT_KERNEL_CTXT("already in kernel context!\n"); - ASSERT_CTXT_MAGIC(new_ctx->magic); - OBD_SET_CTXT_MAGIC(save); - - /* - CDEBUG(D_INFO, - "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n", - save, current, current->fs, current->fs->pwd, - atomic_read(¤t->fs->pwd->d_count), - atomic_read(¤t->fs->pwd->d_inode->i_count), - current->fs->pwd->d_name.len, current->fs->pwd->d_name.name, - current->fs->pwdmnt, - atomic_read(¤t->fs->pwdmnt->mnt_count)); - */ - - save->fs = get_fs(); - LASSERT(atomic_read(¤t->fs->pwd->d_count)); - LASSERT(atomic_read(&new_ctx->pwd->d_count)); - save->pwd = dget(current->fs->pwd); - save->pwdmnt = mntget(current->fs->pwdmnt); - save->ngroups = current->ngroups; - - LASSERT(save->pwd); - LASSERT(save->pwdmnt); - LASSERT(new_ctx->pwd); - LASSERT(new_ctx->pwdmnt); - - if (uc) { - save->ouc.ouc_fsuid = current->fsuid; - save->ouc.ouc_fsgid = current->fsgid; - save->ouc.ouc_cap = current->cap_effective; - save->ouc.ouc_suppgid1 = current->groups[0]; - save->ouc.ouc_suppgid2 = current->groups[1]; - - current->fsuid = uc->ouc_fsuid; - current->fsgid = uc->ouc_fsgid; - current->cap_effective = uc->ouc_cap; - current->ngroups = 0; - - if (uc->ouc_suppgid1 != -1) - current->groups[current->ngroups++] = uc->ouc_suppgid1; - if (uc->ouc_suppgid2 != -1) - current->groups[current->ngroups++] = uc->ouc_suppgid2; - } - set_fs(new_ctx->fs); - set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd); - - /* - CDEBUG(D_INFO, - "= push %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n", - new_ctx, current, current->fs, current->fs->pwd, - atomic_read(¤t->fs->pwd->d_count), - atomic_read(¤t->fs->pwd->d_inode->i_count), - current->fs->pwd->d_name.len, current->fs->pwd->d_name.name, - current->fs->pwdmnt, - atomic_read(¤t->fs->pwdmnt->mnt_count)); - */ -} -EXPORT_SYMBOL(push_ctxt); - -void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx, - struct obd_ucred *uc) -{ - //printk("pc0"); - ASSERT_CTXT_MAGIC(saved->magic); - //printk("pc1"); - ASSERT_KERNEL_CTXT("popping non-kernel context!\n"); - - /* - CDEBUG(D_INFO, - " = pop %p==%p = cur %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n", - new_ctx, current, current->fs, current->fs->pwd, - atomic_read(¤t->fs->pwd->d_count), - atomic_read(¤t->fs->pwd->d_inode->i_count), - current->fs->pwd->d_name.len, current->fs->pwd->d_name.name, - current->fs->pwdmnt, - atomic_read(¤t->fs->pwdmnt->mnt_count)); - */ - - LASSERT(current->fs->pwd == new_ctx->pwd); - LASSERT(current->fs->pwdmnt == new_ctx->pwdmnt); - - set_fs(saved->fs); - set_fs_pwd(current->fs, saved->pwdmnt, saved->pwd); - - dput(saved->pwd); - mntput(saved->pwdmnt); - if (uc) { - current->fsuid = saved->ouc.ouc_fsuid; - current->fsgid = saved->ouc.ouc_fsgid; - current->cap_effective = saved->ouc.ouc_cap; - current->ngroups = saved->ngroups; - - current->groups[0] = saved->ouc.ouc_suppgid1; - current->groups[1] = saved->ouc.ouc_suppgid2; - } - - /* - CDEBUG(D_INFO, - "= pop %p->%p = cur fs %p pwd %p:d%d:i%d (%*s), pwdmnt %p:%d\n", - saved, current, current->fs, current->fs->pwd, - atomic_read(¤t->fs->pwd->d_count), - atomic_read(¤t->fs->pwd->d_inode->i_count), - current->fs->pwd->d_name.len, current->fs->pwd->d_name.name, - current->fs->pwdmnt, - atomic_read(¤t->fs->pwdmnt->mnt_count)); - */ -} -EXPORT_SYMBOL(pop_ctxt); - -/* utility to make a file */ -struct dentry *simple_mknod(struct dentry *dir, char *name, int mode) -{ - struct dentry *dchild; - int err = 0; - ENTRY; - - ASSERT_KERNEL_CTXT("kernel doing mknod outside kernel context\n"); - CDEBUG(D_INODE, "creating file %*s\n", (int)strlen(name), name); - - dchild = lookup_one_len(name, dir, strlen(name)); - if (IS_ERR(dchild)) - GOTO(out_up, dchild); - - if (dchild->d_inode) { - if (!S_ISREG(dchild->d_inode->i_mode)) - GOTO(out_err, err = -EEXIST); - - GOTO(out_up, dchild); - } - - err = vfs_create(dir->d_inode, dchild, (mode & ~S_IFMT) | S_IFREG); - if (err) - GOTO(out_err, err); - - RETURN(dchild); - -out_err: - dput(dchild); - dchild = ERR_PTR(err); -out_up: - return dchild; -} -EXPORT_SYMBOL(simple_mknod); - -/* utility to make a directory */ -struct dentry *simple_mkdir(struct dentry *dir, char *name, int mode) -{ - struct dentry *dchild; - int err = 0; - ENTRY; - - ASSERT_KERNEL_CTXT("kernel doing mkdir outside kernel context\n"); - CDEBUG(D_INODE, "creating directory %*s\n", (int)strlen(name), name); - dchild = lookup_one_len(name, dir, strlen(name)); - if (IS_ERR(dchild)) - GOTO(out_up, dchild); - - if (dchild->d_inode) { - if (!S_ISDIR(dchild->d_inode->i_mode)) - GOTO(out_err, err = -ENOTDIR); - - GOTO(out_up, dchild); - } - - err = vfs_mkdir(dir->d_inode, dchild, mode); - if (err) - GOTO(out_err, err); - - RETURN(dchild); - -out_err: - dput(dchild); - dchild = ERR_PTR(err); -out_up: - return dchild; -} -EXPORT_SYMBOL(simple_mkdir); - -/* - * Read a file from within kernel context. Prior to calling this - * function we should already have done a push_ctxt(). - */ -int lustre_fread(struct file *file, void *buf, int len, loff_t *off) -{ - ASSERT_KERNEL_CTXT("kernel doing read outside kernel context\n"); - if (!file || !file->f_op || !file->f_op->read || !off) - RETURN(-ENOSYS); - - return file->f_op->read(file, buf, len, off); -} -EXPORT_SYMBOL(lustre_fread); - -/* - * Write a file from within kernel context. Prior to calling this - * function we should already have done a push_ctxt(). - */ -int lustre_fwrite(struct file *file, const void *buf, int len, loff_t *off) -{ - ENTRY; - ASSERT_KERNEL_CTXT("kernel doing write outside kernel context\n"); - if (!file) - RETURN(-ENOENT); - if (!file->f_op) - RETURN(-ENOSYS); - if (!off) - RETURN(-EINVAL); - - if (!file->f_op->write) - RETURN(-EROFS); - - RETURN(file->f_op->write(file, buf, len, off)); -} -EXPORT_SYMBOL(lustre_fwrite); - -/* - * Sync a file from within kernel context. Prior to calling this - * function we should already have done a push_ctxt(). - */ -int lustre_fsync(struct file *file) -{ - ENTRY; - ASSERT_KERNEL_CTXT("kernel doing sync outside kernel context\n"); - if (!file || !file->f_op || !file->f_op->fsync) - RETURN(-ENOSYS); - - RETURN(file->f_op->fsync(file, file->f_dentry, 0)); -} -EXPORT_SYMBOL(lustre_fsync); diff --git a/lustre/obdclass/statfs_pack.c b/lustre/obdclass/statfs_pack.c deleted file mode 100644 index 786a768..0000000 --- a/lustre/obdclass/statfs_pack.c +++ /dev/null @@ -1,102 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002, 2003 Cluster File Systems, Inc. - * Author: Andreas Dilger <adilger@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * (Un)packing of OST/MDS requests - * - */ - -#define DEBUG_SUBSYSTEM S_CLASS - -#define EXPORT_SYMTAB -#ifndef __KERNEL__ -#include <liblustre.h> -#else -#include <linux/version.h> -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include <asm/statfs.h> -#endif -#endif - -#include <linux/lustre_export.h> -#include <linux/lustre_net.h> -#include <linux/obd_support.h> -#include <linux/obd_class.h> - -void statfs_pack(struct obd_statfs *osfs, struct statfs *sfs) -{ - osfs->os_type = sfs->f_type; - osfs->os_blocks = sfs->f_blocks; - osfs->os_bfree = sfs->f_bfree; - osfs->os_bavail = sfs->f_bavail; - osfs->os_files = sfs->f_files; - osfs->os_ffree = sfs->f_ffree; - osfs->os_bsize = sfs->f_bsize; - osfs->os_namelen = sfs->f_namelen; -} - -void statfs_unpack(struct statfs *sfs, struct obd_statfs *osfs) -{ - sfs->f_type = osfs->os_type; - sfs->f_blocks = osfs->os_blocks; - sfs->f_bfree = osfs->os_bfree; - sfs->f_bavail = osfs->os_bavail; - sfs->f_files = osfs->os_files; - sfs->f_ffree = osfs->os_ffree; - sfs->f_bsize = osfs->os_bsize; - sfs->f_namelen = osfs->os_namelen; -} - -int obd_self_statfs(struct obd_device *obd, struct statfs *sfs) -{ - struct obd_export *export, *my_export = NULL; - struct obd_statfs osfs = { 0 }; - int rc; - ENTRY; - - LASSERT( obd != NULL ); - - spin_lock(&obd->obd_dev_lock); - if (list_empty(&obd->obd_exports)) { - spin_unlock(&obd->obd_dev_lock); - export = my_export = class_new_export(obd); - if (export == NULL) - RETURN(-ENOMEM); - } else { - export = list_entry(obd->obd_exports.next, typeof(*export), - exp_obd_chain); - export = class_export_get(export); - spin_unlock(&obd->obd_dev_lock); - } - - rc = obd_statfs(export, &osfs); - if (!rc) - statfs_unpack(sfs, &osfs); - - if (my_export) - class_unlink_export(my_export); - - class_export_put(export); - RETURN(rc); -} - -EXPORT_SYMBOL(statfs_pack); -EXPORT_SYMBOL(statfs_unpack); -EXPORT_SYMBOL(obd_self_statfs); diff --git a/lustre/obdclass/sysctl.c b/lustre/obdclass/sysctl.c deleted file mode 100644 index 3d68f2e..0000000 --- a/lustre/obdclass/sysctl.c +++ /dev/null @@ -1,140 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <linux/module.h> -#include <linux/autoconf.h> -#include <linux/sysctl.h> -#include <linux/sched.h> -#include <linux/mm.h> -#include <linux/sysctl.h> -#include <linux/version.h> -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#include <linux/swapctl.h> -#endif -#include <linux/proc_fs.h> -#include <linux/slab.h> -#include <linux/stat.h> -#include <linux/ctype.h> -#include <asm/bitops.h> -#include <asm/segment.h> -#include <asm/uaccess.h> -#include <linux/utsname.h> - -#define DEBUG_SUBSYSTEM S_CLASS - -#include <linux/obd_support.h> - -struct ctl_table_header *obd_table_header = NULL; - -static int vars[2]; -static int index = 0; - -static int obd_sctl_vars( ctl_table * table, int write, struct file * - filp, void * buffer, size_t * lenp ); -static int obd_sctl_reset( ctl_table * table, int write, struct file - * filp, void * buffer, size_t * lenp ); - -#define OBD_SYSCTL 300 - -#define OBD_FAIL_LOC 1 /* control test failures instrumentation */ -#define OBD_ENTRY 2 /* control enter/leave pattern */ -#define OBD_VARS 3 -#define OBD_INDEX 4 -#define OBD_RESET 5 -#define OBD_TIMEOUT 6 /* RPC timeout before recovery/intr */ -/* XXX move to /proc/sys/lustre/recovery? */ -#define OBD_UPCALL 7 /* path to recovery upcall */ -/* XXX temporary, as we play with sync osts.. */ -#define OBD_SYNCFILTER 8 - -#define OBD_VARS_SLOT 2 - -static ctl_table obd_table[] = { - {OBD_FAIL_LOC, "fail_loc", &obd_fail_loc, sizeof(int), 0644, NULL, &proc_dointvec}, - {OBD_VARS, "vars", &vars[0], sizeof(int), 0644, NULL, &proc_dointvec}, - {OBD_INDEX, "index", &index, sizeof(int), 0644, NULL, &obd_sctl_vars}, - {OBD_RESET, "reset", NULL, 0, 0644, NULL, &obd_sctl_reset}, - {OBD_TIMEOUT, "timeout", &obd_timeout, sizeof(int), 0644, NULL, &proc_dointvec}, - /* XXX need to lock so we avoid update races with the recovery upcall! */ - {OBD_UPCALL, "upcall", obd_lustre_upcall, 128, 0644, NULL, - &proc_dostring, &sysctl_string }, - {OBD_SYNCFILTER, "filter_sync_on_commit", &obd_sync_filter, sizeof(int), - 0644, NULL, &proc_dointvec}, - { 0 } -}; - -static ctl_table parent_table[] = { - {OBD_SYSCTL, "lustre", NULL, 0, 0555, obd_table}, - {0} -}; - -void obd_sysctl_init (void) -{ -#ifdef CONFIG_SYSCTL - if ( !obd_table_header ) - obd_table_header = register_sysctl_table(parent_table, 0); -#endif -} - -void obd_sysctl_clean (void) -{ -#ifdef CONFIG_SYSCTL - if ( obd_table_header ) - unregister_sysctl_table(obd_table_header); - obd_table_header = NULL; -#endif -} - -int obd_sctl_reset (ctl_table * table, int write, - struct file * filp, void * buffer, - size_t * lenp) -{ - if ( write ) { - /* do something here */ - vars[0]=0; - vars[1]=0; - } - - *lenp = 0; - return 0; -} - -int obd_sctl_vars (ctl_table * table, int write, - struct file * filp, void * buffer, - size_t * lenp) -{ - int rc; - - rc = proc_dointvec(table, write, filp, buffer, lenp); - - if ( rc ) - return rc; - - if ( index < 0 || index > 1 ) { - CERROR("Illegal index %d!\n", index); - index = 0; - } else { - obd_table[OBD_VARS_SLOT].data = &vars[index]; - } - - return rc; -} diff --git a/lustre/obdclass/uuid.c b/lustre/obdclass/uuid.c deleted file mode 100644 index 9f103df..0000000 --- a/lustre/obdclass/uuid.c +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Public include file for the UUID library - * - * Copyright (C) 1996, 1997, 1998 Theodore Ts'o. - * Copyright (C) 2002 Cluster File System - * - changed for use in lustre - * - * %Begin-Header% - * This file may be redistributed under the terms of the GNU - * Library General Public License. - * %End-Header% - */ -#define DEBUG_SUBSYSTEM S_CLASS - -#ifdef __KERNEL__ -# include <linux/ctype.h> -# include <linux/kernel.h> -# include <linux/sched.h> -# include <linux/smp_lock.h> -#else -# include <liblustre.h> -#endif - -#include <linux/obd_support.h> -#include <linux/obd_class.h> -#include <linux/obd_ost.h> - -struct uuid { - __u32 time_low; - __u16 time_mid; - __u16 time_hi_and_version; - __u16 clock_seq; - __u8 node[6]; -}; - -static void uuid_unpack(class_uuid_t in, struct uuid *uu) -{ - __u8 *ptr = in; - __u32 tmp; - - tmp = *ptr++; - tmp = (tmp << 8) | *ptr++; - tmp = (tmp << 8) | *ptr++; - tmp = (tmp << 8) | *ptr++; - uu->time_low = tmp; - - tmp = *ptr++; - tmp = (tmp << 8) | *ptr++; - uu->time_mid = tmp; - - tmp = *ptr++; - tmp = (tmp << 8) | *ptr++; - uu->time_hi_and_version = tmp; - - tmp = *ptr++; - tmp = (tmp << 8) | *ptr++; - uu->clock_seq = tmp; - - memcpy(uu->node, ptr, 6); -} - -#if 0 -static void uuid_pack(struct uuid *uu, class_uuid_t ptr) -{ - __u32 tmp; - unsigned char *out = ptr; - - tmp = uu->time_low; - out[3] = (unsigned char) tmp; - tmp >>= 8; - out[2] = (unsigned char) tmp; - tmp >>= 8; - out[1] = (unsigned char) tmp; - tmp >>= 8; - out[0] = (unsigned char) tmp; - - tmp = uu->time_mid; - out[5] = (unsigned char) tmp; - tmp >>= 8; - out[4] = (unsigned char) tmp; - - tmp = uu->time_hi_and_version; - out[7] = (unsigned char) tmp; - tmp >>= 8; - out[6] = (unsigned char) tmp; - - tmp = uu->clock_seq; - out[9] = (unsigned char) tmp; - tmp >>= 8; - out[8] = (unsigned char) tmp; - - memcpy(out+10, uu->node, 6); -} - -int class_uuid_parse(struct obd_uuid in, class_uuid_t uu) -{ - struct uuid uuid; - int i; - char *cp, buf[3]; - - if (strlen(in) != 36) - return -1; - for (i=0, cp = in; i <= 36; i++,cp++) { - if ((i == 8) || (i == 13) || (i == 18) || - (i == 23)) - if (*cp == '-') - continue; - if (i== 36) - if (*cp == 0) - continue; - if (!isxdigit(*cp)) - return -1; - } - uuid.time_low = simple_strtoul(in, NULL, 16); - uuid.time_mid = simple_strtoul(in+9, NULL, 16); - uuid.time_hi_and_version = simple_strtoul(in+14, NULL, 16); - uuid.clock_seq = simple_strtoul(in+19, NULL, 16); - cp = in+24; - buf[2] = 0; - for (i=0; i < 6; i++) { - buf[0] = *cp++; - buf[1] = *cp++; - uuid.node[i] = simple_strtoul(buf, NULL, 16); - } - - uuid_pack(&uuid, uu); - return 0; -} -#endif - -void class_uuid_unparse(class_uuid_t uu, struct obd_uuid *out) -{ - struct uuid uuid; - - uuid_unpack(uu, &uuid); - sprintf(out->uuid, - "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", - uuid.time_low, uuid.time_mid, uuid.time_hi_and_version, - uuid.clock_seq >> 8, uuid.clock_seq & 0xFF, - uuid.node[0], uuid.node[1], uuid.node[2], - uuid.node[3], uuid.node[4], uuid.node[5]); -} - -struct obd_device *client_tgtuuid2obd(struct obd_uuid *tgtuuid) -{ - int i; - - for (i = 0; i < MAX_OBD_DEVICES; i++) { - struct obd_device *obd = &obd_dev[i]; - if (obd->obd_type == NULL) - continue; - if ((strncmp(obd->obd_type->typ_name, LUSTRE_OSC_NAME, - sizeof LUSTRE_OSC_NAME) == 0) || - (strncmp(obd->obd_type->typ_name, LUSTRE_MDC_NAME, - sizeof LUSTRE_MDC_NAME) == 0)) { - struct client_obd *cli = &obd->u.cli; - struct obd_import *imp = cli->cl_import; - if (strncmp(tgtuuid->uuid, imp->imp_target_uuid.uuid, - sizeof(imp->imp_target_uuid)) == 0) - return obd; - } - } - - return NULL; -} diff --git a/lustre/obdecho/.cvsignore b/lustre/obdecho/.cvsignore deleted file mode 100644 index e530020..0000000 --- a/lustre/obdecho/.cvsignore +++ /dev/null @@ -1,8 +0,0 @@ -.Xrefs -config.log -config.status -configure -Makefile -Makefile.in -.deps -TAGS diff --git a/lustre/obdecho/Makefile.am b/lustre/obdecho/Makefile.am deleted file mode 100644 index 08136d7..0000000 --- a/lustre/obdecho/Makefile.am +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -DEFS= - -if LIBLUSTRE -lib_LIBRARIES = libobdecho.a -libobdecho_a_SOURCES = echo_client.c -else -MODULE = obdecho -modulefs_DATA = obdecho.o -EXTRA_PROGRAMS = obdecho -LINX= -obdecho_SOURCES = echo.c echo_client.c lproc_echo.c $(LINX) -endif - -include $(top_srcdir)/Rules diff --git a/lustre/obdecho/echo.c b/lustre/obdecho/echo.c deleted file mode 100644 index f89df07..0000000 --- a/lustre/obdecho/echo.c +++ /dev/null @@ -1,576 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Author: Peter Braam <braam@clusterfs.com> - * Author: Andreas Dilger <adilger@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define EXPORT_SYMTAB - -#include <linux/version.h> -#include <linux/module.h> -#include <linux/mm.h> -#include <linux/highmem.h> -#include <linux/fs.h> -#include <linux/stat.h> -#include <linux/sched.h> -#include <linux/smp_lock.h> -#include <linux/ext2_fs.h> -#include <linux/quotaops.h> -#include <linux/proc_fs.h> -#include <linux/init.h> -#include <asm/unistd.h> - -#define DEBUG_SUBSYSTEM S_ECHO - -#include <linux/obd_support.h> -#include <linux/obd_class.h> -#include <linux/obd_echo.h> -#include <linux/lustre_debug.h> -#include <linux/lustre_dlm.h> -#include <linux/lprocfs_status.h> - -#define ECHO_INIT_OBJID 0x1000000000000000ULL -#define ECHO_HANDLE_MAGIC 0xabcd0123fedc9876ULL - -#define ECHO_OBJECT0_NPAGES 16 -static struct page *echo_object0_pages[ECHO_OBJECT0_NPAGES]; - -enum { - LPROC_ECHO_READ_BYTES = 1, - LPROC_ECHO_WRITE_BYTES = 2, - LPROC_ECHO_LAST = LPROC_ECHO_WRITE_BYTES +1 -}; - -static int echo_connect(struct lustre_handle *conn, struct obd_device *obd, - struct obd_uuid *cluuid) -{ - return class_connect(conn, obd, cluuid); -} - -static int echo_disconnect(struct lustre_handle *conn, int failover) -{ - struct obd_export *exp = class_conn2export(conn); - - LASSERT (exp != NULL); - - ldlm_cancel_locks_for_export(exp); - class_export_put(exp); - return (class_disconnect(conn, failover)); -} - -static __u64 echo_next_id(struct obd_device *obddev) -{ - obd_id id; - - spin_lock(&obddev->u.echo.eo_lock); - id = ++obddev->u.echo.eo_lastino; - spin_unlock(&obddev->u.echo.eo_lock); - - return id; -} - -int echo_create(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md **ea, struct obd_trans_info *oti) -{ - struct obd_device *obd = class_conn2obd(conn); - - if (!obd) { - CERROR("invalid client cookie "LPX64"\n", conn->cookie); - return -EINVAL; - } - - if (!(oa->o_mode && S_IFMT)) { - CERROR("echo obd: no type!\n"); - return -ENOENT; - } - - if (!(oa->o_valid & OBD_MD_FLTYPE)) { - CERROR("invalid o_valid %08x\n", oa->o_valid); - return -EINVAL; - } - - oa->o_id = echo_next_id(obd); - oa->o_valid = OBD_MD_FLID; - atomic_inc(&obd->u.echo.eo_create); - - return 0; -} - -int echo_destroy(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *ea, struct obd_trans_info *oti) -{ - struct obd_device *obd = class_conn2obd(conn); - - if (!obd) { - CERROR("invalid client cookie "LPX64"\n", conn->cookie); - RETURN(-EINVAL); - } - - if (!(oa->o_valid & OBD_MD_FLID)) { - CERROR("obdo missing FLID valid flag: %08x\n", oa->o_valid); - RETURN(-EINVAL); - } - - if (oa->o_id > obd->u.echo.eo_lastino || oa->o_id < ECHO_INIT_OBJID) { - CERROR("bad destroy objid: "LPX64"\n", oa->o_id); - RETURN(-EINVAL); - } - - atomic_inc(&obd->u.echo.eo_destroy); - - return 0; -} - -static int echo_open(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *md, struct obd_trans_info *oti, - struct obd_client_handle *och) -{ - struct lustre_handle *fh = obdo_handle (oa); - struct obd_device *obd = class_conn2obd (conn); - - if (!obd) { - CERROR("invalid client cookie "LPX64"\n", conn->cookie); - return (-EINVAL); - } - - if (!(oa->o_valid & OBD_MD_FLID)) { - CERROR ("obdo missing FLID valid flag: %08x\n", oa->o_valid); - return (-EINVAL); - } - - fh->cookie = ECHO_HANDLE_MAGIC; - - oa->o_valid |= OBD_MD_FLHANDLE; - return 0; -} - -static int echo_close(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *md, struct obd_trans_info *oti) -{ - struct lustre_handle *fh = obdo_handle (oa); - struct obd_device *obd = class_conn2obd(conn); - - if (!obd) { - CERROR("invalid client cookie "LPX64"\n", conn->cookie); - return (-EINVAL); - } - - if (!(oa->o_valid & OBD_MD_FLHANDLE)) { - CERROR("obdo missing FLHANDLE valid flag: %08x\n", oa->o_valid); - return (-EINVAL); - } - - if (fh->cookie != ECHO_HANDLE_MAGIC) { - CERROR ("invalid file handle on close: "LPX64"\n", fh->cookie); - return (-EINVAL); - } - - return 0; -} - -static int echo_getattr(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *md) -{ - struct obd_device *obd = class_conn2obd(conn); - obd_id id = oa->o_id; - - if (!obd) { - CERROR("invalid client cookie "LPX64"\n", conn->cookie); - RETURN(-EINVAL); - } - - if (!(oa->o_valid & OBD_MD_FLID)) { - CERROR("obdo missing FLID valid flag: %08x\n", oa->o_valid); - RETURN(-EINVAL); - } - - obdo_cpy_md(oa, &obd->u.echo.oa, oa->o_valid); - oa->o_id = id; - - return 0; -} - -static int echo_setattr(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *md, struct obd_trans_info *oti) -{ - struct obd_device *obd = class_conn2obd(conn); - - if (!obd) { - CERROR("invalid client cookie "LPX64"\n", conn->cookie); - RETURN(-EINVAL); - } - - if (!(oa->o_valid & OBD_MD_FLID)) { - CERROR("obdo missing FLID valid flag: %08x\n", oa->o_valid); - RETURN(-EINVAL); - } - - memcpy(&obd->u.echo.oa, oa, sizeof(*oa)); - - atomic_inc(&obd->u.echo.eo_setattr); - - return 0; -} - -/* This allows us to verify that desc_private is passed unmolested */ -#define DESC_PRIV 0x10293847 - -int echo_preprw(int cmd, struct obd_export *export, struct obdo *oa, - int objcount, struct obd_ioobj *obj, int niocount, - struct niobuf_remote *nb, struct niobuf_local *res, - void **desc_private, struct obd_trans_info *oti) -{ - struct obd_device *obd; - struct niobuf_local *r = res; - int tot_bytes = 0; - int rc = 0; - int i; - ENTRY; - - obd = export->exp_obd; - if (obd == NULL) - RETURN(-EINVAL); - - memset(res, 0, sizeof(*res) * niocount); - - CDEBUG(D_PAGE, "%s %d obdos with %d IOs\n", - cmd == OBD_BRW_READ ? "reading" : "writing", objcount, niocount); - - *desc_private = (void *)DESC_PRIV; - - for (i = 0; i < objcount; i++, obj++) { - int gfp_mask = (obj->ioo_id & 1) ? GFP_HIGHUSER : GFP_KERNEL; - int isobj0 = obj->ioo_id == 0; - int verify = !isobj0; - int j; - - for (j = 0 ; j < obj->ioo_bufcnt ; j++, nb++, r++) { - - if (isobj0 && - (nb->offset >> PAGE_SHIFT) < ECHO_OBJECT0_NPAGES) { - r->page = echo_object0_pages[nb->offset >> - PAGE_SHIFT]; - /* Take extra ref so __free_pages() can be called OK */ - get_page (r->page); - } else { - r->page = alloc_pages(gfp_mask, 0); - if (r->page == NULL) { - CERROR("can't get page %u/%u for id " - LPU64"\n", - j, obj->ioo_bufcnt, obj->ioo_id); - GOTO(preprw_cleanup, rc = -ENOMEM); - } - } - - tot_bytes += r->len; - - atomic_inc(&obd->u.echo.eo_prep); - - r->offset = nb->offset; - r->len = nb->len; - LASSERT ((r->offset & (PAGE_SIZE - 1)) + r->len <= PAGE_SIZE); - - CDEBUG(D_PAGE, "$$$$ get page %p @ "LPU64" for %d\n", - r->page, r->offset, r->len); - - if (cmd & OBD_BRW_READ) { - r->rc = r->len; - if (verify) { - page_debug_setup(kmap (r->page), r->len, - r->offset,obj->ioo_id); - kunmap (r->page); - } - r->rc = r->len; - } else { - if (verify) { - page_debug_setup(kmap (r->page), r->len, - 0xecc0ecc0ecc0ecc0, - 0xecc0ecc0ecc0ecc0); - kunmap (r->page); - } - } - } - } - if (cmd & OBD_BRW_READ) - lprocfs_counter_add(obd->obd_stats, LPROC_ECHO_READ_BYTES, - tot_bytes); - else - lprocfs_counter_add(obd->obd_stats, LPROC_ECHO_WRITE_BYTES, - tot_bytes); - - CDEBUG(D_PAGE, "%d pages allocated after prep\n", - atomic_read(&obd->u.echo.eo_prep)); - - RETURN(0); - -preprw_cleanup: - /* It is possible that we would rather handle errors by allow - * any already-set-up pages to complete, rather than tearing them - * all down again. I believe that this is what the in-kernel - * prep/commit operations do. - */ - CERROR("cleaning up %ld pages (%d obdos)\n", (long)(r - res), objcount); - while (r-- > res) { - kunmap(r->page); - /* NB if this is an 'object0' page, __free_pages will just - * lose the extra ref gained above */ - __free_pages(r->page, 0); - atomic_dec(&obd->u.echo.eo_prep); - } - memset(res, 0, sizeof(*res) * niocount); - - return rc; -} - -int echo_commitrw(int cmd, struct obd_export *export, int objcount, - struct obd_ioobj *obj, int niocount, struct niobuf_local *res, - void *desc_private, struct obd_trans_info *oti) -{ - struct obd_device *obd; - struct niobuf_local *r = res; - int i, vrc = 0, rc = 0; - ENTRY; - - obd = export->exp_obd; - if (obd == NULL) - RETURN(-EINVAL); - - if ((cmd & OBD_BRW_RWMASK) == OBD_BRW_READ) { - CDEBUG(D_PAGE, "reading %d obdos with %d IOs\n", - objcount, niocount); - } else { - CDEBUG(D_PAGE, "writing %d obdos with %d IOs\n", - objcount, niocount); - } - - if (niocount && !r) { - CERROR("NULL res niobuf with niocount %d\n", niocount); - RETURN(-EINVAL); - } - - LASSERT(desc_private == (void *)DESC_PRIV); - - for (i = 0; i < objcount; i++, obj++) { - int verify = obj->ioo_id != 0; - int j; - - for (j = 0 ; j < obj->ioo_bufcnt ; j++, r++) { - struct page *page = r->page; - void *addr; - - if (!page || !(addr = kmap(page)) || - !kern_addr_valid(addr)) { - - CERROR("bad page objid "LPU64":%p, buf %d/%d\n", - obj->ioo_id, page, j, obj->ioo_bufcnt); - kunmap(page); - GOTO(commitrw_cleanup, rc = -EFAULT); - } - - CDEBUG(D_PAGE, "$$$$ use page %p, addr %p@"LPU64"\n", - r->page, addr, r->offset); - - if (verify) { - vrc = page_debug_check("echo", addr, r->len, - r->offset, obj->ioo_id); - /* check all the pages always */ - if (vrc != 0 && rc == 0) - rc = vrc; - } - - kunmap(page); - /* NB see comment above regarding object0 pages */ - __free_pages(page, 0); - atomic_dec(&obd->u.echo.eo_prep); - } - } - CDEBUG(D_PAGE, "%d pages remain after commit\n", - atomic_read(&obd->u.echo.eo_prep)); - RETURN(rc); - -commitrw_cleanup: - CERROR("cleaning up %ld pages (%d obdos)\n", - niocount - (long)(r - res) - 1, objcount); - while (++r < res + niocount) { - struct page *page = r->page; - - /* NB see comment above regarding object0 pages */ - __free_pages(page, 0); - atomic_dec(&obd->u.echo.eo_prep); - } - return rc; -} - -static int echo_setup(struct obd_device *obddev, obd_count len, void *buf) -{ - ENTRY; - - spin_lock_init(&obddev->u.echo.eo_lock); - obddev->u.echo.eo_lastino = ECHO_INIT_OBJID; - - obddev->obd_namespace = - ldlm_namespace_new("echo-tgt", LDLM_NAMESPACE_SERVER); - if (obddev->obd_namespace == NULL) { - LBUG(); - RETURN(-ENOMEM); - } - - ptlrpc_init_client (LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL, - "echo_ldlm_cb_client", &obddev->obd_ldlm_client); - RETURN(0); -} - -static int echo_cleanup(struct obd_device *obddev, int force, int failover) -{ - ENTRY; - - ldlm_namespace_free(obddev->obd_namespace); - CERROR("%d prep/commitrw pages leaked\n", - atomic_read(&obddev->u.echo.eo_prep)); - - RETURN(0); -} - -int echo_attach(struct obd_device *obd, obd_count len, void *data) -{ - struct lprocfs_static_vars lvars; - int rc; - - lprocfs_init_vars(&lvars); - rc = lprocfs_obd_attach(obd, lvars.obd_vars); - if (rc != 0) - return rc; - rc = lprocfs_alloc_obd_stats(obd, LPROC_ECHO_LAST); - if (rc != 0) - return rc; - - lprocfs_counter_init(obd->obd_stats, LPROC_ECHO_READ_BYTES, - LPROCFS_CNTR_AVGMINMAX, "read_bytes", "bytes"); - lprocfs_counter_init(obd->obd_stats, LPROC_ECHO_WRITE_BYTES, - LPROCFS_CNTR_AVGMINMAX, "write_bytes", "bytes"); - return rc; -} - -int echo_detach(struct obd_device *dev) -{ - lprocfs_free_obd_stats(dev); - return lprocfs_obd_detach(dev); -} - -static struct obd_ops echo_obd_ops = { - o_owner: THIS_MODULE, - o_attach: echo_attach, - o_detach: echo_detach, - o_connect: echo_connect, - o_disconnect: echo_disconnect, - o_create: echo_create, - o_destroy: echo_destroy, - o_open: echo_open, - o_close: echo_close, - o_getattr: echo_getattr, - o_setattr: echo_setattr, - o_preprw: echo_preprw, - o_commitrw: echo_commitrw, - o_setup: echo_setup, - o_cleanup: echo_cleanup -}; - -extern int echo_client_init(void); -extern void echo_client_cleanup(void); - -static void -echo_object0_pages_fini (void) -{ - int i; - - for (i = 0; i < ECHO_OBJECT0_NPAGES; i++) - if (echo_object0_pages[i] != NULL) { - __free_pages (echo_object0_pages[i], 0); - echo_object0_pages[i] = NULL; - } -} - -static int -echo_object0_pages_init (void) -{ - struct page *pg; - int i; - - for (i = 0; i < ECHO_OBJECT0_NPAGES; i++) { - int gfp_mask = (i < ECHO_OBJECT0_NPAGES/2) ? - GFP_KERNEL : GFP_HIGHUSER; - - pg = alloc_pages (gfp_mask, 0); - if (pg == NULL) { - echo_object0_pages_fini (); - return (-ENOMEM); - } - - memset (kmap (pg), 0, PAGE_SIZE); - kunmap (pg); - - echo_object0_pages[i] = pg; - } - - return (0); -} - -static int __init obdecho_init(void) -{ - struct lprocfs_static_vars lvars; - int rc; - - printk(KERN_INFO "Lustre Echo OBD driver; info@clusterfs.com\n"); - - lprocfs_init_vars(&lvars); - - rc = echo_object0_pages_init (); - if (rc != 0) - goto failed_0; - - rc = class_register_type(&echo_obd_ops, lvars.module_vars, - OBD_ECHO_DEVICENAME); - if (rc != 0) - goto failed_1; - - rc = echo_client_init(); - if (rc == 0) - RETURN (0); - - class_unregister_type(OBD_ECHO_DEVICENAME); - failed_1: - echo_object0_pages_fini (); - failed_0: - RETURN(rc); -} - -static void __exit obdecho_exit(void) -{ - echo_client_cleanup(); - class_unregister_type(OBD_ECHO_DEVICENAME); - echo_object0_pages_fini (); -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Lustre Testing Echo OBD driver"); -MODULE_LICENSE("GPL"); - -module_init(obdecho_init); -module_exit(obdecho_exit); diff --git a/lustre/obdecho/echo_client.c b/lustre/obdecho/echo_client.c deleted file mode 100644 index 79da7ea..0000000 --- a/lustre/obdecho/echo_client.c +++ /dev/null @@ -1,1139 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_ECHO -#ifdef __KERNEL__ -#include <linux/version.h> -#include <linux/module.h> -#include <linux/fs.h> -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#include <linux/iobuf.h> -#endif -#include <asm/div64.h> -#else -#include <liblustre.h> -#endif - -#include <linux/obd.h> -#include <linux/obd_support.h> -#include <linux/obd_class.h> -#include <linux/obd_echo.h> -#include <linux/lustre_debug.h> -#include <linux/lprocfs_status.h> -#include <linux/lustre_lite.h> /* for LL_IOC_LOV_SETSTRIPE */ - -#if 0 -static void -echo_printk_object (char *msg, struct ec_object *eco) -{ - struct lov_stripe_md *lsm = eco->eco_lsm; - int i; - - printk (KERN_INFO "%s: object %p: "LPX64", refs %d%s: "LPX64 - "=%u!%u@%d\n", msg, eco, eco->eco_id, eco->eco_refcount, - eco->eco_deleted ? "(deleted) " : "", - lsm->lsm_object_id, lsm->lsm_stripe_size, - lsm->lsm_stripe_count, lsm->lsm_stripe_offset); - - for (i = 0; i < lsm->lsm_stripe_count; i++) - printk (KERN_INFO " [%2u]"LPX64"\n", - lsm->lsm_oinfo[i].loi_ost_idx, - lsm->lsm_oinfo[i].loi_id); -} -#endif - -static struct ec_object * -echo_find_object_locked (struct obd_device *obd, obd_id id) -{ - struct echo_client_obd *ec = &obd->u.echo_client; - struct ec_object *eco = NULL; - struct list_head *el; - - list_for_each (el, &ec->ec_objects) { - eco = list_entry (el, struct ec_object, eco_obj_chain); - - if (eco->eco_id == id) - return (eco); - } - return (NULL); -} - -static int -echo_copyout_lsm (struct lov_stripe_md *lsm, void *ulsm, int ulsm_nob) -{ - int nob; - - nob = offsetof (struct lov_stripe_md, lsm_oinfo[lsm->lsm_stripe_count]); - if (nob > ulsm_nob) - return (-EINVAL); - - if (copy_to_user (ulsm, lsm, nob)) - return (-EFAULT); - - return (0); -} - -static int -echo_copyin_lsm (struct obd_device *obd, struct lov_stripe_md *lsm, - void *ulsm, int ulsm_nob) -{ - struct echo_client_obd *ec = &obd->u.echo_client; - int nob; - - if (ulsm_nob < sizeof (*lsm)) - return (-EINVAL); - - if (copy_from_user (lsm, ulsm, sizeof (*lsm))) - return (-EFAULT); - - nob = lsm->lsm_stripe_count * sizeof (lsm->lsm_oinfo[0]); - - if (ulsm_nob < nob || - lsm->lsm_stripe_count > ec->ec_nstripes || - lsm->lsm_magic != LOV_MAGIC || - (lsm->lsm_stripe_offset != 0 && - lsm->lsm_stripe_offset != 0xffffffff && - lsm->lsm_stripe_offset >= ec->ec_nstripes) || - (lsm->lsm_stripe_size & (PAGE_SIZE - 1)) != 0 || - ((__u64)lsm->lsm_stripe_size * lsm->lsm_stripe_count > ~0UL)) - return (-EINVAL); - - LASSERT (ec->ec_lsmsize >= sizeof (*lsm) + nob); - - if (copy_from_user(lsm->lsm_oinfo, - ((struct lov_stripe_md *)ulsm)->lsm_oinfo, nob)) - return (-EFAULT); - - return (0); -} - -static struct ec_object * -echo_allocate_object (struct obd_device *obd) -{ - struct echo_client_obd *ec = &obd->u.echo_client; - struct ec_object *eco; - - OBD_ALLOC (eco, sizeof (*eco)); - if (eco == NULL) - return (NULL); - - OBD_ALLOC (eco->eco_lsm, ec->ec_lsmsize); - if (eco->eco_lsm == NULL) { - OBD_FREE (eco, sizeof (*eco)); - return (NULL); - } - - eco->eco_device = obd; - eco->eco_deleted = 0; - eco->eco_refcount = 0; - eco->eco_lsm->lsm_magic = LOV_MAGIC; - /* leave stripe count 0 by default */ - - return (eco); -} - -static void -echo_free_object (struct ec_object *eco) -{ - struct obd_device *obd = eco->eco_device; - struct echo_client_obd *ec = &obd->u.echo_client; - - LASSERT (eco->eco_refcount == 0); - OBD_FREE (eco->eco_lsm, ec->ec_lsmsize); - OBD_FREE (eco, sizeof (*eco)); -} - -static int -echo_create_object (struct obd_device *obd, int on_target, struct obdo *oa, - void *ulsm, int ulsm_nob) -{ - struct echo_client_obd *ec = &obd->u.echo_client; - struct ec_object *eco2; - struct ec_object *eco; - struct lov_stripe_md *lsm; - int rc; - int i; - - if ((oa->o_valid & OBD_MD_FLID) == 0 && /* no obj id */ - (on_target || /* set_stripe */ - ec->ec_nstripes != 0)) { /* LOV */ - CERROR ("No valid oid\n"); - return (-EINVAL); - } - - eco = echo_allocate_object (obd); - if (eco == NULL) - return (-ENOMEM); - - lsm = eco->eco_lsm; - - if (ulsm != NULL) { - rc = echo_copyin_lsm (obd, lsm, ulsm, ulsm_nob); - if (rc != 0) - goto failed; - } - - /* setup object ID here for !on_target and LOV hint */ - if ((oa->o_valid & OBD_MD_FLID) != 0) - eco->eco_id = lsm->lsm_object_id = oa->o_id; - - /* defaults -> actual values */ - if (lsm->lsm_stripe_offset == 0xffffffff) - lsm->lsm_stripe_offset = 0; - - if (lsm->lsm_stripe_count == 0) - lsm->lsm_stripe_count = ec->ec_nstripes; - - if (lsm->lsm_stripe_size == 0) - lsm->lsm_stripe_size = PAGE_SIZE; - - /* setup stripes: indices + default ids if required */ - for (i = 0; i < lsm->lsm_stripe_count; i++) { - if (lsm->lsm_oinfo[i].loi_id == 0) - lsm->lsm_oinfo[i].loi_id = lsm->lsm_object_id; - - lsm->lsm_oinfo[i].loi_ost_idx = - (lsm->lsm_stripe_offset + i) % ec->ec_nstripes; - } - - if (on_target) { - rc = obd_create (&ec->ec_conn, oa, &lsm, NULL); - if (rc != 0) - goto failed; - - /* See what object ID we were given */ - LASSERT ((oa->o_valid & OBD_MD_FLID) != 0); - eco->eco_id = lsm->lsm_object_id = oa->o_id; - } - - spin_lock (&ec->ec_lock); - - eco2 = echo_find_object_locked (obd, oa->o_id); - if (eco2 != NULL) { /* conflict */ - spin_unlock (&ec->ec_lock); - - CERROR ("Can't create object id "LPX64": id already exists%s\n", - oa->o_id, on_target ? " (undoing create)" : ""); - - if (on_target) - obd_destroy (&ec->ec_conn, oa, lsm, NULL); - - rc = -EEXIST; - goto failed; - } - - list_add (&eco->eco_obj_chain, &ec->ec_objects); - spin_unlock (&ec->ec_lock); - CDEBUG (D_INFO, - "created %p: "LPX64"=%u#%u&%d refs %d del %d\n", - eco, eco->eco_id, - eco->eco_lsm->lsm_stripe_size, - eco->eco_lsm->lsm_stripe_count, - eco->eco_lsm->lsm_stripe_offset, - eco->eco_refcount, eco->eco_deleted); - return (0); - - failed: - echo_free_object (eco); - return (rc); -} - -static int -echo_get_object (struct ec_object **ecop, struct obd_device *obd, - struct obdo *oa) -{ - struct echo_client_obd *ec = &obd->u.echo_client; - struct ec_object *eco; - struct ec_object *eco2; - int rc; - - if ((oa->o_valid & OBD_MD_FLID) == 0) - { - CERROR ("No valid oid\n"); - return (-EINVAL); - } - - spin_lock (&ec->ec_lock); - eco = echo_find_object_locked (obd, oa->o_id); - if (eco != NULL) { - if (eco->eco_deleted) /* being deleted */ - return (-EAGAIN); /* (see comment in cleanup) */ - - eco->eco_refcount++; - spin_unlock (&ec->ec_lock); - *ecop = eco; - CDEBUG (D_INFO, - "found %p: "LPX64"=%u#%u&%d refs %d del %d\n", - eco, eco->eco_id, - eco->eco_lsm->lsm_stripe_size, - eco->eco_lsm->lsm_stripe_count, - eco->eco_lsm->lsm_stripe_offset, - eco->eco_refcount, eco->eco_deleted); - return (0); - } - spin_unlock (&ec->ec_lock); - - if (ec->ec_nstripes != 0) /* striping required */ - return (-ENOENT); - - eco = echo_allocate_object (obd); - if (eco == NULL) - return (-ENOMEM); - - eco->eco_id = eco->eco_lsm->lsm_object_id = oa->o_id; - - spin_lock (&ec->ec_lock); - - eco2 = echo_find_object_locked (obd, oa->o_id); - if (eco2 == NULL) { /* didn't race */ - list_add (&eco->eco_obj_chain, &ec->ec_objects); - spin_unlock (&ec->ec_lock); - eco->eco_refcount = 1; - *ecop = eco; - CDEBUG (D_INFO, - "created %p: "LPX64"=%u#%u&%d refs %d del %d\n", - eco, eco->eco_id, - eco->eco_lsm->lsm_stripe_size, - eco->eco_lsm->lsm_stripe_count, - eco->eco_lsm->lsm_stripe_offset, - eco->eco_refcount, eco->eco_deleted); - return (0); - } - - if (eco2->eco_deleted) - rc = -EAGAIN; /* lose race */ - else { - eco2->eco_refcount++; /* take existing */ - *ecop = eco2; - rc = 0; - LASSERT (eco2->eco_id == eco2->eco_lsm->lsm_object_id); - CDEBUG (D_INFO, - "found(2) %p: "LPX64"=%u#%u&%d refs %d del %d\n", - eco2, eco2->eco_id, - eco2->eco_lsm->lsm_stripe_size, - eco2->eco_lsm->lsm_stripe_count, - eco2->eco_lsm->lsm_stripe_offset, - eco2->eco_refcount, eco2->eco_deleted); - } - - spin_unlock (&ec->ec_lock); - - echo_free_object (eco); - return (rc); -} - -static void -echo_put_object (struct ec_object *eco) -{ - struct obd_device *obd = eco->eco_device; - struct echo_client_obd *ec = &obd->u.echo_client; - - /* Release caller's ref on the object. - * delete => mark for deletion when last ref goes - */ - - spin_lock (&ec->ec_lock); - - eco->eco_refcount--; - LASSERT (eco->eco_refcount >= 0); - - CDEBUG(D_INFO, "put %p: "LPX64"=%u#%u&%d refs %d del %d\n", - eco, eco->eco_id, - eco->eco_lsm->lsm_stripe_size, - eco->eco_lsm->lsm_stripe_count, - eco->eco_lsm->lsm_stripe_offset, - eco->eco_refcount, eco->eco_deleted); - - if (eco->eco_refcount != 0 || !eco->eco_deleted) { - spin_unlock (&ec->ec_lock); - return; - } - - spin_unlock (&ec->ec_lock); - - /* NB leave obj in the object list. We must prevent anyone from - * attempting to enqueue on this object number until we can be - * sure there will be no more lock callbacks. - */ - obd_cancel_unused(&ec->ec_conn, eco->eco_lsm, 0, NULL); - - /* now we can let it go */ - spin_lock (&ec->ec_lock); - list_del (&eco->eco_obj_chain); - spin_unlock (&ec->ec_lock); - - LASSERT (eco->eco_refcount == 0); - - echo_free_object (eco); -} - -static void -echo_get_stripe_off_id (struct lov_stripe_md *lsm, obd_off *offp, obd_id *idp) -{ - unsigned long stripe_count; - unsigned long stripe_size; - unsigned long width; - unsigned long woffset; - int stripe_index; - obd_off offset; - - if (lsm->lsm_stripe_count <= 1) - return; - - offset = *offp; - stripe_size = lsm->lsm_stripe_size; - stripe_count = lsm->lsm_stripe_count; - - /* width = # bytes in all stripes */ - width = stripe_size * stripe_count; - - /* woffset = offset within a width; offset = whole number of widths */ - woffset = do_div (offset, width); - - stripe_index = woffset / stripe_size; - - *idp = lsm->lsm_oinfo[stripe_index].loi_id; - *offp = offset * stripe_size + woffset % stripe_size; -} - -static int -echo_client_kbrw (struct obd_device *obd, int rw, - struct obdo *oa, struct lov_stripe_md *lsm, - obd_off offset, obd_size count) -{ - struct echo_client_obd *ec = &obd->u.echo_client; - obd_count npages; - struct brw_page *pga; - struct brw_page *pgp; - obd_off off; - int i; - int rc; - int verify; - int gfp_mask; - - /* oa_id == 0 => speed test (no verification) else... - * oa & 1 => use HIGHMEM - */ - verify = (oa->o_id != 0); - gfp_mask = ((oa->o_id & 1) == 0) ? GFP_KERNEL : GFP_HIGHUSER; - - LASSERT(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ); - - if (count <= 0 || - (count & (PAGE_SIZE - 1)) != 0 || - (lsm != NULL && - lsm->lsm_object_id != oa->o_id)) - return (-EINVAL); - - /* XXX think again with misaligned I/O */ - npages = count >> PAGE_SHIFT; - - OBD_ALLOC(pga, npages * sizeof(*pga)); - if (pga == NULL) - return (-ENOMEM); - - for (i = 0, pgp = pga, off = offset; - i < npages; - i++, pgp++, off += PAGE_SIZE) { - - LASSERT (pgp->pg == NULL); /* for cleanup */ - - rc = -ENOMEM; - pgp->pg = alloc_pages (gfp_mask, 0); - if (pgp->pg == NULL) - goto out; - - pgp->count = PAGE_SIZE; - pgp->off = off; - pgp->flag = 0; - - if (verify) { - void *addr = kmap(pgp->pg); - obd_off stripe_off = off; - obd_id stripe_id = oa->o_id; - - if (rw == OBD_BRW_WRITE) { - echo_get_stripe_off_id(lsm, &stripe_off, - &stripe_id); - page_debug_setup(addr, pgp->count, - stripe_off, stripe_id); - } else { - page_debug_setup(addr, pgp->count, - 0xdeadbeef00c0ffee, - 0xdeadbeef00c0ffee); - } - kunmap(pgp->pg); - } - } - - rc = obd_brw(rw, &ec->ec_conn, lsm, npages, pga, NULL); - - out: - if (rc != 0) - verify = 0; - - for (i = 0, pgp = pga; i < npages; i++, pgp++) { - if (pgp->pg == NULL) - continue; - - if (verify) { - void *addr = kmap(pgp->pg); - obd_off stripe_off = pgp->off; - obd_id stripe_id = oa->o_id; - int vrc; - - echo_get_stripe_off_id (lsm, &stripe_off, &stripe_id); - vrc = page_debug_check("test_brw", addr, pgp->count, - stripe_off, stripe_id); - if (vrc != 0 && rc == 0) - rc = vrc; - - kunmap(pgp->pg); - } - __free_pages(pgp->pg, 0); - } - OBD_FREE(pga, npages * sizeof(*pga)); - return (rc); -} - -#ifdef __KERNEL__ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -static int echo_client_ubrw(struct obd_device *obd, int rw, - struct obdo *oa, struct lov_stripe_md *lsm, - obd_off offset, obd_size count, char *buffer) -{ - struct echo_client_obd *ec = &obd->u.echo_client; - obd_count npages; - struct brw_page *pga; - struct brw_page *pgp; - obd_off off; - struct kiobuf *kiobuf; - int i; - int rc; - - LASSERT (rw == OBD_BRW_WRITE || - rw == OBD_BRW_READ); - - /* NB: for now, only whole pages, page aligned */ - - if (count <= 0 || - ((long)buffer & (PAGE_SIZE - 1)) != 0 || - (count & (PAGE_SIZE - 1)) != 0 || - (lsm != NULL && lsm->lsm_object_id != oa->o_id)) - return (-EINVAL); - - /* XXX think again with misaligned I/O */ - npages = count >> PAGE_SHIFT; - - OBD_ALLOC(pga, npages * sizeof(*pga)); - if (pga == NULL) - return (-ENOMEM); - - rc = alloc_kiovec (1, &kiobuf); - if (rc != 0) - goto out_1; - - rc = map_user_kiobuf ((rw == OBD_BRW_READ) ? READ : WRITE, - kiobuf, (unsigned long)buffer, count); - if (rc != 0) - goto out_2; - - LASSERT (kiobuf->offset == 0); - LASSERT (kiobuf->nr_pages == npages); - - for (i = 0, off = offset, pgp = pga; - i < npages; - i++, off += PAGE_SIZE, pgp++) { - pgp->off = off; - pgp->pg = kiobuf->maplist[i]; - pgp->count = PAGE_SIZE; - pgp->flag = 0; - } - - rc = obd_brw(rw, &ec->ec_conn, lsm, npages, pga, NULL); - - // if (rw == OBD_BRW_READ) - // mark_dirty_kiobuf (kiobuf, count); - - unmap_kiobuf (kiobuf); - out_2: - free_kiovec (1, &kiobuf); - out_1: - OBD_FREE(pga, npages * sizeof(*pga)); - return (rc); -} -#else -static int echo_client_ubrw(struct obd_device *obd, int rw, - struct obdo *oa, struct lov_stripe_md *lsm, - obd_off offset, obd_size count, char *buffer) -{ - LBUG(); - return 0; -} -#endif -#endif - -static int -echo_open (struct obd_export *exp, struct obdo *oa) -{ - struct obd_device *obd = exp->exp_obd; - struct echo_client_obd *ec = &obd->u.echo_client; - struct lustre_handle *ufh = obdo_handle (oa); - struct ec_open_object *ecoo; - struct ec_object *eco; - int rc; - - rc = echo_get_object (&eco, obd, oa); - if (rc != 0) - return rc; - - rc = -ENOMEM; - OBD_ALLOC (ecoo, sizeof (*ecoo)); - if (ecoo == NULL) - goto failed_0; - - rc = obd_open(&ec->ec_conn, oa, eco->eco_lsm, NULL, &ecoo->ecoo_och); - if (rc != 0) - goto failed_1; - - memcpy (&ecoo->ecoo_oa, oa, sizeof (*oa)); - ecoo->ecoo_object = eco; - /* ecoo takes ref from echo_get_object() above */ - - spin_lock (&ec->ec_lock); - - list_add (&ecoo->ecoo_exp_chain, &exp->exp_ec_data.eced_open_head); - ufh->cookie = ecoo->ecoo_cookie = ec->ec_unique++; - spin_unlock (&ec->ec_lock); - return 0; - - failed_1: - OBD_FREE (ecoo, sizeof (*ecoo)); - failed_0: - echo_put_object (eco); - return (rc); -} - -static int -echo_close (struct obd_export *exp, struct obdo *oa) -{ - struct obd_device *obd = exp->exp_obd; - struct echo_client_obd *ec = &obd->u.echo_client; - struct lustre_handle *ufh = obdo_handle (oa); - struct ec_open_object *ecoo = NULL; - int found = 0; - struct list_head *el; - int rc; - - if ((oa->o_valid & OBD_MD_FLHANDLE) == 0) - return -EINVAL; - - spin_lock (&ec->ec_lock); - - list_for_each (el, &exp->exp_ec_data.eced_open_head) { - ecoo = list_entry (el, struct ec_open_object, ecoo_exp_chain); - found = (ecoo->ecoo_cookie == ufh->cookie); - if (found) { - list_del (&ecoo->ecoo_exp_chain); - break; - } - } - - spin_unlock (&ec->ec_lock); - - memcpy(&ecoo->ecoo_oa.o_inline, &ecoo->ecoo_och, FD_OSTDATA_SIZE); - ecoo->ecoo_oa.o_valid |= OBD_MD_FLHANDLE; - - rc = obd_close (&ec->ec_conn, &ecoo->ecoo_oa, - ecoo->ecoo_object->eco_lsm, NULL); - - echo_put_object (ecoo->ecoo_object); - OBD_FREE (ecoo, sizeof (*ecoo)); - - return (rc); -} - -static int -echo_ldlm_callback (struct ldlm_lock *lock, struct ldlm_lock_desc *new, - void *data, int flag) -{ - struct ec_object *eco = (struct ec_object *)data; - struct echo_client_obd *ec = &(eco->eco_device->u.echo_client); - struct lustre_handle lockh; - struct list_head *el; - int found = 0; - int rc; - - ldlm_lock2handle (lock, &lockh); - - /* #ifdef this out if we're not feeling paranoid */ - spin_lock (&ec->ec_lock); - list_for_each (el, &ec->ec_objects) { - found = (eco == list_entry(el, struct ec_object, - eco_obj_chain)); - if (found) - break; - } - spin_unlock (&ec->ec_lock); - LASSERT (found); - - switch (flag) { - case LDLM_CB_BLOCKING: - CDEBUG(D_INFO, "blocking callback on "LPX64", handle "LPX64"\n", - eco->eco_id, lockh.cookie); - rc = ldlm_cli_cancel (&lockh); - if (rc != ELDLM_OK) - CERROR ("ldlm_cli_cancel failed: %d\n", rc); - break; - - case LDLM_CB_CANCELING: - CDEBUG(D_INFO, "cancel callback on "LPX64", handle "LPX64"\n", - eco->eco_id, lockh.cookie); - break; - - default: - LBUG (); - } - - return (0); -} - -static int -echo_enqueue (struct obd_export *exp, struct obdo *oa, - int mode, obd_off offset, obd_size nob) -{ - struct obd_device *obd = exp->exp_obd; - struct echo_client_obd *ec = &obd->u.echo_client; - struct lustre_handle *ulh = obdo_handle (oa); - struct ec_object *eco; - struct ec_lock *ecl; - int flags; - int rc; - - if (!(mode == LCK_PR || mode == LCK_PW)) - return -EINVAL; - - if ((offset & (PAGE_SIZE - 1)) != 0 || - (nob & (PAGE_SIZE - 1)) != 0) - return -EINVAL; - - rc = echo_get_object (&eco, obd, oa); - if (rc != 0) - return rc; - - rc = -ENOMEM; - OBD_ALLOC (ecl, sizeof (*ecl)); - if (ecl == NULL) - goto failed_0; - - ecl->ecl_mode = mode; - ecl->ecl_object = eco; - ecl->ecl_extent.start = offset; - ecl->ecl_extent.end = (nob == 0) ? ((obd_off) -1) : (offset + nob - 1); - - flags = 0; - rc = obd_enqueue(&ec->ec_conn, eco->eco_lsm, NULL, LDLM_EXTENT, - &ecl->ecl_extent,sizeof(ecl->ecl_extent), mode, - &flags, echo_ldlm_callback, eco, - &ecl->ecl_lock_handle); - if (rc != 0) - goto failed_1; - - CDEBUG(D_INFO, "enqueue handle "LPX64"\n", ecl->ecl_lock_handle.cookie); - - /* NB ecl takes object ref from echo_get_object() above */ - spin_lock(&ec->ec_lock); - - list_add(&ecl->ecl_exp_chain, &exp->exp_ec_data.eced_locks); - ulh->cookie = ecl->ecl_cookie = ec->ec_unique++; - - spin_unlock(&ec->ec_lock); - - oa->o_valid |= OBD_MD_FLHANDLE; - return 0; - - failed_1: - OBD_FREE (ecl, sizeof (*ecl)); - failed_0: - echo_put_object (eco); - return (rc); -} - -static int -echo_cancel (struct obd_export *exp, struct obdo *oa) -{ - struct obd_device *obd = exp->exp_obd; - struct echo_client_obd *ec = &obd->u.echo_client; - struct lustre_handle *ulh = obdo_handle (oa); - struct ec_lock *ecl = NULL; - int found = 0; - struct list_head *el; - int rc; - - if ((oa->o_valid & OBD_MD_FLHANDLE) == 0) - return -EINVAL; - - spin_lock (&ec->ec_lock); - - list_for_each (el, &exp->exp_ec_data.eced_locks) { - ecl = list_entry (el, struct ec_lock, ecl_exp_chain); - found = (ecl->ecl_cookie == ulh->cookie); - if (found) { - list_del (&ecl->ecl_exp_chain); - break; - } - } - - spin_unlock (&ec->ec_lock); - - if (!found) - return (-ENOENT); - - rc = obd_cancel(&ec->ec_conn, ecl->ecl_object->eco_lsm, ecl->ecl_mode, - &ecl->ecl_lock_handle); - - echo_put_object (ecl->ecl_object); - OBD_FREE (ecl, sizeof (*ecl)); - - return rc; -} - -static int echo_iocontrol(unsigned int cmd, struct lustre_handle *obdconn, - int len, void *karg, void *uarg) -{ - struct obd_export *exp = class_conn2export (obdconn); - struct obd_device *obd; - struct echo_client_obd *ec; - struct ec_object *eco; - struct obd_ioctl_data *data = karg; - int rw = OBD_BRW_READ; - int rc = 0; - ENTRY; - - if (exp == NULL) { - CERROR("ioctl: No device\n"); - GOTO(out, rc = -EINVAL); - } - - obd = exp->exp_obd; - ec = &obd->u.echo_client; - - switch (cmd) { - case OBD_IOC_CREATE: /* may create echo object */ - if (!capable (CAP_SYS_ADMIN)) - GOTO (out, rc = -EPERM); - - rc = echo_create_object (obd, 1, &data->ioc_obdo1, - data->ioc_pbuf1, data->ioc_plen1); - GOTO(out, rc); - - case OBD_IOC_DESTROY: - if (!capable (CAP_SYS_ADMIN)) - GOTO (out, rc = -EPERM); - - rc = echo_get_object (&eco, obd, &data->ioc_obdo1); - if (rc == 0) { - rc = obd_destroy(&ec->ec_conn, &data->ioc_obdo1, - eco->eco_lsm, NULL); - if (rc == 0) - eco->eco_deleted = 1; - echo_put_object(eco); - } - GOTO(out, rc); - - case OBD_IOC_GETATTR: - rc = echo_get_object (&eco, obd, &data->ioc_obdo1); - if (rc == 0) { - rc = obd_getattr(&ec->ec_conn, &data->ioc_obdo1, - eco->eco_lsm); - echo_put_object(eco); - } - GOTO(out, rc); - - case OBD_IOC_SETATTR: - if (!capable (CAP_SYS_ADMIN)) - GOTO (out, rc = -EPERM); - - rc = echo_get_object (&eco, obd, &data->ioc_obdo1); - if (rc == 0) { - rc = obd_setattr(&ec->ec_conn, &data->ioc_obdo1, - eco->eco_lsm, NULL); - echo_put_object(eco); - } - GOTO(out, rc); - - case OBD_IOC_OPEN: - rc = echo_open (exp, &data->ioc_obdo1); - GOTO(out, rc); - - case OBD_IOC_CLOSE: - rc = echo_close (exp, &data->ioc_obdo1); - GOTO(out, rc); - - case OBD_IOC_BRW_WRITE: - if (!capable (CAP_SYS_ADMIN)) - GOTO (out, rc = -EPERM); - - rw = OBD_BRW_WRITE; - /* fall through */ - case OBD_IOC_BRW_READ: - rc = echo_get_object (&eco, obd, &data->ioc_obdo1); - if (rc == 0) { - if (data->ioc_pbuf2 == NULL) // NULL user data pointer - rc = echo_client_kbrw(obd, rw, &data->ioc_obdo1, - eco->eco_lsm, - data->ioc_offset, - data->ioc_count); - else -#ifdef __KERNEL__ - rc = echo_client_ubrw(obd, rw, &data->ioc_obdo1, - eco->eco_lsm, - data->ioc_offset, - data->ioc_count, - data->ioc_pbuf2); -#endif - echo_put_object(eco); - } - GOTO(out, rc); - - case ECHO_IOC_GET_STRIPE: - rc = echo_get_object(&eco, obd, &data->ioc_obdo1); - if (rc == 0) { - rc = echo_copyout_lsm(eco->eco_lsm, data->ioc_pbuf1, - data->ioc_plen1); - echo_put_object(eco); - } - GOTO(out, rc); - - case ECHO_IOC_SET_STRIPE: - if (!capable (CAP_SYS_ADMIN)) - GOTO (out, rc = -EPERM); - - if (data->ioc_pbuf1 == NULL) { /* unset */ - rc = echo_get_object(&eco, obd, &data->ioc_obdo1); - if (rc == 0) { - eco->eco_deleted = 1; - echo_put_object(eco); - } - } else { - rc = echo_create_object(obd, 0, &data->ioc_obdo1, - data->ioc_pbuf1, - data->ioc_plen1); - } - GOTO (out, rc); - - case ECHO_IOC_ENQUEUE: - if (!capable (CAP_SYS_ADMIN)) - GOTO (out, rc = -EPERM); - - rc = echo_enqueue (exp, &data->ioc_obdo1, - data->ioc_conn1, /* lock mode */ - data->ioc_offset, data->ioc_count);/*extent*/ - GOTO (out, rc); - - case ECHO_IOC_CANCEL: - rc = echo_cancel (exp, &data->ioc_obdo1); - GOTO (out, rc); - - default: - CERROR ("echo_ioctl(): unrecognised ioctl %#x\n", cmd); - GOTO (out, rc = -ENOTTY); - } - - EXIT; - out: - class_export_put(exp); - return rc; -} - -static int echo_setup(struct obd_device *obddev, obd_count len, void *buf) -{ - struct obd_ioctl_data* data = buf; - struct echo_client_obd *ec = &obddev->u.echo_client; - struct obd_device *tgt; - struct lov_stripe_md *lsm = NULL; - struct obd_uuid echo_uuid = { "ECHO_UUID" }; - int rc; - ENTRY; - - if (data->ioc_inllen1 < 1) { - CERROR("requires a TARGET OBD name\n"); - RETURN(-EINVAL); - } - - tgt = class_name2obd(data->ioc_inlbuf1); - if (!tgt || !tgt->obd_attached || !tgt->obd_set_up) { - CERROR("device not attached or not set up (%d/%s)\n", - data->ioc_dev, data->ioc_inlbuf1); - RETURN(-EINVAL); - } - - spin_lock_init (&ec->ec_lock); - INIT_LIST_HEAD (&ec->ec_objects); - ec->ec_unique = 0; - - rc = obd_connect(&ec->ec_conn, tgt, &echo_uuid); - if (rc) { - CERROR("fail to connect to device %d\n", data->ioc_dev); - return (rc); - } - - ec->ec_lsmsize = obd_alloc_memmd (&ec->ec_conn, &lsm); - if (ec->ec_lsmsize < 0) { - CERROR ("Can't get # stripes: %d\n", rc); - obd_disconnect (&ec->ec_conn, 0); - rc = ec->ec_lsmsize; - } else { - ec->ec_nstripes = lsm->lsm_stripe_count; - obd_free_memmd (&ec->ec_conn, &lsm); - } - - RETURN(rc); -} - -static int echo_cleanup(struct obd_device * obddev, int force, int failover) -{ - struct list_head *el; - struct ec_object *eco; - struct echo_client_obd *ec = &obddev->u.echo_client; - int rc; - ENTRY; - - if (!list_empty(&obddev->obd_exports)) { - CERROR("still has clients!\n"); - RETURN(-EBUSY); - } - - /* XXX assuming sole access */ - while (!list_empty (&ec->ec_objects)) { - el = ec->ec_objects.next; - eco = list_entry (el, struct ec_object, eco_obj_chain); - - LASSERT (eco->eco_refcount == 0); - eco->eco_refcount = 1; - eco->eco_deleted = 1; - echo_put_object (eco); - } - - rc = obd_disconnect (&ec->ec_conn, 0); - if (rc != 0) - CERROR("fail to disconnect device: %d\n", rc); - - RETURN (rc); -} - -static int echo_connect(struct lustre_handle *conn, struct obd_device *src, - struct obd_uuid *cluuid) -{ - struct obd_export *exp; - int rc; - - rc = class_connect(conn, src, cluuid); - if (rc == 0) { - exp = class_conn2export (conn); - INIT_LIST_HEAD(&exp->exp_ec_data.eced_open_head); - INIT_LIST_HEAD(&exp->exp_ec_data.eced_locks); - class_export_put(exp); - } - - RETURN (rc); -} - -static int echo_disconnect(struct lustre_handle *conn, int failover) -{ - struct obd_export *exp = class_conn2export (conn); - struct obd_device *obd; - struct echo_client_obd *ec; - struct ec_open_object *ecoo; - struct ec_lock *ecl; - int rc; - - if (exp == NULL) - GOTO(out, rc = -EINVAL); - - obd = exp->exp_obd; - ec = &obd->u.echo_client; - - /* no more contention on export's lock list */ - while (!list_empty (&exp->exp_ec_data.eced_locks)) { - ecl = list_entry (exp->exp_ec_data.eced_locks.next, - struct ec_lock, ecl_exp_chain); - list_del (&ecl->ecl_exp_chain); - - rc = obd_cancel (&ec->ec_conn, ecl->ecl_object->eco_lsm, - ecl->ecl_mode, &ecl->ecl_lock_handle); - - CDEBUG (D_INFO, "Cancel lock on object "LPX64" on disconnect (%d)\n", - ecl->ecl_object->eco_id, rc); - - echo_put_object (ecl->ecl_object); - OBD_FREE (ecl, sizeof (*ecl)); - } - - /* no more contention on export's open handle list */ - while (!list_empty (&exp->exp_ec_data.eced_open_head)) { - ecoo = list_entry (exp->exp_ec_data.eced_open_head.next, - struct ec_open_object, ecoo_exp_chain); - list_del (&ecoo->ecoo_exp_chain); - - memcpy (&ecoo->ecoo_oa.o_inline, &ecoo->ecoo_och, - FD_OSTDATA_SIZE); - ecoo->ecoo_oa.o_valid |= OBD_MD_FLHANDLE; - - rc = obd_close (&ec->ec_conn, &ecoo->ecoo_oa, - ecoo->ecoo_object->eco_lsm, NULL); - - CDEBUG (D_INFO, "Closed object "LPX64" on disconnect (%d)\n", - ecoo->ecoo_oa.o_id, rc); - - echo_put_object (ecoo->ecoo_object); - OBD_FREE (ecoo, sizeof (*ecoo)); - } - - rc = class_disconnect (conn, 0); - GOTO(out, rc); - out: - class_export_put(exp); - return rc; -} - -static struct obd_ops echo_obd_ops = { - o_owner: THIS_MODULE, - o_setup: echo_setup, - o_cleanup: echo_cleanup, - o_iocontrol: echo_iocontrol, - o_connect: echo_connect, - o_disconnect: echo_disconnect -}; - -int echo_client_init(void) -{ - struct lprocfs_static_vars lvars; - - lprocfs_init_vars(&lvars); - return class_register_type(&echo_obd_ops, lvars.module_vars, - OBD_ECHO_CLIENT_DEVICENAME); -} - -void echo_client_cleanup(void) -{ - class_unregister_type(OBD_ECHO_CLIENT_DEVICENAME); -} diff --git a/lustre/obdecho/lproc_echo.c b/lustre/obdecho/lproc_echo.c deleted file mode 100644 index 6a16001..0000000 --- a/lustre/obdecho/lproc_echo.c +++ /dev/null @@ -1,54 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ -#define DEBUG_SUBSYSTEM S_ECHO - -#include <linux/lprocfs_status.h> -#include <linux/obd_class.h> - -#ifndef LPROCFS -struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; -struct lprocfs_vars lprocfs_module_vars[] = { {0} }; -#else - -int rd_fstype(char* page, char **start, off_t off, int count, int *eof, - void *data) -{ - struct obd_device* dev = (struct obd_device*)data; - - LASSERT(dev != NULL); - *eof = 1; - return snprintf(page, count, "%s\n", dev->u.echo.eo_fstype); -} - -struct lprocfs_vars lprocfs_obd_vars[] = { - { "uuid", lprocfs_rd_uuid, 0, 0 }, - { "fstype", rd_fstype, 0, 0 }, - { 0 } -}; - -struct lprocfs_vars lprocfs_module_vars[] = { - { "num_refs", lprocfs_rd_numrefs, 0, 0 }, - { 0 } -}; - -#endif /* LPROCFS */ -LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars) diff --git a/lustre/obdfilter/.cvsignore b/lustre/obdfilter/.cvsignore deleted file mode 100644 index e530020..0000000 --- a/lustre/obdfilter/.cvsignore +++ /dev/null @@ -1,8 +0,0 @@ -.Xrefs -config.log -config.status -configure -Makefile -Makefile.in -.deps -TAGS diff --git a/lustre/obdfilter/Makefile.am b/lustre/obdfilter/Makefile.am deleted file mode 100644 index b9addf1..0000000 --- a/lustre/obdfilter/Makefile.am +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -MODULE = obdfilter -modulefs_DATA = obdfilter.o -EXTRA_PROGRAMS = obdfilter -obdfilter_SOURCES = filter.c lproc_obdfilter.c - -include $(top_srcdir)/Rules diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c deleted file mode 100644 index 6f2d96c..0000000 --- a/lustre/obdfilter/filter.c +++ /dev/null @@ -1,2888 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * linux/fs/obdfilter/filter.c - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Author: Peter Braam <braam@clusterfs.com> - * Author: Andreas Dilger <adilger@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* - * Invariant: Get O/R i_sem for lookup, if needed, before any journal ops - * (which need to get journal_lock, may block if journal full). - * - * Invariant: Call filter_start_transno() before any journal ops to avoid the - * same deadlock problem. We can (and want) to get rid of the - * transno sem in favour of the dir/inode i_sem to avoid single - * threaded operation on the OST. - */ - -#define EXPORT_SYMTAB -#define DEBUG_SUBSYSTEM S_FILTER - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/pagemap.h> // XXX kill me soon -#include <linux/fs.h> -#include <linux/dcache.h> -#include <linux/obd_class.h> -#include <linux/lustre_dlm.h> -#include <linux/obd_filter.h> -#include <linux/init.h> -#include <linux/random.h> -#include <linux/lustre_fsfilt.h> -#include <linux/lprocfs_status.h> -#include <linux/version.h> -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -#include <linux/mount.h> -#endif - -enum { - LPROC_FILTER_READ_BYTES = 0, - LPROC_FILTER_WRITE_BYTES = 1, - LPROC_FILTER_LAST, -}; - -#define S_SHIFT 12 -static char *obd_type_by_mode[S_IFMT >> S_SHIFT] = { - [0] NULL, - [S_IFREG >> S_SHIFT] "R", - [S_IFDIR >> S_SHIFT] "D", - [S_IFCHR >> S_SHIFT] "C", - [S_IFBLK >> S_SHIFT] "B", - [S_IFIFO >> S_SHIFT] "F", - [S_IFSOCK >> S_SHIFT] "S", - [S_IFLNK >> S_SHIFT] "L" -}; - -static inline const char *obd_mode_to_type(int mode) -{ - return obd_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; -} - -static void filter_ffd_addref(void *ffdp) -{ - struct filter_file_data *ffd = ffdp; - - atomic_inc(&ffd->ffd_refcount); - CDEBUG(D_INFO, "GETting ffd %p : new refcount %d\n", ffd, - atomic_read(&ffd->ffd_refcount)); -} - -static struct filter_file_data *filter_ffd_new(void) -{ - struct filter_file_data *ffd; - - OBD_ALLOC(ffd, sizeof *ffd); - if (ffd == NULL) { - CERROR("out of memory\n"); - return NULL; - } - - atomic_set(&ffd->ffd_refcount, 2); - - INIT_LIST_HEAD(&ffd->ffd_handle.h_link); - class_handle_hash(&ffd->ffd_handle, filter_ffd_addref); - - return ffd; -} - -static struct filter_file_data *filter_handle2ffd(struct lustre_handle *handle) -{ - struct filter_file_data *ffd = NULL; - ENTRY; - LASSERT(handle != NULL); - ffd = class_handle2object(handle->cookie); - if (ffd != NULL) - LASSERT(ffd->ffd_file->private_data == ffd); - RETURN(ffd); -} - -static void filter_ffd_put(struct filter_file_data *ffd) -{ - CDEBUG(D_INFO, "PUTting ffd %p : new refcount %d\n", ffd, - atomic_read(&ffd->ffd_refcount) - 1); - LASSERT(atomic_read(&ffd->ffd_refcount) > 0 && - atomic_read(&ffd->ffd_refcount) < 0x5a5a); - if (atomic_dec_and_test(&ffd->ffd_refcount)) { - LASSERT(list_empty(&ffd->ffd_handle.h_link)); - OBD_FREE(ffd, sizeof *ffd); - } -} - -static void filter_ffd_destroy(struct filter_file_data *ffd) -{ - class_handle_unhash(&ffd->ffd_handle); - filter_ffd_put(ffd); -} - -static void filter_commit_cb(struct obd_device *obd, __u64 transno, int error) -{ - obd_transno_commit_cb(obd, transno, error); -} -/* Assumes caller has already pushed us into the kernel context. */ -int filter_finish_transno(struct obd_export *export, void *handle, - struct obd_trans_info *oti, int rc) -{ - __u64 last_rcvd; - struct obd_device *obd = export->exp_obd; - struct filter_obd *filter = &obd->u.filter; - struct filter_export_data *fed = &export->exp_filter_data; - struct filter_client_data *fcd = fed->fed_fcd; - loff_t off; - ssize_t written; - - /* Propagate error code. */ - if (rc) - RETURN(rc); - - if (!obd->obd_replayable) - RETURN(rc); - - /* we don't allocate new transnos for replayed requests */ - if (oti && oti->oti_transno == 0) { - spin_lock(&filter->fo_translock); - last_rcvd = le64_to_cpu(filter->fo_fsd->fsd_last_rcvd) + 1; - filter->fo_fsd->fsd_last_rcvd = cpu_to_le64(last_rcvd); - spin_unlock(&filter->fo_translock); - oti->oti_transno = last_rcvd; - fcd->fcd_last_rcvd = cpu_to_le64(last_rcvd); - fcd->fcd_mount_count = filter->fo_fsd->fsd_mount_count; - - /* could get xid from oti, if it's ever needed */ - fcd->fcd_last_xid = 0; - - off = fed->fed_lr_off; - fsfilt_set_last_rcvd(obd, last_rcvd, handle, filter_commit_cb); - written = lustre_fwrite(filter->fo_rcvd_filp, (char *)fcd, - sizeof(*fcd), &off); - CDEBUG(D_HA, "wrote trans #"LPD64" for client %s at #%d: " - "written = "LPSZ"\n", last_rcvd, fcd->fcd_uuid, - fed->fed_lr_idx, written); - - if (written == sizeof(*fcd)) - RETURN(0); - CERROR("error writing to last_rcvd file: rc = %d\n", - (int)written); - if (written >= 0) - RETURN(-EIO); - - RETURN(written); - } - - RETURN(0); -} - -static inline void f_dput(struct dentry *dentry) -{ - /* Can't go inside filter_ddelete because it can block */ - CDEBUG(D_INODE, "putting %s: %p, count = %d\n", - dentry->d_name.name, dentry, atomic_read(&dentry->d_count) - 1); - LASSERT(atomic_read(&dentry->d_count) > 0); - - dput(dentry); -} - -/* Not racy w.r.t. others, because we are the only user of this dentry */ -static void filter_drelease(struct dentry *dentry) -{ - if (dentry->d_fsdata) - OBD_FREE(dentry->d_fsdata, sizeof(struct filter_dentry_data)); -} - -struct dentry_operations filter_dops = { - .d_release = filter_drelease, -}; - -#define LAST_RCVD "last_rcvd" -#define INIT_OBJID 2 - -/* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */ -#define FILTER_LR_MAX_CLIENTS (PAGE_SIZE * 8) -#define FILTER_LR_MAX_CLIENT_WORDS (FILTER_LR_MAX_CLIENTS/sizeof(unsigned long)) - -/* Add client data to the FILTER. We use a bitmap to locate a free space - * in the last_rcvd file if cl_idx is -1 (i.e. a new client). - * Otherwise, we have just read the data from the last_rcvd file and - * we know its offset. - */ -int filter_client_add(struct obd_device *obd, struct filter_obd *filter, - struct filter_export_data *fed, int cl_idx) -{ - unsigned long *bitmap = filter->fo_last_rcvd_slots; - int new_client = (cl_idx == -1); - - LASSERT(bitmap != NULL); - - /* XXX if fcd_uuid were a real obd_uuid, I could use obd_uuid_equals */ - if (!strcmp(fed->fed_fcd->fcd_uuid, "OBD_CLASS_UUID")) - RETURN(0); - - /* the bitmap operations can handle cl_idx > sizeof(long) * 8, so - * there's no need for extra complication here - */ - if (new_client) { - cl_idx = find_first_zero_bit(bitmap, FILTER_LR_MAX_CLIENTS); - repeat: - if (cl_idx >= FILTER_LR_MAX_CLIENTS) { - CERROR("no client slots - fix FILTER_LR_MAX_CLIENTS\n"); - return -ENOMEM; - } - if (test_and_set_bit(cl_idx, bitmap)) { - CERROR("FILTER client %d: found bit is set in bitmap\n", - cl_idx); - cl_idx = find_next_zero_bit(bitmap, - FILTER_LR_MAX_CLIENTS, - cl_idx); - goto repeat; - } - } else { - if (test_and_set_bit(cl_idx, bitmap)) { - CERROR("FILTER client %d: bit already set in bitmap!\n", - cl_idx); - LBUG(); - } - } - - fed->fed_lr_idx = cl_idx; - fed->fed_lr_off = le32_to_cpu(filter->fo_fsd->fsd_client_start) + - cl_idx * le16_to_cpu(filter->fo_fsd->fsd_client_size); - - CDEBUG(D_INFO, "client at index %d (%llu) with UUID '%s' added\n", - fed->fed_lr_idx, fed->fed_lr_off, fed->fed_fcd->fcd_uuid); - - if (new_client) { - struct obd_run_ctxt saved; - loff_t off = fed->fed_lr_off; - ssize_t written; - void *handle; - - CDEBUG(D_INFO, "writing client fcd at idx %u (%llu) (len %u)\n", - fed->fed_lr_idx,off,(unsigned int)sizeof(*fed->fed_fcd)); - - push_ctxt(&saved, &filter->fo_ctxt, NULL); - /* Transaction eeded to fix for bug 1403 */ - handle = fsfilt_start(obd, - filter->fo_rcvd_filp->f_dentry->d_inode, - FSFILT_OP_SETATTR); - if (IS_ERR(handle)) { - written = PTR_ERR(handle); - CERROR("unable to start transaction: rc %d\n", - (int)written); - } else { - written = lustre_fwrite(filter->fo_rcvd_filp, - (char *)fed->fed_fcd, - sizeof(*fed->fed_fcd), &off); - fsfilt_commit(obd, - filter->fo_rcvd_filp->f_dentry->d_inode, - handle, 0); - } - pop_ctxt(&saved, &filter->fo_ctxt, NULL); - - if (written != sizeof(*fed->fed_fcd)) { - if (written < 0) - RETURN(written); - RETURN(-EIO); - } - } - return 0; -} - -int filter_client_free(struct obd_export *exp, int failover) -{ - struct filter_export_data *fed = &exp->exp_filter_data; - struct filter_obd *filter = &exp->exp_obd->u.filter; - struct filter_client_data zero_fcd; - struct obd_run_ctxt saved; - int written; - loff_t off; - ENTRY; - - if (!fed->fed_fcd) - RETURN(0); - - if (failover != 0) - GOTO(free, 0); - - /* XXX if fcd_uuid were a real obd_uuid, I could use obd_uuid_equals */ - if (!strcmp(fed->fed_fcd->fcd_uuid, "OBD_CLASS_UUID")) - GOTO(free, 0); - - LASSERT(filter->fo_last_rcvd_slots != NULL); - - off = fed->fed_lr_off; - - CDEBUG(D_INFO, "freeing client at idx %u (%lld) with UUID '%s'\n", - fed->fed_lr_idx, fed->fed_lr_off, fed->fed_fcd->fcd_uuid); - - if (!test_and_clear_bit(fed->fed_lr_idx, filter->fo_last_rcvd_slots)) { - CERROR("FILTER client %u: bit already clear in bitmap!!\n", - fed->fed_lr_idx); - LBUG(); - } - - memset(&zero_fcd, 0, sizeof zero_fcd); - push_ctxt(&saved, &filter->fo_ctxt, NULL); - written = lustre_fwrite(filter->fo_rcvd_filp, (const char *)&zero_fcd, - sizeof(zero_fcd), &off); - - /* XXX: this write gets lost sometimes, unless this sync is here. */ - if (written > 0) - file_fsync(filter->fo_rcvd_filp, - filter->fo_rcvd_filp->f_dentry, 1); - pop_ctxt(&saved, &filter->fo_ctxt, NULL); - - if (written != sizeof(zero_fcd)) { - CERROR("error zeroing out client %s idx %u (%llu) in %s: %d\n", - fed->fed_fcd->fcd_uuid, fed->fed_lr_idx, fed->fed_lr_off, - LAST_RCVD, written); - } else { - CDEBUG(D_INFO, - "zeroed disconnecting client %s at idx %u (%llu)\n", - fed->fed_fcd->fcd_uuid, fed->fed_lr_idx,fed->fed_lr_off); - } - -free: - OBD_FREE(fed->fed_fcd, sizeof(*fed->fed_fcd)); - - RETURN(0); -} - -static int filter_free_server_data(struct filter_obd *filter) -{ - OBD_FREE(filter->fo_fsd, sizeof(*filter->fo_fsd)); - filter->fo_fsd = NULL; - OBD_FREE(filter->fo_last_rcvd_slots, - FILTER_LR_MAX_CLIENT_WORDS * sizeof(unsigned long)); - filter->fo_last_rcvd_slots = NULL; - return 0; -} - - -/* assumes caller is already in kernel ctxt */ -static int filter_update_server_data(struct file *filp, - struct filter_server_data *fsd) -{ - loff_t off = 0; - int rc; - - CDEBUG(D_INODE, "server uuid : %s\n", fsd->fsd_uuid); - CDEBUG(D_INODE, "server last_objid: "LPU64"\n", - le64_to_cpu(fsd->fsd_last_objid)); - CDEBUG(D_INODE, "server last_rcvd : "LPU64"\n", - le64_to_cpu(fsd->fsd_last_rcvd)); - CDEBUG(D_INODE, "server last_mount: "LPU64"\n", - le64_to_cpu(fsd->fsd_mount_count)); - - rc = lustre_fwrite(filp, (char *)fsd, sizeof(*fsd), &off); - if (rc != sizeof(*fsd)) { - CDEBUG(D_INODE, "error writing filter_server_data: rc = %d\n", - rc); - RETURN(-EIO); - } - RETURN(0); -} - -/* assumes caller has already in kernel ctxt */ -static int filter_init_server_data(struct obd_device *obd, struct file * filp, - __u64 init_lastobjid) -{ - struct filter_obd *filter = &obd->u.filter; - struct filter_server_data *fsd; - struct filter_client_data *fcd = NULL; - struct inode *inode = filp->f_dentry->d_inode; - unsigned long last_rcvd_size = inode->i_size; - __u64 mount_count = 0; - int cl_idx; - loff_t off = 0; - int rc; - - /* ensure padding in the struct is the correct size */ - LASSERT (offsetof(struct filter_server_data, fsd_padding) + - sizeof(fsd->fsd_padding) == FILTER_LR_SERVER_SIZE); - LASSERT (offsetof(struct filter_client_data, fcd_padding) + - sizeof(fcd->fcd_padding) == FILTER_LR_CLIENT_SIZE); - - OBD_ALLOC(fsd, sizeof(*fsd)); - if (!fsd) - RETURN(-ENOMEM); - filter->fo_fsd = fsd; - - OBD_ALLOC(filter->fo_last_rcvd_slots, - FILTER_LR_MAX_CLIENT_WORDS * sizeof(unsigned long)); - if (filter->fo_last_rcvd_slots == NULL) { - OBD_FREE(fsd, sizeof(*fsd)); - RETURN(-ENOMEM); - } - - if (last_rcvd_size == 0) { - CERROR("%s: initializing new last_rcvd\n", obd->obd_name); - - memcpy(fsd->fsd_uuid, obd->obd_uuid.uuid,sizeof(fsd->fsd_uuid)); - fsd->fsd_last_objid = cpu_to_le64(init_lastobjid); - fsd->fsd_last_rcvd = 0; - mount_count = fsd->fsd_mount_count = 0; - fsd->fsd_server_size = cpu_to_le32(FILTER_LR_SERVER_SIZE); - fsd->fsd_client_start = cpu_to_le32(FILTER_LR_CLIENT_START); - fsd->fsd_client_size = cpu_to_le16(FILTER_LR_CLIENT_SIZE); - fsd->fsd_subdir_count = cpu_to_le16(FILTER_SUBDIR_COUNT); - filter->fo_subdir_count = FILTER_SUBDIR_COUNT; - } else { - ssize_t retval = lustre_fread(filp, (char *)fsd, sizeof(*fsd), - &off); - if (retval != sizeof(*fsd)) { - CDEBUG(D_INODE,"OBD filter: error reading %s\n", - LAST_RCVD); - GOTO(err_fsd, rc = -EIO); - } - mount_count = le64_to_cpu(fsd->fsd_mount_count); - filter->fo_subdir_count = le16_to_cpu(fsd->fsd_subdir_count); - } - - if (fsd->fsd_feature_incompat) { - CERROR("unsupported feature %x\n", - le32_to_cpu(fsd->fsd_feature_incompat)); - GOTO(err_fsd, rc = -EINVAL); - } - if (fsd->fsd_feature_rocompat) { - CERROR("read-only feature %x\n", - le32_to_cpu(fsd->fsd_feature_rocompat)); - /* Do something like remount filesystem read-only */ - GOTO(err_fsd, rc = -EINVAL); - } - - CDEBUG(D_INODE, "%s: server last_objid: "LPU64"\n", - obd->obd_name, le64_to_cpu(fsd->fsd_last_objid)); - CDEBUG(D_INODE, "%s: server last_rcvd : "LPU64"\n", - obd->obd_name, le64_to_cpu(fsd->fsd_last_rcvd)); - CDEBUG(D_INODE, "%s: server last_mount: "LPU64"\n", - obd->obd_name, mount_count); - CDEBUG(D_INODE, "%s: server data size: %u\n", - obd->obd_name, le32_to_cpu(fsd->fsd_server_size)); - CDEBUG(D_INODE, "%s: per-client data start: %u\n", - obd->obd_name, le32_to_cpu(fsd->fsd_client_start)); - CDEBUG(D_INODE, "%s: per-client data size: %u\n", - obd->obd_name, le32_to_cpu(fsd->fsd_client_size)); - CDEBUG(D_INODE, "%s: server subdir_count: %u\n", - obd->obd_name, le16_to_cpu(fsd->fsd_subdir_count)); - - /* - * When we do a clean FILTER shutdown, we save the last_rcvd into - * the header. If we find clients with higher last_rcvd values - * then those clients may need recovery done. - */ - if (!obd->obd_replayable) { - CERROR("%s: recovery support OFF\n", obd->obd_name); - GOTO(out, rc = 0); - } - - for (cl_idx = 0; off < last_rcvd_size; cl_idx++) { - __u64 last_rcvd; - int mount_age; - - if (!fcd) { - OBD_ALLOC(fcd, sizeof(*fcd)); - if (!fcd) - GOTO(err_fsd, rc = -ENOMEM); - } - - /* Don't assume off is incremented properly, in case - * sizeof(fsd) isn't the same as fsd->fsd_client_size. - */ - off = le32_to_cpu(fsd->fsd_client_start) + - cl_idx * le16_to_cpu(fsd->fsd_client_size); - rc = lustre_fread(filp, (char *)fcd, sizeof(*fcd), &off); - if (rc != sizeof(*fcd)) { - CERROR("error reading FILTER %s offset %d: rc = %d\n", - LAST_RCVD, cl_idx, rc); - if (rc > 0) /* XXX fatal error or just abort reading? */ - rc = -EIO; - break; - } - - if (fcd->fcd_uuid[0] == '\0') { - CDEBUG(D_INFO, "skipping zeroed client at offset %d\n", - cl_idx); - continue; - } - - last_rcvd = le64_to_cpu(fcd->fcd_last_rcvd); - - /* These exports are cleaned up by filter_disconnect(), so they - * need to be set up like real exports as filter_connect() does. - */ - mount_age = mount_count - le64_to_cpu(fcd->fcd_mount_count); - if (mount_age < FILTER_MOUNT_RECOV) { - struct obd_export *exp = class_new_export(obd); - struct filter_export_data *fed; - CERROR("RCVRNG CLIENT uuid: %s idx: %d lr: "LPU64 - " srv lr: "LPU64" mnt: "LPU64" last mount: " - LPU64"\n", fcd->fcd_uuid, cl_idx, - last_rcvd, le64_to_cpu(fsd->fsd_last_rcvd), - le64_to_cpu(fcd->fcd_mount_count), mount_count); - if (exp == NULL) { - /* XXX this rc is ignored */ - rc = -ENOMEM; - break; - } - memcpy(&exp->exp_client_uuid.uuid, fcd->fcd_uuid, - sizeof exp->exp_client_uuid.uuid); - fed = &exp->exp_filter_data; - fed->fed_fcd = fcd; - filter_client_add(obd, filter, fed, cl_idx); - /* create helper if export init gets more complex */ - INIT_LIST_HEAD(&fed->fed_open_head); - spin_lock_init(&fed->fed_lock); - - fcd = NULL; - obd->obd_recoverable_clients++; - class_export_put(exp); - } else { - CDEBUG(D_INFO, - "discarded client %d UUID '%s' count "LPU64"\n", - cl_idx, fcd->fcd_uuid, - le64_to_cpu(fcd->fcd_mount_count)); - } - - CDEBUG(D_OTHER, "client at idx %d has last_rcvd = "LPU64"\n", - cl_idx, last_rcvd); - - if (last_rcvd > le64_to_cpu(filter->fo_fsd->fsd_last_rcvd)) - filter->fo_fsd->fsd_last_rcvd = cpu_to_le64(last_rcvd); - - obd->obd_last_committed = - le64_to_cpu(filter->fo_fsd->fsd_last_rcvd); - if (obd->obd_recoverable_clients) { - CERROR("RECOVERY: %d recoverable clients, last_rcvd " - LPU64"\n", obd->obd_recoverable_clients, - le64_to_cpu(filter->fo_fsd->fsd_last_rcvd)); - obd->obd_next_recovery_transno = - obd->obd_last_committed + 1; - obd->obd_recovering = 1; - } - - } - - if (fcd) - OBD_FREE(fcd, sizeof(*fcd)); - -out: - fsd->fsd_mount_count = cpu_to_le64(mount_count + 1); - - /* save it,so mount count and last_recvd is current */ - rc = filter_update_server_data(filp, filter->fo_fsd); - - RETURN(rc); - -err_fsd: - filter_free_server_data(filter); - RETURN(rc); -} - -/* setup the object store with correct subdirectories */ -static int filter_prep(struct obd_device *obd) -{ - struct obd_run_ctxt saved; - struct filter_obd *filter = &obd->u.filter; - struct dentry *dentry, *O_dentry; - struct file *file; - struct inode *inode; - int i; - int rc = 0; - int mode = 0; - - push_ctxt(&saved, &filter->fo_ctxt, NULL); - dentry = simple_mkdir(current->fs->pwd, "O", 0700); - CDEBUG(D_INODE, "got/created O: %p\n", dentry); - if (IS_ERR(dentry)) { - rc = PTR_ERR(dentry); - CERROR("cannot open/create O: rc = %d\n", rc); - GOTO(out, rc); - } - filter->fo_dentry_O = dentry; - - /* - * Create directories and/or get dentries for each object type. - * This saves us from having to do multiple lookups for each one. - */ - O_dentry = filter->fo_dentry_O; - for (mode = 0; mode < (S_IFMT >> S_SHIFT); mode++) { - char *name = obd_type_by_mode[mode]; - - if (!name) { - filter->fo_dentry_O_mode[mode] = NULL; - continue; - } - dentry = simple_mkdir(O_dentry, name, 0700); - CDEBUG(D_INODE, "got/created O/%s: %p\n", name, dentry); - if (IS_ERR(dentry)) { - rc = PTR_ERR(dentry); - CERROR("cannot create O/%s: rc = %d\n", name, rc); - GOTO(err_O_mode, rc); - } - filter->fo_dentry_O_mode[mode] = dentry; - } - - file = filp_open(LAST_RCVD, O_RDWR | O_CREAT, 0700); - if (!file || IS_ERR(file)) { - rc = PTR_ERR(file); - CERROR("OBD filter: cannot open/create %s: rc = %d\n", - LAST_RCVD, rc); - GOTO(err_O_mode, rc); - } - - if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { - CERROR("%s is not a regular file!: mode = %o\n", LAST_RCVD, - file->f_dentry->d_inode->i_mode); - GOTO(err_filp, rc = -ENOENT); - } - - rc = fsfilt_journal_data(obd, file); - if (rc) { - CERROR("cannot journal data on %s: rc = %d\n", LAST_RCVD, rc); - GOTO(err_filp, rc); - } - /* steal operations */ - inode = file->f_dentry->d_inode; - filter->fo_fop = file->f_op; - filter->fo_iop = inode->i_op; - filter->fo_aops = inode->i_mapping->a_ops; - - rc = filter_init_server_data(obd, file, INIT_OBJID); - if (rc) { - CERROR("cannot read %s: rc = %d\n", LAST_RCVD, rc); - GOTO(err_client, rc); - } - filter->fo_rcvd_filp = file; - - if (filter->fo_subdir_count) { - O_dentry = filter->fo_dentry_O_mode[S_IFREG >> S_SHIFT]; - OBD_ALLOC(filter->fo_dentry_O_sub, - filter->fo_subdir_count * sizeof(dentry)); - if (!filter->fo_dentry_O_sub) - GOTO(err_client, rc = -ENOMEM); - - for (i = 0; i < filter->fo_subdir_count; i++) { - char dir[20]; - snprintf(dir, sizeof(dir), "d%u", i); - - dentry = simple_mkdir(O_dentry, dir, 0700); - CDEBUG(D_INODE, "got/created O/R/%s: %p\n", dir,dentry); - if (IS_ERR(dentry)) { - rc = PTR_ERR(dentry); - CERROR("can't create O/R/%s: rc = %d\n",dir,rc); - GOTO(err_O_sub, rc); - } - filter->fo_dentry_O_sub[i] = dentry; - } - } - rc = 0; - out: - pop_ctxt(&saved, &filter->fo_ctxt, NULL); - - return(rc); - -err_O_sub: - while (i-- > 0) { - struct dentry *dentry = filter->fo_dentry_O_sub[i]; - if (dentry) { - f_dput(dentry); - filter->fo_dentry_O_sub[i] = NULL; - } - } - OBD_FREE(filter->fo_dentry_O_sub, - filter->fo_subdir_count * sizeof(dentry)); -err_client: - class_disconnect_exports(obd, 0); -err_filp: - if (filp_close(file, 0)) - CERROR("can't close %s after error\n", LAST_RCVD); - filter->fo_rcvd_filp = NULL; -err_O_mode: - while (mode-- > 0) { - struct dentry *dentry = filter->fo_dentry_O_mode[mode]; - if (dentry) { - f_dput(dentry); - filter->fo_dentry_O_mode[mode] = NULL; - } - } - f_dput(filter->fo_dentry_O); - filter->fo_dentry_O = NULL; - goto out; -} - -/* cleanup the filter: write last used object id to status file */ -static void filter_post(struct obd_device *obd) -{ - struct obd_run_ctxt saved; - struct filter_obd *filter = &obd->u.filter; - long rc; - int mode; - - /* XXX: filter_update_lastobjid used to call fsync_dev. It might be - * best to start a transaction with h_sync, because we removed this - * from lastobjid */ - - push_ctxt(&saved, &filter->fo_ctxt, NULL); - rc = filter_update_server_data(filter->fo_rcvd_filp, filter->fo_fsd); - if (rc) - CERROR("OBD filter: error writing lastobjid: rc = %ld\n", rc); - - - if (filter->fo_rcvd_filp) { - rc = file_fsync(filter->fo_rcvd_filp, - filter->fo_rcvd_filp->f_dentry, 1); - filp_close(filter->fo_rcvd_filp, 0); - filter->fo_rcvd_filp = NULL; - if (rc) - CERROR("last_rcvd file won't closed rc = %ld\n", rc); - } - - if (filter->fo_subdir_count) { - int i; - for (i = 0; i < filter->fo_subdir_count; i++) { - struct dentry *dentry = filter->fo_dentry_O_sub[i]; - f_dput(dentry); - filter->fo_dentry_O_sub[i] = NULL; - } - OBD_FREE(filter->fo_dentry_O_sub, - filter->fo_subdir_count * - sizeof(*filter->fo_dentry_O_sub)); - } - for (mode = 0; mode < (S_IFMT >> S_SHIFT); mode++) { - struct dentry *dentry = filter->fo_dentry_O_mode[mode]; - if (dentry) { - f_dput(dentry); - filter->fo_dentry_O_mode[mode] = NULL; - } - } - f_dput(filter->fo_dentry_O); - filter_free_server_data(filter); - pop_ctxt(&saved, &filter->fo_ctxt, NULL); -} - - -static __u64 filter_next_id(struct filter_obd *filter) -{ - obd_id id; - LASSERT(filter->fo_fsd != NULL); - - spin_lock(&filter->fo_objidlock); - id = le64_to_cpu(filter->fo_fsd->fsd_last_objid); - filter->fo_fsd->fsd_last_objid = cpu_to_le64(id + 1); - spin_unlock(&filter->fo_objidlock); - - return id; -} - -/* direct cut-n-paste of mds_blocking_ast() */ -int filter_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, - void *data, int flag) -{ - int do_ast; - ENTRY; - - if (flag == LDLM_CB_CANCELING) { - /* Don't need to do anything here. */ - RETURN(0); - } - - /* XXX layering violation! -phil */ - l_lock(&lock->l_resource->lr_namespace->ns_lock); - /* Get this: if filter_blocking_ast is racing with ldlm_intent_policy, - * such that mds_blocking_ast is called just before l_i_p takes the - * ns_lock, then by the time we get the lock, we might not be the - * correct blocking function anymore. So check, and return early, if - * so. */ - if (lock->l_blocking_ast != filter_blocking_ast) { - l_unlock(&lock->l_resource->lr_namespace->ns_lock); - RETURN(0); - } - - lock->l_flags |= LDLM_FL_CBPENDING; - do_ast = (!lock->l_readers && !lock->l_writers); - l_unlock(&lock->l_resource->lr_namespace->ns_lock); - - if (do_ast) { - struct lustre_handle lockh; - int rc; - - LDLM_DEBUG(lock, "already unused, calling ldlm_cli_cancel"); - ldlm_lock2handle(lock, &lockh); - rc = ldlm_cli_cancel(&lockh); - if (rc < 0) - CERROR("ldlm_cli_cancel: %d\n", rc); - } else { - LDLM_DEBUG(lock, "Lock still has references, will be " - "cancelled later"); - } - RETURN(0); -} - -static int filter_lock_dentry(struct obd_device *obd, struct dentry *de, - ldlm_mode_t lock_mode,struct lustre_handle *lockh) -{ - struct ldlm_res_id res_id = { .name = {0} }; - int flags = 0, rc; - ENTRY; - - res_id.name[0] = de->d_inode->i_ino; - res_id.name[1] = de->d_inode->i_generation; - rc = ldlm_cli_enqueue(NULL, NULL, obd->obd_namespace, NULL, - res_id, LDLM_PLAIN, NULL, 0, lock_mode, - &flags, ldlm_completion_ast, - filter_blocking_ast, NULL, lockh); - - RETURN(rc == ELDLM_OK ? 0 : -ENOLCK); /* XXX translate ldlm code */ -} - -static void filter_parent_unlock(struct dentry *dparent, - struct lustre_handle *lockh, - ldlm_mode_t lock_mode) -{ - ldlm_lock_decref(lockh, lock_mode); -} - -/* We never dget the object parent, so DON'T dput it either */ -static inline struct dentry *filter_parent(struct obd_device *obd, - obd_mode mode, obd_id objid) -{ - struct filter_obd *filter = &obd->u.filter; - - LASSERT(S_ISREG(mode)); /* only regular files for now */ - if (!S_ISREG(mode) || filter->fo_subdir_count == 0) - return filter->fo_dentry_O_mode[(mode & S_IFMT) >> S_SHIFT]; - - return filter->fo_dentry_O_sub[objid & (filter->fo_subdir_count - 1)]; -} - -/* We never dget the object parent, so DON'T dput it either */ -static inline struct dentry *filter_parent_lock(struct obd_device *obd, - obd_mode mode, obd_id objid, - ldlm_mode_t lock_mode, - struct lustre_handle *lockh) -{ - unsigned long now = jiffies; - struct dentry *de = filter_parent(obd, mode, objid); - int rc; - - if (IS_ERR(de)) - return de; - - rc = filter_lock_dentry(obd, de, lock_mode, lockh); - if (time_after(jiffies, now + 15*HZ)) - CERROR("slow parent lock %lus\n", (jiffies - now) / HZ); - return rc ? ERR_PTR(rc) : de; -} - -/* How to get files, dentries, inodes from object id's. - * - * If dir_dentry is passed, the caller has already locked the parent - * appropriately for this operation (normally a write lock). If - * dir_dentry is NULL, we do a read lock while we do the lookup to - * avoid races with create/destroy and such changing the directory - * internal to the filesystem code. - */ -static struct dentry *filter_fid2dentry(struct obd_device *obd, - struct dentry *dir_dentry, - obd_mode mode, obd_id id) -{ - struct super_block *sb = obd->u.filter.fo_sb; - struct lustre_handle lockh; - struct dentry *dparent = dir_dentry; - struct dentry *dchild; - char name[32]; - int len; - ENTRY; - - if (!sb || !sb->s_dev) { - CERROR("device not initialized.\n"); - RETURN(ERR_PTR(-ENXIO)); - } - - if (id == 0) { - CERROR("fatal: invalid object id 0\n"); - LBUG(); - RETURN(ERR_PTR(-ESTALE)); - } - - len = sprintf(name, LPU64, id); - if (!dir_dentry) { - dparent = filter_parent_lock(obd, mode, id, LCK_PR, &lockh); - if (IS_ERR(dparent)) - RETURN(dparent); - } - CDEBUG(D_INODE, "looking up object O/%*s/%s\n", - dparent->d_name.len, dparent->d_name.name, name); - dchild = ll_lookup_one_len(name, dparent, len); - if (!dir_dentry) - filter_parent_unlock(dparent, &lockh, LCK_PR); - if (IS_ERR(dchild)) { - CERROR("child lookup error %ld\n", PTR_ERR(dchild)); - RETURN(dchild); - } - - CDEBUG(D_INODE, "got child objid %s: %p, count = %d\n", - name, dchild, atomic_read(&dchild->d_count)); - - LASSERT(atomic_read(&dchild->d_count) > 0); - - RETURN(dchild); -} - -static struct file *filter_obj_open(struct obd_export *export, - __u64 id, __u32 type, - ldlm_mode_t parent_mode, - struct lustre_handle *parent_lockh) -{ - struct obd_device *obd = export->exp_obd; - struct filter_obd *filter = &obd->u.filter; - struct super_block *sb = filter->fo_sb; - struct dentry *dchild = NULL, *dparent = NULL; - struct filter_export_data *fed = &export->exp_filter_data; - struct filter_dentry_data *fdd = NULL; - struct filter_file_data *ffd = NULL; - struct obd_run_ctxt saved; - char name[24]; - struct file *file; - int len, cleanup_phase = 0; - ENTRY; - - push_ctxt(&saved, &filter->fo_ctxt, NULL); - - if (!sb || !sb->s_dev) { - CERROR("fatal: device not initialized.\n"); - GOTO(cleanup, file = ERR_PTR(-ENXIO)); - } - - if (!id) { - CERROR("fatal: invalid obdo "LPU64"\n", id); - GOTO(cleanup, file = ERR_PTR(-ESTALE)); - } - - if (!(type & S_IFMT)) { - CERROR("OBD %s, object "LPU64" has bad type: %o\n", - __FUNCTION__, id, type); - GOTO(cleanup, file = ERR_PTR(-EINVAL)); - } - - ffd = filter_ffd_new(); - if (ffd == NULL) { - CERROR("obdfilter: out of memory\n"); - GOTO(cleanup, file = ERR_PTR(-ENOMEM)); - } - - cleanup_phase = 1; - - /* We preallocate this to avoid blocking while holding fo_fddlock */ - OBD_ALLOC(fdd, sizeof *fdd); - if (fdd == NULL) { - CERROR("obdfilter: out of memory\n"); - GOTO(cleanup, file = ERR_PTR(-ENOMEM)); - } - - cleanup_phase = 2; - - dparent = filter_parent_lock(obd, type, id, parent_mode, parent_lockh); - if (IS_ERR(dparent)) - GOTO(cleanup, file = (void *)dparent); - - cleanup_phase = 3; - - len = snprintf(name, sizeof(name), LPU64, id); - dchild = ll_lookup_one_len(name, dparent, len); - if (IS_ERR(dchild)) - GOTO(cleanup, file = (void *)dchild); - - cleanup_phase = 4; - - if (dchild->d_inode == NULL) { - CERROR("opening non-existent object %s - O_CREAT?\n", name); - file = ERR_PTR(-ENOENT); - GOTO(cleanup, file); - } - - /* dentry_open does a dput(dchild) and mntput(mnt) on error */ - mntget(filter->fo_vfsmnt); - file = dentry_open(dchild, filter->fo_vfsmnt, O_RDWR | O_LARGEFILE); - if (IS_ERR(file)) { - dchild = NULL; /* prevent a double dput in step 4 */ - CERROR("error opening %s: rc %ld\n", name, PTR_ERR(file)); - GOTO(cleanup, file); - } - - spin_lock(&filter->fo_fddlock); - if (dchild->d_fsdata) { - spin_unlock(&filter->fo_fddlock); - OBD_FREE(fdd, sizeof *fdd); - fdd = dchild->d_fsdata; - LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC); - /* should only happen during client recovery */ - if (fdd->fdd_flags & FILTER_FLAG_DESTROY) - CDEBUG(D_INODE,"opening destroyed object "LPU64"\n",id); - atomic_inc(&fdd->fdd_open_count); - } else { - atomic_set(&fdd->fdd_open_count, 1); - fdd->fdd_magic = FILTER_DENTRY_MAGIC; - fdd->fdd_flags = 0; - fdd->fdd_objid = id; - /* If this is racy, then we can use {cmp}xchg and atomic_add */ - dchild->d_fsdata = fdd; - spin_unlock(&filter->fo_fddlock); - } - - ffd->ffd_file = file; - LASSERT(file->private_data == NULL); - file->private_data = ffd; - - if (!dchild->d_op) - dchild->d_op = &filter_dops; - else - LASSERT(dchild->d_op == &filter_dops); - - spin_lock(&fed->fed_lock); - list_add(&ffd->ffd_export_list, &fed->fed_open_head); - spin_unlock(&fed->fed_lock); - - CDEBUG(D_INODE, "opened objid "LPU64": rc = %p\n", id, file); -cleanup: - switch (cleanup_phase) { - case 4: - if (IS_ERR(file)) - f_dput(dchild); - case 3: - if (IS_ERR(file)) - filter_parent_unlock(dparent, parent_lockh,parent_mode); - case 2: - if (IS_ERR(file)) - OBD_FREE(fdd, sizeof *fdd); - case 1: - if (IS_ERR(file)) - filter_ffd_destroy(ffd); - filter_ffd_put(ffd); - case 0: - pop_ctxt(&saved, &filter->fo_ctxt, NULL); - } - RETURN(file); -} - -/* Caller must hold LCK_PW on parent and push us into kernel context. - * Caller is also required to ensure that dchild->d_inode exists. - */ -static int filter_destroy_internal(struct obd_device *obd, - struct dentry *dparent, - struct dentry *dchild) -{ - struct inode *inode = dchild->d_inode; - int rc; - ENTRY; - - if (inode->i_nlink != 1 || atomic_read(&inode->i_count) != 1) { - CERROR("destroying objid %*s nlink = %d, count = %d\n", - dchild->d_name.len, dchild->d_name.name, - inode->i_nlink, atomic_read(&inode->i_count)); - } - - rc = vfs_unlink(dparent->d_inode, dchild); - - if (rc) - CERROR("error unlinking objid %*s: rc %d\n", - dchild->d_name.len, dchild->d_name.name, rc); - - RETURN(rc); -} - -/* If closing because we are failing this device, then - don't do the unlink on close. -*/ -static int filter_close_internal(struct obd_export *exp, - struct filter_file_data *ffd, - struct obd_trans_info *oti, - int failover) -{ - struct obd_device *obd = exp->exp_obd; - struct filter_obd *filter = &obd->u.filter; - struct file *filp = ffd->ffd_file; - struct dentry *dchild = dget(filp->f_dentry); - struct filter_dentry_data *fdd = dchild->d_fsdata; - struct lustre_handle parent_lockh; - int rc, rc2, cleanup_phase = 0; - struct dentry *dparent = NULL; - struct obd_run_ctxt saved; - ENTRY; - - LASSERT(filp->private_data == ffd); - LASSERT(fdd); - LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC); - - rc = filp_close(filp, 0); - - if (atomic_dec_and_test(&fdd->fdd_open_count) && - fdd->fdd_flags & FILTER_FLAG_DESTROY && !failover) { - void *handle; - - push_ctxt(&saved, &filter->fo_ctxt, NULL); - cleanup_phase = 1; - - LASSERT(fdd->fdd_objid > 0); - dparent = filter_parent_lock(obd, S_IFREG, fdd->fdd_objid, - LCK_PW, &parent_lockh); - if (IS_ERR(dparent)) - GOTO(cleanup, rc = PTR_ERR(dparent)); - cleanup_phase = 2; - - handle = fsfilt_start(obd, dparent->d_inode, - FSFILT_OP_UNLINK); - if (IS_ERR(handle)) - GOTO(cleanup, rc = PTR_ERR(handle)); - - /* XXX unlink from PENDING directory now too */ - rc2 = filter_destroy_internal(obd, dparent, dchild); - if (rc2 && !rc) - rc = rc2; - rc = filter_finish_transno(exp, handle, oti, rc); - rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0); - if (rc2) { - CERROR("error on commit, err = %d\n", rc2); - if (!rc) - rc = rc2; - } - } - -cleanup: - switch(cleanup_phase) { - case 2: - if (rc || oti == NULL) { - filter_parent_unlock(dparent, &parent_lockh, LCK_PW); - } else { - memcpy(&oti->oti_ack_locks[0].lock, &parent_lockh, - sizeof(parent_lockh)); - oti->oti_ack_locks[0].mode = LCK_PW; - } - case 1: - pop_ctxt(&saved, &filter->fo_ctxt, NULL); - case 0: - f_dput(dchild); - filter_ffd_destroy(ffd); - break; - default: - CERROR("invalid cleanup_phase %d\n", cleanup_phase); - LBUG(); - } - - RETURN(rc); -} - -/* obd methods */ -/* mount the file system (secretly) */ -static int filter_common_setup(struct obd_device *obd, obd_count len, void *buf, - char *option) -{ - struct obd_ioctl_data* data = buf; - struct filter_obd *filter = &obd->u.filter; - - struct vfsmount *mnt; - int rc = 0; - ENTRY; - - if (!data->ioc_inlbuf1 || !data->ioc_inlbuf2) - RETURN(-EINVAL); - - obd->obd_fsops = fsfilt_get_ops(data->ioc_inlbuf2); - if (IS_ERR(obd->obd_fsops)) - RETURN(PTR_ERR(obd->obd_fsops)); - - mnt = do_kern_mount(data->ioc_inlbuf2, 0, data->ioc_inlbuf1, option); - rc = PTR_ERR(mnt); - if (IS_ERR(mnt)) - GOTO(err_ops, rc); - - if (data->ioc_inllen3 > 0 && data->ioc_inlbuf3) { - if (*data->ioc_inlbuf3 == 'f') { - obd->obd_replayable = 1; - obd_sync_filter = 1; - CERROR("%s: configured for recovery and sync write\n", - obd->obd_name); - } else { - if (*data->ioc_inlbuf3 != 'n') { - CERROR("unrecognised flag '%c'\n", - *data->ioc_inlbuf3); - } - } - } - - if (data->ioc_inllen4 > 0 && data->ioc_inlbuf4) { - if (*data->ioc_inlbuf4 == '/') { - CERROR("filter namespace mount: %s\n", - data->ioc_inlbuf4); - filter->fo_nspath = strdup(data->ioc_inlbuf4); - } else { - CERROR("namespace mount must be absolute path: '%s'\n", - data->ioc_inlbuf4); - } - } - - filter->fo_vfsmnt = mnt; - filter->fo_sb = mnt->mnt_sb; - filter->fo_fstype = mnt->mnt_sb->s_type->name; - CDEBUG(D_SUPER, "%s: mnt = %p\n", filter->fo_fstype, mnt); - - OBD_SET_CTXT_MAGIC(&filter->fo_ctxt); - filter->fo_ctxt.pwdmnt = mnt; - filter->fo_ctxt.pwd = mnt->mnt_root; - filter->fo_ctxt.fs = get_ds(); - - rc = filter_prep(obd); - if (rc) - GOTO(err_mntput, rc); - - spin_lock_init(&filter->fo_translock); - spin_lock_init(&filter->fo_fddlock); - spin_lock_init(&filter->fo_objidlock); - INIT_LIST_HEAD(&filter->fo_export_list); - - obd->obd_namespace = ldlm_namespace_new("filter-tgt", - LDLM_NAMESPACE_SERVER); - if (!obd->obd_namespace) - GOTO(err_post, rc = -ENOMEM); - - ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL, - "filter_ldlm_cb_client", &obd->obd_ldlm_client); - - RETURN(0); - -err_post: - filter_post(obd); -err_mntput: - unlock_kernel(); - mntput(mnt); - filter->fo_sb = 0; - lock_kernel(); -err_ops: - fsfilt_put_ops(obd->obd_fsops); - return rc; -} - -static int filter_setup(struct obd_device *obd, obd_count len, void *buf) -{ - struct obd_ioctl_data* data = buf; - char *option = NULL; - - if (!strcmp(data->ioc_inlbuf2, "ext3")) - option = "asyncdel"; - - return filter_common_setup(obd, len, buf, option); -} - -/* sanobd setup methods - use a specific mount option */ -static int filter_san_setup(struct obd_device *obd, obd_count len, void *buf) -{ - struct obd_ioctl_data* data = buf; - char *option = NULL; - - if (!data->ioc_inlbuf2) - RETURN(-EINVAL); - - /* for extN/ext3 filesystem, we must mount it with 'writeback' mode */ - if (!strcmp(data->ioc_inlbuf2, "extN")) - option = "data=writeback"; - else if (!strcmp(data->ioc_inlbuf2, "ext3")) - option = "data=writeback,asyncdel"; - else - LBUG(); /* just a reminder */ - - return filter_common_setup(obd, len, buf, option); -} - -static int filter_cleanup(struct obd_device *obd, int force, int failover) -{ - struct super_block *sb; - ENTRY; - - if (failover) - CERROR("%s: shutting down for failover; client state will" - " be preserved.\n", obd->obd_name); - - if (!list_empty(&obd->obd_exports)) { - CERROR("%s: still has clients!\n", obd->obd_name); - class_disconnect_exports(obd, failover); - if (!list_empty(&obd->obd_exports)) { - CERROR("still has exports after forced cleanup?\n"); - RETURN(-EBUSY); - } - } - - ldlm_namespace_free(obd->obd_namespace); - - sb = obd->u.filter.fo_sb; - if (!sb) - RETURN(0); - - filter_post(obd); - - shrink_dcache_parent(sb->s_root); - unlock_kernel(); - - if (atomic_read(&obd->u.filter.fo_vfsmnt->mnt_count) > 1){ - CERROR("%s: mount point busy, mnt_count: %d\n", obd->obd_name, - atomic_read(&obd->u.filter.fo_vfsmnt->mnt_count)); - } - - mntput(obd->u.filter.fo_vfsmnt); - obd->u.filter.fo_sb = 0; -/* destroy_buffers(obd->u.filter.fo_sb->s_dev);*/ - - fsfilt_put_ops(obd->obd_fsops); - lock_kernel(); - - RETURN(0); -} - -int filter_attach(struct obd_device *obd, obd_count len, void *data) -{ - struct lprocfs_static_vars lvars; - int rc; - - lprocfs_init_vars(&lvars); - rc = lprocfs_obd_attach(obd, lvars.obd_vars); - if (rc != 0) - return rc; - - rc = lprocfs_alloc_obd_stats(obd, LPROC_FILTER_LAST); - if (rc != 0) - return rc; - - /* Init obdfilter private stats here */ - lprocfs_counter_init(obd->obd_stats, LPROC_FILTER_READ_BYTES, - LPROCFS_CNTR_AVGMINMAX, "read_bytes", "bytes"); - lprocfs_counter_init(obd->obd_stats, LPROC_FILTER_WRITE_BYTES, - LPROCFS_CNTR_AVGMINMAX, "write_bytes", "bytes"); - return rc; -} - -int filter_detach(struct obd_device *dev) -{ - lprocfs_free_obd_stats(dev); - return lprocfs_obd_detach(dev); -} - -/* nearly identical to mds_connect */ -static int filter_connect(struct lustre_handle *conn, struct obd_device *obd, - struct obd_uuid *cluuid) -{ - struct obd_export *exp; - struct filter_export_data *fed; - struct filter_client_data *fcd; - struct filter_obd *filter = &obd->u.filter; - int rc; - - ENTRY; - - if (!conn || !obd || !cluuid) - RETURN(-EINVAL); - - rc = class_connect(conn, obd, cluuid); - if (rc) - RETURN(rc); - exp = class_conn2export(conn); - LASSERT(exp); - - fed = &exp->exp_filter_data; - class_export_put(exp); - - INIT_LIST_HEAD(&fed->fed_open_head); - spin_lock_init(&fed->fed_lock); - - if (!obd->obd_replayable) - RETURN(0); - - OBD_ALLOC(fcd, sizeof(*fcd)); - if (!fcd) { - CERROR("filter: out of memory for client data\n"); - GOTO(out_export, rc = -ENOMEM); - } - - memcpy(fcd->fcd_uuid, cluuid, sizeof(fcd->fcd_uuid)); - fed->fed_fcd = fcd; - fcd->fcd_mount_count = cpu_to_le64(filter->fo_fsd->fsd_mount_count); - - rc = filter_client_add(obd, filter, fed, -1); - if (rc) - GOTO(out_fcd, rc); - - RETURN(rc); - -out_fcd: - OBD_FREE(fcd, sizeof(*fcd)); -out_export: - class_disconnect(conn, 0); - - RETURN(rc); -} - -static void filter_destroy_export(struct obd_export *exp) -{ - struct filter_export_data *fed = &exp->exp_filter_data; - - ENTRY; - spin_lock(&fed->fed_lock); - while (!list_empty(&fed->fed_open_head)) { - struct filter_file_data *ffd; - - ffd = list_entry(fed->fed_open_head.next, typeof(*ffd), - ffd_export_list); - list_del(&ffd->ffd_export_list); - spin_unlock(&fed->fed_lock); - - CERROR("force close file %*s (hdl %p:"LPX64") on disconnect\n", - ffd->ffd_file->f_dentry->d_name.len, - ffd->ffd_file->f_dentry->d_name.name, - ffd, ffd->ffd_handle.h_cookie); - - filter_close_internal(exp, ffd, NULL, exp->exp_failover); - spin_lock(&fed->fed_lock); - } - spin_unlock(&fed->fed_lock); - - if (exp->exp_obd->obd_replayable) - filter_client_free(exp, exp->exp_failover); - EXIT; -} - -/* also incredibly similar to mds_disconnect */ -static int filter_disconnect(struct lustre_handle *conn, int failover) -{ - struct obd_export *exp = class_conn2export(conn); - int rc; - unsigned long flags; - ENTRY; - - LASSERT(exp); - ldlm_cancel_locks_for_export(exp); - - spin_lock_irqsave(&exp->exp_lock, flags); - exp->exp_failover = failover; - spin_unlock_irqrestore(&exp->exp_lock, flags); - - rc = class_disconnect(conn, failover); - - fsfilt_sync(exp->exp_obd, exp->exp_obd->u.filter.fo_sb); - class_export_put(exp); - /* XXX cleanup preallocated inodes */ - RETURN(rc); -} - -static void filter_from_inode(struct obdo *oa, struct inode *inode, int valid) -{ - int type = oa->o_mode & S_IFMT; - ENTRY; - - CDEBUG(D_INFO, "src inode %lu (%p), dst obdo "LPU64" valid 0x%08x\n", - inode->i_ino, inode, oa->o_id, valid); - /* Don't copy the inode number in place of the object ID */ - obdo_from_inode(oa, inode, valid); - oa->o_mode &= ~S_IFMT; - oa->o_mode |= type; - - if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { - obd_rdev rdev = kdev_t_to_nr(inode->i_rdev); - oa->o_rdev = rdev; - oa->o_valid |= OBD_MD_FLRDEV; - } - - EXIT; -} - -static struct dentry *__filter_oa2dentry(struct lustre_handle *conn, - struct obdo *oa, char *what) -{ - struct dentry *dchild = NULL; - - if (oa->o_valid & OBD_MD_FLHANDLE) { - struct lustre_handle *ost_handle = obdo_handle(oa); - struct filter_file_data *ffd = filter_handle2ffd(ost_handle); - - if (ffd != NULL) { - struct filter_dentry_data *fdd; - dchild = dget(ffd->ffd_file->f_dentry); - fdd = dchild->d_fsdata; - LASSERT(fdd->fdd_magic == FILTER_DENTRY_MAGIC); - filter_ffd_put(ffd); - - CDEBUG(D_INODE, - "got child objid %*s: %p, count = %d\n", - dchild->d_name.len, dchild->d_name.name, - dchild, atomic_read(&dchild->d_count)); - } - } - - if (!dchild) { - struct obd_device *obd = class_conn2obd(conn); - - if (!obd) { - CERROR("invalid client cookie "LPX64"\n", conn->cookie); - RETURN(ERR_PTR(-EINVAL)); - } - dchild = filter_fid2dentry(obd, NULL, oa->o_mode, oa->o_id); - } - - if (IS_ERR(dchild)) { - CERROR("%s error looking up object: "LPU64"\n", what, oa->o_id); - RETURN(dchild); - } - - if (!dchild->d_inode) { - CERROR("%s on non-existent object: "LPU64"\n", what, oa->o_id); - f_dput(dchild); - RETURN(ERR_PTR(-ENOENT)); - } - - return dchild; -} - -#define filter_oa2dentry(conn, oa) __filter_oa2dentry(conn, oa, __FUNCTION__) - -static int filter_getattr(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *md) -{ - struct dentry *dentry = NULL; - int rc = 0; - ENTRY; - - dentry = filter_oa2dentry(conn, oa); - if (IS_ERR(dentry)) - RETURN(PTR_ERR(dentry)); - - filter_from_inode(oa, dentry->d_inode, oa->o_valid); - - f_dput(dentry); - RETURN(rc); -} - -/* this is called from filter_truncate() until we have filter_punch() */ -static int filter_setattr(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *md, struct obd_trans_info *oti) -{ - struct obd_run_ctxt saved; - struct obd_export *export = class_conn2export(conn); - struct obd_device *obd = class_conn2obd(conn); - struct filter_obd *filter = &obd->u.filter; - struct dentry *dentry; - struct iattr iattr; - struct inode *inode; - void * handle; - int rc, rc2; - ENTRY; - - dentry = filter_oa2dentry(conn, oa); - - if (IS_ERR(dentry)) - GOTO(out_exp, rc = PTR_ERR(dentry)); - - iattr_from_obdo(&iattr, oa, oa->o_valid); - iattr.ia_mode = (iattr.ia_mode & ~S_IFMT) | S_IFREG; - inode = dentry->d_inode; - - push_ctxt(&saved, &filter->fo_ctxt, NULL); - lock_kernel(); - if (iattr.ia_valid & ATTR_SIZE) - down(&inode->i_sem); - - handle = fsfilt_start(obd, dentry->d_inode, FSFILT_OP_SETATTR); - if (IS_ERR(handle)) - GOTO(out_unlock, rc = PTR_ERR(handle)); - - rc = fsfilt_setattr(obd, dentry, handle, &iattr, 1); - rc = filter_finish_transno(export, handle, oti, rc); - rc2 = fsfilt_commit(obd, dentry->d_inode, handle, 0); - if (rc2) { - CERROR("error on commit, err = %d\n", rc2); - if (!rc) - rc = rc2; - } - - if (iattr.ia_valid & ATTR_SIZE) { - up(&inode->i_sem); - oa->o_valid = OBD_MD_FLBLOCKS | OBD_MD_FLCTIME | OBD_MD_FLMTIME; - obdo_from_inode(oa, inode, oa->o_valid); - } - -out_unlock: - unlock_kernel(); - pop_ctxt(&saved, &filter->fo_ctxt, NULL); - - f_dput(dentry); - out_exp: - class_export_put(export); - RETURN(rc); -} - -static int filter_open(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *ea, struct obd_trans_info *oti, - struct obd_client_handle *och) -{ - struct obd_export *export = NULL; - struct lustre_handle *handle; - struct filter_file_data *ffd; - struct file *filp; - struct lustre_handle parent_lockh; - int rc = 0; - ENTRY; - - export = class_conn2export(conn); - if (!export) { - CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n", - conn->cookie); - GOTO(out, rc = -EINVAL); - } - - filp = filter_obj_open(export, oa->o_id, oa->o_mode, - LCK_PR, &parent_lockh); - if (IS_ERR(filp)) - GOTO(out, rc = PTR_ERR(filp)); - - filter_from_inode(oa, filp->f_dentry->d_inode, oa->o_valid); - - ffd = filp->private_data; - handle = obdo_handle(oa); - handle->cookie = ffd->ffd_handle.h_cookie; - oa->o_valid |= OBD_MD_FLHANDLE; - -out: - class_export_put(export); - if (!rc) { - memcpy(&oti->oti_ack_locks[0].lock, &parent_lockh, - sizeof(parent_lockh)); - oti->oti_ack_locks[0].mode = LCK_PR; - } - RETURN(rc); -} - -static int filter_close(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *ea, struct obd_trans_info *oti) -{ - struct obd_export *exp = class_conn2export(conn); - struct filter_file_data *ffd; - struct filter_export_data *fed; - int rc; - ENTRY; - - if (!exp) { - CDEBUG(D_IOCTL, "invalid client cookie"LPX64"\n", conn->cookie); - GOTO(out, rc = -EINVAL); - } - - if (!(oa->o_valid & OBD_MD_FLHANDLE)) { - CERROR("no handle for close of objid "LPU64"\n", oa->o_id); - GOTO(out, rc = -EINVAL); - } - - ffd = filter_handle2ffd(obdo_handle(oa)); - if (ffd == NULL) { - CERROR("bad handle ("LPX64") for close\n", - obdo_handle(oa)->cookie); - GOTO(out, rc = -ESTALE); - } - - fed = &exp->exp_filter_data; - spin_lock(&fed->fed_lock); - list_del(&ffd->ffd_export_list); - spin_unlock(&fed->fed_lock); - - rc = filter_close_internal(exp, ffd, oti, 0); - filter_ffd_put(ffd); - GOTO(out, rc); - out: - class_export_put(exp); - return rc; -} - -static int filter_create(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md **ea, struct obd_trans_info *oti) -{ - struct obd_export *exp; - struct obd_device *obd = class_conn2obd(conn); - struct filter_obd *filter = &obd->u.filter; - struct obd_run_ctxt saved; - struct lustre_handle parent_lockh; - struct dentry *dparent; - struct dentry *dchild = NULL; - struct iattr; - void *handle; - int err, rc, cleanup_phase; - ENTRY; - - if (!obd) { - CERROR("invalid client cookie "LPX64"\n", conn->cookie); - RETURN(-EINVAL); - } - - exp = class_conn2export(conn); - - push_ctxt(&saved, &filter->fo_ctxt, NULL); - retry: - oa->o_id = filter_next_id(filter); - - cleanup_phase = 0; - dparent = filter_parent_lock(obd, S_IFREG, oa->o_id, LCK_PW, - &parent_lockh); - if (IS_ERR(dparent)) - GOTO(cleanup, rc = PTR_ERR(dparent)); - cleanup_phase = 1; - - dchild = filter_fid2dentry(obd, dparent, S_IFREG, oa->o_id); - if (IS_ERR(dchild)) - GOTO(cleanup, rc = PTR_ERR(dchild)); - if (dchild->d_inode) { - /* This would only happen if lastobjid was bad on disk */ - CERROR("Serious error: objid %*s already exists; is this " - "filesystem corrupt? I will try to work around it.\n", - dchild->d_name.len, dchild->d_name.name); - f_dput(dchild); - filter_parent_unlock(dparent, &parent_lockh, LCK_PW); - goto retry; - } - - cleanup_phase = 2; - handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_CREATE); - if (IS_ERR(handle)) - GOTO(cleanup, rc = PTR_ERR(handle)); - - rc = vfs_create(dparent->d_inode, dchild, oa->o_mode); - if (rc) - CERROR("create failed rc = %d\n", rc); - - rc = filter_finish_transno(exp, handle, oti, rc); - err = filter_update_server_data(filter->fo_rcvd_filp, filter->fo_fsd); - if (err) { - CERROR("unable to write lastobjid but file created\n"); - if (!rc) - rc = err; - } - err = fsfilt_commit(obd, dparent->d_inode, handle, 0); - if (err) { - CERROR("error on commit, err = %d\n", err); - if (!rc) - rc = err; - } - - if (rc) - GOTO(cleanup, rc); - - /* Set flags for fields we have set in the inode struct */ - oa->o_valid = OBD_MD_FLID | OBD_MD_FLBLKSZ | OBD_MD_FLBLOCKS | - OBD_MD_FLMTIME | OBD_MD_FLATIME | OBD_MD_FLCTIME; - filter_from_inode(oa, dchild->d_inode, oa->o_valid); - - EXIT; -cleanup: - switch(cleanup_phase) { - case 2: - f_dput(dchild); - case 1: /* locked parent dentry */ - if (rc || oti == NULL) { - filter_parent_unlock(dparent, &parent_lockh, LCK_PW); - } else { - memcpy(&oti->oti_ack_locks[0].lock, &parent_lockh, - sizeof(parent_lockh)); - oti->oti_ack_locks[0].mode = LCK_PW; - } - case 0: - pop_ctxt(&saved, &filter->fo_ctxt, NULL); - class_export_put(exp); - break; - default: - CERROR("invalid cleanup_phase %d\n", cleanup_phase); - LBUG(); - } - - RETURN(rc); -} - -static int filter_destroy(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *ea, struct obd_trans_info *oti) -{ - struct obd_export *exp; - struct obd_device *obd = class_conn2obd(conn); - struct filter_obd *filter = &obd->u.filter; - struct dentry *dparent, *dchild = NULL; - struct filter_dentry_data *fdd; - struct obd_run_ctxt saved; - void *handle = NULL; - struct lustre_handle parent_lockh; - int rc, rc2, cleanup_phase = 0; - ENTRY; - - if (!obd) { - CERROR("invalid client cookie "LPX64"\n", conn->cookie); - RETURN(-EINVAL); - } - - exp = class_conn2export(conn); - - CDEBUG(D_INODE, "destroying objid "LPU64"\n", oa->o_id); - - push_ctxt(&saved, &filter->fo_ctxt, NULL); - dparent = filter_parent_lock(obd, oa->o_mode, oa->o_id, - LCK_PW, &parent_lockh); - if (IS_ERR(dparent)) - GOTO(cleanup, rc = PTR_ERR(dparent)); - cleanup_phase = 1; - - dchild = filter_fid2dentry(obd, dparent, S_IFREG, oa->o_id); - if (IS_ERR(dchild)) - GOTO(cleanup, rc = -ENOENT); - cleanup_phase = 2; - - if (!dchild->d_inode) { - CERROR("destroying non-existent object "LPU64"\n", oa->o_id); - GOTO(cleanup, rc = -ENOENT); - } - - handle = fsfilt_start(obd, dparent->d_inode, FSFILT_OP_UNLINK); - if (IS_ERR(handle)) - GOTO(cleanup, rc = PTR_ERR(handle)); - cleanup_phase = 3; - - fdd = dchild->d_fsdata; - if (fdd && atomic_read(&fdd->fdd_open_count)) { - LASSERT(fdd->fdd_magic = FILTER_DENTRY_MAGIC); - if (!(fdd->fdd_flags & FILTER_FLAG_DESTROY)) { - fdd->fdd_flags |= FILTER_FLAG_DESTROY; - /* XXX put into PENDING directory in case of crash */ - CDEBUG(D_INODE, - "defer destroy of %dx open objid "LPU64"\n", - atomic_read(&fdd->fdd_open_count), oa->o_id); - } else - CDEBUG(D_INODE, - "repeat destroy of %dx open objid "LPU64"\n", - atomic_read(&fdd->fdd_open_count), oa->o_id); - GOTO(cleanup, rc = 0); - } - - rc = filter_destroy_internal(obd, dparent, dchild); - -cleanup: - switch(cleanup_phase) { - case 3: - rc = filter_finish_transno(exp, handle, oti, rc); - rc2 = fsfilt_commit(obd, dparent->d_inode, handle, 0); - if (rc2) { - CERROR("error on commit, err = %d\n", rc2); - if (!rc) - rc = rc2; - } - case 2: - f_dput(dchild); - case 1: - if (rc || oti == NULL) { - filter_parent_unlock(dparent, &parent_lockh, LCK_PW); - } else { - memcpy(&oti->oti_ack_locks[0].lock, &parent_lockh, - sizeof(parent_lockh)); - oti->oti_ack_locks[0].mode = LCK_PW; - } - case 0: - pop_ctxt(&saved, &filter->fo_ctxt, NULL); - class_export_put(exp); - break; - default: - CERROR("invalid cleanup_phase %d\n", cleanup_phase); - LBUG(); - } - - RETURN(rc); -} - -/* NB start and end are used for punch, but not truncate */ -static int filter_truncate(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *lsm, - obd_off start, obd_off end, - struct obd_trans_info *oti) -{ - int error; - ENTRY; - - if (end != OBD_OBJECT_EOF) - CERROR("PUNCH not supported, only truncate: end = "LPX64"\n", - end); - - CDEBUG(D_INODE, "calling truncate for object "LPU64", valid = %x, " - "o_size = "LPD64"\n", oa->o_id, oa->o_valid, start); - oa->o_size = start; - error = filter_setattr(conn, oa, NULL, oti); - RETURN(error); -} - -static inline void lustre_put_page(struct page *page) -{ - page_cache_release(page); -} - -static int filter_start_page_read(struct inode *inode, struct niobuf_local *lnb) -{ - struct address_space *mapping = inode->i_mapping; - struct page *page; - unsigned long index = lnb->offset >> PAGE_SHIFT; - int rc; - - page = grab_cache_page(mapping, index); /* locked page */ - if (IS_ERR(page)) - return lnb->rc = PTR_ERR(page); - - lnb->page = page; - - if (inode->i_size < lnb->offset + lnb->len - 1) - lnb->rc = inode->i_size - lnb->offset; - else - lnb->rc = lnb->len; - - if (PageUptodate(page)) { - unlock_page(page); - return 0; - } - - rc = mapping->a_ops->readpage(NULL, page); - if (rc < 0) { - CERROR("page index %lu, rc = %d\n", index, rc); - lnb->page = NULL; - lustre_put_page(page); - return lnb->rc = rc; - } - - return 0; -} - -static int filter_finish_page_read(struct niobuf_local *lnb) -{ - if (lnb->page == NULL) - return 0; - - if (PageUptodate(lnb->page)) - return 0; - - wait_on_page(lnb->page); - if (!PageUptodate(lnb->page)) { - CERROR("page index %lu/offset "LPX64" not uptodate\n", - lnb->page->index, lnb->offset); - GOTO(err_page, lnb->rc = -EIO); - } - if (PageError(lnb->page)) { - CERROR("page index %lu/offset "LPX64" has error\n", - lnb->page->index, lnb->offset); - GOTO(err_page, lnb->rc = -EIO); - } - - return 0; - -err_page: - lustre_put_page(lnb->page); - lnb->page = NULL; - return lnb->rc; -} - -static struct page *lustre_get_page_write(struct inode *inode, - unsigned long index) -{ - struct address_space *mapping = inode->i_mapping; - struct page *page; - int rc; - - page = grab_cache_page(mapping, index); /* locked page */ - - if (!IS_ERR(page)) { - /* Note: Called with "O" and "PAGE_SIZE" this is essentially - * a no-op for most filesystems, because we write the whole - * page. For partial-page I/O this will read in the page. - */ - rc = mapping->a_ops->prepare_write(NULL, page, 0, PAGE_SIZE); - if (rc) { - CERROR("page index %lu, rc = %d\n", index, rc); - if (rc != -ENOSPC) - LBUG(); - GOTO(err_unlock, rc); - } - /* XXX not sure if we need this if we are overwriting page */ - if (PageError(page)) { - CERROR("error on page index %lu, rc = %d\n", index, rc); - LBUG(); - GOTO(err_unlock, rc = -EIO); - } - } - return page; - -err_unlock: - unlock_page(page); - lustre_put_page(page); - return ERR_PTR(rc); -} - -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -int waitfor_one_page(struct page *page) -{ - wait_on_page_locked(page); - return 0; -} -#endif - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -/* We should only change the file mtime (and not the ctime, like - * update_inode_times() in generic_file_write()) when we only change data. - */ -static inline void inode_update_time(struct inode *inode, int ctime_too) -{ - time_t now = CURRENT_TIME; - if (inode->i_mtime == now && (!ctime_too || inode->i_ctime == now)) - return; - inode->i_mtime = now; - if (ctime_too) - inode->i_ctime = now; - mark_inode_dirty_sync(inode); -} -#endif - -static int lustre_commit_write(struct niobuf_local *lnb) -{ - struct page *page = lnb->page; - unsigned from = lnb->offset & ~PAGE_MASK; - unsigned to = from + lnb->len; - struct inode *inode = page->mapping->host; - int err; - - LASSERT(to <= PAGE_SIZE); - err = page->mapping->a_ops->commit_write(NULL, page, from, to); - if (!err && IS_SYNC(inode)) - err = waitfor_one_page(page); - //SetPageUptodate(page); // the client commit_write will do this - - SetPageReferenced(page); - unlock_page(page); - lustre_put_page(page); - return err; -} - -int filter_get_page_write(struct inode *inode, struct niobuf_local *lnb, - int *pglocked) -{ - unsigned long index = lnb->offset >> PAGE_SHIFT; - struct address_space *mapping = inode->i_mapping; - struct page *page; - int rc; - - //ASSERT_PAGE_INDEX(index, GOTO(err, rc = -EINVAL)); - if (*pglocked) - page = grab_cache_page_nowait(mapping, index); /* locked page */ - else - page = grab_cache_page(mapping, index); /* locked page */ - - - /* This page is currently locked, so get a temporary page instead. */ - if (!page) { - CDEBUG(D_ERROR,"ino %lu page %ld locked\n", inode->i_ino,index); - page = alloc_pages(GFP_KERNEL, 0); /* locked page */ - if (!page) { - CERROR("no memory for a temp page\n"); - GOTO(err, rc = -ENOMEM); - } - page->index = index; - lnb->page = page; - lnb->flags |= N_LOCAL_TEMP_PAGE; - } else if (!IS_ERR(page)) { - (*pglocked)++; - - rc = mapping->a_ops->prepare_write(NULL, page, - lnb->offset & ~PAGE_MASK, - lnb->len); - if (rc) { - if (rc != -ENOSPC) - CERROR("page index %lu, rc = %d\n", index, rc); - GOTO(err_unlock, rc); - } - /* XXX not sure if we need this if we are overwriting page */ - if (PageError(page)) { - CERROR("error on page index %lu, rc = %d\n", index, rc); - LBUG(); - GOTO(err_unlock, rc = -EIO); - } - lnb->page = page; - } - - return 0; - -err_unlock: - unlock_page(page); - lustre_put_page(page); -err: - return lnb->rc = rc; -} - -/* - * We need to balance prepare_write() calls with commit_write() calls. - * If the page has been prepared, but we have no data for it, we don't - * want to overwrite valid data on disk, but we still need to zero out - * data for space which was newly allocated. Like part of what happens - * in __block_prepare_write() for newly allocated blocks. - * - * XXX currently __block_prepare_write() creates buffers for all the - * pages, and the filesystems mark these buffers as BH_New if they - * were newly allocated from disk. We use the BH_New flag similarly. - */ -static int filter_commit_write(struct niobuf_local *lnb, int err) -{ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - if (err) { - unsigned block_start, block_end; - struct buffer_head *bh, *head = lnb->page->buffers; - unsigned blocksize = head->b_size; - - /* debugging: just seeing if this ever happens */ - CDEBUG(err == -ENOSPC ? D_INODE : D_ERROR, - "called for ino %lu:%lu on err %d\n", - lnb->page->mapping->host->i_ino, lnb->page->index, err); - - /* Currently one buffer per page, but in the future... */ - for (bh = head, block_start = 0; bh != head || !block_start; - block_start = block_end, bh = bh->b_this_page) { - block_end = block_start + blocksize; - if (buffer_new(bh)) { - memset(kmap(lnb->page) + block_start, 0, - blocksize); - kunmap(lnb->page); - } - } - } -#endif - return lustre_commit_write(lnb); -} - -static int filter_preprw(int cmd, struct obd_export *exp, struct obdo *obdo, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_remote *nb, - struct niobuf_local *res, void **desc_private, - struct obd_trans_info *oti) -{ - struct obd_run_ctxt saved; - struct obd_device *obd; - struct obd_ioobj *o; - struct niobuf_remote *rnb; - struct niobuf_local *lnb; - struct fsfilt_objinfo *fso; - struct dentry *dentry; - struct inode *inode; - int pglocked = 0, rc = 0, i, j, tot_bytes = 0; - unsigned long now = jiffies; - ENTRY; - - memset(res, 0, niocount * sizeof(*res)); - - obd = exp->exp_obd; - if (obd == NULL) - RETURN(-EINVAL); - - // theoretically we support multi-obj BRW RPCs, but until then... - LASSERT(objcount == 1); - - OBD_ALLOC(fso, objcount * sizeof(*fso)); - if (!fso) - RETURN(-ENOMEM); - - push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); - - for (i = 0, o = obj; i < objcount; i++, o++) { - struct filter_dentry_data *fdd; - - LASSERT(o->ioo_bufcnt); - - dentry = filter_fid2dentry(obd, NULL, o->ioo_type, o->ioo_id); - - if (IS_ERR(dentry)) - GOTO(out_objinfo, rc = PTR_ERR(dentry)); - - fso[i].fso_dentry = dentry; - fso[i].fso_bufcnt = o->ioo_bufcnt; - - if (!dentry->d_inode) { - CERROR("trying to BRW to non-existent file "LPU64"\n", - o->ioo_id); - f_dput(dentry); - GOTO(out_objinfo, rc = -ENOENT); - } - - /* If we ever start to support mutli-object BRW RPCs, we will - * need to get locks on mulitple inodes (in order) or use the - * DLM to do the locking for us (and use the same locking in - * filter_setattr() for truncate). That isn't all, because - * there still exists the possibility of a truncate starting - * a new transaction while holding the ext3 rwsem = write - * while some writes (which have started their transactions - * here) blocking on the ext3 rwsem = read => lock inversion. - * - * The handling gets very ugly when dealing with locked pages. - * It may be easier to just get rid of the locked page code - * (which has problems of its own) and either discover we do - * not need it anymore (i.e. it was a symptom of another bug) - * or ensure we get the page locks in an appropriate order. - */ - if (cmd & OBD_BRW_WRITE) - down(&dentry->d_inode->i_sem); - fdd = dentry->d_fsdata; - if (!fdd || !atomic_read(&fdd->fdd_open_count)) - CDEBUG(D_PAGE, "I/O to unopened object "LPU64"\n", - o->ioo_id); - } - - if (time_after(jiffies, now + 15*HZ)) - CERROR("slow prep setup %lus\n", (jiffies - now) / HZ); - - if (cmd & OBD_BRW_WRITE) { - *desc_private = fsfilt_brw_start(obd, objcount, fso, - niocount, nb); - if (IS_ERR(*desc_private)) { - rc = PTR_ERR(*desc_private); - CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR, - "error starting transaction: rc = %d\n", rc); - *desc_private = NULL; - GOTO(out_objinfo, rc); - } - } - - for (i = 0, o = obj, rnb = nb, lnb = res; i < objcount; i++, o++) { - dentry = fso[i].fso_dentry; - inode = dentry->d_inode; - - for (j = 0; j < o->ioo_bufcnt; j++, rnb++, lnb++) { - if (j == 0) - lnb->dentry = dentry; - else - lnb->dentry = dget(dentry); - - lnb->offset = rnb->offset; - lnb->len = rnb->len; - lnb->flags = rnb->flags; - lnb->start = jiffies; - - if (cmd & OBD_BRW_WRITE) { - rc = filter_get_page_write(inode,lnb,&pglocked); - if (rc) - up(&dentry->d_inode->i_sem); - } else if (inode->i_size <= rnb->offset) { - /* If there's no more data, abort early. - * lnb->page == NULL and lnb->rc == 0, so it's - * easy to detect later. */ - f_dput(dentry); - lnb->dentry = NULL; - break; - } else { - rc = filter_start_page_read(inode, lnb); - } - - if (rc) { - CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR, - "page err %u@"LPU64" %u/%u %p: rc %d\n", - lnb->len, lnb->offset, j, o->ioo_bufcnt, - dentry, rc); - f_dput(dentry); - GOTO(out_pages, rc); - } - - tot_bytes += lnb->len; - - if ((cmd & OBD_BRW_READ) && lnb->rc < lnb->len) { - /* Likewise with a partial read */ - break; - } - } - } - - if (time_after(jiffies, now + 15*HZ)) - CERROR("slow prep get page %lus\n", (jiffies - now) / HZ); - - if (cmd & OBD_BRW_READ) { - lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_READ_BYTES, - tot_bytes); - while (lnb-- > res) { - rc = filter_finish_page_read(lnb); - if (rc) { - CERROR("error page %u@"LPU64" %u %p: rc %d\n", - lnb->len, lnb->offset, lnb - res, - lnb->dentry, rc); - f_dput(lnb->dentry); - GOTO(out_pages, rc); - } - } - } else - lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_WRITE_BYTES, - tot_bytes); - - if (time_after(jiffies, now + 15*HZ)) - CERROR("slow prep finish page %lus\n", (jiffies - now) / HZ); - - EXIT; -out: - OBD_FREE(fso, objcount * sizeof(*fso)); - current->journal_info = NULL; - pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); - return rc; - -out_pages: - while (lnb-- > res) { - if (cmd & OBD_BRW_WRITE) { - filter_commit_write(lnb, rc); - up(&lnb->dentry->d_inode->i_sem); - } else { - lustre_put_page(lnb->page); - } - f_dput(lnb->dentry); - } - if (cmd & OBD_BRW_WRITE) { - filter_finish_transno(exp, *desc_private, oti, rc); - fsfilt_commit(obd, - filter_parent(obd,S_IFREG,obj->ioo_id)->d_inode, - *desc_private, 0); - } - goto out; /* dropped the dentry refs already (one per page) */ - -out_objinfo: - for (i = 0; i < objcount && fso[i].fso_dentry; i++) { - if (cmd & OBD_BRW_WRITE) - up(&fso[i].fso_dentry->d_inode->i_sem); - f_dput(fso[i].fso_dentry); - } - goto out; -} - -static int filter_write_locked_page(struct niobuf_local *lnb) -{ - struct page *lpage; - void *lpage_addr; - void *lnb_addr; - int rc; - ENTRY; - - lpage = lustre_get_page_write(lnb->dentry->d_inode, lnb->page->index); - if (IS_ERR(lpage)) { - /* It is highly unlikely that we would ever get an error here. - * The page we want to get was previously locked, so it had to - * have already allocated the space, and we were just writing - * over the same data, so there would be no hole in the file. - * - * XXX: possibility of a race with truncate could exist, need - * to check that. There are no guarantees w.r.t. - * write order even on a local filesystem, although the - * normal response would be to return the number of bytes - * successfully written and leave the rest to the app. - */ - rc = PTR_ERR(lpage); - CERROR("error getting locked page index %ld: rc = %d\n", - lnb->page->index, rc); - LBUG(); - lustre_commit_write(lnb); - RETURN(rc); - } - - /* 2 kmaps == vanishingly small deadlock opportunity */ - lpage_addr = kmap(lpage); - lnb_addr = kmap(lnb->page); - - memcpy(lpage_addr, lnb_addr, PAGE_SIZE); - - kunmap(lnb->page); - kunmap(lpage); - - lustre_put_page(lnb->page); - - lnb->page = lpage; - rc = lustre_commit_write(lnb); - if (rc) - CERROR("error committing locked page %ld: rc = %d\n", - lnb->page->index, rc); - - RETURN(rc); -} - -static int filter_syncfs(struct obd_export *exp) -{ - struct obd_device *obd = exp->exp_obd; - ENTRY; - - RETURN(fsfilt_sync(obd, obd->u.filter.fo_sb)); -} - -static int filter_commitrw(int cmd, struct obd_export *exp, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_local *res, - void *desc_private, struct obd_trans_info *oti) -{ - struct obd_run_ctxt saved; - struct obd_ioobj *o; - struct niobuf_local *lnb; - struct obd_device *obd = exp->exp_obd; - int found_locked = 0, rc = 0, i; - unsigned long now = jiffies; /* DEBUGGING OST TIMEOUTS */ - ENTRY; - - push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); - - LASSERT(!current->journal_info); - current->journal_info = desc_private; - - for (i = 0, o = obj, lnb = res; i < objcount; i++, o++) { - int j; - - if (cmd & OBD_BRW_WRITE) { - inode_update_time(lnb->dentry->d_inode, 1); - up(&lnb->dentry->d_inode->i_sem); - } - for (j = 0 ; j < o->ioo_bufcnt ; j++, lnb++) { - if (lnb->page == NULL) { - continue; - } - - if (lnb->flags & N_LOCAL_TEMP_PAGE) { - found_locked++; - continue; - } - - if (time_after(jiffies, lnb->start + 15*HZ)) - CERROR("slow commitrw %lus\n", - (jiffies - lnb->start) / HZ); - - if (cmd & OBD_BRW_WRITE) { - int err = filter_commit_write(lnb, 0); - - if (!rc) - rc = err; - } else { - lustre_put_page(lnb->page); - } - - f_dput(lnb->dentry); - if (time_after(jiffies, lnb->start + 15*HZ)) - CERROR("slow commit_write %lus\n", - (jiffies - lnb->start) / HZ); - } - } - - for (i = 0, o = obj, lnb = res; found_locked > 0 && i < objcount; - i++, o++) { - int j; - for (j = 0 ; j < o->ioo_bufcnt ; j++, lnb++) { - int err; - if (!(lnb->flags & N_LOCAL_TEMP_PAGE)) - continue; - - if (time_after(jiffies, lnb->start + 15*HZ)) - CERROR("slow commitrw locked %lus\n", - (jiffies - lnb->start) / HZ); - - err = filter_write_locked_page(lnb); - if (!rc) - rc = err; - f_dput(lnb->dentry); - found_locked--; - - if (time_after(jiffies, lnb->start + 15*HZ)) - CERROR("slow commit_write locked %lus\n", - (jiffies - lnb->start) / HZ); - } - } - - if (cmd & OBD_BRW_WRITE) { - /* We just want any dentry for the commit, for now */ - struct dentry *dparent = filter_parent(obd, S_IFREG, 0); - int err; - - rc = filter_finish_transno(exp, desc_private, oti, rc); - err = fsfilt_commit(obd, dparent->d_inode, desc_private, - obd_sync_filter); - if (err) - rc = err; - if (obd_sync_filter) - LASSERT(oti->oti_transno <= obd->obd_last_committed); - - if (time_after(jiffies, now + 15*HZ)) - CERROR("slow commitrw commit %lus\n", (jiffies-now)/HZ); - } - - LASSERT(!current->journal_info); - - pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL); - RETURN(rc); -} - -static int filter_brw(int cmd, struct lustre_handle *conn, - struct lov_stripe_md *lsm, obd_count oa_bufs, - struct brw_page *pga, struct obd_trans_info *oti) -{ - struct obd_export *export = class_conn2export(conn); - struct obd_ioobj ioo; - struct niobuf_local *lnb; - struct niobuf_remote *rnb; - obd_count i; - void *desc_private; - int ret = 0; - ENTRY; - - if (export == NULL) - RETURN(-EINVAL); - - OBD_ALLOC(lnb, oa_bufs * sizeof(struct niobuf_local)); - OBD_ALLOC(rnb, oa_bufs * sizeof(struct niobuf_remote)); - - if (lnb == NULL || rnb == NULL) - GOTO(out, ret = -ENOMEM); - - for (i = 0; i < oa_bufs; i++) { - rnb[i].offset = pga[i].off; - rnb[i].len = pga[i].count; - } - - ioo.ioo_id = lsm->lsm_object_id; - ioo.ioo_gr = 0; - ioo.ioo_type = S_IFREG; - ioo.ioo_bufcnt = oa_bufs; - - ret = filter_preprw(cmd, export, NULL, 1, &ioo, oa_bufs, rnb, lnb, - &desc_private, oti); - if (ret != 0) - GOTO(out, ret); - - for (i = 0; i < oa_bufs; i++) { - void *virt = kmap(pga[i].pg); - obd_off off = pga[i].off & ~PAGE_MASK; - void *addr = kmap(lnb[i].page); - - /* 2 kmaps == vanishingly small deadlock opportunity */ - - if (cmd & OBD_BRW_WRITE) - memcpy(addr + off, virt + off, pga[i].count); - else - memcpy(virt + off, addr + off, pga[i].count); - - kunmap(addr); - kunmap(virt); - } - - ret = filter_commitrw(cmd, export, 1, &ioo, oa_bufs, lnb, desc_private, - oti); - -out: - if (lnb) - OBD_FREE(lnb, oa_bufs * sizeof(struct niobuf_local)); - if (rnb) - OBD_FREE(rnb, oa_bufs * sizeof(struct niobuf_remote)); - class_export_put(export); - RETURN(ret); -} - -static int filter_san_preprw(int cmd, struct lustre_handle *conn, - int objcount, struct obd_ioobj *obj, - int niocount, struct niobuf_remote *nb) -{ - struct obd_device *obd; - struct obd_ioobj *o = obj; - struct niobuf_remote *rnb = nb; - int rc = 0; - int i; - ENTRY; - - obd = class_conn2obd(conn); - if (!obd) { - CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n", - conn->cookie); - RETURN(-EINVAL); - } - - for (i = 0; i < objcount; i++, o++) { - struct dentry *dentry; - struct inode *inode; - int (*fs_bmap)(struct address_space *, long); - int j; - - dentry = filter_fid2dentry(obd, NULL, o->ioo_type, o->ioo_id); - if (IS_ERR(dentry)) - GOTO(out, rc = PTR_ERR(dentry)); - inode = dentry->d_inode; - if (!inode) { - CERROR("trying to BRW to non-existent file "LPU64"\n", - o->ioo_id); - f_dput(dentry); - GOTO(out, rc = -ENOENT); - } - fs_bmap = inode->i_mapping->a_ops->bmap; - - for (j = 0; j < o->ioo_bufcnt; j++, rnb++) { - long block; - - block = rnb->offset >> inode->i_blkbits; - - if (cmd == OBD_BRW_READ) { - block = fs_bmap(inode->i_mapping, block); - } else { - loff_t newsize = rnb->offset + rnb->len; - /* fs_prep_san_write will also update inode - * size for us: - * (1) new alloced block - * (2) existed block but size extented - */ - /* FIXME We could call fs_prep_san_write() - * only once for all the blocks allocation. - * Now call it once for each block, for - * simplicity. And if error happens, we - * probably need to release previous alloced - * block */ - rc = fs_prep_san_write(obd, inode, &block, - 1, newsize); - if (rc) - break; - } - - rnb->offset = block; - } - f_dput(dentry); - } -out: - RETURN(rc); -} - -static int filter_statfs(struct obd_export *exp, struct obd_statfs *osfs) -{ - struct obd_device *obd = exp->exp_obd; - ENTRY; - - RETURN(fsfilt_statfs(obd, obd->u.filter.fo_sb, osfs)); -} - -static int filter_get_info(struct lustre_handle *conn, __u32 keylen, - void *key, __u32 *vallen, void *val) -{ - struct obd_device *obd; - ENTRY; - - obd = class_conn2obd(conn); - if (!obd) { - CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n", - conn->cookie); - RETURN(-EINVAL); - } - - if (keylen == strlen("blocksize") && - memcmp(key, "blocksize", keylen) == 0) { - __u32 *blocksize = val; - *vallen = sizeof(*blocksize); - *blocksize = obd->u.filter.fo_sb->s_blocksize; - RETURN(0); - } - - if (keylen == strlen("blocksize_bits") && - memcmp(key, "blocksize_bits", keylen) == 0) { - __u32 *blocksize_bits = val; - *vallen = sizeof(*blocksize_bits); - *blocksize_bits = obd->u.filter.fo_sb->s_blocksize_bits; - RETURN(0); - } - - CDEBUG(D_IOCTL, "invalid key\n"); - RETURN(-EINVAL); -} - -int filter_copy_data(struct lustre_handle *dst_conn, struct obdo *dst, - struct lustre_handle *src_conn, struct obdo *src, - obd_size count, obd_off offset, struct obd_trans_info *oti) -{ - struct page *page; - struct lov_stripe_md srcmd, dstmd; - unsigned long index = 0; - int err = 0; - - LBUG(); /* THIS CODE IS NOT CORRECT -phil */ - - memset(&srcmd, 0, sizeof(srcmd)); - memset(&dstmd, 0, sizeof(dstmd)); - srcmd.lsm_object_id = src->o_id; - dstmd.lsm_object_id = dst->o_id; - - ENTRY; - CDEBUG(D_INFO, "src: ino "LPU64" blocks "LPU64", size "LPU64 - ", dst: ino "LPU64"\n", - src->o_id, src->o_blocks, src->o_size, dst->o_id); - page = alloc_page(GFP_USER); - if (page == NULL) - RETURN(-ENOMEM); - - wait_on_page(page); - - /* XXX with brw vector I/O, we could batch up reads and writes here, - * all we need to do is allocate multiple pages to handle the I/Os - * and arrays to handle the request parameters. - */ - while (index < ((src->o_size + PAGE_SIZE - 1) >> PAGE_SHIFT)) { - struct brw_page pg; - - pg.pg = page; - pg.count = PAGE_SIZE; - pg.off = (page->index) << PAGE_SHIFT; - pg.flag = 0; - - page->index = index; - err = obd_brw(OBD_BRW_READ, src_conn, &srcmd, 1, &pg, NULL); - if (err) { - EXIT; - break; - } - - pg.flag = OBD_BRW_CREATE; - CDEBUG(D_INFO, "Read page %ld ...\n", page->index); - - err = obd_brw(OBD_BRW_WRITE, dst_conn, &dstmd, 1, &pg, oti); - - /* XXX should handle dst->o_size, dst->o_blocks here */ - if (err) { - EXIT; - break; - } - - CDEBUG(D_INFO, "Wrote page %ld ...\n", page->index); - - index++; - } - dst->o_size = src->o_size; - dst->o_blocks = src->o_blocks; - dst->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; - unlock_page(page); - __free_page(page); - - RETURN(err); -} - -int filter_iocontrol(unsigned int cmd, struct lustre_handle *conn, - int len, void *karg, void *uarg) -{ - struct obd_device *obd = class_conn2obd(conn); - - switch (cmd) { - case OBD_IOC_ABORT_RECOVERY: - CERROR("aborting recovery for device %s\n", obd->obd_name); - target_abort_recovery(obd); - RETURN(0); - - default: - RETURN(-EINVAL); - } - RETURN(0); -} - - -static struct obd_ops filter_obd_ops = { - o_owner: THIS_MODULE, - o_attach: filter_attach, - o_detach: filter_detach, - o_get_info: filter_get_info, - o_setup: filter_setup, - o_cleanup: filter_cleanup, - o_connect: filter_connect, - o_disconnect: filter_disconnect, - o_statfs: filter_statfs, - o_syncfs: filter_syncfs, - o_getattr: filter_getattr, - o_create: filter_create, - o_setattr: filter_setattr, - o_destroy: filter_destroy, - o_open: filter_open, - o_close: filter_close, - o_brw: filter_brw, - o_punch: filter_truncate, - o_preprw: filter_preprw, - o_commitrw: filter_commitrw, - o_destroy_export: filter_destroy_export, - o_iocontrol: filter_iocontrol, -#if 0 - o_san_preprw: filter_san_preprw, - o_preallocate: filter_preallocate_inodes, - o_migrate: filter_migrate, - o_copy: filter_copy_data, - o_iterate: filter_iterate -#endif -}; - -static struct obd_ops filter_sanobd_ops = { - o_owner: THIS_MODULE, - o_attach: filter_attach, - o_detach: filter_detach, - o_get_info: filter_get_info, - o_setup: filter_san_setup, - o_cleanup: filter_cleanup, - o_connect: filter_connect, - o_disconnect: filter_disconnect, - o_statfs: filter_statfs, - o_getattr: filter_getattr, - o_create: filter_create, - o_setattr: filter_setattr, - o_destroy: filter_destroy, - o_open: filter_open, - o_close: filter_close, - o_brw: filter_brw, - o_punch: filter_truncate, - o_preprw: filter_preprw, - o_commitrw: filter_commitrw, - o_san_preprw: filter_san_preprw, - o_destroy_export: filter_destroy_export, - o_iocontrol: filter_iocontrol, -#if 0 - o_preallocate: filter_preallocate_inodes, - o_migrate: filter_migrate, - o_copy: filter_copy_data, - o_iterate: filter_iterate -#endif -}; - - -static int __init obdfilter_init(void) -{ - struct lprocfs_static_vars lvars; - int rc; - - printk(KERN_INFO "Lustre Filtering OBD driver; info@clusterfs.com\n"); - - lprocfs_init_vars(&lvars); - - rc = class_register_type(&filter_obd_ops, lvars.module_vars, - OBD_FILTER_DEVICENAME); - if (rc) - return rc; - - rc = class_register_type(&filter_sanobd_ops, lvars.module_vars, - OBD_FILTER_SAN_DEVICENAME); - if (rc) - class_unregister_type(OBD_FILTER_DEVICENAME); - return rc; -} - -static void __exit obdfilter_exit(void) -{ - class_unregister_type(OBD_FILTER_SAN_DEVICENAME); - class_unregister_type(OBD_FILTER_DEVICENAME); -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Lustre Filtering OBD driver"); -MODULE_LICENSE("GPL"); - -module_init(obdfilter_init); -module_exit(obdfilter_exit); diff --git a/lustre/obdfilter/lproc_obdfilter.c b/lustre/obdfilter/lproc_obdfilter.c deleted file mode 100644 index 1319dbd..0000000 --- a/lustre/obdfilter/lproc_obdfilter.c +++ /dev/null @@ -1,89 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ -#define DEBUG_SUBSYSTEM S_CLASS - -#include <linux/version.h> -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include <asm/statfs.h> -#endif -#include <linux/lprocfs_status.h> -#include <linux/obd.h> - -#ifndef LPROCFS -struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; -struct lprocfs_vars lprocfs_module_vars[] = { {0} }; -#else - -static inline int lprocfs_filter_statfs(void *data, struct statfs *sfs) -{ - struct obd_device *dev = (struct obd_device *) data; - LASSERT(dev != NULL); - return vfs_statfs(dev->u.filter.fo_sb, sfs); -} - -DEFINE_LPROCFS_STATFS_FCT(rd_blksize, lprocfs_filter_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, lprocfs_filter_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree, lprocfs_filter_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filestotal, lprocfs_filter_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filesfree, lprocfs_filter_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filegroups, lprocfs_filter_statfs); - -int rd_fstype(char *page, char **start, off_t off, int count, int *eof, - void *data) -{ - struct obd_device *dev = (struct obd_device *)data; - LASSERT(dev != NULL); - return snprintf(page, count, "%s\n", dev->u.filter.fo_fstype); -} - -int lprocfs_filter_rd_mntdev(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - struct obd_device* obd = (struct obd_device *)data; - - LASSERT(obd != NULL); - LASSERT(obd->u.filter.fo_vfsmnt->mnt_devname); - *eof = 1; - return snprintf(page, count, "%s\n", - obd->u.filter.fo_vfsmnt->mnt_devname); -} - -struct lprocfs_vars lprocfs_obd_vars[] = { - { "uuid", lprocfs_rd_uuid, 0, 0 }, - { "blocksize", rd_blksize, 0, 0 }, - { "kbytestotal", rd_kbytestotal, 0, 0 }, - { "kbytesfree", rd_kbytesfree, 0, 0 }, - { "filestotal", rd_filestotal, 0, 0 }, - { "filesfree", rd_filesfree, 0, 0 }, - { "filegroups", rd_filegroups, 0, 0 }, - { "fstype", rd_fstype, 0, 0 }, - { "mntdev", lprocfs_filter_rd_mntdev, 0, 0 }, - { 0 } -}; - -struct lprocfs_vars lprocfs_module_vars[] = { - { "num_refs", lprocfs_rd_numrefs, 0, 0 }, - { 0 } -}; - -#endif /* LPROCFS */ -LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars) diff --git a/lustre/osc/.cvsignore b/lustre/osc/.cvsignore deleted file mode 100644 index e530020..0000000 --- a/lustre/osc/.cvsignore +++ /dev/null @@ -1,8 +0,0 @@ -.Xrefs -config.log -config.status -configure -Makefile -Makefile.in -.deps -TAGS diff --git a/lustre/osc/Makefile.am b/lustre/osc/Makefile.am deleted file mode 100644 index dc0b4d8..0000000 --- a/lustre/osc/Makefile.am +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -DEFS= - -if LIBLUSTRE -lib_LIBRARIES = libosc.a -libosc_a_SOURCES = osc_request.c -else -MODULE = osc -modulefs_DATA = osc.o -EXTRA_PROGRAMS = osc -osc_SOURCES = osc_request.c lproc_osc.c osc_lib.c -endif - -include $(top_srcdir)/Rules diff --git a/lustre/osc/lproc_osc.c b/lustre/osc/lproc_osc.c deleted file mode 100644 index d5e4ec1..0000000 --- a/lustre/osc/lproc_osc.c +++ /dev/null @@ -1,62 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ -#define DEBUG_SUBSYSTEM S_CLASS - -#include <linux/version.h> -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include <asm/statfs.h> -#endif -#include <linux/obd_class.h> -#include <linux/lprocfs_status.h> - -#ifndef LPROCFS -struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; -struct lprocfs_vars lprocfs_module_vars[] = { {0} }; -#else - -DEFINE_LPROCFS_STATFS_FCT(rd_blksize, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_kbytestotal, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_kbytesfree, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filestotal, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filesfree, obd_self_statfs); -DEFINE_LPROCFS_STATFS_FCT(rd_filegroups, obd_self_statfs); - -struct lprocfs_vars lprocfs_obd_vars[] = { - { "uuid", lprocfs_rd_uuid, 0, 0 }, - { "blocksize", rd_blksize, 0, 0 }, - { "kbytestotal", rd_kbytestotal, 0, 0 }, - { "kbytesfree", rd_kbytesfree, 0, 0 }, - { "filestotal", rd_filestotal, 0, 0 }, - { "filesfree", rd_filesfree, 0, 0 }, - { "filegroups", rd_filegroups, 0, 0 }, - { "ost_server_uuid", lprocfs_rd_server_uuid, 0, 0 }, - { "ost_conn_uuid", lprocfs_rd_conn_uuid, 0, 0 }, - { 0 } -}; - -struct lprocfs_vars lprocfs_module_vars[] = { - { "num_refs", lprocfs_rd_numrefs, 0, 0 }, - { 0 } -}; - -#endif /* LPROCFS */ -LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars) diff --git a/lustre/osc/osc_lib.c b/lustre/osc/osc_lib.c deleted file mode 100644 index aa04a1a..0000000 --- a/lustre/osc/osc_lib.c +++ /dev/null @@ -1,76 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define EXPORT_SYMTAB -#define DEBUG_SUBSYSTEM S_OSC - -#ifdef __KERNEL__ -# include <linux/module.h> -# include <linux/obd.h> -# include <linux/obd_ost.h> -# include <linux/lustre_net.h> -# include <linux/lustre_dlm.h> - -/* convert a pathname into a kdev_t */ -static kdev_t path2dev(char *path) -{ - struct dentry *dentry; - struct nameidata nd; - kdev_t dev; - KDEVT_VAL(dev, 0); - - if (!path_init(path, LOOKUP_FOLLOW, &nd)) - return 0; - - if (path_walk(path, &nd)) - return 0; - - dentry = nd.dentry; - if (dentry->d_inode && !is_bad_inode(dentry->d_inode) && - S_ISBLK(dentry->d_inode->i_mode)) - dev = dentry->d_inode->i_rdev; - path_release(&nd); - - return dev; -} - -int client_sanobd_setup(struct obd_device *obddev, obd_count len, void *buf) -{ - struct obd_ioctl_data* data = buf; - struct client_obd *cli = &obddev->u.cli; - ENTRY; - - if (data->ioc_inllen3 < 1) { - CERROR("setup requires a SAN device pathname\n"); - RETURN(-EINVAL); - } - - client_obd_setup(obddev, len, buf); - - cli->cl_sandev = path2dev(data->ioc_inlbuf3); - if (!kdev_t_to_nr(cli->cl_sandev)) { - CERROR("%s seems not a valid SAN device\n", data->ioc_inlbuf3); - RETURN(-EINVAL); - } - - RETURN(0); -} -#endif diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c deleted file mode 100644 index 4bda8de..0000000 --- a/lustre/osc/osc_request.c +++ /dev/null @@ -1,1913 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001-2003 Cluster File Systems, Inc. - * Author Peter Braam <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * For testing and management it is treated as an obd_device, - * although * it does not export a full OBD method table (the - * requests are coming * in over the wire, so object target modules - * do not have a full * method table.) - * - */ - -#define EXPORT_SYMTAB -#define DEBUG_SUBSYSTEM S_OSC - -#ifdef __KERNEL__ -#include <linux/version.h> -#include <linux/module.h> -#include <linux/mm.h> -#include <linux/highmem.h> -#include <linux/lustre_dlm.h> -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include <linux/workqueue.h> -#include <linux/smp_lock.h> -#else -#include <linux/locks.h> -#endif -#else -#include <liblustre.h> -#endif - -#include <linux/kp30.h> -#include <linux/lustre_mds.h> /* for mds_objid */ -#include <linux/lustre_otree.h> -#include <linux/obd_ost.h> -#include <linux/obd_lov.h> - -#ifndef __CYGWIN__ -#include <linux/ctype.h> -#include <linux/init.h> -#else -#include <ctype.h> -#endif - -#include <linux/lustre_ha.h> -#include <linux/obd_support.h> /* for OBD_FAIL_CHECK */ -#include <linux/lustre_lite.h> /* for ll_i2info */ -#include <portals/lib-types.h> /* for PTL_MD_MAX_IOV */ -#include <linux/lprocfs_status.h> - -static int osc_attach(struct obd_device *dev, obd_count len, void *data) -{ - struct lprocfs_static_vars lvars; - - lprocfs_init_vars(&lvars); - return lprocfs_obd_attach(dev, lvars.obd_vars); -} - -static int osc_detach(struct obd_device *dev) -{ - return lprocfs_obd_detach(dev); -} - -/* Pack OSC object metadata for disk storage (LE byte order). */ -static int osc_packmd(struct lustre_handle *conn, struct lov_mds_md **lmmp, - struct lov_stripe_md *lsm) -{ - int lmm_size; - ENTRY; - - lmm_size = sizeof(**lmmp); - if (!lmmp) - RETURN(lmm_size); - - if (*lmmp && !lsm) { - OBD_FREE(*lmmp, lmm_size); - *lmmp = NULL; - RETURN(0); - } - - if (!*lmmp) { - OBD_ALLOC(*lmmp, lmm_size); - if (!*lmmp) - RETURN(-ENOMEM); - } - - if (lsm) { - LASSERT(lsm->lsm_object_id); - (*lmmp)->lmm_object_id = cpu_to_le64 (lsm->lsm_object_id); - } - - RETURN(lmm_size); -} - -/* Unpack OSC object metadata from disk storage (LE byte order). */ -static int osc_unpackmd(struct lustre_handle *conn, struct lov_stripe_md **lsmp, - struct lov_mds_md *lmm, int lmm_bytes) -{ - int lsm_size; - ENTRY; - - if (lmm != NULL) { - if (lmm_bytes < sizeof (*lmm)) { - CERROR("lov_mds_md too small: %d, need %d\n", - lmm_bytes, (int)sizeof(*lmm)); - RETURN (-EINVAL); - } - /* XXX LOV_MAGIC etc check? */ - - if (lmm->lmm_object_id == cpu_to_le64 (0)) { - CERROR ("lov_mds_md: zero lmm_object_id\n"); - RETURN (-EINVAL); - } - } - - lsm_size = lov_stripe_md_size(1); - if (!lsmp) - RETURN(lsm_size); - - if (*lsmp && !lmm) { - OBD_FREE(*lsmp, lsm_size); - *lsmp = NULL; - RETURN(0); - } - - if (!*lsmp) { - OBD_ALLOC(*lsmp, lsm_size); - if (!*lsmp) - RETURN(-ENOMEM); - - (*lsmp)->lsm_oinfo[0].loi_dirty_ot = - &(*lsmp)->lsm_oinfo[0].loi_dirty_ot_inline; - ot_init((*lsmp)->lsm_oinfo[0].loi_dirty_ot); - } - - if (lmm) { - /* XXX zero *lsmp? */ - (*lsmp)->lsm_object_id = le64_to_cpu (lmm->lmm_object_id); - LASSERT((*lsmp)->lsm_object_id); - } - - (*lsmp)->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES; - - RETURN(lsm_size); -} - -#warning "FIXME: make this be sent from OST" -#define OSC_BRW_MAX_SIZE 65536 -#define OSC_BRW_MAX_IOV min_t(int, PTL_MD_MAX_IOV, OSC_BRW_MAX_SIZE/PAGE_SIZE) - -static int osc_getattr_interpret(struct ptlrpc_request *req, - struct osc_getattr_async_args *aa, int rc) -{ - struct obdo *oa = aa->aa_oa; - struct ost_body *body; - ENTRY; - - if (rc != 0) { - CERROR("failed: rc = %d\n", rc); - RETURN (rc); - } - - body = lustre_swab_repbuf(req, 0, sizeof (*body), lustre_swab_ost_body); - if (body == NULL) { - CERROR ("can't unpack ost_body\n"); - RETURN (-EPROTO); - } - - CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode); - memcpy(oa, &body->oa, sizeof(*oa)); - - /* This should really be sent by the OST */ - oa->o_blksize = OSC_BRW_MAX_SIZE; - oa->o_valid |= OBD_MD_FLBLKSZ; - - RETURN (0); -} - -static int osc_getattr_async(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *md, - struct ptlrpc_request_set *set) -{ - struct ptlrpc_request *request; - struct ost_body *body; - int size = sizeof(*body); - struct osc_getattr_async_args *aa; - ENTRY; - - request = ptlrpc_prep_req(class_conn2cliimp(conn), OST_GETATTR, 1, - &size, NULL); - if (!request) - RETURN(-ENOMEM); - - body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body)); - memcpy(&body->oa, oa, sizeof(*oa)); - - request->rq_replen = lustre_msg_size(1, &size); - request->rq_interpret_reply = osc_getattr_interpret; - - LASSERT (sizeof (*aa) <= sizeof (request->rq_async_args)); - aa = (struct osc_getattr_async_args *)&request->rq_async_args; - aa->aa_oa = oa; - - ptlrpc_set_add_req (set, request); - RETURN (0); -} - -static int osc_getattr(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *md) -{ - struct ptlrpc_request *request; - struct ost_body *body; - int rc, size = sizeof(*body); - ENTRY; - - request = ptlrpc_prep_req(class_conn2cliimp(conn), OST_GETATTR, 1, - &size, NULL); - if (!request) - RETURN(-ENOMEM); - - body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body)); - memcpy(&body->oa, oa, sizeof(*oa)); - - request->rq_replen = lustre_msg_size(1, &size); - - rc = ptlrpc_queue_wait(request); - if (rc) { - CERROR("%s failed: rc = %d\n", __FUNCTION__, rc); - GOTO(out, rc); - } - - body = lustre_swab_repbuf(request, 0, sizeof (*body), - lustre_swab_ost_body); - if (body == NULL) { - CERROR ("can't unpack ost_body\n"); - GOTO (out, rc = -EPROTO); - } - - CDEBUG(D_INODE, "mode: %o\n", body->oa.o_mode); - memcpy(oa, &body->oa, sizeof(*oa)); - - /* This should really be sent by the OST */ - oa->o_blksize = OSC_BRW_MAX_SIZE; - oa->o_valid |= OBD_MD_FLBLKSZ; - - EXIT; - out: - ptlrpc_req_finished(request); - return rc; -} - -/* The import lock must already be held. */ -static inline void osc_update_body_handle(struct list_head *head, - struct lustre_handle *old, - struct lustre_handle *new, int op) -{ - struct list_head *tmp; - struct ost_body *body; - struct ptlrpc_request *req; - struct ptlrpc_request *last_req = NULL; /* temporary fire escape */ - - list_for_each(tmp, head) { - req = list_entry(tmp, struct ptlrpc_request, rq_list); - - /* XXX ok to remove when bug 1303 resolved - rread 05/27/03 */ - LASSERT (req != last_req); - last_req = req; - - if (req->rq_reqmsg->opc != op) - continue; - body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*body)); - if (memcmp(obdo_handle(&body->oa), old, sizeof(*old))) - continue; - - DEBUG_REQ(D_HA, req, "updating close body with new fh"); - memcpy(obdo_handle(&body->oa), new, sizeof(*new)); - } -} - -static void osc_replay_open(struct ptlrpc_request *req) -{ - struct lustre_handle old; - struct ost_body *body; - struct obd_client_handle *och = req->rq_replay_data; - struct lustre_handle *oa_handle; - ENTRY; - - body = lustre_swab_repbuf (req, 0, sizeof (*body), - lustre_swab_ost_body); - LASSERT (body != NULL); - - oa_handle = obdo_handle(&body->oa); - - memcpy(&old, &och->och_fh, sizeof(old)); - CDEBUG(D_HA, "updating cookie from "LPD64" to "LPD64"\n", - och->och_fh.cookie, oa_handle->cookie); - memcpy(&och->och_fh, oa_handle, sizeof(och->och_fh)); - - /* A few frames up, ptlrpc_replay holds the lock, so this is safe. */ - osc_update_body_handle(&req->rq_import->imp_sending_list, &old, - &och->och_fh, OST_CLOSE); - osc_update_body_handle(&req->rq_import->imp_delayed_list, &old, - &och->och_fh, OST_CLOSE); - EXIT; -} - - -static int osc_open(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *md, struct obd_trans_info *oti, - struct obd_client_handle *och) -{ - struct ptlrpc_request *request; - struct ost_body *body; - unsigned long flags; - int rc, size = sizeof(*body); - ENTRY; - LASSERT(och != NULL); - - request = ptlrpc_prep_req(class_conn2cliimp(conn), OST_OPEN, 1, &size, - NULL); - if (!request) - RETURN(-ENOMEM); - - spin_lock_irqsave (&request->rq_lock, flags); - request->rq_replay = 1; - spin_unlock_irqrestore (&request->rq_lock, flags); - - body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body)); - memcpy(&body->oa, oa, sizeof(*oa)); - - request->rq_replen = lustre_msg_size(1, &size); - - rc = ptlrpc_queue_wait(request); - if (rc) - GOTO(out, rc); - - body = lustre_swab_repbuf (request, 0, sizeof (*body), - lustre_swab_ost_body); - if (body == NULL) { - CERROR ("Can't unpack ost_body\n"); - GOTO (out, rc = -EPROTO); - } - - memcpy(oa, &body->oa, sizeof(*oa)); - - /* If the open succeeded, we better have a handle */ - /* BlueArc OSTs don't send back (o_valid | FLHANDLE). sigh. - * Temporary workaround until fixed. -phil 24 Feb 03 */ - // if ((oa->o_valid & OBD_MD_FLHANDLE) == 0) { - // CERROR ("No file handle\n"); - // GOTO (out, rc = -EPROTO); - // } - oa->o_valid |= OBD_MD_FLHANDLE; - - /* This should really be sent by the OST */ - oa->o_blksize = OSC_BRW_MAX_SIZE; - oa->o_valid |= OBD_MD_FLBLKSZ; - - memcpy(&och->och_fh, obdo_handle(oa), sizeof(och->och_fh)); - request->rq_replay_cb = osc_replay_open; - request->rq_replay_data = och; - och->och_req = ptlrpc_request_addref(request); - och->och_magic = OBD_CLIENT_HANDLE_MAGIC; - - EXIT; - out: - ptlrpc_req_finished(request); - return rc; -} - -static int osc_close(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *md, struct obd_trans_info *oti) -{ - struct obd_import *import = class_conn2cliimp(conn); - struct ptlrpc_request *request; - struct ost_body *body; - struct obd_client_handle *och; - unsigned long flags; - int rc, size = sizeof(*body); - ENTRY; - - LASSERT(oa != NULL); - och = (struct obd_client_handle *)&oa->o_inline; - if (och->och_magic == 0) { - /* Zero magic means that this file was never opened on this - * OST--almost certainly because the OST was inactive at - * open-time */ - RETURN(0); - } - LASSERT(och->och_magic == OBD_CLIENT_HANDLE_MAGIC); - - request = ptlrpc_prep_req(import, OST_CLOSE, 1, &size, NULL); - if (!request) - RETURN(-ENOMEM); - - body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body)); - memcpy(&body->oa, oa, sizeof(*oa)); - - request->rq_replen = lustre_msg_size(1, &size); - - rc = ptlrpc_queue_wait(request); - if (rc) - CDEBUG(D_HA, "Suppressing close error %d\n", rc); // bug 1036 - - /* och_req == NULL can't happen any more, right? --phik */ - if (och->och_req != NULL) { - spin_lock_irqsave(&import->imp_lock, flags); - spin_lock (&och->och_req->rq_lock); - och->och_req->rq_replay = 0; - spin_unlock (&och->och_req->rq_lock); - /* see comments in llite/file.c:ll_mdc_close() */ - if (och->och_req->rq_transno) { - /* this can't happen yet, because the OSTs don't yet - * issue transnos for OPEN requests -phik 21 Apr 2003 */ - LBUG(); - if (!request->rq_transno && import->imp_replayable) { - request->rq_transno = och->och_req->rq_transno; - ptlrpc_retain_replayable_request(request, - import); - } - spin_unlock_irqrestore(&import->imp_lock, flags); - } else { - spin_unlock_irqrestore(&import->imp_lock, flags); - } - - ptlrpc_req_finished(och->och_req); - } - - if (!rc) { - body = lustre_swab_repbuf (request, 0, sizeof (*body), - lustre_swab_ost_body); - if (body == NULL) { - rc = -EPROTO; - CDEBUG(D_HA, "Suppressing close error %d\n", rc); // bug 1036 - } else - memcpy(oa, &body->oa, sizeof(*oa)); - } - - ptlrpc_req_finished(request); - RETURN(0); -} - -static int osc_setattr(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *md, struct obd_trans_info *oti) -{ - struct ptlrpc_request *request; - struct ost_body *body; - int rc, size = sizeof(*body); - ENTRY; - - request = ptlrpc_prep_req(class_conn2cliimp(conn), OST_SETATTR, 1, - &size, NULL); - if (!request) - RETURN(-ENOMEM); - - body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body)); - memcpy(&body->oa, oa, sizeof(*oa)); - - request->rq_replen = lustre_msg_size(1, &size); - - rc = ptlrpc_queue_wait(request); - - ptlrpc_req_finished(request); - return rc; -} - -static int osc_create(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md **ea, struct obd_trans_info *oti) -{ - struct ptlrpc_request *request; - struct ost_body *body; - struct lov_stripe_md *lsm; - int rc, size = sizeof(*body); - ENTRY; - - LASSERT(oa); - LASSERT(ea); - - lsm = *ea; - if (!lsm) { - rc = obd_alloc_memmd(conn, &lsm); - if (rc < 0) - RETURN(rc); - } - - request = ptlrpc_prep_req(class_conn2cliimp(conn), OST_CREATE, 1, &size, - NULL); - if (!request) - GOTO(out, rc = -ENOMEM); - - body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body)); - memcpy(&body->oa, oa, sizeof(*oa)); - - request->rq_replen = lustre_msg_size(1, &size); - - rc = ptlrpc_queue_wait(request); - if (rc) - GOTO(out_req, rc); - - body = lustre_swab_repbuf (request, 0, sizeof (*body), - lustre_swab_ost_body); - if (body == NULL) { - CERROR ("can't unpack ost_body\n"); - GOTO (out_req, rc = -EPROTO); - } - - memcpy(oa, &body->oa, sizeof(*oa)); - - /* This should really be sent by the OST */ - oa->o_blksize = OSC_BRW_MAX_SIZE; - oa->o_valid |= OBD_MD_FLBLKSZ; - - /* XXX LOV STACKING: the lsm that is passed to us from LOV does not - * have valid lsm_oinfo data structs, so don't go touching that. - * This needs to be fixed in a big way. - */ - lsm->lsm_object_id = oa->o_id; - lsm->lsm_stripe_count = 0; - lsm->lsm_maxbytes = LUSTRE_STRIPE_MAXBYTES; - *ea = lsm; - - if (oti != NULL) - oti->oti_transno = request->rq_repmsg->transno; - - CDEBUG(D_HA, "transno: "LPD64"\n", request->rq_repmsg->transno); - EXIT; -out_req: - ptlrpc_req_finished(request); -out: - if (rc && !*ea) - obd_free_memmd(conn, &lsm); - return rc; -} - -static int osc_punch(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *md, obd_size start, - obd_size end, struct obd_trans_info *oti) -{ - struct ptlrpc_request *request; - struct ost_body *body; - int rc, size = sizeof(*body); - ENTRY; - - if (!oa) { - CERROR("oa NULL\n"); - RETURN(-EINVAL); - } - - request = ptlrpc_prep_req(class_conn2cliimp(conn), OST_PUNCH, 1, &size, - NULL); - if (!request) - RETURN(-ENOMEM); - - body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body)); - memcpy(&body->oa, oa, sizeof(*oa)); - - /* overload the size and blocks fields in the oa with start/end */ - body->oa.o_size = start; - body->oa.o_blocks = end; - body->oa.o_valid |= (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS); - - request->rq_replen = lustre_msg_size(1, &size); - - rc = ptlrpc_queue_wait(request); - if (rc) - GOTO(out, rc); - - body = lustre_swab_repbuf (request, 0, sizeof (*body), - lustre_swab_ost_body); - if (body == NULL) { - CERROR ("can't unpack ost_body\n"); - GOTO (out, rc = -EPROTO); - } - - memcpy(oa, &body->oa, sizeof(*oa)); - - EXIT; - out: - ptlrpc_req_finished(request); - return rc; -} - -static int osc_destroy(struct lustre_handle *conn, struct obdo *oa, - struct lov_stripe_md *ea, struct obd_trans_info *oti) -{ - struct ptlrpc_request *request; - struct ost_body *body; - int rc, size = sizeof(*body); - ENTRY; - - if (!oa) { - CERROR("oa NULL\n"); - RETURN(-EINVAL); - } - request = ptlrpc_prep_req(class_conn2cliimp(conn), OST_DESTROY, 1, - &size, NULL); - if (!request) - RETURN(-ENOMEM); - - body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body)); - memcpy(&body->oa, oa, sizeof(*oa)); - - request->rq_replen = lustre_msg_size(1, &size); - - rc = ptlrpc_queue_wait(request); - if (rc) - GOTO(out, rc); - - body = lustre_swab_repbuf (request, 0, sizeof (*body), - lustre_swab_ost_body); - if (body == NULL) { - CERROR ("Can't unpack body\n"); - GOTO (out, rc = -EPROTO); - } - - memcpy(oa, &body->oa, sizeof(*oa)); - - EXIT; - out: - ptlrpc_req_finished(request); - return rc; -} - -static void osc_announce_cached(struct client_obd *cli, struct ost_body *body) -{ - obd_flag bits = OBD_MD_FLBLOCKS|OBD_MD_FLRDEV; - - LASSERT(!(body->oa.o_valid & bits)); - - body->oa.o_valid |= bits; - down(&cli->cl_dirty_sem); - body->oa.o_blocks = cli->cl_dirty; - body->oa.o_rdev = cli->cl_dirty_granted; - up(&cli->cl_dirty_sem); - CDEBUG(D_INODE, "announcing "LPU64" dirty "LPU64" granted\n", - cli->cl_dirty, cli->cl_dirty_granted); -} - -static void osc_update_grant(struct client_obd *cli, struct ost_body *body) -{ - if(!(body->oa.o_valid & OBD_MD_FLRDEV)) { - if (cli->cl_ost_can_grant) { - CDEBUG(D_INODE, "%s can't grant\n", - cli->cl_import->imp_target_uuid.uuid); - } - cli->cl_ost_can_grant = 0; - return; - } - - CDEBUG(D_INODE, "got "LPU64" grant\n", body->oa.o_rdev); - down(&cli->cl_dirty_sem); - cli->cl_dirty_granted = body->oa.o_rdev; - /* XXX check for over-run and wake up the io thread that - * doesn't exist yet */ - up(&cli->cl_dirty_sem); -} - -/* We assume that the reason this OSC got a short read is because it read - * beyond the end of a stripe file; i.e. lustre is reading a sparse file - * via the LOV, and it _knows_ it's reading inside the file, it's just that - * this stripe never got written at or beyond this stripe offset yet. */ -static void handle_short_read(int nob_read, obd_count page_count, - struct brw_page *pga) -{ - char *ptr; - - /* skip bytes read OK */ - while (nob_read > 0) { - LASSERT (page_count > 0); - - if (pga->count > nob_read) { - /* EOF inside this page */ - ptr = kmap(pga->pg) + (pga->off & ~PAGE_MASK); - memset(ptr + nob_read, 0, pga->count - nob_read); - kunmap(pga->pg); - page_count--; - pga++; - break; - } - - nob_read -= pga->count; - page_count--; - pga++; - } - - /* zero remaining pages */ - while (page_count-- > 0) { - ptr = kmap(pga->pg) + (pga->off & ~PAGE_MASK); - memset(ptr, 0, pga->count); - kunmap(pga->pg); - pga++; - } -} - -static int check_write_rcs (struct ptlrpc_request *request, - int niocount, obd_count page_count, - struct brw_page *pga) -{ - int i; - __u32 *remote_rcs; - - /* return error if any niobuf was in error */ - remote_rcs = lustre_swab_repbuf(request, 1, - sizeof(*remote_rcs) * niocount, NULL); - if (remote_rcs == NULL) { - CERROR ("Missing/short RC vector on BRW_WRITE reply\n"); - return (-EPROTO); - } - if (lustre_msg_swabbed (request->rq_repmsg)) - for (i = 0; i < niocount; i++) - __swab32s (&remote_rcs[i]); - - for (i = 0; i < niocount; i++) { - if (remote_rcs[i] < 0) - return (remote_rcs[i]); - - if (remote_rcs[i] != 0) { - CERROR ("rc[%d] invalid (%d) req %p\n", - i, remote_rcs[i], request); - return (-EPROTO); - } - } - - return (0); -} - -static inline int can_merge_pages (struct brw_page *p1, struct brw_page *p2) -{ - if (p1->flag != p2->flag) { - /* XXX we don't make much use of 'flag' right now - * but this will warn about usage when we do */ - CERROR ("different flags set %d, %d\n", - p1->flag, p2->flag); - return (0); - } - - return (p1->off + p1->count == p2->off); -} - -#if CHECKSUM_BULK -static obd_count cksum_pages(int nob, obd_count page_count, - struct brw_page *pga) -{ - obd_count cksum = 0; - char *ptr; - int i; - - while (nob > 0) { - LASSERT (page_count > 0); - - ptr = kmap (pga->pg); - ost_checksum (&cksum, ptr + (pga->off & (PAGE_SIZE - 1)), - pga->count > nob ? nob : pga->count); - kunmap (pga->pg); - - nob -= pga->count; - page_count--; - pga++; - } - - return (cksum); -} -#endif - -static int osc_brw_prep_request(struct obd_import *imp, - struct lov_stripe_md *lsm, obd_count page_count, - struct brw_page *pga, int cmd, - int *requested_nobp, int *niocountp, - struct ptlrpc_request **reqp) -{ - struct ptlrpc_request *req; - struct ptlrpc_bulk_desc *desc; - struct client_obd *cli = &imp->imp_obd->u.cli; - struct ost_body *body; - struct obd_ioobj *ioobj; - struct niobuf_remote *niobuf; - unsigned long flags; - int niocount; - int size[3]; - int i; - int requested_nob; - int opc; - int rc; - - opc = ((cmd & OBD_BRW_WRITE) != 0) ? OST_WRITE : OST_READ; - - for (niocount = i = 1; i < page_count; i++) - if (!can_merge_pages (&pga[i - 1], &pga[i])) - niocount++; - - size[0] = sizeof (*body); - size[1] = sizeof (*ioobj); - size[2] = niocount * sizeof (*niobuf); - - req = ptlrpc_prep_req (imp, opc, 3, size, NULL); - if (req == NULL) - return (-ENOMEM); - - if (opc == OST_WRITE) - desc = ptlrpc_prep_bulk_imp(req, BULK_GET_SOURCE, - OST_BULK_PORTAL); - else - desc = ptlrpc_prep_bulk_imp(req, BULK_PUT_SINK, - OST_BULK_PORTAL); - if (desc == NULL) - GOTO (out, rc = -ENOMEM); - /* NB request now owns desc and will free it when it gets freed */ - - body = lustre_msg_buf(req->rq_reqmsg, 0, sizeof(*body)); - ioobj = lustre_msg_buf(req->rq_reqmsg, 1, sizeof(*ioobj)); - niobuf = lustre_msg_buf(req->rq_reqmsg, 2, niocount * sizeof(*niobuf)); - - ioobj->ioo_id = lsm->lsm_object_id; - ioobj->ioo_gr = 0; - ioobj->ioo_type = S_IFREG; - ioobj->ioo_bufcnt = niocount; - - LASSERT (page_count > 0); - for (requested_nob = i = 0; i < page_count; i++, niobuf++) { - struct brw_page *pg = &pga[i]; - struct brw_page *pg_prev = pg - 1; - - LASSERT (pg->count > 0); - LASSERT ((pg->off & (PAGE_SIZE - 1)) + pg->count <= PAGE_SIZE); - LASSERT (i == 0 || pg->off > pg_prev->off); - - rc = ptlrpc_prep_bulk_page (desc, pg->pg, - pg->off & (PAGE_SIZE - 1), - pg->count); - if (rc != 0) - GOTO (out, rc); - - requested_nob += pg->count; - - if (i > 0 && can_merge_pages (pg_prev, pg)) { - niobuf--; - niobuf->len += pg->count; - } else { - niobuf->offset = pg->off; - niobuf->len = pg->count; - niobuf->flags = pg->flag; - } - } - - LASSERT ((void *)(niobuf - niocount) == - lustre_msg_buf(req->rq_reqmsg, 2, niocount * sizeof(*niobuf))); -#if CHECKSUM_BULK - body->oa.o_valid |= OBD_MD_FLCKSUM; - if (opc == OST_BRW_WRITE) - body->oa.o_nlink = cksum_pages (requested_nob, page_count, pga); -#endif - osc_announce_cached(cli, body); - spin_lock_irqsave (&req->rq_lock, flags); - req->rq_no_resend = 1; - spin_unlock_irqrestore (&req->rq_lock, flags); - - /* size[0] still sizeof (*body) */ - if (opc == OST_WRITE) { - /* 1 RC per niobuf */ - size[1] = sizeof(__u32) * niocount; - req->rq_replen = lustre_msg_size(2, size); - } else { - /* 1 RC for the whole I/O */ - req->rq_replen = lustre_msg_size(1, size); - } - - *niocountp = niocount; - *requested_nobp = requested_nob; - *reqp = req; - return (0); - - out: - ptlrpc_req_finished (req); - return (rc); -} - -static int osc_brw_fini_request (struct ptlrpc_request *req, - int requested_nob, int niocount, - obd_count page_count, struct brw_page *pga, - int rc) -{ - struct client_obd *cli = &req->rq_import->imp_obd->u.cli; - struct ost_body *body; - if (rc < 0) - return (rc); - - body = lustre_swab_repbuf(req, 0, sizeof (*body), lustre_swab_ost_body); - if (body == NULL) { - CERROR ("Can't unpack body\n"); - RETURN(-EPROTO); - } - osc_update_grant(cli, body); - - if (req->rq_reqmsg->opc == OST_WRITE) { - if (rc > 0) { - CERROR ("Unexpected +ve rc %d\n", rc); - return (-EPROTO); - } - - return (check_write_rcs(req, niocount, page_count, pga)); - } - - if (rc > requested_nob) { - CERROR ("Unexpected rc %d (%d requested)\n", - rc, requested_nob); - return (-EPROTO); - } - - if (rc < requested_nob) - handle_short_read(rc, page_count, pga); - -#if CHECKSUM_BULK - if (body->oa.o_valid & OBD_MD_FLCKSUM) { - static int cksum_counter; - obd_count server_cksum = body->oa.o_nlink; - obd_count cksum = cksum_pages(rc, page_count, pga); - - cksum_counter++; - if (server_cksum != cksum) { - CERROR("Bad checksum: server "LPX64", client "LPX64 - ", server NID "LPX64"\n", server_cksum, cksum, - imp->imp_connection->c_peer.peer_nid); - cksum_counter = 0; - } else if ((cksum_counter & (-cksum_counter)) == cksum_counter) - CERROR("Checksum %u from "LPX64" OK: %x\n", - cksum_counter, - imp->imp_connection->c_peer.peer_nid, cksum); - } else { - static int cksum_missed; - cksum_missed++; - if ((cksum_missed & (-cksum_missed)) == cksum_missed) - CERROR("Request checksum %u from "LPX64", no reply\n", - cksum_missed, - imp->imp_connection->c_peer.peer_nid); - } -#endif - return (0); -} - -static int osc_brw_internal(struct lustre_handle *conn, - struct lov_stripe_md *lsm, - obd_count page_count, struct brw_page *pga, int cmd) -{ - int requested_nob; - int niocount; - struct ptlrpc_request *request; - int rc; - ENTRY; - -restart_bulk: - rc = osc_brw_prep_request(class_conn2cliimp(conn), lsm, page_count, pga, - cmd, &requested_nob, &niocount, &request); - /* NB ^ sets rq_no_resend */ - - if (rc != 0) - return (rc); - - rc = ptlrpc_queue_wait(request); - - if (rc == -ETIMEDOUT && request->rq_resend) { - DEBUG_REQ(D_HA, request, "BULK TIMEOUT"); - ptlrpc_req_finished(request); - goto restart_bulk; - } - - rc = osc_brw_fini_request (request, requested_nob, niocount, - page_count, pga, rc); - - ptlrpc_req_finished(request); - RETURN (rc); -} - -static int brw_interpret(struct ptlrpc_request *request, - struct osc_brw_async_args *aa, int rc) -{ - int requested_nob = aa->aa_requested_nob; - int niocount = aa->aa_nio_count; - obd_count page_count = aa->aa_page_count; - struct brw_page *pga = aa->aa_pga; - ENTRY; - - /* XXX bug 937 here */ - if (rc == -ETIMEDOUT && request->rq_resend) { - DEBUG_REQ(D_HA, request, "BULK TIMEOUT"); - LBUG(); /* re-send. later. */ - //goto restart_bulk; - } - - rc = osc_brw_fini_request (request, requested_nob, niocount, - page_count, pga, rc); - RETURN (rc); -} - -static int async_internal(struct lustre_handle *conn, struct lov_stripe_md *lsm, - obd_count page_count, struct brw_page *pga, - struct ptlrpc_request_set *set, int cmd) -{ - struct ptlrpc_request *request; - int requested_nob; - int nio_count; - struct osc_brw_async_args *aa; - int rc; - ENTRY; - - rc = osc_brw_prep_request (class_conn2cliimp(conn), - lsm, page_count, pga, cmd, - &requested_nob, &nio_count, &request); - /* NB ^ sets rq_no_resend */ - - if (rc == 0) { - LASSERT (sizeof (*aa) <= sizeof (request->rq_async_args)); - aa = (struct osc_brw_async_args *)&request->rq_async_args; - aa->aa_requested_nob = requested_nob; - aa->aa_nio_count = nio_count; - aa->aa_page_count = page_count; - aa->aa_pga = pga; - - request->rq_interpret_reply = brw_interpret; - ptlrpc_set_add_req(set, request); - } - RETURN (rc); -} - -#ifndef min_t -#define min_t(type,x,y) \ - ({ type __x = (x); type __y = (y); __x < __y ? __x: __y; }) -#endif - -/* - * ugh, we want disk allocation on the target to happen in offset order. we'll - * follow sedgewicks advice and stick to the dead simple shellsort -- it'll do - * fine for our small page arrays and doesn't require allocation. its an - * insertion sort that swaps elements that are strides apart, shrinking the - * stride down until its '1' and the array is sorted. - */ -static void sort_brw_pages(struct brw_page *array, int num) -{ - int stride, i, j; - struct brw_page tmp; - - if (num == 1) - return; - for (stride = 1; stride < num ; stride = (stride * 3) + 1) - ; - - do { - stride /= 3; - for (i = stride ; i < num ; i++) { - tmp = array[i]; - j = i; - while (j >= stride && array[j - stride].off > tmp.off) { - array[j] = array[j - stride]; - j -= stride; - } - array[j] = tmp; - } - } while (stride > 1); -} - -/* make sure we the regions we're passing to elan don't violate its '4 - * fragments' constraint. portal headers are a fragment, all full - * PAGE_SIZE long pages count as 1 fragment, and each partial page - * counts as a fragment. I think. see bug 934. */ -static obd_count check_elan_limit(struct brw_page *pg, obd_count pages) -{ - int frags_left = 3; - int saw_whole_frag = 0; - int i; - - for (i = 0 ; frags_left && i < pages ; pg++, i++) { - if (pg->count == PAGE_SIZE) { - if (!saw_whole_frag) { - saw_whole_frag = 1; - frags_left--; - } - } else { - frags_left--; - } - } - return i; -} - -static int osc_brw(int cmd, struct lustre_handle *conn, - struct lov_stripe_md *md, obd_count page_count, - struct brw_page *pga, struct obd_trans_info *oti) -{ - ENTRY; - - if (cmd == OBD_BRW_CHECK) { - /* The caller just wants to know if there's a chance that this - * I/O can succeed */ - struct obd_import *imp = class_conn2cliimp(conn); - - if (imp == NULL || imp->imp_invalid) - RETURN(-EIO); - RETURN(0); - } - - while (page_count) { - obd_count pages_per_brw; - int rc; - - if (page_count > OSC_BRW_MAX_IOV) - pages_per_brw = OSC_BRW_MAX_IOV; - else - pages_per_brw = page_count; - - sort_brw_pages(pga, pages_per_brw); - pages_per_brw = check_elan_limit(pga, pages_per_brw); - - rc = osc_brw_internal(conn, md, pages_per_brw, pga, cmd); - - if (rc != 0) - RETURN(rc); - - page_count -= pages_per_brw; - pga += pages_per_brw; - } - RETURN(0); -} - -static int osc_brw_async(int cmd, struct lustre_handle *conn, - struct lov_stripe_md *md, obd_count page_count, - struct brw_page *pga, struct ptlrpc_request_set *set, - struct obd_trans_info *oti) -{ - ENTRY; - - if (cmd == OBD_BRW_CHECK) { - /* The caller just wants to know if there's a chance that this - * I/O can succeed */ - struct obd_import *imp = class_conn2cliimp(conn); - - if (imp == NULL || imp->imp_invalid) - RETURN(-EIO); - RETURN(0); - } - - while (page_count) { - obd_count pages_per_brw; - int rc; - - if (page_count > OSC_BRW_MAX_IOV) - pages_per_brw = OSC_BRW_MAX_IOV; - else - pages_per_brw = page_count; - - sort_brw_pages(pga, pages_per_brw); - pages_per_brw = check_elan_limit(pga, pages_per_brw); - - rc = async_internal(conn, md, pages_per_brw, pga, set, cmd); - - if (rc != 0) - RETURN(rc); - - page_count -= pages_per_brw; - pga += pages_per_brw; - } - RETURN(0); -} - -#ifdef __KERNEL__ -/* Note: caller will lock/unlock, and set uptodate on the pages */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -static int sanosc_brw_read(struct lustre_handle *conn, - struct lov_stripe_md *lsm, - obd_count page_count, - struct brw_page *pga) -{ - struct ptlrpc_request *request = NULL; - struct ost_body *body; - struct niobuf_remote *nioptr; - struct obd_ioobj *iooptr; - int rc, size[3] = {sizeof(*body)}, mapped = 0; - int swab; - ENTRY; - - /* XXX does not handle 'new' brw protocol */ - - size[1] = sizeof(struct obd_ioobj); - size[2] = page_count * sizeof(*nioptr); - - request = ptlrpc_prep_req(class_conn2cliimp(conn), OST_SAN_READ, 3, - size, NULL); - if (!request) - RETURN(-ENOMEM); - - body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body)); - iooptr = lustre_msg_buf(request->rq_reqmsg, 1, sizeof (*iooptr)); - nioptr = lustre_msg_buf(request->rq_reqmsg, 2, - sizeof (*nioptr) * page_count); - - iooptr->ioo_id = lsm->lsm_object_id; - iooptr->ioo_gr = 0; - iooptr->ioo_type = S_IFREG; - iooptr->ioo_bufcnt = page_count; - - for (mapped = 0; mapped < page_count; mapped++, nioptr++) { - LASSERT(PageLocked(pga[mapped].pg)); - LASSERT(mapped == 0 || pga[mapped].off > pga[mapped - 1].off); - - nioptr->offset = pga[mapped].off; - nioptr->len = pga[mapped].count; - nioptr->flags = pga[mapped].flag; - } - - size[1] = page_count * sizeof(*nioptr); - request->rq_replen = lustre_msg_size(2, size); - - rc = ptlrpc_queue_wait(request); - if (rc) - GOTO(out_req, rc); - - swab = lustre_msg_swabbed (request->rq_repmsg); - LASSERT_REPSWAB (request, 1); - nioptr = lustre_msg_buf(request->rq_repmsg, 1, size[1]); - if (!nioptr) { - /* nioptr missing or short */ - GOTO(out_req, rc = -EPROTO); - } - - /* actual read */ - for (mapped = 0; mapped < page_count; mapped++, nioptr++) { - struct page *page = pga[mapped].pg; - struct buffer_head *bh; - kdev_t dev; - - if (swab) - lustre_swab_niobuf_remote (nioptr); - - /* got san device associated */ - LASSERT(class_conn2obd(conn)); - dev = class_conn2obd(conn)->u.cli.cl_sandev; - - /* hole */ - if (!nioptr->offset) { - CDEBUG(D_PAGE, "hole at ino %lu; index %ld\n", - page->mapping->host->i_ino, - page->index); - memset(page_address(page), 0, PAGE_SIZE); - continue; - } - - if (!page->buffers) { - create_empty_buffers(page, dev, PAGE_SIZE); - bh = page->buffers; - - clear_bit(BH_New, &bh->b_state); - set_bit(BH_Mapped, &bh->b_state); - bh->b_blocknr = (unsigned long)nioptr->offset; - - clear_bit(BH_Uptodate, &bh->b_state); - - ll_rw_block(READ, 1, &bh); - } else { - bh = page->buffers; - - /* if buffer already existed, it must be the - * one we mapped before, check it */ - LASSERT(!test_bit(BH_New, &bh->b_state)); - LASSERT(test_bit(BH_Mapped, &bh->b_state)); - LASSERT(bh->b_blocknr == (unsigned long)nioptr->offset); - - /* wait it's io completion */ - if (test_bit(BH_Lock, &bh->b_state)) - wait_on_buffer(bh); - - if (!test_bit(BH_Uptodate, &bh->b_state)) - ll_rw_block(READ, 1, &bh); - } - - - /* must do syncronous write here */ - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - /* I/O error */ - rc = -EIO; - goto out_req; - } - } - -out_req: - ptlrpc_req_finished(request); - RETURN(rc); -} - -static int sanosc_brw_write(struct lustre_handle *conn, - struct lov_stripe_md *lsm, - obd_count page_count, - struct brw_page *pga) -{ - struct ptlrpc_request *request = NULL; - struct ost_body *body; - struct niobuf_remote *nioptr; - struct obd_ioobj *iooptr; - int rc, size[3] = {sizeof(*body)}, mapped = 0; - int swab; - ENTRY; - - size[1] = sizeof(struct obd_ioobj); - size[2] = page_count * sizeof(*nioptr); - - request = ptlrpc_prep_req(class_conn2cliimp(conn), OST_SAN_WRITE, - 3, size, NULL); - if (!request) - RETURN(-ENOMEM); - - body = lustre_msg_buf(request->rq_reqmsg, 0, sizeof (*body)); - iooptr = lustre_msg_buf(request->rq_reqmsg, 1, sizeof (*iooptr)); - nioptr = lustre_msg_buf(request->rq_reqmsg, 2, - sizeof (*nioptr) * page_count); - - iooptr->ioo_id = lsm->lsm_object_id; - iooptr->ioo_gr = 0; - iooptr->ioo_type = S_IFREG; - iooptr->ioo_bufcnt = page_count; - - /* pack request */ - for (mapped = 0; mapped < page_count; mapped++, nioptr++) { - LASSERT(PageLocked(pga[mapped].pg)); - LASSERT(mapped == 0 || pga[mapped].off > pga[mapped - 1].off); - - nioptr->offset = pga[mapped].off; - nioptr->len = pga[mapped].count; - nioptr->flags = pga[mapped].flag; - } - - size[1] = page_count * sizeof(*nioptr); - request->rq_replen = lustre_msg_size(2, size); - - rc = ptlrpc_queue_wait(request); - if (rc) - GOTO(out_req, rc); - - swab = lustre_msg_swabbed (request->rq_repmsg); - LASSERT_REPSWAB (request, 1); - nioptr = lustre_msg_buf(request->rq_repmsg, 1, size[1]); - if (!nioptr) { - CERROR("absent/short niobuf array\n"); - GOTO(out_req, rc = -EPROTO); - } - - /* actual write */ - for (mapped = 0; mapped < page_count; mapped++, nioptr++) { - struct page *page = pga[mapped].pg; - struct buffer_head *bh; - kdev_t dev; - - if (swab) - lustre_swab_niobuf_remote (nioptr); - - /* got san device associated */ - LASSERT(class_conn2obd(conn)); - dev = class_conn2obd(conn)->u.cli.cl_sandev; - - if (!page->buffers) { - create_empty_buffers(page, dev, PAGE_SIZE); - } else { - /* checking */ - LASSERT(!test_bit(BH_New, &page->buffers->b_state)); - LASSERT(test_bit(BH_Mapped, &page->buffers->b_state)); - LASSERT(page->buffers->b_blocknr == - (unsigned long)nioptr->offset); - } - bh = page->buffers; - - LASSERT(bh); - - /* if buffer locked, wait it's io completion */ - if (test_bit(BH_Lock, &bh->b_state)) - wait_on_buffer(bh); - - clear_bit(BH_New, &bh->b_state); - set_bit(BH_Mapped, &bh->b_state); - - /* override the block nr */ - bh->b_blocknr = (unsigned long)nioptr->offset; - - /* we are about to write it, so set it - * uptodate/dirty - * page lock should garentee no race condition here */ - set_bit(BH_Uptodate, &bh->b_state); - set_bit(BH_Dirty, &bh->b_state); - - ll_rw_block(WRITE, 1, &bh); - - /* must do syncronous write here */ - wait_on_buffer(bh); - if (!buffer_uptodate(bh) || test_bit(BH_Dirty, &bh->b_state)) { - /* I/O error */ - rc = -EIO; - goto out_req; - } - } - -out_req: - ptlrpc_req_finished(request); - RETURN(rc); -} - -static int sanosc_brw(int cmd, struct lustre_handle *conn, - struct lov_stripe_md *lsm, obd_count page_count, - struct brw_page *pga, struct obd_trans_info *oti) -{ - ENTRY; - - while (page_count) { - obd_count pages_per_brw; - int rc; - - if (page_count > OSC_BRW_MAX_IOV) - pages_per_brw = OSC_BRW_MAX_IOV; - else - pages_per_brw = page_count; - - if (cmd & OBD_BRW_WRITE) - rc = sanosc_brw_write(conn, lsm, pages_per_brw, pga); - else - rc = sanosc_brw_read(conn, lsm, pages_per_brw, pga); - - if (rc != 0) - RETURN(rc); - - page_count -= pages_per_brw; - pga += pages_per_brw; - } - RETURN(0); -} -#endif -#endif - -static int osc_mark_page_dirty(struct lustre_handle *conn, - struct lov_stripe_md *lsm, unsigned long offset) -{ - struct client_obd *cli = &class_conn2obd(conn)->u.cli; - struct otree *dirty_ot = lsm->lsm_oinfo[0].loi_dirty_ot; - int rc; - ENTRY; - - down(&cli->cl_dirty_sem); - - if (cli->cl_ost_can_grant && - (cli->cl_dirty + PAGE_CACHE_SIZE >= cli->cl_dirty_granted)) { - CDEBUG(D_INODE, "granted "LPU64" < "LPU64"\n", - cli->cl_dirty_granted, cli->cl_dirty + PAGE_CACHE_SIZE); - GOTO(out, rc = -EDQUOT); - } - - rc = ot_mark_offset(dirty_ot, offset); - if (rc) - GOTO(out, rc); - - cli->cl_dirty += PAGE_CACHE_SIZE; - CDEBUG(D_INODE, "dirtied off %lu, now "LPU64" bytes dirty\n", - offset, cli->cl_dirty); -out: - up(&cli->cl_dirty_sem); - RETURN(rc); -} - -static int osc_clear_dirty_pages(struct lustre_handle *conn, - struct lov_stripe_md *lsm, - unsigned long start, unsigned long end, - unsigned long *cleared) -{ - struct client_obd *cli = &class_conn2obd(conn)->u.cli; - struct otree *dirty_ot = lsm->lsm_oinfo[0].loi_dirty_ot; - unsigned long old_marked, new_marked; - int rc; - ENTRY; - - down(&cli->cl_dirty_sem); - - old_marked = ot_num_marked(dirty_ot); - - rc = ot_clear_extent(dirty_ot, start, end); - if (rc) - GOTO(out, rc); - - new_marked = ot_num_marked(dirty_ot); - - LASSERT(new_marked <= old_marked); - LASSERT(old_marked * PAGE_CACHE_SIZE <= cli->cl_dirty); - *cleared = old_marked - new_marked; - cli->cl_dirty -= (__u64)*cleared << PAGE_CACHE_SHIFT; - CDEBUG(D_INODE, "cleared [%lu,%lu], now "LPU64" bytes dirty\n", - start, end, cli->cl_dirty); - -out: - up(&cli->cl_dirty_sem); - RETURN(rc); -} - -static int osc_last_dirty_offset(struct lustre_handle *conn, - struct lov_stripe_md *lsm, - unsigned long *offset) -{ - struct otree *dirty_ot = lsm->lsm_oinfo[0].loi_dirty_ot; - int rc; - ENTRY; - - rc = ot_last_marked(dirty_ot, offset); - RETURN(rc); -} - -static int osc_enqueue(struct lustre_handle *connh, struct lov_stripe_md *lsm, - struct lustre_handle *parent_lock, - __u32 type, void *extentp, int extent_len, __u32 mode, - int *flags, void *callback, void *data, - struct lustre_handle *lockh) -{ - struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} }; - struct obd_device *obddev = class_conn2obd(connh); - struct ldlm_extent *extent = extentp; - int rc; - ENTRY; - - /* Filesystem lock extents are extended to page boundaries so that - * dealing with the page cache is a little smoother. */ - extent->start -= extent->start & ~PAGE_MASK; - extent->end |= ~PAGE_MASK; - - /* Next, search for already existing extent locks that will cover us */ - rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_MATCH_DATA, &res_id, - type, extent, sizeof(extent), mode, data, lockh); - if (rc == 1) - /* We already have a lock, and it's referenced */ - RETURN(ELDLM_OK); - - /* If we're trying to read, we also search for an existing PW lock. The - * VFS and page cache already protect us locally, so lots of readers/ - * writers can share a single PW lock. - * - * There are problems with conversion deadlocks, so instead of - * converting a read lock to a write lock, we'll just enqueue a new - * one. - * - * At some point we should cancel the read lock instead of making them - * send us a blocking callback, but there are problems with canceling - * locks out from other users right now, too. */ - - if (mode == LCK_PR) { - rc = ldlm_lock_match(obddev->obd_namespace, LDLM_FL_MATCH_DATA, - &res_id, type, extent, sizeof(extent), - LCK_PW, data, lockh); - if (rc == 1) { - /* FIXME: This is not incredibly elegant, but it might - * be more elegant than adding another parameter to - * lock_match. I want a second opinion. */ - ldlm_lock_addref(lockh, LCK_PR); - ldlm_lock_decref(lockh, LCK_PW); - - RETURN(ELDLM_OK); - } - } - - rc = ldlm_cli_enqueue(connh, NULL, obddev->obd_namespace, parent_lock, - res_id, type, extent, sizeof(extent), mode, flags, - ldlm_completion_ast, callback, data, lockh); - RETURN(rc); -} - -static int osc_match(struct lustre_handle *connh, struct lov_stripe_md *lsm, - __u32 type, void *extentp, int extent_len, __u32 mode, - int *flags, void *data, struct lustre_handle *lockh) -{ - struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} }; - struct obd_device *obddev = class_conn2obd(connh); - struct ldlm_extent *extent = extentp; - int rc; - ENTRY; - - /* Filesystem lock extents are extended to page boundaries so that - * dealing with the page cache is a little smoother */ - extent->start -= extent->start & ~PAGE_MASK; - extent->end |= ~PAGE_MASK; - - /* Next, search for already existing extent locks that will cover us */ - rc = ldlm_lock_match(obddev->obd_namespace, *flags, &res_id, type, - extent, sizeof(extent), mode, data, lockh); - if (rc) - RETURN(rc); - - /* If we're trying to read, we also search for an existing PW lock. The - * VFS and page cache already protect us locally, so lots of readers/ - * writers can share a single PW lock. */ - if (mode == LCK_PR) { - rc = ldlm_lock_match(obddev->obd_namespace, *flags, &res_id, - type, extent, sizeof(extent), LCK_PW, - data, lockh); - if (rc == 1) { - /* FIXME: This is not incredibly elegant, but it might - * be more elegant than adding another parameter to - * lock_match. I want a second opinion. */ - ldlm_lock_addref(lockh, LCK_PR); - ldlm_lock_decref(lockh, LCK_PW); - } - } - RETURN(rc); -} - -static int osc_cancel(struct lustre_handle *oconn, struct lov_stripe_md *md, - __u32 mode, struct lustre_handle *lockh) -{ - ENTRY; - - ldlm_lock_decref(lockh, mode); - - RETURN(0); -} - -static int osc_cancel_unused(struct lustre_handle *connh, - struct lov_stripe_md *lsm, int flags, void *opaque) -{ - struct obd_device *obddev = class_conn2obd(connh); - struct ldlm_res_id res_id = { .name = {lsm->lsm_object_id} }; - - return ldlm_cli_cancel_unused(obddev->obd_namespace, &res_id, flags, - opaque); -} - -static int osc_statfs(struct obd_export *exp, struct obd_statfs *osfs) -{ - struct obd_statfs *msfs; - struct ptlrpc_request *request; - int rc, size = sizeof(*osfs); - ENTRY; - - request = ptlrpc_prep_req(exp->exp_obd->u.cli.cl_import, OST_STATFS, 0, - NULL, NULL); - if (!request) - RETURN(-ENOMEM); - - request->rq_replen = lustre_msg_size(1, &size); - - rc = ptlrpc_queue_wait(request); - if (rc) { - CERROR("%s failed: rc = %d\n", __FUNCTION__, rc); - GOTO(out, rc); - } - - msfs = lustre_swab_repbuf (request, 0, sizeof (*msfs), - lustre_swab_obd_statfs); - if (msfs == NULL) { - CERROR ("Can't unpack obd_statfs\n"); - GOTO (out, rc = -EPROTO); - } - - memcpy (osfs, msfs, sizeof (*msfs)); - - EXIT; - out: - ptlrpc_req_finished(request); - return rc; -} - -/* Retrieve object striping information. - * - * @lmmu is a pointer to an in-core struct with lmm_ost_count indicating - * the maximum number of OST indices which will fit in the user buffer. - * lmm_magic must be LOV_MAGIC (we only use 1 slot here). - */ -static int osc_getstripe(struct lustre_handle *conn, struct lov_stripe_md *lsm, - struct lov_mds_md *lmmu) -{ - struct lov_mds_md lmm, *lmmk; - int rc, lmm_size; - ENTRY; - - if (!lsm) - RETURN(-ENODATA); - - rc = copy_from_user(&lmm, lmmu, sizeof(lmm)); - if (rc) - RETURN(-EFAULT); - - if (lmm.lmm_magic != LOV_MAGIC) - RETURN(-EINVAL); - - if (lmm.lmm_ost_count < 1) - RETURN(-EOVERFLOW); - - lmm_size = sizeof(lmm) + sizeof(lmm.lmm_objects[0]); - OBD_ALLOC(lmmk, lmm_size); - if (rc < 0) - RETURN(rc); - - lmmk->lmm_stripe_count = 1; - lmmk->lmm_ost_count = 1; - lmmk->lmm_object_id = lsm->lsm_object_id; - lmmk->lmm_objects[0].l_object_id = lsm->lsm_object_id; - - if (copy_to_user(lmmu, lmmk, lmm_size)) - rc = -EFAULT; - - OBD_FREE(lmmk, lmm_size); - - RETURN(rc); -} - -static int osc_iocontrol(unsigned int cmd, struct lustre_handle *conn, int len, - void *karg, void *uarg) -{ - struct obd_device *obddev = class_conn2obd(conn); - struct obd_ioctl_data *data = karg; - int err = 0; - ENTRY; - - switch (cmd) { - case IOC_OSC_REGISTER_LOV: { - if (obddev->u.cli.cl_containing_lov) - GOTO(out, err = -EALREADY); - obddev->u.cli.cl_containing_lov = (struct obd_device *)karg; - GOTO(out, err); - } - case OBD_IOC_LOV_GET_CONFIG: { - char *buf; - struct lov_desc *desc; - struct obd_uuid uuid; - - buf = NULL; - len = 0; - if (obd_ioctl_getdata(&buf, &len, (void *)uarg)) - GOTO(out, err = -EINVAL); - - data = (struct obd_ioctl_data *)buf; - - if (sizeof(*desc) > data->ioc_inllen1) { - OBD_FREE(buf, len); - GOTO(out, err = -EINVAL); - } - - if (data->ioc_inllen2 < sizeof(uuid)) { - OBD_FREE(buf, len); - GOTO(out, err = -EINVAL); - } - - desc = (struct lov_desc *)data->ioc_inlbuf1; - desc->ld_tgt_count = 1; - desc->ld_active_tgt_count = 1; - desc->ld_default_stripe_count = 1; - desc->ld_default_stripe_size = 0; - desc->ld_default_stripe_offset = 0; - desc->ld_pattern = 0; - memcpy(&desc->ld_uuid, &obddev->obd_uuid, sizeof(uuid)); - - memcpy(data->ioc_inlbuf2, &obddev->obd_uuid, sizeof(uuid)); - - err = copy_to_user((void *)uarg, buf, len); - if (err) - err = -EFAULT; - obd_ioctl_freedata(buf, len); - GOTO(out, err); - } - case LL_IOC_LOV_SETSTRIPE: - err = obd_alloc_memmd(conn, karg); - if (err > 0) - err = 0; - GOTO(out, err); - case LL_IOC_LOV_GETSTRIPE: - err = osc_getstripe(conn, karg, uarg); - GOTO(out, err); - case OBD_IOC_CLIENT_RECOVER: - err = ptlrpc_recover_import(obddev->u.cli.cl_import, - data->ioc_inlbuf1); - GOTO(out, err); - case IOC_OSC_SET_ACTIVE: - err = ptlrpc_set_import_active(obddev->u.cli.cl_import, - data->ioc_offset); - GOTO(out, err); - default: - CERROR ("osc_ioctl(): unrecognised ioctl %#x\n", cmd); - GOTO(out, err = -ENOTTY); - } -out: - return err; -} - -static int osc_get_info(struct lustre_handle *conn, obd_count keylen, - void *key, __u32 *vallen, void *val) -{ - ENTRY; - if (!vallen || !val) - RETURN(-EFAULT); - - if (keylen > strlen("lock_to_stripe") && - strcmp(key, "lock_to_stripe") == 0) { - __u32 *stripe = val; - *vallen = sizeof(*stripe); - *stripe = 0; - RETURN(0); - } - RETURN(-EINVAL); -} - -struct obd_ops osc_obd_ops = { - o_owner: THIS_MODULE, - o_attach: osc_attach, - o_detach: osc_detach, - o_setup: client_obd_setup, - o_cleanup: client_obd_cleanup, - o_connect: client_import_connect, - o_disconnect: client_import_disconnect, - o_statfs: osc_statfs, - o_packmd: osc_packmd, - o_unpackmd: osc_unpackmd, - o_create: osc_create, - o_destroy: osc_destroy, - o_getattr: osc_getattr, - o_getattr_async: osc_getattr_async, - o_setattr: osc_setattr, - o_open: osc_open, - o_close: osc_close, - o_brw: osc_brw, - o_brw_async: osc_brw_async, - o_punch: osc_punch, - o_enqueue: osc_enqueue, - o_match: osc_match, - o_cancel: osc_cancel, - o_cancel_unused: osc_cancel_unused, - o_iocontrol: osc_iocontrol, - o_get_info: osc_get_info, - .o_mark_page_dirty = osc_mark_page_dirty, - .o_clear_dirty_pages = osc_clear_dirty_pages, - .o_last_dirty_offset = osc_last_dirty_offset, -}; - -struct obd_ops sanosc_obd_ops = { - o_owner: THIS_MODULE, - o_attach: osc_attach, - o_detach: osc_detach, - o_cleanup: client_obd_cleanup, - o_connect: client_import_connect, - o_disconnect: client_import_disconnect, - o_statfs: osc_statfs, - o_packmd: osc_packmd, - o_unpackmd: osc_unpackmd, - o_create: osc_create, - o_destroy: osc_destroy, - o_getattr: osc_getattr, - o_getattr_async: osc_getattr_async, - o_setattr: osc_setattr, - o_open: osc_open, - o_close: osc_close, -#ifdef __KERNEL__ - o_setup: client_sanobd_setup, - o_brw: sanosc_brw, -#endif - o_punch: osc_punch, - o_enqueue: osc_enqueue, - o_match: osc_match, - o_cancel: osc_cancel, - o_cancel_unused: osc_cancel_unused, - o_iocontrol: osc_iocontrol, - .o_mark_page_dirty = osc_mark_page_dirty, - .o_clear_dirty_pages = osc_clear_dirty_pages, - .o_last_dirty_offset = osc_last_dirty_offset, -}; - -int __init osc_init(void) -{ - struct lprocfs_static_vars lvars; - int rc; - ENTRY; - - LASSERT(sizeof(struct obd_client_handle) <= FD_OSTDATA_SIZE); - LASSERT(sizeof(struct obd_client_handle) <= OBD_INLINESZ); - - lprocfs_init_vars(&lvars); - - rc = class_register_type(&osc_obd_ops, lvars.module_vars, - LUSTRE_OSC_NAME); - if (rc) - RETURN(rc); - - rc = class_register_type(&sanosc_obd_ops, lvars.module_vars, - LUSTRE_SANOSC_NAME); - if (rc) - class_unregister_type(LUSTRE_OSC_NAME); - - RETURN(rc); -} - -static void __exit osc_exit(void) -{ - class_unregister_type(LUSTRE_SANOSC_NAME); - class_unregister_type(LUSTRE_OSC_NAME); -} - -#ifdef __KERNEL__ -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Lustre Object Storage Client (OSC)"); -MODULE_LICENSE("GPL"); - -module_init(osc_init); -module_exit(osc_exit); -#endif diff --git a/lustre/ost/.cvsignore b/lustre/ost/.cvsignore deleted file mode 100644 index e530020..0000000 --- a/lustre/ost/.cvsignore +++ /dev/null @@ -1,8 +0,0 @@ -.Xrefs -config.log -config.status -configure -Makefile -Makefile.in -.deps -TAGS diff --git a/lustre/ost/Makefile.am b/lustre/ost/Makefile.am deleted file mode 100644 index b2e51c3..0000000 --- a/lustre/ost/Makefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -DEFS= - -MODULE = ost -modulefs_DATA = ost.o -EXTRA_PROGRAMS = ost -ost_SOURCES = ost_handler.c lproc_ost.c - -include $(top_srcdir)/Rules diff --git a/lustre/ost/lproc_ost.c b/lustre/ost/lproc_ost.c deleted file mode 100644 index c44093c..0000000 --- a/lustre/ost/lproc_ost.c +++ /dev/null @@ -1,42 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ -#define DEBUG_SUBSYSTEM S_OST - -#include <linux/obd_class.h> -#include <linux/lprocfs_status.h> - -#ifndef LPROCFS -struct lprocfs_vars lprocfs_obd_vars[] = { {0} }; -struct lprocfs_vars lprocfs_module_vars[] = { {0} }; -#else -struct lprocfs_vars lprocfs_obd_vars[] = { - { "uuid", lprocfs_rd_uuid, 0, 0 }, - { 0 } -}; - -struct lprocfs_vars lprocfs_module_vars[] = { - { "num_refs", lprocfs_rd_numrefs, 0, 0 }, - { 0 } -}; - -#endif /* LPROCFS */ -LPROCFS_INIT_VARS(lprocfs_module_vars, lprocfs_obd_vars) diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c deleted file mode 100644 index 023deb2..0000000 --- a/lustre/ost/ost_handler.c +++ /dev/null @@ -1,1171 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001-2003 Cluster File Systems, Inc. - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Storage Target Handling functions - * Lustre Object Server Module (OST) - * - * This server is single threaded at present (but can easily be multi - * threaded). For testing and management it is treated as an - * obd_device, although it does not export a full OBD method table - * (the requests are coming in over the wire, so object target - * modules do not have a full method table.) - */ - -#define EXPORT_SYMTAB -#define DEBUG_SUBSYSTEM S_OST - -#include <linux/module.h> -#include <linux/obd_ost.h> -#include <linux/lustre_net.h> -#include <linux/lustre_dlm.h> -#include <linux/lustre_export.h> -#include <linux/init.h> -#include <linux/lprocfs_status.h> - -inline void oti_init(struct obd_trans_info *oti, - struct ptlrpc_request *req) -{ - if(oti == NULL) - return; - memset(oti, 0, sizeof *oti); - - - if (req->rq_repmsg && req->rq_reqmsg != 0) - oti->oti_transno = req->rq_repmsg->transno; - - EXIT; -} - -inline void oti_to_request(struct obd_trans_info *oti, - struct ptlrpc_request *req) -{ - int i; - struct oti_req_ack_lock *ack_lock; - - if(oti == NULL) - return; - - if (req->rq_repmsg) - req->rq_repmsg->transno = oti->oti_transno; - - /* XXX 4 == entries in oti_ack_locks??? */ - for (ack_lock = oti->oti_ack_locks, i = 0; i < 4; i++, ack_lock++) { - if (!ack_lock->mode) - break; - memcpy(&req->rq_ack_locks[i].lock, &ack_lock->lock, - sizeof(req->rq_ack_locks[i].lock)); - req->rq_ack_locks[i].mode = ack_lock->mode; - } - EXIT; -} - -static int ost_destroy(struct ptlrpc_request *req, struct obd_trans_info *oti) -{ - struct lustre_handle *conn = &req->rq_reqmsg->handle; - struct ost_body *body; - int rc, size = sizeof(*body); - ENTRY; - - body = lustre_swab_reqbuf (req, 0, sizeof (*body), - lustre_swab_ost_body); - if (body == NULL) - RETURN (-EFAULT); - - rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc) - RETURN(rc); - - req->rq_status = obd_destroy(conn, &body->oa, NULL, oti); - RETURN(0); -} - -static int ost_getattr(struct ptlrpc_request *req) -{ - struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg; - struct ost_body *body, *repbody; - int rc, size = sizeof(*body); - ENTRY; - - body = lustre_swab_reqbuf (req, 0, sizeof (*body), - lustre_swab_ost_body); - if (body == NULL) - RETURN (-EFAULT); - - rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc) - RETURN(rc); - - repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*repbody)); - memcpy(&repbody->oa, &body->oa, sizeof(body->oa)); - req->rq_status = obd_getattr(conn, &repbody->oa, NULL); - RETURN(0); -} - -static int ost_statfs(struct ptlrpc_request *req) -{ - struct obd_statfs *osfs; - int rc, size = sizeof(*osfs); - ENTRY; - - rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc) - RETURN(rc); - - osfs = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*osfs)); - memset(osfs, 0, size); - - req->rq_status = obd_statfs(req->rq_export, osfs); - if (req->rq_status != 0) - CERROR("ost: statfs failed: rc %d\n", req->rq_status); - - RETURN(0); -} - -static int ost_syncfs(struct ptlrpc_request *req) -{ - struct obd_statfs *osfs; - int rc, size = sizeof(*osfs); - ENTRY; - - rc = lustre_pack_msg(0, &size, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc) - RETURN(rc); - - rc = obd_syncfs(req->rq_export); - if (rc) { - CERROR("ost: syncfs failed: rc %d\n", rc); - req->rq_status = rc; - RETURN(rc); - } - - RETURN(0); -} - -static int ost_open(struct ptlrpc_request *req, struct obd_trans_info *oti) -{ - struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg; - struct ost_body *body, *repbody; - int rc, size = sizeof(*repbody); - ENTRY; - - body = lustre_swab_reqbuf (req, 0, sizeof (*body), - lustre_swab_ost_body); - if (body == NULL) - return (-EFAULT); - - rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc) - RETURN(rc); - - repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*repbody)); - memcpy(&repbody->oa, &body->oa, sizeof(body->oa)); - req->rq_status = obd_open(conn, &repbody->oa, NULL, oti, NULL); - RETURN(0); -} - -static int ost_close(struct ptlrpc_request *req, struct obd_trans_info *oti) -{ - struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg; - struct ost_body *body, *repbody; - int rc, size = sizeof(*repbody); - ENTRY; - - body = lustre_swab_reqbuf (req, 0, sizeof (*body), - lustre_swab_ost_body); - if (body == NULL) - RETURN (-EFAULT); - - rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc) - RETURN(rc); - - repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*repbody)); - memcpy(&repbody->oa, &body->oa, sizeof(body->oa)); - req->rq_status = obd_close(conn, &repbody->oa, NULL, oti); - RETURN(0); -} - -static int ost_create(struct ptlrpc_request *req, struct obd_trans_info *oti) -{ - struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg; - struct ost_body *body, *repbody; - int rc, size = sizeof(*repbody); - ENTRY; - - body = lustre_swab_reqbuf (req, 0, sizeof (*body), - lustre_swab_ost_body); - if (body == NULL) - RETURN (-EFAULT); - - rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc) - RETURN(rc); - - repbody = lustre_msg_buf (req->rq_repmsg, 0, sizeof (*repbody)); - memcpy(&repbody->oa, &body->oa, sizeof(body->oa)); - req->rq_status = obd_create(conn, &repbody->oa, NULL, oti); - RETURN(0); -} - -static int ost_punch(struct ptlrpc_request *req, struct obd_trans_info *oti) -{ - struct lustre_handle *conn = (struct lustre_handle *)req->rq_reqmsg; - struct ost_body *body, *repbody; - int rc, size = sizeof(*repbody); - ENTRY; - - body = lustre_swab_reqbuf (req, 0, sizeof (*body), - lustre_swab_ost_body); - if (body == NULL) - RETURN (-EFAULT); - - if ((body->oa.o_valid & (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS)) != - (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS)) - RETURN(-EINVAL); - - rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc) - RETURN(rc); - - repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*repbody)); - memcpy(&repbody->oa, &body->oa, sizeof(body->oa)); - req->rq_status = obd_punch(conn, &repbody->oa, NULL, repbody->oa.o_size, - repbody->oa.o_blocks, oti); - RETURN(0); -} - -static int ost_setattr(struct ptlrpc_request *req, struct obd_trans_info *oti) -{ - struct lustre_handle *conn = &req->rq_reqmsg->handle; - struct ost_body *body, *repbody; - int rc, size = sizeof(*repbody); - ENTRY; - - body = lustre_swab_reqbuf (req, 0, sizeof (*body), - lustre_swab_ost_body); - if (body == NULL) - RETURN (-EFAULT); - - rc = lustre_pack_msg(1, &size, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc) - RETURN(rc); - - repbody = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*repbody)); - memcpy(&repbody->oa, &body->oa, sizeof(body->oa)); - - req->rq_status = obd_setattr(conn, &repbody->oa, NULL, oti); - RETURN(0); -} - -static int ost_bulk_timeout(void *data) -{ - ENTRY; - /* We don't fail the connection here, because having the export - * killed makes the (vital) call to commitrw very sad. - */ - RETURN(1); -} - -static int get_per_page_niobufs (struct obd_ioobj *ioo, int nioo, - struct niobuf_remote *rnb, int nrnb, - struct niobuf_remote **pp_rnbp) -{ - /* Copy a remote niobuf, splitting it into page-sized chunks - * and setting ioo[i].ioo_bufcnt accordingly */ - struct niobuf_remote *pp_rnb; - int i; - int j; - int page; - int rnbidx = 0; - int npages = 0; - - /* first count and check the number of pages required */ - for (i = 0; i < nioo; i++) - for (j = 0; j < ioo->ioo_bufcnt; j++, rnbidx++) { - obd_off offset = rnb[rnbidx].offset; - obd_off p0 = offset >> PAGE_SHIFT; - obd_off pn = (offset + rnb[rnbidx].len - 1)>>PAGE_SHIFT; - - LASSERT (rnbidx < nrnb); - - npages += (pn + 1 - p0); - - if (rnb[rnbidx].len == 0) { - CERROR("zero len BRW: obj %d objid "LPX64 - " buf %u\n", i, ioo[i].ioo_id, j); - return (-EINVAL); - } - if (j > 0 && - rnb[rnbidx].offset <= rnb[rnbidx-1].offset) { - CERROR("unordered BRW: obj %d objid "LPX64 - " buf %u offset "LPX64" <= "LPX64"\n", - i, ioo[i].ioo_id, j, rnb[rnbidx].offset, - rnb[rnbidx].offset); - return (-EINVAL); - } - } - - LASSERT (rnbidx == nrnb); - - if (npages == nrnb) { /* all niobufs are for single pages */ - *pp_rnbp = rnb; - return (npages); - } - - OBD_ALLOC (pp_rnb, sizeof (*pp_rnb) * npages); - if (pp_rnb == NULL) - return (-ENOMEM); - - /* now do the actual split */ - page = rnbidx = 0; - for (i = 0; i < nioo; i++) { - int obj_pages = 0; - - for (j = 0; j < ioo[i].ioo_bufcnt; j++, rnbidx++) { - obd_off off = rnb[rnbidx].offset; - int nob = rnb[rnbidx].len; - - LASSERT (rnbidx < nrnb); - do { - obd_off poff = off & (PAGE_SIZE - 1); - int pnob = (poff + nob > PAGE_SIZE) ? - PAGE_SIZE - poff : nob; - - LASSERT (page < npages); - pp_rnb[page].len = pnob; - pp_rnb[page].offset = off; - pp_rnb[page].flags = rnb->flags; - - CDEBUG (D_PAGE, " obj %d id "LPX64 - "page %d(%d) "LPX64" for %d\n", - i, ioo[i].ioo_id, obj_pages, page, - pp_rnb[page].offset, pp_rnb[page].len); - page++; - obj_pages++; - - off += pnob; - nob -= pnob; - } while (nob > 0); - LASSERT (nob == 0); - } - ioo[i].ioo_bufcnt = obj_pages; - } - LASSERT (page == npages); - - *pp_rnbp = pp_rnb; - return (npages); -} - -static void free_per_page_niobufs (int npages, struct niobuf_remote *pp_rnb, - struct niobuf_remote *rnb) -{ - if (pp_rnb == rnb) /* didn't allocate above */ - return; - - OBD_FREE (pp_rnb, sizeof (*pp_rnb) * npages); -} - -#if CHECKSUM_BULK -__u64 ost_checksum_bulk (struct ptlrpc_bulk_desc *desc) -{ - __u64 cksum = 0; - struct list_head *tmp; - char *ptr; - - list_for_each (tmp, &desc->bd_page_list) { - struct ptlrpc_bulk_page *bp; - - bp = list_entry (tmp, struct ptlrpc_bulk_page, bp_link); - ptr = kmap (bp->bp_page); - ost_checksum (&cksum, ptr + bp->bp_pageoffset, bp->bp_buflen); - kunmap (bp->bp_page); - } -} -#endif - -static int ost_brw_read(struct ptlrpc_request *req) -{ - struct ptlrpc_bulk_desc *desc; - struct niobuf_remote *remote_nb; - struct niobuf_remote *pp_rnb; - struct niobuf_local *local_nb; - struct obd_ioobj *ioo; - struct ost_body *body; - struct l_wait_info lwi; - void *desc_priv = NULL; - int size[1] = { sizeof(*body) }; - int comms_error = 0; - int niocount; - int npages; - int nob = 0; - int rc; - int i; - ENTRY; - - if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_READ_BULK)) - GOTO(out, rc = -EIO); - - body = lustre_swab_reqbuf(req, 0, sizeof(*body), lustre_swab_ost_body); - if (body == NULL) { - CERROR ("Missing/short ost_body\n"); - GOTO (out, rc = -EFAULT); - } - - ioo = lustre_swab_reqbuf (req, 1, sizeof (*ioo), - lustre_swab_obd_ioobj); - if (ioo == NULL) { - CERROR ("Missing/short ioobj\n"); - GOTO (out, rc = -EFAULT); - } - - niocount = ioo->ioo_bufcnt; - remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof (*remote_nb), - lustre_swab_niobuf_remote); - if (remote_nb == NULL) { - CERROR ("Missing/short niobuf\n"); - GOTO (out, rc = -EFAULT); - } - if (lustre_msg_swabbed (req->rq_reqmsg)) { /* swab remaining niobufs */ - for (i = 1; i < niocount; i++) - lustre_swab_niobuf_remote (&remote_nb[i]); - } - - rc = lustre_pack_msg(1, size, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc) - GOTO(out, rc); - - /* CAVEAT EMPTOR this sets ioo->ioo_bufcnt to # pages */ - npages = get_per_page_niobufs (ioo, 1, remote_nb, niocount, &pp_rnb); - if (npages < 0) - GOTO(out, rc = npages); - - OBD_ALLOC(local_nb, sizeof(*local_nb) * npages); - if (local_nb == NULL) - GOTO(out_pp_rnb, rc = -ENOMEM); - - desc = ptlrpc_prep_bulk_exp (req, BULK_PUT_SOURCE, OST_BULK_PORTAL); - if (desc == NULL) - GOTO(out_local, rc = -ENOMEM); - - rc = obd_preprw(OBD_BRW_READ, req->rq_export, NULL, 1, ioo, npages, - pp_rnb, local_nb, &desc_priv, NULL); - if (rc != 0) - GOTO(out_bulk, rc); - - nob = 0; - for (i = 0; i < npages; i++) { - int page_rc = local_nb[i].rc; - - if (page_rc < 0) { /* error */ - rc = page_rc; - break; - } - - LASSERT (page_rc <= pp_rnb[i].len); - nob += page_rc; - if (page_rc != 0) { /* some data! */ - LASSERT (local_nb[i].page != NULL); - rc = ptlrpc_prep_bulk_page(desc, local_nb[i].page, - pp_rnb[i].offset& ~PAGE_MASK, - page_rc); - if (rc != 0) - break; - } - - if (page_rc != pp_rnb[i].len) { /* short read */ - /* All subsequent pages should be 0 */ - while (++i < npages) - LASSERT (local_nb[i].rc == 0); - break; - } - } - - if (rc == 0) { - rc = ptlrpc_bulk_put(desc); - if (rc == 0) { - lwi = LWI_TIMEOUT(obd_timeout * HZ, ost_bulk_timeout, - desc); - rc = l_wait_event(desc->bd_waitq, - ptlrpc_bulk_complete(desc), &lwi); - if (rc) { - LASSERT(rc == -ETIMEDOUT); - CERROR ("timeout waiting for bulk PUT\n"); - ptlrpc_abort_bulk (desc); - } - } else { - CERROR("ptlrpc_bulk_put failed RC: %d\n", rc); - } - comms_error = rc != 0; - } - - /* Must commit after prep above in all cases */ - rc = obd_commitrw(OBD_BRW_READ, req->rq_export, 1, ioo, npages, - local_nb, desc_priv, NULL); - -#if CHECKSUM_BULK - if (rc == 0) { - body = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*body)); - body->oa.o_rdev = ost_checksum_bulk (desc); - body->oa.o_valid |= OBD_MD_FLCKSUM; - } -#endif - - out_bulk: - ptlrpc_free_bulk (desc); - out_local: - OBD_FREE(local_nb, sizeof(*local_nb) * npages); - out_pp_rnb: - free_per_page_niobufs (npages, pp_rnb, remote_nb); - out: - LASSERT (rc <= 0); - if (rc == 0) { - req->rq_status = nob; - ptlrpc_reply(req); - } else if (!comms_error) { - /* only reply if comms OK */ - req->rq_status = rc; - ptlrpc_error(req); - } else { - if (req->rq_repmsg != NULL) { - /* reply out callback would free */ - OBD_FREE (req->rq_repmsg, req->rq_replen); - } - CERROR("bulk IO comms error: evicting %s@%s nid "LPU64"\n", - req->rq_export->exp_client_uuid.uuid, - req->rq_connection->c_remote_uuid.uuid, - req->rq_connection->c_peer.peer_nid); - ptlrpc_fail_export(req->rq_export); - } - - RETURN(rc); -} - -static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti) -{ - struct ptlrpc_bulk_desc *desc; - struct niobuf_remote *remote_nb; - struct niobuf_remote *pp_rnb; - struct niobuf_local *local_nb; - struct obd_ioobj *ioo; - struct ost_body *body; - struct l_wait_info lwi; - void *desc_priv = NULL; - __u32 *rcs; - int size[2] = { sizeof (*body) }; - int objcount, niocount, npages; - int comms_error = 0; - int rc, rc2, swab, i, j; - ENTRY; - - if (OBD_FAIL_CHECK(OBD_FAIL_OST_BRW_WRITE_BULK)) - GOTO(out, rc = -EIO); - - /* pause before transaction has been started */ - OBD_FAIL_TIMEOUT(OBD_FAIL_OST_BRW_PAUSE_BULK | OBD_FAIL_ONCE, - obd_timeout +1); - - swab = lustre_msg_swabbed (req->rq_reqmsg); - body = lustre_swab_reqbuf (req, 0, sizeof (*body), - lustre_swab_ost_body); - if (body == NULL) { - CERROR ("Missing/short ost_body\n"); - GOTO(out, rc = -EFAULT); - } - - LASSERT_REQSWAB (req, 1); - objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo); - if (objcount == 0) { - CERROR ("Missing/short ioobj\n"); - GOTO (out, rc = -EFAULT); - } - ioo = lustre_msg_buf (req->rq_reqmsg, 1, objcount * sizeof (*ioo)); - LASSERT (ioo != NULL); - for (niocount = i = 0; i < objcount; i++) { - if (swab) - lustre_swab_obd_ioobj (&ioo[i]); - if (ioo[i].ioo_bufcnt == 0) { - CERROR ("ioo[%d] has zero bufcnt\n", i); - GOTO (out, rc = -EFAULT); - } - niocount += ioo[i].ioo_bufcnt; - } - - remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof (*remote_nb), - lustre_swab_niobuf_remote); - if (remote_nb == NULL) { - CERROR ("Missing/short niobuf\n"); - GOTO(out, rc = -EFAULT); - } - if (swab) { /* swab the remaining niobufs */ - for (i = 1; i < niocount; i++) - lustre_swab_niobuf_remote (&remote_nb[i]); - } - - size[1] = niocount * sizeof (*rcs); - rc = lustre_pack_msg(2, size, NULL, &req->rq_replen, - &req->rq_repmsg); - if (rc != 0) - GOTO (out, rc); - rcs = lustre_msg_buf (req->rq_repmsg, 1, niocount * sizeof (*rcs)); - - /* CAVEAT EMPTOR this sets ioo->ioo_bufcnt to # pages */ - npages = get_per_page_niobufs(ioo, objcount,remote_nb,niocount,&pp_rnb); - if (npages < 0) - GOTO (out, rc = npages); - - OBD_ALLOC(local_nb, sizeof(*local_nb) * npages); - if (local_nb == NULL) - GOTO(out_pp_rnb, rc = -ENOMEM); - - desc = ptlrpc_prep_bulk_exp (req, BULK_GET_SINK, OST_BULK_PORTAL); - if (desc == NULL) - GOTO(out_local, rc = -ENOMEM); - - rc = obd_preprw(OBD_BRW_WRITE, req->rq_export, NULL, objcount, ioo, - npages, pp_rnb, local_nb, &desc_priv, oti); - if (rc != 0) - GOTO (out_bulk, rc); - - /* NB Having prepped, we must commit... */ - - for (i = 0; i < npages; i++) { - rc = ptlrpc_prep_bulk_page(desc, local_nb[i].page, - pp_rnb[i].offset & (PAGE_SIZE - 1), - pp_rnb[i].len); - if (rc != 0) - break; - } - - if (rc == 0) { - rc = ptlrpc_bulk_get(desc); - if (rc == 0) { - lwi = LWI_TIMEOUT(obd_timeout * HZ, ost_bulk_timeout, - desc); - rc = l_wait_event(desc->bd_waitq, - ptlrpc_bulk_complete(desc), &lwi); - if (rc) { - LASSERT(rc == -ETIMEDOUT); - CERROR ("timeout waiting for bulk GET\n"); - ptlrpc_abort_bulk (desc); - } - } else { - CERROR("ptlrpc_bulk_get failed RC: %d\n", rc); - } - comms_error = rc != 0; - } - -#if CHECKSUM_BULK - if (rc == 0 && (body->oa.o_valid & OBD_MD_FLCKSUM) != 0) { - static int cksum_counter; - __u64 client_cksum = body->oa.o_rdev; - __u64 cksum = ost_checksum_bulk (desc); - - if (client_cksum != cksum) { - CERROR("Bad checksum: client "LPX64", server "LPX64 - ", client NID "LPX64"\n", client_cksum, cksum, - req->rq_connection->c_peer.peer_nid); - cksum_counter = 1; - } else { - cksum_counter++; - if ((cksum_counter & (-cksum_counter)) == cksum_counter) - CERROR("Checksum %d from "LPX64": "LPX64" OK\n", - cksum_counter, - req->rq_connection->c_peer.peer_nid, - cksum); - } - } -#endif - /* Must commit after prep above in all cases */ - rc2 = obd_commitrw(OBD_BRW_WRITE, req->rq_export, objcount, ioo, - npages, local_nb, desc_priv, oti); - - if (rc == 0) { - /* set per-requested niobuf return codes */ - for (i = j = 0; i < niocount; i++) { - int nob = remote_nb[i].len; - - rcs[i] = 0; - do { - LASSERT (j < npages); - if (local_nb[j].rc < 0) - rcs[i] = local_nb[j].rc; - nob -= pp_rnb[j].len; - j++; - } while (nob > 0); - LASSERT (nob == 0); - } - LASSERT (j == npages); - } - if (rc == 0) - rc = rc2; - - out_bulk: - ptlrpc_free_bulk (desc); - out_local: - OBD_FREE(local_nb, sizeof(*local_nb) * npages); - out_pp_rnb: - free_per_page_niobufs (npages, pp_rnb, remote_nb); - out: - if (rc == 0) { - oti_to_request(oti, req); - rc = ptlrpc_reply(req); - } else if (!comms_error) { - /* Only reply if there was no comms problem with bulk */ - req->rq_status = rc; - ptlrpc_error(req); - } else { - if (req->rq_repmsg != NULL) { - /* reply out callback would free */ - OBD_FREE (req->rq_repmsg, req->rq_replen); - } - CERROR("bulk IO comms error: evicting %s@%s nid "LPU64"\n", - req->rq_export->exp_client_uuid.uuid, - req->rq_connection->c_remote_uuid.uuid, - req->rq_connection->c_peer.peer_nid); - ptlrpc_fail_export(req->rq_export); - } - RETURN(rc); -} - -static int ost_san_brw(struct ptlrpc_request *req, int cmd) -{ - struct lustre_handle *conn = &req->rq_reqmsg->handle; - struct niobuf_remote *remote_nb, *res_nb; - struct obd_ioobj *ioo; - struct ost_body *body; - int rc, i, j, objcount, niocount, size[2] = {sizeof(*body)}; - int n; - int swab; - ENTRY; - - /* XXX not set to use latest protocol */ - - swab = lustre_msg_swabbed (req->rq_reqmsg); - body = lustre_swab_reqbuf (req, 0, sizeof (*body), - lustre_swab_ost_body); - if (body == NULL) { - CERROR ("Missing/short ost_body\n"); - GOTO (out, rc = -EFAULT); - } - - ioo = lustre_swab_reqbuf(req, 1, sizeof (*ioo), - lustre_swab_obd_ioobj); - if (ioo == NULL) { - CERROR ("Missing/short ioobj\n"); - GOTO (out, rc = -EFAULT); - } - objcount = req->rq_reqmsg->buflens[1] / sizeof(*ioo); - niocount = ioo[0].ioo_bufcnt; - for (i = 1; i < objcount; i++) { - if (swab) - lustre_swab_obd_ioobj (&ioo[i]); - niocount += ioo[i].ioo_bufcnt; - } - - remote_nb = lustre_swab_reqbuf(req, 2, niocount * sizeof (*remote_nb), - lustre_swab_niobuf_remote); - if (remote_nb == NULL) { - CERROR ("Missing/short niobuf\n"); - GOTO (out, rc = -EFAULT); - } - if (swab) { /* swab the remaining niobufs */ - for (i = 1; i < niocount; i++) - lustre_swab_niobuf_remote (&remote_nb[i]); - } - - for (i = n = 0; i < objcount; i++) { - for (j = 0; j < ioo[i].ioo_bufcnt; j++, n++) { - if (remote_nb[n].len == 0) { - CERROR("zero len BRW: objid "LPX64" buf %u\n", - ioo[i].ioo_id, j); - GOTO(out, rc = -EINVAL); - } - if (j && remote_nb[n].offset <= remote_nb[n-1].offset) { - CERROR("unordered BRW: objid "LPX64 - " buf %u offset "LPX64" <= "LPX64"\n", - ioo[i].ioo_id, j, remote_nb[n].offset, - remote_nb[n-1].offset); - GOTO(out, rc = -EINVAL); - } - } - } - - size[1] = niocount * sizeof(*remote_nb); - rc = lustre_pack_msg(2, size, NULL, &req->rq_replen, &req->rq_repmsg); - if (rc) - GOTO(out, rc); - - req->rq_status = obd_san_preprw(cmd, conn, objcount, ioo, - niocount, remote_nb); - - if (req->rq_status) - GOTO (out, rc = 0); - - res_nb = lustre_msg_buf(req->rq_repmsg, 1, size[1]); - memcpy (res_nb, remote_nb, size[1]); - rc = 0; -out: - if (rc) { - OBD_FREE(req->rq_repmsg, req->rq_replen); - req->rq_repmsg = NULL; - req->rq_status = rc; - ptlrpc_error(req); - } else - ptlrpc_reply(req); - - return rc; -} - -static int filter_recovery_request(struct ptlrpc_request *req, - struct obd_device *obd, int *process) -{ - switch (req->rq_reqmsg->opc) { - case OST_CONNECT: /* This will never get here, but for completeness. */ - case OST_DISCONNECT: - *process = 1; - RETURN(0); - - case OBD_PING: - case OST_CLOSE: - case OST_CREATE: - case OST_DESTROY: - case OST_OPEN: - case OST_PUNCH: - case OST_SETATTR: - case OST_SYNCFS: - case OST_WRITE: - case LDLM_ENQUEUE: - *process = target_queue_recovery_request(req, obd); - RETURN(0); - - default: - DEBUG_REQ(D_ERROR, req, "not permitted during recovery"); - *process = 0; - /* XXX what should we set rq_status to here? */ - req->rq_status = -EAGAIN; - RETURN(ptlrpc_error(req)); - } -} - - - -static int ost_handle(struct ptlrpc_request *req) -{ - struct obd_trans_info trans_info = { 0, }; - struct obd_trans_info *oti = &trans_info; - int should_process, fail = OBD_FAIL_OST_ALL_REPLY_NET, rc = 0; - ENTRY; - - /* XXX identical to MDS */ - if (req->rq_reqmsg->opc != OST_CONNECT) { - struct obd_device *obd; - int abort_recovery, recovering; - - if (req->rq_export == NULL) { - CERROR("lustre_ost: operation %d on unconnected OST\n", - req->rq_reqmsg->opc); - req->rq_status = -ENOTCONN; - GOTO(out, rc = -ENOTCONN); - } - - obd = req->rq_export->exp_obd; - - /* Check for aborted recovery. */ - spin_lock_bh(&obd->obd_processing_task_lock); - abort_recovery = obd->obd_abort_recovery; - recovering = obd->obd_recovering; - spin_unlock_bh(&obd->obd_processing_task_lock); - if (abort_recovery) { - target_abort_recovery(obd); - } else if (recovering) { - rc = filter_recovery_request(req, obd, &should_process); - if (rc || !should_process) - RETURN(rc); - } - } - - if (strcmp(req->rq_obd->obd_type->typ_name, "ost") != 0) - GOTO(out, rc = -EINVAL); - - oti_init(oti, req); - - switch (req->rq_reqmsg->opc) { - case OST_CONNECT: - CDEBUG(D_INODE, "connect\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_CONNECT_NET, 0); - rc = target_handle_connect(req, ost_handle); - break; - case OST_DISCONNECT: - CDEBUG(D_INODE, "disconnect\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_DISCONNECT_NET, 0); - rc = target_handle_disconnect(req); - break; - case OST_CREATE: - CDEBUG(D_INODE, "create\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_CREATE_NET, 0); - rc = ost_create(req, oti); - break; - case OST_DESTROY: - CDEBUG(D_INODE, "destroy\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_DESTROY_NET, 0); - rc = ost_destroy(req, oti); - break; - case OST_GETATTR: - CDEBUG(D_INODE, "getattr\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_GETATTR_NET, 0); - rc = ost_getattr(req); - break; - case OST_SETATTR: - CDEBUG(D_INODE, "setattr\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_SETATTR_NET, 0); - rc = ost_setattr(req, oti); - break; - case OST_OPEN: - CDEBUG(D_INODE, "open\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_OPEN_NET, 0); - rc = ost_open(req, oti); - break; - case OST_CLOSE: - CDEBUG(D_INODE, "close\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_CLOSE_NET, 0); - rc = ost_close(req, oti); - break; - case OST_WRITE: - CDEBUG(D_INODE, "write\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0); - rc = ost_brw_write(req, oti); - /* ost_brw sends its own replies */ - RETURN(rc); - case OST_READ: - CDEBUG(D_INODE, "read\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0); - rc = ost_brw_read(req); - /* ost_brw sends its own replies */ - RETURN(rc); - case OST_SAN_READ: - CDEBUG(D_INODE, "san read\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0); - rc = ost_san_brw(req, OBD_BRW_READ); - /* ost_san_brw sends its own replies */ - RETURN(rc); - case OST_SAN_WRITE: - CDEBUG(D_INODE, "san write\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_BRW_NET, 0); - rc = ost_san_brw(req, OBD_BRW_WRITE); - /* ost_san_brw sends its own replies */ - RETURN(rc); - case OST_PUNCH: - CDEBUG(D_INODE, "punch\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_PUNCH_NET, 0); - rc = ost_punch(req, oti); - break; - case OST_STATFS: - CDEBUG(D_INODE, "statfs\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_STATFS_NET, 0); - rc = ost_statfs(req); - break; - case OST_SYNCFS: - CDEBUG(D_INODE, "sync\n"); - OBD_FAIL_RETURN(OBD_FAIL_OST_SYNCFS_NET, 0); - rc = ost_syncfs(req); - break; - case OBD_PING: - DEBUG_REQ(D_INODE, req, "ping"); - rc = target_handle_ping(req); - break; - case LDLM_ENQUEUE: - CDEBUG(D_INODE, "enqueue\n"); - OBD_FAIL_RETURN(OBD_FAIL_LDLM_ENQUEUE, 0); - rc = ldlm_handle_enqueue(req, ldlm_server_completion_ast, - ldlm_server_blocking_ast); - fail = OBD_FAIL_OST_LDLM_REPLY_NET; - break; - case LDLM_CONVERT: - CDEBUG(D_INODE, "convert\n"); - OBD_FAIL_RETURN(OBD_FAIL_LDLM_CONVERT, 0); - rc = ldlm_handle_convert(req); - break; - case LDLM_CANCEL: - CDEBUG(D_INODE, "cancel\n"); - OBD_FAIL_RETURN(OBD_FAIL_LDLM_CANCEL, 0); - rc = ldlm_handle_cancel(req); - break; - case LDLM_BL_CALLBACK: - case LDLM_CP_CALLBACK: - CDEBUG(D_INODE, "callback\n"); - CERROR("callbacks should not happen on OST\n"); - /* fall through */ - default: - CERROR("Unexpected opcode %d\n", req->rq_reqmsg->opc); - req->rq_status = -ENOTSUPP; - rc = ptlrpc_error(req); - RETURN(rc); - } - - EXIT; - /* If we're DISCONNECTing, the export_data is already freed */ - if (!rc && req->rq_reqmsg->opc != OST_DISCONNECT) { - struct obd_device *obd = req->rq_export->exp_obd; - if (!obd->obd_no_transno) { - req->rq_repmsg->last_committed = - obd->obd_last_committed; - } else { - DEBUG_REQ(D_IOCTL, req, - "not sending last_committed update"); - } - CDEBUG(D_INFO, "last_committed "LPU64", xid "LPX64"\n", - obd->obd_last_committed, req->rq_xid); - } - -out: - if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_LAST_REPLAY) { - struct obd_device *obd = req->rq_export->exp_obd; - - if (obd && obd->obd_recovering) { - DEBUG_REQ(D_HA, req, "LAST_REPLAY, queuing reply"); - return target_queue_final_reply(req, rc); - } - /* Lost a race with recovery; let the error path DTRT. */ - rc = req->rq_status = -ENOTCONN; - } - - if (!rc) - oti_to_request(oti, req); - - target_send_reply(req, rc, fail); - return 0; -} - -static int ost_setup(struct obd_device *obddev, obd_count len, void *buf) -{ - struct ost_obd *ost = &obddev->u.ost; - int err; - int i; - ENTRY; - - ost->ost_service = ptlrpc_init_svc(OST_NEVENTS, OST_NBUFS, - OST_BUFSIZE, OST_MAXREQSIZE, - OST_REQUEST_PORTAL, OSC_REPLY_PORTAL, - ost_handle, "ost", obddev); - if (!ost->ost_service) { - CERROR("failed to start service\n"); - GOTO(error_disc, err = -ENOMEM); - } - - for (i = 0; i < OST_NUM_THREADS; i++) { - char name[32]; - sprintf(name, "ll_ost_%02d", i); - err = ptlrpc_start_thread(obddev, ost->ost_service, name); - if (err) { - CERROR("error starting thread #%d: rc %d\n", i, err); - GOTO(error_disc, err = -EINVAL); - } - } - - RETURN(0); - -error_disc: - RETURN(err); -} - -static int ost_cleanup(struct obd_device *obddev, int force, int failover) -{ - struct ost_obd *ost = &obddev->u.ost; - int err = 0; - ENTRY; - - if (obddev->obd_recovering) - target_cancel_recovery_timer(obddev); - - ptlrpc_stop_all_threads(ost->ost_service); - ptlrpc_unregister_service(ost->ost_service); - - RETURN(err); -} - -int ost_attach(struct obd_device *dev, obd_count len, void *data) -{ - struct lprocfs_static_vars lvars; - - lprocfs_init_vars(&lvars); - return lprocfs_obd_attach(dev, lvars.obd_vars); -} - -int ost_detach(struct obd_device *dev) -{ - return lprocfs_obd_detach(dev); -} - -/* I don't think this function is ever used, since nothing - * connects directly to this module. - */ -static int ost_connect(struct lustre_handle *conn, - struct obd_device *obd, struct obd_uuid *cluuid) -{ - struct obd_export *exp; - int rc; - ENTRY; - - if (!conn || !obd || !cluuid) - RETURN(-EINVAL); - - rc = class_connect(conn, obd, cluuid); - if (rc) - RETURN(rc); - exp = class_conn2export(conn); - LASSERT(exp); - class_export_put(exp); - - RETURN(0); -} - -/* use obd ops to offer management infrastructure */ -static struct obd_ops ost_obd_ops = { - o_owner: THIS_MODULE, - o_attach: ost_attach, - o_detach: ost_detach, - o_setup: ost_setup, - o_cleanup: ost_cleanup, - o_connect: ost_connect, -}; - -static int __init ost_init(void) -{ - struct lprocfs_static_vars lvars; - ENTRY; - - lprocfs_init_vars(&lvars); - RETURN(class_register_type(&ost_obd_ops, lvars.module_vars, - LUSTRE_OST_NAME)); -} - -static void __exit ost_exit(void) -{ - class_unregister_type(LUSTRE_OST_NAME); -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Lustre Object Storage Target (OST) v0.01"); -MODULE_LICENSE("GPL"); - -module_init(ost_init); -module_exit(ost_exit); diff --git a/lustre/portals/.cvsignore b/lustre/portals/.cvsignore deleted file mode 100644 index 99ac885..0000000 --- a/lustre/portals/.cvsignore +++ /dev/null @@ -1,8 +0,0 @@ -Kernelenv -Makefile -Makefile.in -aclocal.m4 -autom4te.cache -config.log -config.status -configure diff --git a/lustre/portals/AUTHORS b/lustre/portals/AUTHORS deleted file mode 100644 index e69de29..0000000 diff --git a/lustre/portals/ChangeLog b/lustre/portals/ChangeLog deleted file mode 100644 index e69de29..0000000 diff --git a/lustre/portals/Kernelenv.in b/lustre/portals/Kernelenv.in deleted file mode 100644 index 29a713f..0000000 --- a/lustre/portals/Kernelenv.in +++ /dev/null @@ -1 +0,0 @@ -EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include diff --git a/lustre/portals/Kernelenv.mk b/lustre/portals/Kernelenv.mk deleted file mode 100644 index 29a713f..0000000 --- a/lustre/portals/Kernelenv.mk +++ /dev/null @@ -1 +0,0 @@ -EXTRA_CFLAGS= -Ifs/lustre/include -Ifs/lustre/portals/include diff --git a/lustre/portals/Makefile.am b/lustre/portals/Makefile.am deleted file mode 100644 index 1a223f2..0000000 --- a/lustre/portals/Makefile.am +++ /dev/null @@ -1,12 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -EXTRA_DIST = Rules.linux archdep.m4 include -DIST_SUBDIRS = libcfs portals knals unals utils tests doc router -if LIBLUSTRE -SUBDIRS = portals unals utils -else -SUBDIRS = libcfs portals knals unals utils tests doc router -endif diff --git a/lustre/portals/Makefile.mk b/lustre/portals/Makefile.mk deleted file mode 100644 index be0e51a..0000000 --- a/lustre/portals/Makefile.mk +++ /dev/null @@ -1,6 +0,0 @@ -include fs/lustre/portals/Kernelenv - -obj-y += portals/ -obj-y += libcfs/ -obj-y += knals/ -obj-y += router/ diff --git a/lustre/portals/NEWS b/lustre/portals/NEWS deleted file mode 100644 index e69de29..0000000 diff --git a/lustre/portals/README b/lustre/portals/README deleted file mode 100644 index e69de29..0000000 diff --git a/lustre/portals/Rules.linux b/lustre/portals/Rules.linux deleted file mode 100644 index 93943b7..0000000 --- a/lustre/portals/Rules.linux +++ /dev/null @@ -1,25 +0,0 @@ -# included in Linux kernel directories -# Rules for module building - -if LINUX25 - -basename=$(shell echo $< | sed -e 's/\.c//g' | sed -e 's/-//g' | sed -e 's/\.o//g') -AM_CPPFLAGS= -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -mpreferred-stack-boundary=2 -DKBUILD_MODNAME=$(MODULE) -DKBUILD_BASENAME=$(basename) - -$(MODULE).o: $($(MODULE)_OBJECTS) - $(LD) -m $(MOD_LINK) -r -o $(MODULE).o $($(MODULE)_OBJECTS) - -else - -$(MODULE).o: $($(MODULE)_OBJECTS) - $(LD) -m $(MOD_LINK) -r -o $(MODULE).o $($(MODULE)_OBJECTS) - -endif - -tags: - rm -f $(top_srcdir)/TAGS - rm -f $(top_srcdir)/tags - find $(top_srcdir)/../portals/ -name '*.[hc]' | xargs etags -a - find $(top_srcdir) -name '*.[hc]' | grep -v ".orig" | xargs etags -a - find $(top_srcdir)/../portals/ -name '*.[hc]' | xargs ctags -a - find $(top_srcdir) -name '*.[hc]' | grep -v ".orig" | xargs ctags -a diff --git a/lustre/portals/archdep.m4 b/lustre/portals/archdep.m4 deleted file mode 100644 index 7a4e05c..0000000 --- a/lustre/portals/archdep.m4 +++ /dev/null @@ -1,317 +0,0 @@ - -# -------- in kernel compilation? (2.5 only) ------------- -AC_ARG_ENABLE(inkernel, [ --enable-inkernel set up 2.5 kernel makefiles]) -AM_CONDITIONAL(INKERNEL, test x$enable_inkernel = xyes) -echo "Makefile for in kernel build: $INKERNEL" - -# -------- liblustre compilation -------------- -AC_ARG_WITH(lib, [ --with-lib compile lustre library], host_cpu="lib") - -# -------- set linuxdir ------------ - -AC_ARG_WITH(linux, [ --with-linux=[path] set path to Linux source (default=/usr/src/linux)],LINUX=$with_linux,LINUX=/usr/src/linux) -AC_SUBST(LINUX) - -# --------- UML? -------------------- -AC_MSG_CHECKING(if you are running user mode linux for $host_cpu ...) -if test $host_cpu = "lib" ; then - host_cpu="lib" - AC_MSG_RESULT(no building Lustre library) -else - if test -e $LINUX/include/asm-um ; then - if test X`ls -id $LINUX/include/asm/ | awk '{print $1}'` = X`ls -id $LINUX/include/asm-um | awk '{print $1}'` ; then - host_cpu="um"; - AC_MSG_RESULT(yes) - else - AC_MSG_RESULT(no (asm doesn't point at asm-um)) - fi - - else - AC_MSG_RESULT(no (asm-um missing)) - fi -fi - -# --------- Linux 25 ------------------ - -AC_MSG_CHECKING(if you are running linux 2.5) -if test -e $LINUX/include/linux/namei.h ; then - linux25="yes" - AC_MSG_RESULT(yes) -else - linux25="no" - AC_MSG_RESULT(no) -fi -AM_CONDITIONAL(LINUX25, test x$linux25 = xyes) -echo "Makefiles for in linux 2.5 build: $LINUX25" - -# ------- Makeflags ------------------ - -AC_MSG_CHECKING(setting make flags system architecture: ) -case ${host_cpu} in - lib ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-g -Wall ' - KCPPFLAGS='-D__arch_lib__ ' - libdir='${exec_prefix}/lib/lustre' - MOD_LINK=elf_i386 -;; - um ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-g -Wall -pipe -Wno-trigraphs -Wstrict-prototypes -fno-strict-aliasing -fno-common ' - case ${linux25} in - yes ) - KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/include -I$(LINUX)/arch/um/kernel/tt/include -I$(LINUX)/arch/um/kernel/skas/include -O2 -nostdinc -iwithprefix include -DKBUILD_BASENAME=$(MODULE) -DKBUILD_MODNAME=$(MODULE) ' - ;; - * ) - KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/kernel/tt/include -I$(LINUX)/arch/um/include ' - ;; - esac - - MOD_LINK=elf_i386 -;; - i*86 ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -pipe' - case ${linux25} in - yes ) - KCPPFLAGS='-D__KERNEL__ -DMODULE -march=i686 -I$(LINUX)/include/asm-i386/mach-default -nostdinc -iwithprefix include ' - ;; - * ) - KCPPFLAGS='-D__KERNEL__ -DMODULE ' - ;; - esac - MOD_LINK=elf_i386 -;; - - alphaev6 ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -pipe -mno-fp-regs -ffixed-8 -mcpu=ev5 -Wa,-mev6' - KCPPFLAGS='-D__KERNEL__ -DMODULE ' - MOD_LINK=elf64alpha -;; - - alphaev67 ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -pipe -mno-fp-regs -ffixed-8 -mcpu=ev5 -Wa,-mev6' - KCPPFLAGS='-D__KERNEL__ -DMODULE ' - MOD_LINK=elf64alpha -;; - - alpha* ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -pipe -mno-fp-regs -ffixed-8 -mcpu=ev5 -Wa,-mev5' - KCPPFLAGS='-D__KERNEL__ -DMODULE ' - MOD_LINK=elf64alpha -;; - - ia64 ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-g -O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -ffixed-r13 -mfixed-range=f10-f15,f32-f127 -falign-functions=32 -mb-step' - KCPPFLAGS='-D__KERNEL__ -DMODULE' - MOD_LINK=elf64_ia64 -;; - - sparc64 ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -Wno-unused -m64 -pipe -mno-fpu -mcpu=ultrasparc -mcmodel=medlow -ffixed-g4 -fcall-used-g5 -fcall-used-g7 -Wno-sign-compare -Wa,--undeclared-regs' - KCPPFLAGS='-D__KERNEL__' - MOD_LINK=elf64_sparc - -;; - - powerpc ) - AC_MSG_RESULT($host_cpu) - KCFLAGS='-O2 -Wall -Wstrict-prototypes -Wno-trigraphs -fomit-frame-pointer -fno-strict-aliasing -fno-common -D__powerpc__ -fsigned-char -msoft-float -pipe -ffixed-r2 -Wno-uninitialized -mmultiple -mstring' - KCPPFLAGS='-D__KERNEL__' - MOD_LINK=elf32ppclinux -;; - - *) - AC_ERROR("Unknown Linux Platform: $host_cpu") -;; -esac - -# ----------- make dep run? ------------------ - -if test $host_cpu != "lib" ; then - AC_MSG_CHECKING(if make dep has been run in kernel source (host $host_cpu) ) - if test -f $LINUX/include/linux/config.h ; then - AC_MSG_RESULT(yes) - else - AC_MSG_ERROR(** cannot find $LINUX/include/linux/config.h. Run make dep in $LINUX.) - fi -fi - -# ------------ include paths ------------------ - -if test $host_cpu != "lib" ; then - KINCFLAGS="-I\$(top_srcdir)/include -I\$(top_srcdir)/portals/include -I$LINUX/include" -else - KINCFLAGS='-I$(top_srcdir)/include -I$(top_srcdir)/portals/include' -fi -CPPFLAGS="$KINCFLAGS $ARCHCPPFLAGS" - -if test $host_cpu != "lib" ; then -# ------------ autoconf.h ------------------ - AC_MSG_CHECKING(if autoconf.h is in kernel source) - if test -f $LINUX/include/linux/autoconf.h ; then - AC_MSG_RESULT(yes) - else - AC_MSG_ERROR(** cannot find $LINUX/include/linux/autoconf.h. Run make config in $LINUX.) - fi - -# ------------ RELEASE and moduledir ------------------ - AC_MSG_CHECKING(for Linux release) - - dnl We need to rid ourselves of the nasty [ ] quotes. - changequote(, ) - dnl Get release from version.h - RELEASE="`sed -ne 's/.*UTS_RELEASE[ \"]*\([0-9.a-zA-Z_-]*\).*/\1/p' $LINUX/include/linux/version.h`" - changequote([, ]) - - moduledir='$(libdir)/modules/'$RELEASE/kernel - AC_SUBST(moduledir) - - modulefsdir='$(moduledir)/fs/$(PACKAGE)' - AC_SUBST(modulefsdir) - - AC_MSG_RESULT($RELEASE) - AC_SUBST(RELEASE) - -# ---------- modversions? -------------------- - AC_MSG_CHECKING(for MODVERSIONS) - if egrep -e 'MODVERSIONS.*1' $LINUX/include/linux/autoconf.h >/dev/null 2>&1; - then - MFLAGS="-DMODULE -DMODVERSIONS -include $LINUX/include/linux/modversions.h -DEXPORT_SYMTAB" - AC_MSG_RESULT(yes) - else - MFLAGS= - AC_MSG_RESULT(no) - fi -fi - -# ---------- Portals flags -------------------- - -#AC_PREFIX_DEFAULT([]) -#if test "x$prefix" = xNONE || test "x$prefix" = x; then -# usrprefix=/usr -#else -# usrprefix='${prefix}' -#fi -#AC_SUBST(usrprefix) - -AC_MSG_CHECKING(if kernel has CPU affinity support) -if test "$target_cpu" != ia64 ; then - enable_affinity_temp="-DCPU_AFFINITY=1" - AC_MSG_RESULT(yes) -else - enable_affinity_temp="" - AC_MSG_RESULT(no) -fi - -AC_MSG_CHECKING(if kernel has zero-copy TCP support) -ZCCD="`grep -c zccd $LINUX/include/linux/skbuff.h`" -if test "$ZCCD" != 0 ; then - enable_zerocopy_temp="-DSOCKNAL_ZC=1" - AC_MSG_RESULT(yes) -else - enable_zerocopy_temp="" - AC_MSG_RESULT(no) -fi - -AC_ARG_ENABLE(zerocopy, [ --enable-zerocopy enable socknal zerocopy],enable_zerocopy=$enable_zerocopy_temp, enable_zerocopy="") - -AC_ARG_ENABLE(affinity, [ --enable-affinity enable process/irq affinity],enable_affinity="-DCPU_AFFINITY=1", enable_affinity=$enable_affinity_temp) -##################################### - -AC_MSG_CHECKING(if quadrics kernel headers are present) -if test -d $LINUX/drivers/net/qsnet ; then - AC_MSG_RESULT(yes) - QSWNAL="qswnal" - with_quadrics="-I$LINUX/drivers/net/qsnet/include" - : -elif test -d $LINUX/drivers/qsnet1 ; then - AC_MSG_RESULT(yes) - QSWNAL="qswnal" - with_quadrics="-I$LINUX/drivers/qsnet1/include -DPROPRIETARY_ELAN" - : -elif test -d $LINUX/drivers/quadrics ; then - AC_MSG_RESULT(yes) - QSWNAL="qswnal" - with_quadrics="-I$LINUX/drivers/quadrics/include -DPROPRIETARY_ELAN" - : -#elif test -d /usr/include/elan3 ; then -# AC_MSG_RESULT(yes) -# QSWNAL="qswnal" -# with_quadrics="" -# : -else - AC_MSG_RESULT(no) - QSWNAL="" - with_quadrics="" - : -fi -AC_SUBST(with_quadrics) -AC_SUBST(QSWNAL) - -# R. Read 5/02 -GMNAL="" -echo "checking with-gm=" ${with_gm} -if test "${with_gm+set}" = set; then - if test "${with_gm}" = yes; then - with_gm="-I/usr/local/gm/include" - else - with_gm=-I"$with_gm/include" - fi - GMNAL="gmnal" -else -# default case - no GM - with_gm="" -fi -AC_SUBST(with_gm) -AC_SUBST(GMNAL) - - -def_scamac=/opt/scali/include -AC_ARG_WITH(scamac, [ --with-scamac=[yes/no/path] Path to ScaMAC includes (default=/opt/scali/include)], with_scamac=$withval, with_scamac=$def_scamac) -AC_MSG_CHECKING(if ScaMAC headers are present) -if test "$with_scamac" = yes; then - with_scamac=$def_scamac -fi -if test "$with_scamac" != no -a -f ${with_scamac}/scamac.h; then - AC_MSG_RESULT(yes) - SCIMACNAL="scimacnal" - with_scamac="-I${with_scamac} -I${with_scamac}/icm" -else - AC_MSG_RESULT(no) - SCIMACNAL="" - with_scamac="" -fi - -AC_SUBST(with_scamac) -AC_SUBST(SCIMACNAL) - -CFLAGS="$KCFLAGS" -CPPFLAGS="$KINCFLAGS $KCPPFLAGS $MFLAGS $enable_zerocopy $enable_affinity $with_quadrics $with_gm $with_scamac " - -AC_SUBST(MOD_LINK) -AC_SUBST(LINUX25) -AM_CONDITIONAL(LIBLUSTRE, test x$host_cpu = xlib) - -# ---------- Red Hat 2.4.20 backports some 2.5 bits -------- -# This needs to run after we've defined the KCPPFLAGS - -AC_MSG_CHECKING(for kernel version) -AC_TRY_LINK([#define __KERNEL__ - #include <linux/sched.h>], - [struct task_struct p; - p.sighand = NULL;], - [RH_2_4_20=1], - [RH_2_4_20=0]) - -if test $RH_2_4_20 = 1; then - AC_MSG_RESULT(redhat-2.4.20) - CPPFLAGS="$CPPFLAGS -DCONFIG_RH_2_4_20" -else - AC_MSG_RESULT($RELEASE) -fi diff --git a/lustre/portals/autogen.sh b/lustre/portals/autogen.sh deleted file mode 100755 index 9deed73..0000000 --- a/lustre/portals/autogen.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -aclocal && -automake --add-missing && -${AUTOCONF:-autoconf} diff --git a/lustre/portals/build.m4 b/lustre/portals/build.m4 deleted file mode 100644 index 025f243..0000000 --- a/lustre/portals/build.m4 +++ /dev/null @@ -1,95 +0,0 @@ -# ---------- other tests and settings --------- - - -# --------- unsigned long long sane? ------- - -AC_CHECK_SIZEOF(unsigned long long, 0) -echo "---> size SIZEOF $SIZEOF_unsigned_long_long" -echo "---> size SIZEOF $ac_cv_sizeof_unsigned_long_long" -if test $ac_cv_sizeof_unsigned_long_long != 8 ; then - AC_MSG_ERROR([** we assume that sizeof(long long) == 8. Tell phil@clusterfs.com]) -fi - -# directories for binaries -ac_default_prefix= -bindir='${exec_prefix}/usr/bin' -sbindir='${exec_prefix}/usr/sbin' -includedir='${prefix}/usr/include' - -# Directories for documentation and demos. -docdir='${prefix}/usr/share/doc/$(PACKAGE)' -AC_SUBST(docdir) -demodir='$(docdir)/demo' -AC_SUBST(demodir) -pkgexampledir='${prefix}/usr/lib/$(PACKAGE)/examples' -AC_SUBST(pkgexampledir) -pymoddir='${prefix}/usr/lib/${PACKAGE}/python/Lustre' -AC_SUBST(pymoddir) -modulenetdir='$(moduledir)/net/$(PACKAGE)' -AC_SUBST(modulenetdir) - - -# ---------- BAD gcc? ------------ -AC_PROG_RANLIB -AC_PROG_CC -AC_MSG_CHECKING(for buggy compiler) -CC_VERSION=`$CC -v 2>&1 | grep "^gcc version"` -bad_cc() { - echo - echo " '$CC_VERSION'" - echo " has been known to generate bad code, " - echo " please get an updated compiler." - AC_MSG_ERROR(sorry) -} -TMP_VERSION=`echo $CC_VERSION | cut -c 1-16` -if test "$TMP_VERSION" = "gcc version 2.95"; then - bad_cc -fi -case "$CC_VERSION" in - # ost_pack_niobuf putting 64bit NTOH temporaries on the stack - # without "sub $0xc,%esp" to protect the stack from being - # stomped on by interrupts (bug 606) - "gcc version 2.96 20000731 (Red Hat Linux 7.1 2.96-98)") - bad_cc - ;; - # mandrake's similar sub 0xc compiler bug - # http://marc.theaimsgroup.com/?l=linux-kernel&m=104748366226348&w=2 - "gcc version 2.96 20000731 (Mandrake Linux 8.1 2.96-0.62mdk)") - bad_cc - ;; - *) - AC_MSG_RESULT(no known problems) - ;; -esac -# end ------ BAD gcc? ------------ - -# -------- Check for required packages -------------- - -# this doesn't seem to work on older autoconf -# AC_CHECK_LIB(readline, readline,,) -AC_ARG_ENABLE(readline, [ --enable-readline use readline library],, - enable_readline="yes") - -if test "$enable_readline" = "yes" ; then - LIBREADLINE="-lreadline -lncurses" - HAVE_LIBREADLINE="-DHAVE_LIBREADLINE=1" -else - LIBREADLINE="" - HAVE_LIBREADLINE="" -fi -AC_SUBST(LIBREADLINE) -AC_SUBST(HAVE_LIBREADLINE) - -AC_ARG_ENABLE(efence, [ --enable-efence use efence library],, - enable_efence="no") - -if test "$enable_efence" = "yes" ; then - LIBEFENCE="-lefence" - HAVE_LIBEFENCE="-DHAVE_LIBEFENCE=1" -else - LIBEFENCE="" - HAVE_LIBEFENCE="" -fi -AC_SUBST(LIBEFENCE) -AC_SUBST(HAVE_LIBEFENCE) - diff --git a/lustre/portals/configure.in b/lustre/portals/configure.in deleted file mode 100644 index 31d3492..0000000 --- a/lustre/portals/configure.in +++ /dev/null @@ -1,34 +0,0 @@ -# This version is here to make autoconf happy; the name is a file which is -# "unique" to this directory so that configure knows where it should run. -AC_INIT(knals/Makefile.am, 3.0) -AC_CANONICAL_SYSTEM -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -# Automake variables. Steal the version number from packaging/intersync.spec -AM_INIT_AUTOMAKE(portals, builtin([esyscmd], [sed -ne '/.*define IVERSION /{ s/.*IVERSION //; p; }' libcfs/module.c])) -# AM_MAINTAINER_MODE - -sinclude(build.m4) -sinclude(archdep.m4) - -if test x$enable_inkernel = xyes ; then -cp Kernelenv.mk Kernelenv.in -cp Makefile.mk Makefile.in -cp libcfs/Makefile.mk libcfs/Makefile.in -cp portals/Makefile.mk portals/Makefile.in -cp knals/Makefile.mk knals/Makefile.in -cp knals/socknal/Makefile.mk knals/socknal/Makefile.in -cp router/Makefile.mk router/Makefile.in -fi - -AM_CONFIG_HEADER(include/config.h) - -AC_OUTPUT([Makefile Kernelenv libcfs/Makefile portals/Makefile \ - unals/Makefile knals/Makefile router/Makefile \ - knals/socknal/Makefile knals/gmnal/Makefile knals/qswnal/Makefile \ - knals/scimacnal/Makefile knals/toenal/Makefile \ - utils/Makefile tests/Makefile doc/Makefile ]) - diff --git a/lustre/portals/doc/.cvsignore b/lustre/portals/doc/.cvsignore deleted file mode 100644 index 827dca4..0000000 --- a/lustre/portals/doc/.cvsignore +++ /dev/null @@ -1,4 +0,0 @@ -Makefile -Makefile.in -*.eps -*.pdf diff --git a/lustre/portals/doc/Data-structures b/lustre/portals/doc/Data-structures deleted file mode 100644 index b5532b1..0000000 --- a/lustre/portals/doc/Data-structures +++ /dev/null @@ -1,65 +0,0 @@ -In this document I will try to draw the data structures and how they -interrelate in the Portals 3 reference implementation. It is probably -best shown with a drawing, so there may be an additional xfig or -Postscript figure. - - -MEMORY POOLS: ------------- - -First, a digression on memory allocation in the library. As mentioned -in the NAL Writer's Guide, the library does not link against any -standard C libraries and as such is unable to dynamically allocate -memory on its own. It requires that the NAL implement a method -for allocation that is appropriate for the protection domain in -which the library lives. This is only called when a network -interface is initialized to allocate the Portals object pools. - -These pools are preallocate blocks of objects that the library -can rapidly make active and manage with a minimum of overhead. -It is also cuts down on overhead for setting up structures -since the NAL->malloc() callback does not need to be called -for each object. - -The objects are maintained on a per-object type singly linked free -list and contain a pointer to the next free object. This pointer -is NULL if the object is not on the free list and is non-zero -if it is on the list. The special sentinal value of 0xDEADBEEF -is used to mark the end of the free list since NULL could -indicate that the last object in the list is not free. - -When one of the lib_*_alloc() functions is called, the library -returns the head of the free list and advances the head pointer -to the next item on the list. The special case of 0xDEADBEEF is -checked and a NULL pointer is returned if there are no more -objects of this type available. The lib_*_free() functions -are even simpler -- check to ensure that the object is not already -free, set its next pointer to the current head and then set -the head to be this newly freed object. - -Since C does not have templates, I did the next best thing and wrote -the memory pool allocation code as a macro that expands based on the -type of the argument. The mk_alloc(T) macro expands to -write the _lib_T_alloc() and lib_T_free() functions. -It requires that the object have a pointer of the type T named -"next_free". There are also functions that map _lib_T_alloc() -to lib_T_alloc() so that the library can add some extra -functionality to the T constructor. - - - -LINKED LISTS: ------------- - -Many of the active Portals objects are stored in doubly linked lists -when they are active. These are always implemented with the pointer -to the next object and a pointer to the next pointer of the -previous object. This avoids the "dummy head" object or -special cases for inserting at the beginning or end of the list. -The pointer manipulations are a little hairy at times, but -I hope that they are understandable. - -The actual linked list code is implemented as macros in <lib-p30.h>, -although the object has to know about - - diff --git a/lustre/portals/doc/Makefile.am b/lustre/portals/doc/Makefile.am deleted file mode 100644 index 7c65e6c..0000000 --- a/lustre/portals/doc/Makefile.am +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -LYX2PDF = lyx --export pdf -LYX2TXT = lyx --export text -LYX2HTML = lyx --export html -SUFFIXES = .lin .lyx .pdf .sgml .html .txt .fig .eps - -DOCS = portals3.pdf -IMAGES = file.eps flow_new.eps get.eps mpi.eps portals.eps put.eps -LYXFILES= portals3.lyx - -MAINTAINERCLEANFILES = $(IMAGES) $(DOCS) $(GENERATED) -GENERATED = -EXTRA_DIST = $(DOCS) $(IMAGES) $(LYXFILES) - -all: $(DOCS) - -# update date and version in document -date := $(shell date +%x) -tag := $(shell echo '$$Name: $$' | sed -e 's/^\$$Na''me: *\$$$$/HEAD/; s/^\$$Na''me: \(.*\) \$$$$/\1/') -addversion = sed -e 's|@T''AG@|$(tag)|g; s|@VER''SION@|$(VERSION)|g; s|@DA''TE@|$(date)|g' - -# Regenerate when the $(VERSION) or $Name: $ changes. -.INTERMEDIATE: $(GENERATED) -$(GENERATED) : %.lyx: %.lin Makefile - $(addversion) $< > $@ - -.lyx.pdf: - @$(LYX2PDF) $< || printf "\n*** Warning: not creating PDF docs; install lyx to rectify this\n" - -.lyx.txt: - @$(LYX2TXT) $< || printf "\n*** Warning: not creating text docs; install lyx to rectify this\n" -.lyx.html: - @$(LYX2HTML) $< || printf "\n*** Warning: not creating HTML docs; install lyx to rectify this\n" -.fig.eps: - -fig2dev -L eps $< > $@ - -portals3.pdf portals3.txt portals3.html: $(IMAGES) portals3.lyx - -syncweb: portals3.pdf -# cp lustre.pdf /usr/src/www/content/lustre/docs/lustre.pdf -# ( cd /usr/src/www ; make lustre ; make synclustre ) - diff --git a/lustre/portals/doc/Message-life-cycle b/lustre/portals/doc/Message-life-cycle deleted file mode 100644 index e8cc7e2..0000000 --- a/lustre/portals/doc/Message-life-cycle +++ /dev/null @@ -1,118 +0,0 @@ -This documents the life cycle of message as it arrives and is handled by -a basic async, packetized NAL. There are four types of messages that have -slightly different life cycles, so they are addressed independently. - - -Put request ------------ - -1. NAL notices that there is a incoming message header on the network -and reads an ptl_hdr_t in from the wire. - -2. It may store additional NAL specific data that provides context -for this event in a void* that it will interpret in some fashion -later. - -3. The NAL calls lib_parse() with a pointer to the header and its -private data structure. - -4. The library decodes the header and may build a message state -object that describes the event to be written and the ACK to be -sent, if any. It then calls nal->recv() with the private data -that the NAL passed in, a pointer to the message state object -and a translated user address. - - The NAL will have been given a chance to pretranslate - all user addresses when the buffers are created. This - process is described in the NAL-HOWTO. - -5. The NAL should restore what ever context it required from the -private data pointer, begin receiving the bytes and possibly store -some extra state of its own. It should return at this point. - - - -Get request ------------ - -1. As with a Put, the NAL notices the incoming message header and -passes it to lib_parse(). - -2. The library decodes the header and calls nal->recv() with a -zero byte length, offset and destination to instruct it to clean -up the wire after reading the header. The private data will -be passed in as well, allowing the NAL to retrieve any state -or context that it requires. - -3. The library may build a message state object to possibly -write an event log or invalidate a memory region. - -4. The library will build a ptl_msg_t header that specifies the -Portals protocol information for delivery at the remote end. - -5. The library calls nal->send() with the pre-built header, -the optional message state object, the four part address -component, a translated user pointer + offset, and some -other things. - -6. The NAL is to put the header on the wire or copy it at -this point (since it off the stack). It should store some -amount of state about its current position in the message and -the destination address. - -7. And then return to the library. - - -Reply request -------------- - -1. Starting at "The library decodes the header..." - -2. The library decodes the header and calls nal->recv() -to bring in the rest of the message. Flow continues in -exactly the same fashion as with all other receives. - - -Ack request ------------ - -1. The library decodes the header, builds the appropriate data -structures for the event in a message state object and calls nal->recv() -with a zero byte length, etc. - - -Packet arrival --------------- - -1. The NAL should notice the arrival of a packet, retrieve whatever -state it needs from the message ID or other NAL specific header data -and place the data bytes directly into the user address that were -given to nal->recv(). - - How this happens is outside the scope of the Portals library - and soley determined by the NAL... - -2. If this is the last packet in a message, the NAL should retrieve -the lib_msg_t *cookie that it was given in the call to nal->recv() -and pass it to lib_finalize(). lib_finalize() may call nal->send() -to send an ACK, nal->write() to record an entry in the event log, -nal->invalidate() to unregister a region of memory or do nothing at all. - -3. It should then clean up any remaining NAL specific state about -the message and go back into the main loop. - - -Outgoing packets ----------------- - -1. When the NAL has pending output, it should put the packets on -the wire wrapped with whatever implementation specified wrappers. - -2. Once it has output all the packets of a message it should -call lib_finalize() with the message state object that was -handed to nal->send(). This will allows the library to clean -up its state regarding the message and write any pending event -entries. - - - diff --git a/lustre/portals/doc/NAL-HOWTO b/lustre/portals/doc/NAL-HOWTO deleted file mode 100644 index ea38aed..0000000 --- a/lustre/portals/doc/NAL-HOWTO +++ /dev/null @@ -1,293 +0,0 @@ -This document is a first attempt at describing how to write a NAL -for the Portals 3 library. It also defines the library architecture -and the abstraction of protection domains. - - -First, an overview of the architecture: - - Application - -----|----+-------- - | - API === NAL (User space) - | ----------+---|----- - | - LIB === NAL (Library space) - | ----------+---|----- - - Physical wire (NIC space) - - -Application - API -API-side NAL ------------- -LIB-side NAL - LIB -LIB-side NAL - wire - -Communication is through the indicated paths via well defined -interfaces. The API and LIB portions are written to be portable -across platforms and do not depend on the network interface. - -Communcation between the application and the API code is -defined in the Portals 3 API specification. This is the -user-visible portion of the interface and should be the most -stable. - - - -API-side NAL: ------------- - -The user space NAL needs to implement only a few functions -that are stored in a nal_t data structure and called by the -API-side library: - - int forward( nal_t *nal, - int index, - void *args, - size_t arg_len, - void *ret, - size_t ret_len - ); - -Most of the data structures in the portals library are held in -the LIB section of the code, so it is necessary to forward API -calls across the protection domain to the library. This is -handled by the NAL's forward method. Once the argument and return -blocks are on the remote side the NAL should call lib_dispatch() -to invoke the appropriate API function. - - int validate( nal_t *nal, - void *base, - size_t extent, - void **trans_base, - void **trans_data - ); - -The validate method provides a means for the NAL to prevalidate -and possibly pretranslate user addresses into a form suitable -for fast use by the network card or kernel module. The trans_base -pointer will be used by the library everytime it needs to -refer to the block of memory. The trans_data result is a -cookie that will be handed to the NAL along with the trans_base. - -The library never performs calculations on the trans_base value; -it only computes offsets that are then handed to the NAL. - - - int shutdown( nal_t *nal, int interface ); - -Brings down the network interface. The remote NAL side should -call lib_fini() to bring down the library side of the network. - - void yield( nal_t *nal ); - -This allows the user application to gracefully give up the processor -while busy waiting. Performance critical applications may not -want to take the time to call this function, so it should be an -option to the PtlEQWait call. Right now it is not implemented as such. - -Lastly, the NAL must implement a function named PTL_IFACE_*, where -* is the name of the NAL such as PTL_IFACE_IP or PTL_IFACE_MYR. -This initialization function is to set up communication with the -library-side NAL, which should call lib_init() to bring up the -network interface. - - - -LIB-side NAL: ------------- - -On the library-side, the NAL has much more responsibility. It -is responsible for calling lib_dispatch() on behalf of the user, -it is also responsible for bringing packets off the wire and -pushing bits out. As on the user side, the methods are stored -in a nal_cb_t structure that is defined on a per network -interface basis. - -The calls to lib_dispatch() need to be examined. The prototype: - - void lib_dispatch( - nal_cb_t *nal, - void *private, - int index, - void *arg_block, - void *ret_block - ); - -has two complications. The private field is a NAL-specific -value that will be passed to any callbacks produced as a result -of this API call. Kernel module implementations may use this -for task structures, or perhaps network card data. It is ignored -by the library. - -Secondly, the arg_block and ret_block must be in the same protection -domain as the library. The NAL's two halves must communicate the -sizes and perform the copies. After the call, the buffer pointed -to by ret_block will be filled in and should be copied back to -the user space. How this is to be done is NAL specific. - - int lib_parse( - nal_cb_t *nal, - ptl_hdr_t *hdr, - void *private - ); - -This is the only other entry point into the library from the NAL. -When the NAL detects an incoming message on the wire it should read -sizeof(ptl_hdr_t) bytes and pass a pointer to the header to -lib_parse(). It may set private to be anything that it needs to -tie the incoming message to callbacks that are made as a result -of this event. - -The method calls are: - - int (*send)( - nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - ptl_hdr_t *hdr, - int nid, - int pid, - int gid, - int rid, - user_ptr trans_base, - user_ptr trans_data, - size_t offset, - size_t len - ); - -This is a tricky function -- it must support async output -of messages as well as properly syncronized event log writing. -The private field is the same that was passed into lib_dispatch() -or lib_parse() and may be used to tie this call to the event -that initiated the entry to the library. - -The cookie is a pointer to a library private value that must -be passed to lib_finalize() once the message has been completely -sent. It should not be examined by the NAL for any meaning. - -The four ID fields are passed in, although some implementations -may not use all of them. - -The single base pointer has been replaced with the translated -address that the API NAL generated in the api_nal->validate() -call. The trans_data is unchanged and the offset is in bytes. - - - int (*recv)( - nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - user_ptr trans_base, - user_ptr trans_data, - size_t offset, - size_t mlen, - size_t rlen - ); - -This callback will only be called in response to lib_parse(). -The cookie, trans_addr and trans_data are as discussed in send(). -The NAL should read mlen bytes from the wire, deposit them into -trans_base + offset and then discard (rlen - mlen) bytes. -Once the entire message has been received the NAL should call -lib_finalize() with the lib_msg_t *cookie. - -The special arguments of base=NULL, data=NULL, offset=0, mlen=0, rlen=0 -is used to indicate that the NAL should clean up the wire. This could -be implemented as a blocking call, although having it return as quickly -as possible is desirable. - - int (*write)( - nal_cb_t *nal, - void *private, - user_ptr trans_addr, - user_ptr trans_data, - size_t offset, - - void *src_addr, - size_t len - ); - -This is essentially a cross-protection domain memcpy(). The user address -has been pretranslated by the api_nal->translate() call. - - void *(*malloc)( - nal_cb_t *nal, - size_t len - ); - - void (*free)( - nal_cb_t *nal, - void *buf - ); - -Since the NAL may be in a non-standard hosted environment it can -not call malloc(). This allows the library side NAL to implement -the system specific malloc(). In the current reference implementation -the libary only calls nal->malloc() when the network interface is -initialized and then calls free when it is brought down. The library -maintains its own pool of objects for allocation so only one call to -malloc is made per object type. - - void (*invalidate)( - nal_cb_t *nal, - user_ptr trans_base, - user_ptr trans_data, - size_t extent - ); - -User addresses are validated/translated at the user-level API NAL -method, which is likely to push them to this level. Meanwhile, -the library NAL will be notified when the library no longer -needs the buffer. Overlapped buffers are not detected by the -library, so the NAL should ref count each page involved. - -Unfortunately we have a few bugs when the invalidate method is -called. It is still in progress... - - void (*printf)( - nal_cb_t *nal, - const char *fmt, - ... - ); - -As with malloc(), the library does not have any way to do printf -or printk. It is not necessary for the NAL to implement the this -call, although it will make debugging difficult. - - void (*cli)( - nal_cb_t *nal, - unsigned long *flags - ); - - void (*sti)( - nal_cb_t *nal, - unsigned long *flags - ); - -These are used by the library to mark critical sections. - - int (*gidrid2nidpid)( - nal_cb_t *nal, - ptl_id_t gid, - ptl_id_t rid, - ptl_id_t *nid, - ptl_id_t *pid - ); - - - int (*nidpid2gidrid)( - nal_cb_t *nal, - ptl_id_t nid, - ptl_id_t pid, - ptl_id_t *gid, - ptl_id_t *rid - ); - -Rolf added these. I haven't looked at how they have to work yet. diff --git a/lustre/portals/doc/file.fig b/lustre/portals/doc/file.fig deleted file mode 100644 index 914c294..0000000 --- a/lustre/portals/doc/file.fig +++ /dev/null @@ -1,111 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -6 1200 750 1650 1050 -2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 - 1650 1050 1650 750 1200 750 1200 1050 1650 1050 -4 1 0 100 0 0 10 0.0000 0 105 240 1425 952 FS0\001 --6 -6 1200 2325 1650 2625 -2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 - 1650 2625 1650 2325 1200 2325 1200 2625 1650 2625 -4 1 0 100 0 0 10 0.0000 0 105 240 1425 2527 FS3\001 --6 -6 1200 1800 1650 2100 -2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 - 1650 2100 1650 1800 1200 1800 1200 2100 1650 2100 -4 1 0 100 0 0 10 0.0000 0 105 240 1425 2002 FS2\001 --6 -6 1200 1275 1650 1575 -2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 - 1650 1575 1650 1275 1200 1275 1200 1575 1650 1575 -4 1 0 100 0 0 10 0.0000 0 105 240 1425 1477 FS1\001 --6 -6 450 750 900 1200 -5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 750.000 450 1050 675 1125 900 1050 -1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 825 225 75 450 900 900 750 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 450 825 450 1050 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 1050 900 825 --6 -6 450 2325 900 2775 -5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 2325.000 450 2625 675 2700 900 2625 -1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 2400 225 75 450 2475 900 2325 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 450 2400 450 2625 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 2625 900 2400 --6 -6 450 1800 900 2250 -5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 1800.000 450 2100 675 2175 900 2100 -1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 1875 225 75 450 1950 900 1800 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 450 1875 450 2100 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 2100 900 1875 --6 -6 450 1275 900 1725 -5 1 0 1 0 7 100 0 20 0.000 0 1 0 0 675.000 1275.000 450 1575 675 1650 900 1575 -1 2 0 1 0 7 100 0 20 0.000 1 0.0000 675 1350 225 75 450 1425 900 1275 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 450 1350 450 1575 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 1575 900 1350 --6 -6 2250 750 3450 2625 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 - 2550 1200 3150 1200 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 - 2550 1500 3150 1500 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 - 2550 1800 3150 1800 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 - 2550 2100 3150 2100 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2550 975 3150 975 3150 2625 2550 2625 2550 975 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 2 - 2550 2400 3150 2400 -4 1 0 100 0 0 10 0.0000 0 135 1185 2850 900 Application Buffer\001 --6 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 2400 2550 1350 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 1875 2550 1050 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 1425 2550 1950 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 900 2550 1650 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 900 1200 900 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 1425 1200 1425 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 1950 1200 1950 -2 1 0 1 0 7 100 0 20 0.000 0 0 -1 0 0 2 - 900 2475 1200 2475 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 2025 2550 2250 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 1 2 - 0 0 1.00 60.00 120.00 - 0 0 1.00 60.00 120.00 - 1650 2550 2550 2475 -2 4 0 1 0 7 100 0 -1 0.000 0 0 7 0 0 5 - 1875 2850 1875 600 225 600 225 2850 1875 2850 -4 1 0 100 0 0 10 0.0000 0 105 1215 1050 525 Parallel File Server\001 diff --git a/lustre/portals/doc/flow_new.fig b/lustre/portals/doc/flow_new.fig deleted file mode 100644 index d828dea..0000000 --- a/lustre/portals/doc/flow_new.fig +++ /dev/null @@ -1,213 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -6 525 2175 1575 2925 -6 675 2287 1425 2812 -4 1 0 50 0 0 10 0.0000 4 105 255 1050 2437 MD\001 -4 1 0 50 0 0 10 0.0000 4 105 645 1050 2587 Exists and\001 -4 1 0 50 0 0 10 0.0000 4 135 555 1050 2737 Accepts?\001 --6 -2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 - 1575 2550 1050 2175 525 2550 1050 2925 1575 2550 --6 -6 3450 1275 4350 1725 -6 3600 1312 4200 1687 -4 1 0 100 0 0 10 0.0000 0 135 525 3900 1612 Message\001 -4 1 0 100 0 0 10 0.0000 0 105 465 3900 1462 Discard\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 3450 1275 4350 1275 4350 1725 3450 1725 3450 1275 --6 -6 4650 1275 5550 1725 -6 4725 1312 5475 1687 -4 1 0 100 0 0 10 0.0000 0 135 735 5100 1612 Drop Count\001 -4 1 0 100 0 0 10 0.0000 0 105 630 5100 1462 Increment\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 4650 1275 5550 1275 5550 1725 4650 1725 4650 1275 --6 -6 1350 525 2250 975 -6 1350 562 2250 937 -4 1 0 100 0 0 10 0.0000 0 135 795 1800 862 Match Entry\001 -4 1 0 100 0 0 10 0.0000 0 105 585 1800 712 Get Next\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1350 525 2250 525 2250 975 1350 975 1350 525 --6 -6 525 1125 1575 1875 -2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 - 1575 1500 1050 1125 525 1500 1050 1875 1575 1500 -4 1 0 100 0 0 10 0.0000 0 105 465 1049 1552 Match?\001 --6 -6 2340 1237 2940 1687 -6 2340 1237 2940 1687 -4 1 0 100 0 0 10 0.0000 0 105 345 2640 1387 More\001 -4 1 0 100 0 0 10 0.0000 0 105 405 2640 1537 Match\001 -4 1 0 100 0 0 10 0.0000 0 105 510 2640 1687 Entries?\001 --6 --6 -6 525 3225 1575 3975 -6 675 3375 1425 3750 -4 1 0 50 0 0 10 0.0000 4 105 255 1050 3525 MD\001 -4 1 0 50 0 0 10 0.0000 4 105 615 1050 3720 has room?\001 --6 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5 - 525 3600 1050 3225 1575 3600 1050 3975 525 3600 --6 -6 3300 3375 4350 3825 -6 3300 3412 4350 3787 -4 1 0 50 0 0 10 0.0000 4 105 735 3825 3562 Unlink MD\001 -4 1 0 50 0 0 10 0.0000 4 135 945 3825 3712 & Match Entry\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 3300 3375 4350 3375 4350 3825 3300 3825 3300 3375 --6 -6 1950 3225 3000 3975 -6 2250 3450 2700 3750 -4 1 0 50 0 0 10 0.0000 4 105 450 2475 3600 Unlink\001 -4 1 0 50 0 0 10 0.0000 4 105 315 2475 3750 full?\001 --6 -2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 - 3000 3600 2475 3225 1950 3600 2475 3975 3000 3600 --6 -6 3150 4500 4200 4950 -6 3150 4537 4200 4912 -4 1 0 50 0 0 10 0.0000 4 105 735 3675 4687 Unlink MD\001 -4 1 0 50 0 0 10 0.0000 4 135 945 3675 4837 & Match Entry\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 3150 4500 4200 4500 4200 4950 3150 4950 3150 4500 --6 -6 600 4500 1500 4950 -6 675 4537 1425 4912 -4 1 0 50 0 0 10 0.0000 4 135 615 1050 4837 Operation\001 -4 1 0 50 0 0 10 0.0000 4 105 525 1050 4687 Perform\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 600 4500 1500 4500 1500 4950 600 4950 600 4500 --6 -6 4650 4350 5700 5100 -6 4950 4537 5400 4912 -6 4950 4537 5400 4912 -4 1 0 50 0 0 10 0.0000 4 135 435 5175 4837 Queue?\001 -4 1 0 50 0 0 10 0.0000 4 105 360 5175 4687 Event\001 --6 --6 -2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 - 5700 4725 5175 4350 4650 4725 5175 5100 5700 4725 --6 -6 6000 4500 6900 4950 -6 6225 4575 6675 4875 -4 1 0 50 0 0 10 0.0000 4 105 360 6450 4875 Event\001 -4 1 0 50 0 0 10 0.0000 4 105 435 6450 4725 Record\001 --6 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 6000 4500 6900 4500 6900 4950 6000 4950 6000 4500 --6 -6 1800 4350 2850 5100 -6 2100 4575 2550 4875 -4 1 0 50 0 0 10 0.0000 4 105 450 2325 4725 Unlink\001 -4 1 0 50 0 0 10 0.0000 4 105 450 2325 4875 thresh?\001 --6 -2 3 0 1 0 7 100 0 -1 0.000 0 0 0 0 0 5 - 2850 4725 2325 4350 1800 4725 2325 5100 2850 4725 --6 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1050 1875 1050 2175 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1575 1500 2100 1500 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1050 450 1050 1125 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1350 750 1050 750 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1050 2925 1050 3225 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3150 1500 3450 1500 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 4350 1500 4650 1500 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5 - 2100 1500 2625 1125 3150 1500 2625 1875 2100 1500 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1575 3600 1950 3600 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1050 3975 1050 4500 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3000 3600 3300 3600 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1500 4725 1800 4725 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 5700 4725 6000 4725 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 2850 4725 3150 4725 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 4200 4725 4650 4725 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 6900 4725 7950 4725 -3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5 - 0 0 1.00 60.00 120.00 - 1575 2550 1650 2550 1800 2550 1800 2400 1800 1500 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 0 1 5 - 0 0 1.00 60.00 120.00 - 2250 750 2475 750 2625 750 2625 900 2625 1125 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 0 1 5 - 0 0 1.00 60.00 120.00 - 7500 4725 7500 1650 7500 1500 7350 1500 5550 1500 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 50 0 -1 0.000 0 1 0 5 - 0 0 1.00 60.00 120.00 - 2475 3225 2475 2400 2475 2250 2325 2250 1800 2250 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 50 0 -1 0.000 0 1 0 5 - 0 0 1.00 60.00 120.00 - 3825 3375 3825 2175 3825 2025 3675 2025 1800 2025 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 50 0 -1 0.000 0 1 0 8 - 0 0 1.00 60.00 120.00 - 2325 4350 2325 4275 2325 4125 2475 4125 4275 4125 4425 4125 - 4425 4275 4425 4725 - 0.000 1.000 1.000 1.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 50 0 -1 0.000 0 1 0 8 - 0 0 1.00 60.00 120.00 - 5175 4350 5175 4275 5175 4125 5325 4125 7125 4125 7275 4125 - 7275 4275 7275 4725 - 0.000 1.000 1.000 1.000 1.000 1.000 1.000 0.000 -4 1 0 100 0 0 10 0.0000 0 75 150 1575 1425 no\001 -4 1 0 100 0 0 10 0.0000 0 135 360 825 525 Entry\001 -4 1 0 100 0 0 10 0.0000 0 75 150 1575 2475 no\001 -4 1 0 100 0 0 10 0.0000 0 105 195 1200 1950 yes\001 -4 1 0 100 0 0 10 0.0000 0 105 195 1200 3000 yes\001 -4 1 0 100 0 0 10 0.0000 0 105 195 2775 1050 yes\001 -4 1 0 100 0 0 10 0.0000 0 75 150 3225 1425 no\001 -4 1 0 100 0 0 10 0.0000 0 75 150 1650 3525 no\001 -4 1 0 100 0 0 10 0.0000 0 105 195 1200 4050 yes\001 -4 1 0 100 0 0 10 0.0000 0 105 195 3150 3525 yes\001 -4 1 0 100 0 0 10 0.0000 0 75 150 2625 3150 no\001 -4 1 0 100 0 0 10 0.0000 0 105 195 3000 4650 yes\001 -4 1 0 100 0 0 10 0.0000 0 105 195 5850 4650 yes\001 -4 1 0 100 0 0 10 0.0000 0 75 150 2475 4275 no\001 -4 1 0 100 0 0 10 0.0000 0 75 150 5325 4275 no\001 -4 1 0 50 0 0 10 0.0000 4 105 285 7800 4650 Exit\001 diff --git a/lustre/portals/doc/get.fig b/lustre/portals/doc/get.fig deleted file mode 100644 index 28db949..0000000 --- a/lustre/portals/doc/get.fig +++ /dev/null @@ -1,33 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -6 2775 900 3525 1200 -4 0 0 100 0 0 10 0.0000 0 105 720 2775 1200 Translation\001 -4 0 0 100 0 0 10 0.0000 0 105 405 2850 1050 Portal\001 --6 -6 1350 1725 2175 2025 -4 0 0 100 0 0 10 0.0000 0 105 825 1350 2025 Transmission\001 -4 0 0 100 0 0 10 0.0000 0 105 285 1620 1875 Data\001 --6 -2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 900 525 2700 750 -2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 2700 825 2700 1275 -2 1 0 1 0 7 100 0 -1 3.000 0 0 7 1 0 2 - 0 0 1.00 60.00 120.00 - 2700 1350 900 1950 -2 2 0 1 0 7 100 0 -1 4.000 0 0 7 0 0 5 - 2400 300 3600 300 3600 2250 2400 2250 2400 300 -2 2 0 1 0 7 100 0 -1 4.000 0 0 7 0 0 5 - 0 300 1200 300 1200 2250 0 2250 0 300 -4 1 0 100 0 0 10 0.0000 4 135 495 1800 825 Request\001 -4 1 0 100 0 0 10 0.0000 0 105 540 600 525 Initiator\001 -4 1 0 100 0 0 10 0.0000 0 135 405 3000 525 Target\001 diff --git a/lustre/portals/doc/ieee.bst b/lustre/portals/doc/ieee.bst deleted file mode 100644 index 4df7c50..0000000 --- a/lustre/portals/doc/ieee.bst +++ /dev/null @@ -1,1112 +0,0 @@ -% --------------------------------------------------------------- -% -% by Paolo.Ienne@di.epfl.ch -% -% --------------------------------------------------------------- -% -% no guarantee is given that the format corresponds perfectly to -% IEEE 8.5" x 11" Proceedings, but most features should be ok. -% -% --------------------------------------------------------------- -% -% `ieee' from BibTeX standard bibliography style `abbrv' -% version 0.99a for BibTeX versions 0.99a or later, LaTeX version 2.09. -% Copyright (C) 1985, all rights reserved. -% Copying of this file is authorized only if either -% (1) you make absolutely no changes to your copy, including name, or -% (2) if you do make changes, you name it something other than -% btxbst.doc, plain.bst, unsrt.bst, alpha.bst, and abbrv.bst. -% This restriction helps ensure that all standard styles are identical. -% The file btxbst.doc has the documentation for this style. - -ENTRY - { address - author - booktitle - chapter - edition - editor - howpublished - institution - journal - key - month - note - number - organization - pages - publisher - school - series - title - type - volume - year - } - {} - { label } - -INTEGERS { output.state before.all mid.sentence after.sentence after.block } - -FUNCTION {init.state.consts} -{ #0 'before.all := - #1 'mid.sentence := - #2 'after.sentence := - #3 'after.block := -} - -STRINGS { s t } - -FUNCTION {output.nonnull} -{ 's := - output.state mid.sentence = - { ", " * write$ } - { output.state after.block = - { add.period$ write$ - newline$ - "\newblock " write$ - } - { output.state before.all = - 'write$ - { add.period$ " " * write$ } - if$ - } - if$ - mid.sentence 'output.state := - } - if$ - s -} - -FUNCTION {output} -{ duplicate$ empty$ - 'pop$ - 'output.nonnull - if$ -} - -FUNCTION {output.check} -{ 't := - duplicate$ empty$ - { pop$ "empty " t * " in " * cite$ * warning$ } - 'output.nonnull - if$ -} - -FUNCTION {output.bibitem} -{ newline$ - "\bibitem{" write$ - cite$ write$ - "}" write$ - newline$ - "" - before.all 'output.state := -} - -FUNCTION {fin.entry} -{ add.period$ - write$ - newline$ -} - -FUNCTION {new.block} -{ output.state before.all = - 'skip$ - { after.block 'output.state := } - if$ -} - -FUNCTION {new.sentence} -{ output.state after.block = - 'skip$ - { output.state before.all = - 'skip$ - { after.sentence 'output.state := } - if$ - } - if$ -} - -FUNCTION {not} -{ { #0 } - { #1 } - if$ -} - -FUNCTION {and} -{ 'skip$ - { pop$ #0 } - if$ -} - -FUNCTION {or} -{ { pop$ #1 } - 'skip$ - if$ -} - -FUNCTION {new.block.checka} -{ empty$ - 'skip$ - 'new.block - if$ -} - -FUNCTION {new.block.checkb} -{ empty$ - swap$ empty$ - and - 'skip$ - 'new.block - if$ -} - -FUNCTION {new.sentence.checka} -{ empty$ - 'skip$ - 'new.sentence - if$ -} - -FUNCTION {new.sentence.checkb} -{ empty$ - swap$ empty$ - and - 'skip$ - 'new.sentence - if$ -} - -FUNCTION {field.or.null} -{ duplicate$ empty$ - { pop$ "" } - 'skip$ - if$ -} - -FUNCTION {emphasize} -{ duplicate$ empty$ - { pop$ "" } - { "{\em " swap$ * "}" * } - if$ -} - -INTEGERS { nameptr namesleft numnames } - -FUNCTION {format.names} -{ 's := - #1 'nameptr := - s num.names$ 'numnames := - numnames 'namesleft := - { namesleft #0 > } - { s nameptr "{f.~}{vv~}{ll}{, jj}" format.name$ 't := - nameptr #1 > - { namesleft #1 > - { ", " * t * } - { numnames #2 > - { "," * } - 'skip$ - if$ - t "others" = - { " et~al." * } - { " and " * t * } - if$ - } - if$ - } - 't - if$ - nameptr #1 + 'nameptr := - namesleft #1 - 'namesleft := - } - while$ -} - -FUNCTION {format.authors} -{ author empty$ - { "" } - { author format.names } - if$ -} - -FUNCTION {format.editors} -{ editor empty$ - { "" } - { editor format.names - editor num.names$ #1 > - { ", editors" * } - { ", editor" * } - if$ - } - if$ -} - -FUNCTION {format.title} -{ title empty$ - { "" } - { title "t" change.case$ } - if$ -} - -FUNCTION {n.dashify} -{ 't := - "" - { t empty$ not } - { t #1 #1 substring$ "-" = - { t #1 #2 substring$ "--" = not - { "--" * - t #2 global.max$ substring$ 't := - } - { { t #1 #1 substring$ "-" = } - { "-" * - t #2 global.max$ substring$ 't := - } - while$ - } - if$ - } - { t #1 #1 substring$ * - t #2 global.max$ substring$ 't := - } - if$ - } - while$ -} - -FUNCTION {format.date} -{ year empty$ - { month empty$ - { "" } - { "there's a month but no year in " cite$ * warning$ - month - } - if$ - } - { month empty$ - 'year - { month " " * year * } - if$ - } - if$ -} - -FUNCTION {format.btitle} -{ title emphasize -} - -FUNCTION {tie.or.space.connect} -{ duplicate$ text.length$ #3 < - { "~" } - { " " } - if$ - swap$ * * -} - -FUNCTION {either.or.check} -{ empty$ - 'pop$ - { "can't use both " swap$ * " fields in " * cite$ * warning$ } - if$ -} - -FUNCTION {format.bvolume} -{ volume empty$ - { "" } - { "volume" volume tie.or.space.connect - series empty$ - 'skip$ - { " of " * series emphasize * } - if$ - "volume and number" number either.or.check - } - if$ -} - -FUNCTION {format.number.series} -{ volume empty$ - { number empty$ - { series field.or.null } - { output.state mid.sentence = - { "number" } - { "Number" } - if$ - number tie.or.space.connect - series empty$ - { "there's a number but no series in " cite$ * warning$ } - { " in " * series * } - if$ - } - if$ - } - { "" } - if$ -} - -FUNCTION {format.edition} -{ edition empty$ - { "" } - { output.state mid.sentence = - { edition "l" change.case$ " edition" * } - { edition "t" change.case$ " edition" * } - if$ - } - if$ -} - -INTEGERS { multiresult } - -FUNCTION {multi.page.check} -{ 't := - #0 'multiresult := - { multiresult not - t empty$ not - and - } - { t #1 #1 substring$ - duplicate$ "-" = - swap$ duplicate$ "," = - swap$ "+" = - or or - { #1 'multiresult := } - { t #2 global.max$ substring$ 't := } - if$ - } - while$ - multiresult -} - -FUNCTION {format.pages} -{ pages empty$ - { "" } - { pages multi.page.check - { "pages" pages n.dashify tie.or.space.connect } - { "page" pages tie.or.space.connect } - if$ - } - if$ -} - -FUNCTION {format.vol.num.pages} -{ volume field.or.null - number empty$ - 'skip$ - { "(" number * ")" * * - volume empty$ - { "there's a number but no volume in " cite$ * warning$ } - 'skip$ - if$ - } - if$ - pages empty$ - 'skip$ - { duplicate$ empty$ - { pop$ format.pages } - { ":" * pages n.dashify * } - if$ - } - if$ -} - -FUNCTION {format.chapter.pages} -{ chapter empty$ - 'format.pages - { type empty$ - { "chapter" } - { type "l" change.case$ } - if$ - chapter tie.or.space.connect - pages empty$ - 'skip$ - { ", " * format.pages * } - if$ - } - if$ -} - -FUNCTION {format.in.ed.booktitle} -{ booktitle empty$ - { "" } - { editor empty$ - { "In " booktitle emphasize * } - { "In " format.editors * ", " * booktitle emphasize * } - if$ - } - if$ -} - -FUNCTION {empty.misc.check} -{ author empty$ title empty$ howpublished empty$ - month empty$ year empty$ note empty$ - and and and and and - key empty$ not and - { "all relevant fields are empty in " cite$ * warning$ } - 'skip$ - if$ -} - -FUNCTION {format.thesis.type} -{ type empty$ - 'skip$ - { pop$ - type "t" change.case$ - } - if$ -} - -FUNCTION {format.tr.number} -{ type empty$ - { "Technical Report" } - 'type - if$ - number empty$ - { "t" change.case$ } - { number tie.or.space.connect } - if$ -} - -FUNCTION {format.article.crossref} -{ key empty$ - { journal empty$ - { "need key or journal for " cite$ * " to crossref " * crossref * - warning$ - "" - } - { "In {\em " journal * "\/}" * } - if$ - } - { "In " key * } - if$ - " \cite{" * crossref * "}" * -} - -FUNCTION {format.crossref.editor} -{ editor #1 "{vv~}{ll}" format.name$ - editor num.names$ duplicate$ - #2 > - { pop$ " et~al." * } - { #2 < - 'skip$ - { editor #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" = - { " et~al." * } - { " and " * editor #2 "{vv~}{ll}" format.name$ * } - if$ - } - if$ - } - if$ -} - -FUNCTION {format.book.crossref} -{ volume empty$ - { "empty volume in " cite$ * "'s crossref of " * crossref * warning$ - "In " - } - { "Volume" volume tie.or.space.connect - " of " * - } - if$ - editor empty$ - editor field.or.null author field.or.null = - or - { key empty$ - { series empty$ - { "need editor, key, or series for " cite$ * " to crossref " * - crossref * warning$ - "" * - } - { "{\em " * series * "\/}" * } - if$ - } - { key * } - if$ - } - { format.crossref.editor * } - if$ - " \cite{" * crossref * "}" * -} - -FUNCTION {format.incoll.inproc.crossref} -{ editor empty$ - editor field.or.null author field.or.null = - or - { key empty$ - { booktitle empty$ - { "need editor, key, or booktitle for " cite$ * " to crossref " * - crossref * warning$ - "" - } - { "In {\em " booktitle * "\/}" * } - if$ - } - { "In " key * } - if$ - } - { "In " format.crossref.editor * } - if$ - " \cite{" * crossref * "}" * -} - -FUNCTION {article} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - crossref missing$ - { journal emphasize "journal" output.check - format.vol.num.pages output - format.date "year" output.check - } - { format.article.crossref output.nonnull - format.pages output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {book} -{ output.bibitem - author empty$ - { format.editors "author and editor" output.check } - { format.authors output.nonnull - crossref missing$ - { "author and editor" editor either.or.check } - 'skip$ - if$ - } - if$ - new.block - format.btitle "title" output.check - crossref missing$ - { format.bvolume output - new.block - format.number.series output - new.sentence - publisher "publisher" output.check - address output - } - { new.block - format.book.crossref output.nonnull - } - if$ - format.edition output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {booklet} -{ output.bibitem - format.authors output - new.block - format.title "title" output.check - howpublished address new.block.checkb - howpublished output - address output - format.date output - new.block - note output - fin.entry -} - -FUNCTION {inbook} -{ output.bibitem - author empty$ - { format.editors "author and editor" output.check } - { format.authors output.nonnull - crossref missing$ - { "author and editor" editor either.or.check } - 'skip$ - if$ - } - if$ - new.block - format.btitle "title" output.check - crossref missing$ - { format.bvolume output - format.chapter.pages "chapter and pages" output.check - new.block - format.number.series output - new.sentence - publisher "publisher" output.check - address output - } - { format.chapter.pages "chapter and pages" output.check - new.block - format.book.crossref output.nonnull - } - if$ - format.edition output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {incollection} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - crossref missing$ - { format.in.ed.booktitle "booktitle" output.check - format.bvolume output - format.number.series output - format.chapter.pages output - new.sentence - publisher "publisher" output.check - address output - format.edition output - format.date "year" output.check - } - { format.incoll.inproc.crossref output.nonnull - format.chapter.pages output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {inproceedings} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - crossref missing$ - { format.in.ed.booktitle "booktitle" output.check - format.bvolume output - format.number.series output - format.pages output - address empty$ - { organization publisher new.sentence.checkb - organization output - publisher output - format.date "year" output.check - } - { address output.nonnull - format.date "year" output.check - new.sentence - organization output - publisher output - } - if$ - } - { format.incoll.inproc.crossref output.nonnull - format.pages output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {conference} { inproceedings } - -FUNCTION {manual} -{ output.bibitem - author empty$ - { organization empty$ - 'skip$ - { organization output.nonnull - address output - } - if$ - } - { format.authors output.nonnull } - if$ - new.block - format.btitle "title" output.check - author empty$ - { organization empty$ - { address new.block.checka - address output - } - 'skip$ - if$ - } - { organization address new.block.checkb - organization output - address output - } - if$ - format.edition output - format.date output - new.block - note output - fin.entry -} - -FUNCTION {mastersthesis} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - "Master's thesis" format.thesis.type output.nonnull - school "school" output.check - address output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {misc} -{ output.bibitem - format.authors output - title howpublished new.block.checkb - format.title output - howpublished new.block.checka - howpublished output - format.date output - new.block - note output - fin.entry - empty.misc.check -} - -FUNCTION {phdthesis} -{ output.bibitem - format.authors "author" output.check - new.block - format.btitle "title" output.check - new.block - "PhD thesis" format.thesis.type output.nonnull - school "school" output.check - address output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {proceedings} -{ output.bibitem - editor empty$ - { organization output } - { format.editors output.nonnull } - if$ - new.block - format.btitle "title" output.check - format.bvolume output - format.number.series output - address empty$ - { editor empty$ - { publisher new.sentence.checka } - { organization publisher new.sentence.checkb - organization output - } - if$ - publisher output - format.date "year" output.check - } - { address output.nonnull - format.date "year" output.check - new.sentence - editor empty$ - 'skip$ - { organization output } - if$ - publisher output - } - if$ - new.block - note output - fin.entry -} - -FUNCTION {techreport} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - format.tr.number output.nonnull - institution "institution" output.check - address output - format.date "year" output.check - new.block - note output - fin.entry -} - -FUNCTION {unpublished} -{ output.bibitem - format.authors "author" output.check - new.block - format.title "title" output.check - new.block - note "note" output.check - format.date output - fin.entry -} - -FUNCTION {default.type} { misc } - -MACRO {jan} {"Jan."} - -MACRO {feb} {"Feb."} - -MACRO {mar} {"Mar."} - -MACRO {apr} {"Apr."} - -MACRO {may} {"May"} - -MACRO {jun} {"June"} - -MACRO {jul} {"July"} - -MACRO {aug} {"Aug."} - -MACRO {sep} {"Sept."} - -MACRO {oct} {"Oct."} - -MACRO {nov} {"Nov."} - -MACRO {dec} {"Dec."} - -MACRO {acmcs} {"ACM Comput. Surv."} - -MACRO {acta} {"Acta Inf."} - -MACRO {cacm} {"Commun. ACM"} - -MACRO {ibmjrd} {"IBM J. Res. Dev."} - -MACRO {ibmsj} {"IBM Syst.~J."} - -MACRO {ieeese} {"IEEE Trans. Softw. Eng."} - -MACRO {ieeetc} {"IEEE Trans. Comput."} - -MACRO {ieeetcad} - {"IEEE Trans. Comput.-Aided Design Integrated Circuits"} - -MACRO {ipl} {"Inf. Process. Lett."} - -MACRO {jacm} {"J.~ACM"} - -MACRO {jcss} {"J.~Comput. Syst. Sci."} - -MACRO {scp} {"Sci. Comput. Programming"} - -MACRO {sicomp} {"SIAM J. Comput."} - -MACRO {tocs} {"ACM Trans. Comput. Syst."} - -MACRO {tods} {"ACM Trans. Database Syst."} - -MACRO {tog} {"ACM Trans. Gr."} - -MACRO {toms} {"ACM Trans. Math. Softw."} - -MACRO {toois} {"ACM Trans. Office Inf. Syst."} - -MACRO {toplas} {"ACM Trans. Prog. Lang. Syst."} - -MACRO {tcs} {"Theoretical Comput. Sci."} - -READ - -FUNCTION {sortify} -{ purify$ - "l" change.case$ -} - -INTEGERS { len } - -FUNCTION {chop.word} -{ 's := - 'len := - s #1 len substring$ = - { s len #1 + global.max$ substring$ } - 's - if$ -} - -FUNCTION {sort.format.names} -{ 's := - #1 'nameptr := - "" - s num.names$ 'numnames := - numnames 'namesleft := - { namesleft #0 > } - { nameptr #1 > - { " " * } - 'skip$ - if$ - s nameptr "{vv{ } }{ll{ }}{ f{ }}{ jj{ }}" format.name$ 't := - nameptr numnames = t "others" = and - { "et al" * } - { t sortify * } - if$ - nameptr #1 + 'nameptr := - namesleft #1 - 'namesleft := - } - while$ -} - -FUNCTION {sort.format.title} -{ 't := - "A " #2 - "An " #3 - "The " #4 t chop.word - chop.word - chop.word - sortify - #1 global.max$ substring$ -} - -FUNCTION {author.sort} -{ author empty$ - { key empty$ - { "to sort, need author or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { author sort.format.names } - if$ -} - -FUNCTION {author.editor.sort} -{ author empty$ - { editor empty$ - { key empty$ - { "to sort, need author, editor, or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { editor sort.format.names } - if$ - } - { author sort.format.names } - if$ -} - -FUNCTION {author.organization.sort} -{ author empty$ - { organization empty$ - { key empty$ - { "to sort, need author, organization, or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { "The " #4 organization chop.word sortify } - if$ - } - { author sort.format.names } - if$ -} - -FUNCTION {editor.organization.sort} -{ editor empty$ - { organization empty$ - { key empty$ - { "to sort, need editor, organization, or key in " cite$ * warning$ - "" - } - { key sortify } - if$ - } - { "The " #4 organization chop.word sortify } - if$ - } - { editor sort.format.names } - if$ -} - -FUNCTION {presort} -{ type$ "book" = - type$ "inbook" = - or - 'author.editor.sort - { type$ "proceedings" = - 'editor.organization.sort - { type$ "manual" = - 'author.organization.sort - 'author.sort - if$ - } - if$ - } - if$ - " " - * - year field.or.null sortify - * - " " - * - title field.or.null - sort.format.title - * - #1 entry.max$ substring$ - 'sort.key$ := -} - -ITERATE {presort} - -SORT - -STRINGS { longest.label } - -INTEGERS { number.label longest.label.width } - -FUNCTION {initialize.longest.label} -{ "" 'longest.label := - #1 'number.label := - #0 'longest.label.width := -} - -FUNCTION {longest.label.pass} -{ number.label int.to.str$ 'label := - number.label #1 + 'number.label := - label width$ longest.label.width > - { label 'longest.label := - label width$ 'longest.label.width := - } - 'skip$ - if$ -} - -EXECUTE {initialize.longest.label} - -ITERATE {longest.label.pass} - -FUNCTION {begin.bib} -{ preamble$ empty$ - 'skip$ - { preamble$ write$ newline$ } - if$ - "\begin{thebibliography}{" longest.label * - "}\setlength{\itemsep}{-1ex}\small" * write$ newline$ -} - -EXECUTE {begin.bib} - -EXECUTE {init.state.consts} - -ITERATE {call.type$} - -FUNCTION {end.bib} -{ newline$ - "\end{thebibliography}" write$ newline$ -} - -EXECUTE {end.bib} - -% end of file ieee.bst -% --------------------------------------------------------------- diff --git a/lustre/portals/doc/mpi.fig b/lustre/portals/doc/mpi.fig deleted file mode 100644 index e1a91b5..0000000 --- a/lustre/portals/doc/mpi.fig +++ /dev/null @@ -1,117 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -6 150 1650 900 2025 -4 1 0 100 0 0 10 0.0000 0 135 735 525 1800 Unexpected\001 -4 1 0 100 0 0 10 0.0000 0 135 585 525 1995 Messages\001 --6 -6 150 150 900 525 -4 1 0 100 0 0 10 0.0000 0 135 615 525 300 Preposted\001 -4 1 0 100 0 0 10 0.0000 0 105 525 525 495 Receives\001 --6 -6 2550 4125 3150 4725 -4 1 0 100 0 0 10 0.0000 0 135 600 2850 4275 Length=0\001 -4 1 0 100 0 0 10 0.0000 0 105 540 2850 4470 Truncate\001 -4 1 0 100 0 0 10 0.0000 0 105 480 2850 4665 No Ack\001 --6 -6 1050 1575 1950 1875 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1050 1575 1950 1575 1950 1875 1050 1875 1050 1575 -4 1 0 100 0 0 10 0.0000 0 105 780 1500 1725 Match Short\001 --6 -6 5400 1575 6300 2175 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 5400 1575 6300 1575 6300 2175 5400 2175 5400 1575 -4 1 0 100 0 0 10 0.0000 0 105 405 5850 1875 Buffer\001 --6 -6 5400 2400 6300 3000 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 5400 2400 6300 2400 6300 3000 5400 3000 5400 2400 -4 1 0 100 0 0 10 0.0000 0 105 405 5850 2700 Buffer\001 --6 -6 1050 2400 1950 2700 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1050 2400 1950 2400 1950 2700 1050 2700 1050 2400 -4 1 0 100 0 0 10 0.0000 0 105 780 1500 2550 Match Short\001 --6 -6 1050 825 1950 1125 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1050 825 1950 825 1950 1125 1050 1125 1050 825 -4 1 0 100 0 0 10 0.0000 0 105 765 1500 975 Match None\001 --6 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1500 1125 1500 1575 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3225 2025 4050 3375 -2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2 - 150 675 6600 675 -2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2 - 150 1350 6600 1350 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2400 4125 3300 4125 3300 4725 2400 4725 2400 4125 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3225 4500 4050 3675 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3225 1725 5400 1725 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3225 2550 5400 2550 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3225 2850 4050 3450 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1500 1800 1500 2400 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2400 825 3300 825 3300 1275 2400 1275 2400 825 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1500 2625 1500 4125 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1050 4125 1950 4125 1950 4425 1050 4425 1050 4125 -2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1500 300 1500 825 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1875 975 2400 975 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1875 1725 2400 1725 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1875 2550 2400 2550 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 1875 4275 2400 4275 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2400 1575 3300 1575 3300 2175 2400 2175 2400 1575 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2400 2400 3300 2400 3300 3000 2400 3000 2400 2400 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 4050 3300 5250 3300 5250 3750 4050 3750 4050 3300 -4 1 0 100 0 0 10 0.0000 0 105 885 1500 150 Match Entries\001 -4 1 0 100 0 0 10 0.0000 0 135 1290 2850 150 Memory Descriptors\001 -4 1 0 100 0 0 10 0.0000 0 135 1065 5850 150 Memory Regions\001 -4 1 0 100 0 0 10 0.0000 0 135 825 4500 150 Event Queues\001 -4 1 0 100 0 0 10 0.0000 0 105 585 525 1050 RcvMark\001 -4 1 0 100 0 0 10 0.0000 0 105 330 2850 1102 None\001 -4 1 0 100 0 0 10 0.0000 0 135 705 1500 4275 Match Any\001 -4 1 0 50 0 0 10 0.0000 0 150 810 2850 1725 max_offset=\001 -4 1 0 50 0 0 10 0.0000 0 150 840 2850 1875 n - short_len\001 -4 1 0 50 0 0 10 0.0000 0 150 810 2850 2550 max_offset=\001 -4 1 0 50 0 0 10 0.0000 0 150 840 2850 2700 n - short_len\001 -4 1 0 50 0 0 10 0.0000 0 105 405 2850 2100 unlink\001 -4 1 0 50 0 0 10 0.0000 0 105 405 2850 2925 unlink\001 -4 1 0 100 0 0 10 0.0000 0 135 930 4650 3675 Message Queue\001 -4 1 0 100 0 0 10 0.0000 0 135 735 4650 3525 Unexpected\001 diff --git a/lustre/portals/doc/portals.fig b/lustre/portals/doc/portals.fig deleted file mode 100644 index 9b1271b..0000000 --- a/lustre/portals/doc/portals.fig +++ /dev/null @@ -1,68 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1350 900 1650 900 1650 1200 1350 1200 1350 900 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 1800 1350 2100 1350 2100 1650 1800 1650 1800 1350 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2250 1800 2550 1800 2550 2100 2250 2100 2250 1800 -2 1 1 1 0 7 100 0 -1 4.000 0 0 -1 0 0 2 - 4200 375 4200 2100 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 525 600 1125 600 1125 2100 525 2100 525 600 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 4425 1275 4875 1275 4875 1950 4425 1950 4425 1275 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 2550 1200 3150 1200 3150 1500 2550 1500 2550 1200 -2 1 0 1 0 7 100 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3000 1425 4425 1425 -2 2 0 1 0 7 100 0 -1 0.000 0 0 -1 0 0 5 - 3600 825 3750 825 3750 1125 3600 1125 3600 825 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 2025 1425 2550 1425 -2 2 0 1 0 7 50 0 -1 0.000 0 0 -1 0 0 5 - 4425 750 4875 750 4875 1125 4425 1125 4425 750 -2 1 0 1 0 7 50 0 -1 0.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 3675 975 4425 975 -3 0 0 1 0 7 100 0 -1 0.000 0 1 0 2 - 0 0 1.00 60.00 120.00 - 825 1050 1350 1050 - 0.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5 - 0 0 1.00 60.00 120.00 - 1500 1125 1500 1350 1500 1500 1650 1500 1800 1500 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 1 0 5 - 0 0 1.00 60.00 120.00 - 1950 1575 1950 1800 1950 1950 2100 1950 2250 1950 - 0.000 1.000 1.000 1.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 0 0 2 - 525 975 1125 975 - 0.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 0 0 2 - 525 1125 1125 1125 - 0.000 0.000 -3 0 0 1 0 7 100 0 -1 0.000 0 1 0 7 - 0 0 1.00 60.00 120.00 - 3000 1275 3150 1275 3300 1275 3300 1125 3300 975 3450 975 - 3600 975 - 0.000 1.000 1.000 1.000 1.000 1.000 0.000 -4 0 0 100 0 0 10 0.0000 0 105 690 1275 750 Match List\001 -4 1 0 100 0 0 10 0.0000 0 105 780 825 525 Portal Table\001 -4 2 0 100 0 0 10 0.0000 0 135 825 4050 2025 Library Space\001 -4 0 0 100 0 0 10 0.0000 0 135 1110 4350 2175 Application Space\001 -4 1 0 100 0 0 10 0.0000 0 135 660 2850 1050 Descriptor\001 -4 1 0 100 0 0 10 0.0000 0 135 540 2850 825 Memory\001 -4 1 0 100 0 0 10 0.0000 0 135 765 3750 675 Event Queue\001 -4 1 0 100 0 0 10 0.0000 0 135 495 4650 675 Regions\001 -4 1 0 100 0 0 10 0.0000 0 135 540 4650 525 Memory\001 diff --git a/lustre/portals/doc/portals3.bib b/lustre/portals/doc/portals3.bib deleted file mode 100644 index 323b99f..0000000 --- a/lustre/portals/doc/portals3.bib +++ /dev/null @@ -1,124 +0,0 @@ -@Article{ Cplant, - title = { {M}assively {P}arallel {C}omputing with - {C}ommodity {C}omponents }, - author = { Ron Brightwell and David S. Greenberg and Arthur - B. Maccabe and Rolf Riesen }, - journal = { Parallel Computing }, - volume = { 26 }, - month = { February }, - pages = { 243-266 }, - year = { 2000 } -} - -@Manual{ Portals, - organization = { Sandia National Laboratories }, - title = { {P}uma {P}ortals }, - note = { http://www.cs.sandia.gov/puma/portals }, - year = { 1997 } -} - -@Techreport{ VIA, - title = { {V}irtual {I}nterface {A}rchitecture - {S}pecification {V}ersion 1.0 }, - author = { {Compaq, Microsoft, and Intel} }, - institution = { Compaq, Microsoft, and Intel }, - month = { December }, - year = { 1997 } -} - -@Techreport{ ST, - title = { {I}nformation {T}echnology - {S}cheduled - {T}ransfer {P}rotocol - {W}orking {D}raft 2.0 }, - author = { {Task Group of Technical Committee T11} }, - institution = { Accredited Standards Committee NCITS }, - month = { July }, - year = { 1998 } -} - -@Manual{ TFLOPS, - organization = { Sandia National Laboratories }, - title = { ASCI Red }, - note = { http://www.sandia.gov/ASCI/TFLOP }, - year = { 1996 } -} - -@Techreport{ GM, - title = { The {GM} {M}essage {P}assing {S}ystem }, - author = { {Myricom, Inc.} }, - institution = { {Myricom, Inc.} }, - year = { 1997 }, -} - -@Article{ MPIstandard, - title = { {MPI}: {A} {M}essage-{P}assing {I}nterface standard }, - author = { {Message Passing Interface Forum} }, - journal = { The International Journal of Supercomputer Applications - and High Performance Computing }, - volume = { 8 }, - year = { 1994 } -} - -@Inproceedings{ PumaOS, - author = "Lance Shuler and Chu Jong and Rolf Riesen and - David van Dresser and Arthur B. Maccabe and - Lee Ann Fisk and T. Mack Stallcup", - booktitle = "Proceeding of the 1995 Intel Supercomputer - User's Group Conference", - title = "The {P}uma Operating System for Massively Parallel Computers", - organization = "Intel Supercomputer User's Group", - year = 1995 -} - -@InProceedings{ SUNMOS, -author = "Arthur B. Maccabe and Kevin S. McCurley and Rolf Riesen and - Stephen R. Wheat", -title = "{SUNMOS} for the {Intel} {Paragon}: A Brief User's Guide", -booktitle = "Proceedings of the {Intel} Supercomputer Users' Group. 1994 - Annual North America Users' Conference.", -year = 1994, -pages = "245--251", -month = "June", -location = "ftp.cs.sandia.gov /pub/sunmos/papers/ISUG94-1.ps" -} - -@InProceedings { PumaMPI, - title = { Design and Implementation of {MPI} on {P}uma Portals }, - author = { Ron Brightwell and Lance Shuler }, - booktitle = { Proceedings of the Second MPI Developer's Conference }, - pages = { 18-25 }, - month = { July }, - year = { 1996 } -} - -@Inproceedings{ FM2, - author = { Mario Lauria and Scott Pakin and Andrew Chien }, - title = { {E}fficient {L}ayering for {H}igh {S}peed - {C}ommunication: {F}ast {M}essages 2.x }, - Booktitle = { Proceedings of the IEEE International Symposium - on High Performance Distributed Computing }, - year = { 1998 } -} - -@Manual { CraySHMEM, - title = "SHMEM Technical Note for C, SG-2516 2.3", - organization = "Cray Research, Inc.", - month = "October", - year = 1994 -} - -@Manual { MPI2, - title = "{MPI}-2: {E}xtensions to the {M}essage-{P}assing {I}nterface", - organization = "Message Passing Interface Forum", - note = "http://www.mpi-forum.org/docs/mpi-20-html/mpi2-report.html", - month = "July", - year = 1997 -} - -@InProceedings { PMMPI, - title = { {The Design and Implementation of Zero Copy MPI Using - Commodity Hardware with a High Performance Network} }, - author = { Francis O'Carroll and Hiroshi Tezuka and Atsushi Hori - and Yutaka Ishikawa }, - booktitle = { Proceedings of the ICS }, - year = { 1998 } -} diff --git a/lustre/portals/doc/portals3.lyx b/lustre/portals/doc/portals3.lyx deleted file mode 100644 index 8429280..0000000 --- a/lustre/portals/doc/portals3.lyx +++ /dev/null @@ -1,15944 +0,0 @@ -#LyX 1.2 created this file. For more info see http://www.lyx.org/ -\lyxformat 220 -\textclass report -\begin_preamble -\usepackage{fullpage} -\renewenvironment{comment}% -{\begin{quote}\textbf{Discussion}: \slshape}% -{\end{quote}} -\pagestyle{myheadings} -\end_preamble -\language american -\inputencoding auto -\fontscheme pslatex -\graphics default -\paperfontsize 10 -\spacing single -\papersize letterpaper -\paperpackage a4 -\use_geometry 0 -\use_amsmath 0 -\use_natbib 0 -\use_numerical_citations 0 -\paperorientation portrait -\secnumdepth 2 -\tocdepth 2 -\paragraph_separation indent -\defskip medskip -\quotes_language english -\quotes_times 2 -\papercolumns 1 -\papersides 2 -\paperpagestyle headings - -\layout Title - -The Portals 3.2 Message Passing Interface -\newline - Revision 1.1 -\layout Author - -Ron Brightwell -\begin_inset Foot -collapsed true - -\layout Standard - -R. - Brightwell and R. - Riesen are with the Scalable Computing Systems Department, Sandia National - Laboratories, P.O. - Box 5800, Albuquerque, NM\SpecialChar ~ -\SpecialChar ~ -87111-1110, bright@cs.sandia.gov, rolf@cs.sandia.gov. -\end_inset - -, Arthur B. - Maccabe -\begin_inset Foot -collapsed true - -\layout Standard - -A. - B. - Maccabe is with the Computer Science Department, University of New Mexico, - Albuquerque, NM\SpecialChar ~ -\SpecialChar ~ -87131-1386, maccabe@cs.unm.edu. -\end_inset - -, Rolf Riesen and Trammell Hudson -\layout Abstract - -This report presents a specification for the Portals 3.2 message passing - interface. - Portals 3.2 is intended to allow scalable, high-performance network communicatio -n between nodes of a parallel computing system. - Specifically, it is designed to support a parallel computing platform composed - of clusters of commodity workstations connected by a commodity system area - network fabric. - In addition, Portals 3.2 is well suited to massively parallel processing - and embedded systems. - Portals 3.2 represents an adaption of the data movement layer developed - for massively parallel processing platforms, such as the 4500-node Intel - TeraFLOPS machine. - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -clearpage -\backslash -pagenumbering{roman} -\backslash -setcounter{page}{3} -\end_inset - - -\layout Standard - - -\begin_inset LatexCommand \tableofcontents{} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -cleardoublepage -\end_inset - - -\layout Standard - - -\begin_inset FloatList figure - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -cleardoublepage -\end_inset - - -\layout Standard - - -\begin_inset FloatList table - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -cleardoublepage -\end_inset - - -\layout Chapter* - -Summary of Changes for Revision 1.1 -\layout Enumerate - -Updated version number to 3.2 throughout the document -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sub:PtlGetId} - -\end_inset - -: added -\family typewriter -PTL_SEGV -\family default - to error list for -\shape italic -PtlGetId -\shape default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -: added -\family typewriter -PTL_ML_TOOLONG -\family default - to error list for -\shape italic -PtlMEAttach -\shape default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:meunlink} - -\end_inset - -: removed text referring to a list of associated memory descriptors. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:mdfree} - -\end_inset - -: added text to describe unlinking a free-floating memory descriptor. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:types} - -\end_inset - -: added entry for -\family typewriter -ptl_seq_t -\family default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - -: -\begin_deeper -\layout Enumerate - -added definition of -\family typewriter -max_offset -\family default -. -\layout Enumerate - -added text to clarify -\family typewriter -PTL_MD_MANAGE_REMOTE -\family default -. -\end_deeper -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:mdattach} - -\end_inset - -: modified text for -\family typewriter -unlink_op -\family default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:niinit} - -\end_inset - -: added text to clarify multiple calls to -\shape italic -PtlNIInit -\shape default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:mdattach} - -\end_inset - -: added text to clarify -\family typewriter -unlink_nofit -\family default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:receiving} - -\end_inset - -: removed text indicating that an MD will reject a message if the associated - EQ is full. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:mdfree} - -\end_inset - -: added -\family typewriter -PTL_MD_INUSE -\family default - error code and text to indicate that only MDs with no pending operations - can be unlinked. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:retcodes} - -\end_inset - -: added -\family typewriter -PTL_MD_INUSE -\family default - return code. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:event-type} - -\end_inset - -: added user id field, MD handle field, and NI specific failure field to - the -\family typewriter -ptl_event_t -\family default - structure. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:types} - -\end_inset - -: added -\family typewriter -ptl_ni_fail_t -\family default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:event-type} - -\end_inset - -: added -\family typewriter -PTL_EVENT_UNLINK -\family default - event type. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:func} - -\end_inset - -: removed -\shape slanted -PtlTransId -\shape default -. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -, Section -\begin_inset LatexCommand \ref{sec:meinsert} - -\end_inset - -, Section -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - -: listed allowable constants with relevant fields. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:func} - -\end_inset - -: added -\shape italic -PtlMEAttachAny -\shape default - function. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:retcodes} - -\end_inset - -: added -\family typewriter -PTL_PT_FULL -\family default - return code for -\shape italic -PtlMEAttachAny -\shape default -. -\layout Enumerate - -Table -\begin_inset LatexCommand \ref{tab:oconsts} - -\end_inset - -: updated to reflect new event types. -\layout Enumerate - -Section -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - -: added -\family typewriter -ptl_nid_t -\family default -, -\family typewriter -ptl_pid_t -\family default -, and -\family typewriter -ptl_uid_t -\family default -. -\layout Chapter* - -Summary of Changes for Version 3.1 -\layout Section* - -Thread Issues -\layout Standard - -The most significant change to the interface from version 3.0 to 3.1 involves - the clarification of how the interface interacts with multi-threaded applicatio -ns. - We adopted a generic thread model in which processes define an address - space and threads share the address space. - Consideration of the API in the light of threads lead to several clarifications - throughout the document: -\layout Enumerate - -Glossary: -\begin_deeper -\layout Enumerate - -added a definition for -\emph on -thread -\emph default -, -\layout Enumerate - -reworded the definition for -\emph on -process -\emph default -. - -\end_deeper -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:apiover} - -\end_inset - -: added section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:threads} - -\end_inset - - to describe the multi-threading model used by the Portals API. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ptlinit} - -\end_inset - -: -\emph on -PtlInit -\emph default - must be called at least once and may be called any number of times. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ptlfini} - -\end_inset - -: -\emph on -PtlFini -\emph default - should be called once as the process is terminating and not as each thread - terminates. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:pid} - -\end_inset - -: Portals does not define thread ids. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - -: network interfaces are associated with processes, not threads. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:niinit} - -\end_inset - -: -\emph on -PtlNIInit -\emph default - must be called at least once and may be called any number of times. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:eqget} - -\end_inset - -: -\emph on -PtlEQGet -\emph default - returns -\family typewriter -PTL_EQ_EMPTY -\family default - if a thread is blocked on -\emph on -PtlEQWait -\emph default -. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:eqwait} - -\end_inset - -: waiting threads are awakened in FIFO order. - -\layout Standard - -Two functions, -\emph on -PtlNIBarrier -\emph default - and -\emph on -PtlEQCount -\emph default - were removed from the API. - -\emph on -PtlNIBarrier -\emph default - was defined to block the calling process until all of the processes in - the application group had invoked -\emph on -PtlNIBarrier -\emph default -. - We now consider this functionality, along with the concept of groups (see - the discussion under -\begin_inset Quotes eld -\end_inset - -other changes -\begin_inset Quotes erd -\end_inset - -), to be part of the runtime system, not part of the Portals API. - -\emph on -PtlEQCount -\emph default - was defined to return the number of events in an event queue. - Because external operations may lead to new events being added and other - threads may remove events, the value returned by -\emph on -PtlEQCount -\emph default - would have to be a hint about the number of events in the event queue. -\layout Section* - -Handling small, unexpected messages -\layout Standard - -Another set of changes relates to handling small unexpected messages in - MPI. - In designing version 3.0, we assumed that each unexpected message would - be placed in a unique memory descriptor. - To avoid the need to process a long list of memory descriptors, we moved - the memory descriptors out of the match list and hung them off of a single - match list entry. - In this way, large unexpected messages would only encounter a single -\begin_inset Quotes eld -\end_inset - -short message -\begin_inset Quotes erd -\end_inset - - match list entry before encountering the -\begin_inset Quotes eld -\end_inset - -long message -\begin_inset Quotes erd -\end_inset - - match list entry. - Experience with this strategy identified resource management problems with - this approach. - In particular, a long sequence of very short (or zero length) messages - could quickly exhaust the memory descriptors constructed for handling unexpecte -d messages. - Our new strategy involves the use of several very large memory descriptors - for small unexpected messages. - Consecutive unexpected messages will be written into the first of these - memory descriptors until the memory descriptor fills up. - When the first of the -\begin_inset Quotes eld -\end_inset - -small memory -\begin_inset Quotes erd -\end_inset - - descriptors fills up, it will be unlinked and subsequent short messages - will be written into the next -\begin_inset Quotes eld -\end_inset - -short message -\begin_inset Quotes erd -\end_inset - - memory descriptor. - In this case, a -\begin_inset Quotes eld -\end_inset - -short message -\begin_inset Quotes erd -\end_inset - - memory descriptor will be declared full when it does not have sufficient - space for the largest small unexpected message. -\layout Standard - -This lead to two significant changes. - First, each match list entry now has a single memory descriptor rather - than a list of memory descriptors. - Second, in addition to exceeding the operation threshold, a memory descriptor - can be unlinked when the local offset exceeds a specified value. - These changes have lead to several changes in this document: -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{subsec:paddress} - -\end_inset - -: -\begin_deeper -\layout Enumerate - -removed references to the memory descriptor list, -\layout Enumerate - -changed the portals address translation description to indicate that unlinking - a memory descriptor implies unlinking the associated match list entry--match - list entries can no longer be unlinked independently from the memory descriptor. - -\end_deeper -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -: -\begin_deeper -\layout Enumerate - -removed unlink from argument list, -\layout Enumerate - -removed description of -\family typewriter -ptl_unlink -\family default - type, -\layout Enumerate - -changed wording of the error condition when the Portal table index already - has an associated match list. - -\end_deeper -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:meinsert} - -\end_inset - -: removed unlink from argument list. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - -: added -\family typewriter -max_offset -\family default -. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdattach} - -\end_inset - -: -\begin_deeper -\layout Enumerate - -added description of -\family typewriter -ptl_unlink -\family default - type, -\layout Enumerate - -removed reference to memory descriptor lists, -\layout Enumerate - -changed wording of the error condition when match list entry already has - an associated memory descriptor, -\layout Enumerate - -changed the description of the -\family typewriter -unlink -\family default - argument. - -\end_deeper -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - -: removed -\family typewriter -PtlMDInsert -\family default - operation. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdbind} - -\end_inset - -: removed references to memory descriptor list. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdfree} - -\end_inset - -: removed reference to memory descriptor list. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:summary} - -\end_inset - -: removed references to PtlMDInsert. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:semantics} - -\end_inset - -: removed reference to memory descriptor list. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:exmpi} - -\end_inset - -: revised the MPI example to reflect the changes to the interface. - -\layout Standard - -Several changes have been made to improve the general documentation of the - interface. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - -: documented the special value -\family typewriter -PTL_EQ_NONE -\family default -. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - -: documented the special value -\family typewriter -PTL_ID_ANY -\family default -. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdbind} - -\end_inset - -: documented the return value -\family typewriter -PTL_INV_EQ -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdupdate} - -\end_inset - -: clarified the description of the -\emph on -PtlMDUpdate -\emph default - function. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:implvals} - -\end_inset - -: introduced a new section to document the implementation defined values. - -\layout Enumerate - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:summary} - -\end_inset - -: modified Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:oconsts} - -\end_inset - - to indicate where each constant is introduced and where it is used. - -\layout Section* - -Other changes -\layout Subsection* - -Implementation defined limits (Section -\begin_inset LatexCommand \ref{sec:niinit} - -\end_inset - -) -\layout Standard - -The earlier version provided implementation defined limits for the maximum - number of match entries, the maximum number of memory descriptors, etc. - Rather than spanning the entire implementation, these limits are now associated - with individual network interfaces. -\layout Subsection* - -Added User Ids (Section -\begin_inset LatexCommand \ref{sec:uid} - -\end_inset - -) -\layout Standard - -Group Ids had been used to simplify access control entries. - In particular, a process could allow access for all of the processes in - a group. - User Ids have been introduced to regain this functionality. - We use user ids to fill this role. -\layout Subsection* - -Removed Group Ids and Rank Ids (Section -\begin_inset LatexCommand \ref{sec:pid} - -\end_inset - -) -\layout Standard - -The earlier version of Portals had two forms for addressing processes: <node - id, process id> and <group id, rank id>. - A process group was defined as the collection processes created during - application launch. - Each process in the group was given a unique rank id in the range 0 to - -\begin_inset Formula $n-1$ -\end_inset - - where -\begin_inset Formula $n$ -\end_inset - - was the number of processes in the group. - We removed groups because they are better handled in the runtime system. -\layout Subsection* - -Match lists (Section -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -) -\layout Standard - -It is no longer illegal to have an existing match entry when calling PtlMEAttach. - A position argument was added to the list of arguments supplied to -\emph on -PtlMEAttach -\emph default - to specify whether the new match entry is prepended or appended to the - existing list. - If there is no existing match list, the position argument is ignored. -\layout Subsection* - -Unlinking Memory Descriptors (Section -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - -) -\layout Standard - -Previously, a memory descriptor could be unlinked if the offset exceeded - a threshold upon the completion of an operation. - In this version, the unlinking is delayed until there is a matching operation - which requires more memory than is currently available in the descriptor. - In addition to changes in section, this lead to a revision of Figure\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:flow} - -\end_inset - -. -\layout Subsection* - -Split Phase Operations and Events (Section -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - -) -\layout Standard - -Previously, there were five types of events: -\family typewriter -PTL_EVENT_PUT -\family default -, -\family typewriter -PTL_EVENT_GET -\family default -, -\family typewriter -PTL_EVENT_REPLY -\family default -, -\family typewriter -PTL_EVENT_SENT -\family default -, and -\family typewriter -PTL_EVENT_ACK. - -\family default -The first four of these reflected the completion of potentially long operations. - We have introduced new event types to reflect the fact that long operations - have a distinct starting point and a distinct completion point. - Moreover, the completion may be successful or unsuccessful. -\layout Standard - -In addition to providing a mechanism for reporting failure to higher levels - of software, this split provides an opportunity for for improved ordering - semantics. - Previously, if one process intiated two operations (e.g., two put operations) - on a remote process, these operations were guaranteed to complete in the - same order that they were initiated. - Now, we only guarantee that the initiation events are delivered in the - same order. - In particular, the operations do not need to complete in the order that - they were intiated. -\layout Subsection* - -Well known proces ids (Section -\begin_inset LatexCommand \ref{sec:niinit} - -\end_inset - -) -\layout Standard - -To support the notion of -\begin_inset Quotes eld -\end_inset - -well known process ids, -\begin_inset Quotes erd -\end_inset - - we added a process id argument to the arguments for PtlNIInit. -\layout Chapter* - -Glossary -\layout Description - -API Application Programming Interface. - A definition of the functions and semantics provided by library of functions. - -\layout Description - -Initiator A -\emph on -process -\emph default - that initiates a message operation. - -\layout Description - -Message An application-defined unit of data that is exchanged between -\emph on -processes -\emph default -. - -\layout Description - -Message\SpecialChar ~ -Operation Either a put operation, which writes data, or a get operation, - which reads data. - -\layout Description - -Network A network provides point-to-point communication between -\emph on -nodes -\emph default -. - Internally, a network may provide multiple routes between endpoints (to - improve fault tolerance or to improve performance characteristics); however, - multiple paths will not be exposed outside of the network. - -\layout Description - -Node A node is an endpoint in a -\emph on -network -\emph default -. - Nodes provide processing capabilities and memory. - A node may provide multiple processors (an SMP node) or it may act as a - -\emph on -gateway -\emph default - between networks. - -\layout Description - -Process A context of execution. - A process defines a virtual memory (VM) context. - This context is not shared with other processes. - Several threads may share the VM context defined by a process. - -\layout Description - -Target A -\emph on -process -\emph default - that is acted upon by a message operation. - -\layout Description - -Thread A context of execution that shares a VM context with other threads. - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -cleardoublepage -\layout Standard - -\backslash -setcounter{page}{1} -\backslash -pagenumbering{arabic} -\end_inset - - -\layout Chapter - -Introduction -\begin_inset LatexCommand \label{sec:intro} - -\end_inset - - -\layout Section - -Overview -\layout Standard - -This document describes an application programming interface for message - passing between nodes in a system area network. - The goal of this interface is to improve the scalability and performance - of network communication by defining the functions and semantics of message - passing required for scaling a parallel computing system to ten thousand - nodes. - This goal is achieved by providing an interface that will allow a quality - implementation to take advantage of the inherently scalable design of Portals. -\layout Standard - -This document is divided into several sections: -\layout Description - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:intro} - -\end_inset - ----Introduction This section describes the purpose and scope of the Portals - API. - -\layout Description - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:apiover} - -\end_inset - ----An\SpecialChar ~ -Overview\SpecialChar ~ -of\SpecialChar ~ -the\SpecialChar ~ -Portals\SpecialChar ~ -3.1\SpecialChar ~ -API This section gives a brief overview of the - Portals API. - The goal is to introduce the key concepts and terminology used in the descripti -on of the API. - -\layout Description - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:api} - -\end_inset - ----The\SpecialChar ~ -Portals\SpecialChar ~ -3.2\SpecialChar ~ -API This section describes the functions and semantics of - the Portals application programming interface. - -\layout Description - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:semantics} - -\end_inset - ---The\SpecialChar ~ -Semantics\SpecialChar ~ -of\SpecialChar ~ -Message\SpecialChar ~ -Transmission This section describes the semantics - of message transmission. - In particular, the information transmitted in each type of message and - the processing of incoming messages. - -\layout Description - -Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:examples} - -\end_inset - ----Examples This section presents several examples intended to illustrates - the use of the Portals API. - -\layout Section - -Purpose -\layout Standard - -Existing message passing technologies available for commodity cluster networking - hardware do not meet the scalability goals required by the Cplant\SpecialChar ~ - -\begin_inset LatexCommand \cite{Cplant} - -\end_inset - - project at Sandia National Laboratories. - The goal of the Cplant project is to construct a commodity cluster that - can scale to the order of ten thousand nodes. - This number greatly exceeds the capacity for which existing message passing - technologies have been designed and implemented. -\layout Standard - -In addition to the scalability requirements of the network, these technologies - must also be able to support a scalable implementation of the Message Passing - Interface (MPI)\SpecialChar ~ - -\begin_inset LatexCommand \cite{MPIstandard} - -\end_inset - - standard, which has become the -\shape italic -de facto -\shape default - standard for parallel scientific computing. - While MPI does not impose any scalability limitations, existing message - passing technologies do not provide the functionality needed to allow implement -ations of MPI to meet the scalability requirements of Cplant. -\layout Standard - -The following are properties of a network architecture that do not impose - any inherent scalability limitations: -\layout Itemize - -Connectionless - Many connection-oriented architectures, such as VIA\SpecialChar ~ - -\begin_inset LatexCommand \cite{VIA} - -\end_inset - - and TCP/IP sockets, have limitations on the number of peer connections - that can be established. - -\layout Itemize - -Network independence - Many communication systems depend on the host processor - to perform operations in order for messages in the network to be consumed. - Message consumption from the network should not be dependent on host processor - activity, such as the operating system scheduler or user-level thread scheduler. - -\layout Itemize - -User-level flow control - Many communication systems manage flow control - internally to avoid depleting resources, which can significantly impact - performance as the number of communicating processes increases. - -\layout Itemize - -OS Bypass - High performance network communication should not involve memory - copies into or out of a kernel-managed protocol stack. - -\layout Standard - -The following are properties of a network architecture that do not impose - scalability limitations for an implementation of MPI: -\layout Itemize - -Receiver-managed - Sender-managed message passing implementations require - a persistent block of memory to be available for every process, requiring - memory resources to increase with job size and requiring user-level flow - control mechanisms to manage these resources. - -\layout Itemize - -User-level Bypass - While OS Bypass is necessary for high-performance, it - alone is not sufficient to support the Progress Rule of MPI asynchronous - operations. - -\layout Itemize - -Unexpected messages - Few communication systems have support for receiving - messages for which there is no prior notification. - Support for these types of messages is necessary to avoid flow control - and protocol overhead. - -\layout Section - -Background -\layout Standard - -Portals was originally designed for and implemented on the nCube machine - as part of the SUNMOS (Sandia/UNM OS)\SpecialChar ~ - -\begin_inset LatexCommand \cite{SUNMOS} - -\end_inset - - and Puma\SpecialChar ~ - -\begin_inset LatexCommand \cite{PumaOS} - -\end_inset - - lightweight kernel development projects. - Portals went through two design phases, the latter of which is used on - the 4500-node Intel TeraFLOPS machine\SpecialChar ~ - -\begin_inset LatexCommand \cite{TFLOPS} - -\end_inset - -. - Portals have been very successful in meeting the needs of such a large - machine, not only as a layer for a high-performance MPI implementation\SpecialChar ~ - -\begin_inset LatexCommand \cite{PumaMPI} - -\end_inset - -, but also for implementing the scalable run-time environment and parallel - I/O capabilities of the machine. -\layout Standard - -The second generation Portals implementation was designed to take full advantage - of the hardware architecture of large MPP machines. - However, efforts to implement this same design on commodity cluster technology - identified several limitations, due to the differences in network hardware - as well as to shortcomings in the design of Portals. -\layout Section - -Scalability -\layout Standard - -The primary goal in the design of Portals is scalability. - Portals are designed specifically for an implementation capable of supporting - a parallel job running on tens of thousands of nodes. - Performance is critical only in terms of scalability. - That is, the level of message passing performance is characterized by how - far it allows an application to scale and not by how it performs in micro-bench -marks (e.g., a two node bandwidth or latency test). -\layout Standard - -The Portals API is designed to allow for scalability, not to guarantee it. - Portals cannot overcome the shortcomings of a poorly designed application - program. - Applications that have inherent scalability limitations, either through - design or implementation, will not be transformed by Portals into scalable - applications. - Scalability must be addressed at all levels. - Portals do not inhibit scalability, but do not guarantee it either. -\layout Standard - -To support scalability, the Portals interface maintains a minimal amount - of state. - Portals provide reliable, ordered delivery of messages between pairs of - processes. - They are connectionless: a process is not required to explicitly establish - a point-to-point connection with another process in order to communicate. - Moreover, all buffers used in the transmission of messages are maintained - in user space. - The target process determines how to respond to incoming messages, and - messages for which there are no buffers are discarded. -\layout Section - -Communication Model -\layout Standard - -Portals combine the characteristics of both one-side and two-sided communication. - They define a -\begin_inset Quotes eld -\end_inset - -matching put -\begin_inset Quotes erd -\end_inset - - operation and a -\begin_inset Quotes eld -\end_inset - -matching get -\begin_inset Quotes erd -\end_inset - - operation. - The destination of a put (or send) is not an explicit address; instead, - each message contains a set of match bits that allow the receiver to determine - where incoming messages should be placed. - This flexibility allows Portals to support both traditional one-sided operation -s and two-sided send/receive operations. -\layout Standard - -Portals allows the target to determine whether incoming messages are acceptable. - A target process can choose to accept message operations from any specific - process or can choose to ignore message operations from any specific process. -\layout Section - -Zero Copy, OS Bypass and Application Bypass -\layout Standard - -In traditional system architectures, network packets arrive at the network - interface card (NIC), are passed through one or more protocol layers in - the operating system, and eventually copied into the address space of the - application. - As network bandwidth began to approach memory copy rates, reduction of - memory copies became a critical concern. - This concern lead to the development of zero-copy message passing protocols - in which message copies are eliminated or pipelined to avoid the loss of - bandwidth. -\layout Standard - -A typical zero-copy protocol has the NIC generate an interrupt for the CPU - when a message arrives from the network. - The interrupt handler then controls the transfer of the incoming message - into the address space of the appropriate application. - The interrupt latency, the time from the initiation of an interrupt until - the interrupt handler is running, is fairly significant. - To avoid this cost, some modern NICs have processors that can be programmed - to implement part of a message passing protocol. - Given a properly designed protocol, it is possible to program the NIC to - control the transfer of incoming messages, without needing to interrupt - the CPU. - Because this strategy does not need to involve the OS on every message - transfer, it is frequently called -\begin_inset Quotes eld -\end_inset - -OS Bypass. -\begin_inset Quotes erd -\end_inset - - ST\SpecialChar ~ - -\begin_inset LatexCommand \cite{ST} - -\end_inset - -, VIA\SpecialChar ~ - -\begin_inset LatexCommand \cite{VIA} - -\end_inset - -, FM\SpecialChar ~ - -\begin_inset LatexCommand \cite{FM2} - -\end_inset - -, GM\SpecialChar ~ - -\begin_inset LatexCommand \cite{GM} - -\end_inset - -, and Portals are examples of OS Bypass protocols. -\layout Standard - -Many protocols that support OS Bypass still require that the application - actively participate in the protocol to ensure progress. - As an example, the long message protocol of PM requires that the application - receive and reply to a request to put or get a long message. - This complicates the runtime environment, requiring a thread to process - incoming requests, and significantly increases the latency required to - initiate a long message protocol. - The Portals message passing protocol does not require activity on the part - of the application to ensure progress. - We use the term -\begin_inset Quotes eld -\end_inset - -Application Bypass -\begin_inset Quotes erd -\end_inset - - to refer to this aspect of the Portals protocol. -\layout Section - -Faults -\layout Standard - -Given the number of components that we are dealing with and the fact that - we are interested in supporting applications that run for very long times, - failures are inevitable. - The Portals API recognizes that the underlying transport may not be able - to successfully complete an operation once it has been initiated. - This is reflected in the fact that the Portals API reports three types - of events: events indicating the initiation of an operation, events indicating - the successful completion of an operation, and events indicating the unsuccessf -ul completion of an operation. - Every initiation event is eventually followed by a successful completion - event or an unsuccessful completion event. -\layout Standard - -Between the time an operation is started and the time that the operation - completes (successfully or unsuccessfully), any memory associated with - the operation should be considered volatile. - That is, the memory may be changed in unpredictable ways while the operation - is progressing. - Once the operation completes, the memory associated with the operation - will not be subject to further modification (from this operation). - Notice that unsuccessful operations may alter memory in an essentially - unpredictable fashion. -\layout Chapter - -An Overview of the Portals API -\begin_inset LatexCommand \label{sec:apiover} - -\end_inset - - -\layout Standard - -In this section, we give a conceptual overview of the Portals API. - The goal is to provide a context for understanding the detailed description - of the API presented in the next section. -\layout Section - -Data Movement -\begin_inset LatexCommand \label{sec:dmsemantics} - -\end_inset - - -\layout Standard - -A Portal represents an opening in the address space of a process. - Other processes can use a Portal to read (get) or write (put) the memory - associated with the portal. - Every data movement operation involves two processes, the -\series bold -initiator -\series default - and the -\series bold -target -\series default -. - The initiator is the process that initiates the data movement operation. - The target is the process that responds to the operation by either accepting - the data for a put operation, or replying with the data for a get operation. -\layout Standard - -In this discussion, activities attributed to a process may refer to activities - that are actually performed by the process or -\emph on -on behalf of the process -\emph default -. - The inclusiveness of our terminology is important in the context of -\emph on -application bypass -\emph default -. - In particular, when we note that the target sends a reply in the case of - a get operation, it is possible that reply will be generated by another - component in the system, bypassing the application. -\layout Standard - -Figures\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:put} - -\end_inset - - and -\begin_inset LatexCommand \ref{fig:get} - -\end_inset - - present graphical interpretations of the Portal data movement operations: - put and get. - In the case of a put operation, the initiator sends a put request message - containing the data to the target. - The target translates the Portal addressing information in the request - using its local Portal structures. - When the request has been processed, the target optionally sends an acknowledge -ment message. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename put.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 218pt - lyxheight 119pt -\end_inset - - -\layout Caption - -Portal Put (Send) -\begin_inset LatexCommand \label{fig:put} - -\end_inset - - -\end_inset - - -\layout Standard - -In the case of a get operation, the initiator sends a get request to the - target. - As with the put operation, the target translates the Portal addressing - information in the request using its local Portal structures. - Once it has translated the Portal addressing information, the target sends - a reply that includes the requested data. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename get.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 218pt - lyxheight 119pt -\end_inset - - -\layout Caption - -Portal Get -\begin_inset LatexCommand \label{fig:get} - -\end_inset - - -\end_inset - - -\layout Standard - -We should note that Portal address translations are only performed on nodes - that respond to operations initiated by other nodes. - Acknowledgements and replies to get operations bypass the portals address - translation structures. -\layout Section - -Portal Addressing -\begin_inset LatexCommand \label{subsec:paddress} - -\end_inset - - -\layout Standard - -One-sided data movement models (e.g., shmem\SpecialChar ~ - -\begin_inset LatexCommand \cite{CraySHMEM} - -\end_inset - -, ST\SpecialChar ~ - -\begin_inset LatexCommand \cite{ST} - -\end_inset - -, MPI-2\SpecialChar ~ - -\begin_inset LatexCommand \cite{MPI2} - -\end_inset - -) typically use a triple to address memory on a remote node. - This triple consists of a process id, memory buffer id, and offset. - The process id identifies the target process, the memory buffer id specifies - the region of memory to be used for the operation, and the offset specifies - an offset within the memory buffer. -\layout Standard - -In addition to the standard address components (process id, memory buffer - id, and offset), a Portal address includes a set of match bits. - This addressing model is appropriate for supporting one-sided operations - as well as traditional two-sided message passing operations. - Specifically, the Portals API provides the flexibility needed for an efficient - implementation of MPI-1, which defines two-sided operations with one-sided - completion semantics. -\layout Standard - -Figure\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:portals} - -\end_inset - - presents a graphical representation of the structures used by a target - in the interpretation of a Portal address. - The process id is used to route the message to the appropriate node and - is not reflected in this diagram. - The memory buffer id, called the -\series bold -portal id -\series default -, is used as an index into the Portal table. - Each element of the Portal table identifies a match list. - Each element of the match list specifies two bit patterns: a set of -\begin_inset Quotes eld -\end_inset - -don't care -\begin_inset Quotes erd -\end_inset - - bits, and a set of -\begin_inset Quotes eld -\end_inset - -must match -\begin_inset Quotes erd -\end_inset - - bits. - In addition to the two sets of match bits, each match list element has - at most one memory descriptor. - Each memory descriptor identifies a memory region and an optional event - queue. - The memory region specifies the memory to be used in the operation and - the event queue is used to record information about these operations. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename portals.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 305pt - lyxheight 106pt -\end_inset - - -\layout Caption - -Portal Addressing Structures -\begin_inset LatexCommand \label{fig:portals} - -\end_inset - - -\end_inset - - -\layout Standard - -Figure\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:flow} - -\end_inset - - illustrates the steps involved in translating a Portal address, starting - from the first element in a match list. - If the match criteria specified in the match list entry are met and the - memory descriptor list accepts the operation -\begin_inset Foot -collapsed true - -\layout Standard - -Memory descriptors can reject operations because a threshold has been exceeded - or because the memory region does not have sufficient space, see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - - -\end_inset - -, the operation (put or get) is performed using the memory region specified - in the memory descriptor. - If the memory descriptor specifies that it is to be unlinked when a threshold - has been exceeded, the match list entry is removed from the match list - and the resources associated with the memory descriptor and match list - entry are reclaimed. - Finally, if there is an event queue specified in the memory descriptor, - the operation is logged in the event queue. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename flow_new.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 447pt - lyxheight 282pt -\end_inset - - -\layout Caption - -Portals Address Translation -\begin_inset LatexCommand \label{fig:flow} - -\end_inset - - -\end_inset - - -\layout Standard - -If the match criteria specified in the match list entry are not met, or - there is no memory descriptor associated with the match list entry, or - the memory descriptor associated with the match list entry rejects the - operation, the address translation continues with the next match list entry. - If the end of the match list has been reached, the address translation - is aborted and the incoming requested is discarded. -\layout Section - -Access Control -\layout Standard - -A process can control access to its portals using an access control list. - Each entry in the access control list specifies a process id and a Portal - table index. - The access control list is actually an array of entries. - Each incoming request includes an index into the access control list (i.e., - a -\begin_inset Quotes eld -\end_inset - -cookie -\begin_inset Quotes erd -\end_inset - - or hint). - If the id of the process issuing the request doesn't match the id specified - in the access control list entry or the Portal table index specified in - the request doesn't match the Portal table index specified in the access - control list entry, the request is rejected. - Process identifiers and Portal table indexes may include wild card values - to increase the flexibility of this mechanism. - -\layout Standard - -Two aspects of this design merit further discussion. - First, the model assumes that the information in a message header, the - sender's id in particular, is trustworthy. - In most contexts, we assume that the entity that constructs the header - is trustworthy; however, using cryptographic techniques, we could easily - devise a protocol that would ensure the authenticity of the sender. -\layout Standard - -Second, because the access check is performed by the receiver, it is possible - that a malicious process will generate thousands of messages that will - be denied by the receiver. - This could saturate the network and/or the receiver, resulting in a -\emph on -denial of service -\emph default - attack. - Moving the check to the sender using capabilities, would remove the potential - for this form of attack. - However, the solution introduces the complexities of capability management - (exchange of capabilities, revocation, protections, etc). -\layout Section - -Multi-threaded Applications -\begin_inset LatexCommand \label{sec:threads} - -\end_inset - - -\layout Standard - -The Portals API supports a generic view of multi-threaded applications. - From the perspective of the Portals API, an application program is defined - by a set of processes. - Each process defines a unique address space. - The Portals API defines access to this address space from other processes - (using portals addressing and the data movement operations). - A process may have one or more -\emph on -threads -\emph default - executing in its address space. - -\layout Standard - -With the exception of -\emph on -PtlEQWait -\emph default - every function in the Portals API is non-blocking and atomic with respect - to both other threads and external operations that result from data movement - operations. - While individual operations are atomic, sequences of these operations may - be interleaved between different threads and with external operations. - The Portals API does not provide any mechanisms to control this interleaving. - It is expected that these mechanisms will be provided by the API used to - create threads. -\layout Chapter - -The Portals API -\begin_inset LatexCommand \label{sec:api} - -\end_inset - - -\layout Section - -Naming Conventions -\begin_inset LatexCommand \label{sec:conv} - -\end_inset - - -\layout Standard - -The Portals API defines two types of entities: functions and types. - Function always start with -\emph on -Ptl -\emph default - and use mixed upper and lower case. - When used in the body of this report, function names appear in italic face, - e.g., -\emph on -PtlInit -\emph default -. - The functions associated with an object type will have names that start - with -\emph on -Ptl -\emph default -, followed by the two letter object type code shown in Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:objcodes} - -\end_inset - -. - As an example, the function -\emph on -PtlEQAlloc -\emph default - allocates resources for an event queue. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Object Type Codes -\begin_inset LatexCommand \label{tab:objcodes} - -\end_inset - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\newline - -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="5" columns="3"> -<features firstHeadEmpty="true"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<row bottomline="true"> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\emph on -xx -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - Name -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - Section -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -EQ -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - Event Queue -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - MD -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - Memory Descriptor -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - ME -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - Match list Entry -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - NI -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - Network Interface -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Standard - -Type names use lower case with underscores to separate words. - Each type name starts with -\family typewriter -ptl -\family default -_ and ends with -\family typewriter -_t -\family default -. - When used in the body of this report, type names appear in a fixed font, - e.g., -\family typewriter -ptl_match_bits_t -\family default -. -\layout Standard - -Names for constants use upper case with underscores to separate words. - Each constant name starts with -\family typewriter -PTL_ -\family default -. - When used in the body of this report, type names appear in a fixed font, - e.g., -\family typewriter -PTL_OK -\family default -. -\layout Section - -Base Types -\layout Standard - -The Portals API defines a variety of base types. - These types represent a simple renaming of the base types provided by the - C programming language. - In most cases these new type names have been introduced to improve type - safety and to avoid issues arising from differences in representation sizes - (e.g., 16-bit or 32-bit integers). -\layout Subsection - -Sizes -\begin_inset LatexCommand \label{sec:size-t} - -\end_inset - - -\layout Standard - -The type -\family typewriter -ptl_size_t -\family default - is an unsigned 64-bit integral type used for representing sizes. -\layout Subsection - -Handles -\begin_inset LatexCommand \label{sec:handle-type} - -\end_inset - - -\layout Standard - -Objects maintained by the API are accessed through handles. - Handle types have names of the form -\family typewriter -ptl_handle_ -\emph on -xx -\emph default -_t -\family default -, where -\emph on -xx -\emph default - is one of the two letter object type codes shown in Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:objcodes} - -\end_inset - -. - For example, the type -\family typewriter -ptl_handle_ni_t -\family default - is used for network interface handles. -\layout Standard - -Each type of object is given a unique handle type to enhance type checking. - The type, -\family typewriter -ptl_handle_any_t -\family default -, can be used when a generic handle is needed. - Every handle value can be converted into a value of type -\family typewriter -ptl_handle_any_t -\family default - without loss of information. -\layout Standard - -Handles are not simple values. - Every portals object is associated with a specific network interface and - an identifier for this interface (along with an object identifier) is part - of the handle for the object. -\layout Standard - -The special value -\family typewriter -PTL_EQ_NONE -\family default -, of type -\family typewriter -ptl_handle_eq_t -\family default -, is used to indicate the absence of an event queue. - See sections -\begin_inset LatexCommand \ref{sec:mdfree} - -\end_inset - - and\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:mdupdate} - -\end_inset - - for uses of this value. -\layout Subsection - -Indexes -\begin_inset LatexCommand \label{sec:index-type} - -\end_inset - - -\layout Standard - -The types -\family typewriter -ptl_pt_index_t -\family default - and -\family typewriter -ptl_ac_index_t -\family default - are integral types used for representing Portal table indexes and access - control tables indexes, respectively. - See section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:niinit} - -\end_inset - - for limits on values of these types. -\layout Subsection - -Match Bits -\begin_inset LatexCommand \label{sec:mb-type} - -\end_inset - - -\layout Standard - -The type -\family typewriter -ptl_match_bits_t -\family default - is capable of holding unsigned 64-bit integer values. -\layout Subsection - -Network Interfaces -\begin_inset LatexCommand \label{sec:ni-type} - -\end_inset - - -\layout Standard - -The type -\family typewriter -ptl_interface_t -\family default - is an integral type used for identifying different network interfaces. - Users will need to consult the local documentation to determine appropriate - values for the interfaces available. - The special value -\family typewriter -PTL_IFACE_DEFAULT -\family default - identifies the default interface. -\layout Subsection - -Identifiers -\begin_inset LatexCommand \label{sec:id-type} - -\end_inset - - -\layout Standard - -The type -\family typewriter -ptl_nid_t -\family default - is an integral type used for representing node ids -\family typewriter -, ptl_pid_t -\family default - is an integral type for representing process ids, and -\family typewriter -ptl_uid_t -\family default -is an integral type for representing user ids. -\layout Standard - -The special values -\family typewriter -PTL_PID_ANY -\family default - matches any process identifier, PTL_NID_ANY matches any node identifier, - and -\family typewriter -PTL_UID_ANY -\family default - matches any user identifier. - See sections -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - - and\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:acentry} - -\end_inset - - for uses of these values. -\layout Subsection - -Status Registers -\begin_inset LatexCommand \label{sec:stat-type} - -\end_inset - - -\layout Standard - -Each network interface maintains an array of status registers that can be - accessed using the -\family typewriter -PtlNIStatus -\family default - function (see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:nistatus} - -\end_inset - -). - The type -\family typewriter -ptl_sr_index_t -\family default - defines the types of indexes that can be used to access the status registers. - The only index defined for all implementations is -\family typewriter -PTL_SR_DROP_COUNT -\family default - which identifies the status register that counts the dropped requests for - the interface. - Other indexes (and registers) may be defined by the implementation. -\layout Standard - -The type -\family typewriter -ptl_sr_value_t -\family default - defines the types of values held in status registers. - This is a signed integer type. - The size is implementation dependent, but must be at least 32 bits. -\layout Section - -Initialization and Cleanup -\begin_inset LatexCommand \label{sec:init} - -\end_inset - - -\layout Standard - -The Portals API includes a function, -\emph on -PtlInit -\emph default -, to initialize the library and a function, -\emph on -PtlFini -\emph default -, to cleanup after the application is done using the library. -\layout Subsection - -PtlInit -\begin_inset LatexCommand \label{sec:ptlinit} - -\end_inset - - -\layout LyX-Code - -int PtlInit( int *max_interfaces ); -\layout Standard -\noindent -The -\emph on -PtlInit -\emph default - function initializes the Portals library. - PtlInit must be called at least once by a process before any thread makes - a Portals function call, but may be safely called more than once. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_FAIL Indicates an error during initialization. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -max_interfaces -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="1" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="5in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -max_interfaces -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the maximum number of interfaces - that can be initialized. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlFini -\begin_inset LatexCommand \label{sec:ptlfini} - -\end_inset - - -\layout LyX-Code - -void PtlFini( void ); -\layout Standard -\noindent -The -\emph on -PtlFini -\emph default - function cleans up after the Portals library is no longer needed by a process. - After this function is called, calls to any of the functions defined by - the Portal API or use of the structures set up by the Portals API will - result in undefined behavior. - This function should be called once and only once during termination by - a process. - Typically, this function will be called in the exit sequence of a process. - Individual threads should not call PtlFini when they terminate. -\layout Section - -Network Interfaces -\begin_inset LatexCommand \label{sec:ni} - -\end_inset - - -\layout Standard - -The Portals API supports the use of multiple network interfaces. - However, each interface is treated as an independent entity. - Combining interfaces (e.g., -\begin_inset Quotes eld -\end_inset - -bonding -\begin_inset Quotes erd -\end_inset - - to create a higher bandwidth connection) must be implemented by the application - or embedded in the underlying network. - Interfaces are treated as independent entities to make it easier to cache - information on individual network interface cards. -\layout Standard - -Once initialized, each interface provides a Portal table, an access control - table, and a collection of status registers. - See Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - for a discussion of updating Portal table entries using the -\emph on -PtlMEAttach -\emph default - function. - See Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ac} - -\end_inset - - for a discussion of the initialization and updating of entries in the access - control table. - See Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:nistatus} - -\end_inset - - for a discussion of the -\emph on -PtlNIStatus -\emph default - function which can be used to determine the value of a status register. -\layout Standard - -Every other type of Portal object (e.g., memory descriptor, event queue, or - match list entry) is associated with a specific network interface. - The association to a network interface is established when the object is - created and is encoded in the handle for the object. -\layout Standard - -Each network interface is initialized and shutdown independently. - The initialization routine, -\emph on -PtlNIInit -\emph default -, returns a handle for an interface object which is used in all subsequent - Portal operations. - The -\emph on -PtlNIFini -\emph default - function is used to shutdown an interface and release any resources that - are associated with the interface. - Network interface handles are associated with processes, not threads. - All threads in a process share all of the network interface handles. -\layout Standard - -The Portals API also defines the -\emph on -PtlNIStatus -\emph default - function to query the status registers for a network interface, the -\emph on -PtlNIDist -\emph default - function to determine the -\begin_inset Quotes eld -\end_inset - -distance -\begin_inset Quotes erd -\end_inset - - to another process, and the -\emph on -PtlNIHandle -\emph default - function to determine the network interface that an object is associated - with. -\layout Subsection - -PtlNIInit -\begin_inset LatexCommand \label{sec:niinit} - -\end_inset - - -\layout LyX-Code - -typedef struct { -\newline - int max_match_entries; -\newline - int max_mem_descriptors; -\newline - int max_event_queues; -\newline - ptl_ac_index_t max_atable_index; -\newline - ptl_pt_index_t max_ptable_index; -\newline -} ptl_ni_limits_t; -\newline - -\newline -int PtlNIInit( ptl_interface_t interface -\newline - ptl_pid_t pid, -\newline - ptl_ni_limits_t* desired, -\newline - ptl_ni_limits_t* actual, -\newline - ptl_handle_ni_t* handle ); -\layout Standard - -Values of type -\family typewriter -ptl_ni_limits_t -\family default - include the following members: -\layout Description - -max_match_entries Maximum number of match entries that can be allocated - at any one time. -\layout Description - -max_mem_descriptors Maximum number of memory descriptors that can be allocated - at any one time. -\layout Description - -max_event_queues Maximum number of event queues that can be allocated at - any one time. -\layout Description - -max_atable_index Largest access control table index for this interface, - valid indexes range from zero to -\family typewriter -max_atable_index -\family default -, inclusive. -\layout Description - -max_ptable_index Largest Portal table index for this interface, valid indexes - range from zero to -\family typewriter -max_ptable_index -\family default -, inclusive. -\layout Standard -\noindent -The -\emph on -PtlNIInit -\emph default - function is used to initialized the Portals API for a network interface. - This function must be called at least once by each process before any other - operations that apply to the interface by any process or thread. - For subsequent calls to -\shape italic -PtlNIInit -\shape default - from within the same process (either by different threads or the same thread), - the desired limits will be ignored and the call will return the existing - NI handle. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INIT_DUP Indicates a duplicate initialization of -\family typewriter -interface -\family default -. - -\layout Description - -PTL_INIT_INV Indicates that -\family typewriter -interface -\family default - is not a valid network interface. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to initialize the - interface. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -pid -\family default - is not a valid process id. -\layout Description - -PTL_SEGV Indicates that -\family typewriter -actual -\family default -or -\family typewriter - handle -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="5" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Identifies the network interface to be initialized. - (See section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ni-type} - -\end_inset - - for a discussion of values used to identify network interfaces.) -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -pid -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Identifies the desired process id (for well known process ids). - The value -\family typewriter -PTL_PID_ANY -\family default - may be used to have the process id assigned by the underlying library. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -desired -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -If non-NULL, points to a structure that holds the desired limits. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -actual -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, the location pointed to by actual will hold the actual - limits. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the interface. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Comment - -The use of desired is implementation dependent. - In particular, an implementation may choose to ignore this argument. -\layout Subsection - -PtlNIFini -\begin_inset LatexCommand \label{sec:nifini} - -\end_inset - - -\layout LyX-Code - -int PtlNIFini( ptl_handle_ni_t interface ); -\layout Standard -\noindent -The -\emph on -PtlNIFini -\emph default - function is used to release the resources allocated for a network interface. - Once the -\emph on -PtlNIFini -\emph default - operation has been started, the results of pending API operations (e.g., - operations initiated by another thread) for this interface are undefined. - Similarly, the effects of incoming operations (puts and gets) or return - values (acknowledgements and replies) for this interface are undefined. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="1" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -A handle for the interface to shutdown. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlNIStatus -\begin_inset LatexCommand \label{sec:nistatus} - -\end_inset - - -\layout LyX-Code - -int PtlNIStatus( ptl_handle_ni_t interface, -\newline - ptl_sr_index_t status_register, -\newline - ptl_sr_value_t* status ); -\layout Standard -\noindent -The -\emph on -PtlNIStatus -\emph default - function returns the value of a status register for the specified interface. - (See section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:stat-type} - -\end_inset - - for more information on status register indexes and status register values.) -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_INV_SR_INDX Indicates that -\family typewriter -status_register -\family default - is not a valid status register. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -status -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="3" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the interface to use. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -status_register -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -An index for the status register to read. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -status -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the current value of the status - register. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Comment - -The only status register that must be defined is a drop count register ( -\family typewriter -PTL_SR_DROP_COUNT -\family default -). - Implementations may define additional status registers. - Identifiers for the indexes associated with these registers should start - with the prefix -\family typewriter -PTL_SR_ -\family default -. -\layout Subsection - -PtlNIDist -\layout LyX-Code - -int PtlNIDist( ptl_handle_ni_t interface, -\newline - ptl_process_id_t process, -\newline - unsigned long* distance ); -\layout Standard -\noindent -The -\emph on -PtlNIDist -\emph default - function returns the distance to another process using the specified interface. - Distances are only defined relative to an interface. - Distance comparisons between different interfaces on the same process may - be meaningless. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -process -\family default - is not a valid process identifier. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -distance -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="3" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the interface to use. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -process -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -An identifier for the process whose distance is being requested. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -distance -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the distance to the remote - process. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Comment - -This function should return a static measure of distance. - Examples include minimum latency, the inverse of available bandwidth, or - the number of switches between the two endpoints. -\layout Subsection - -PtlNIHandle -\layout LyX-Code - -int PtlNIHandle( ptl_handle_any_t handle, -\newline - ptl_handle_ni_t* interface ); -\layout Standard -\noindent -The -\emph on -PtlNIHandle -\emph default - function returns a handle for the network interface with which the object - identified by -\family typewriter -handle -\family default - is associated. - If the object identified by -\family typewriter -handle -\family default - is a network interface, this function returns the same value it is passed. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_HANDLE Indicates that -\family typewriter -handle -\family default - is not a valid handle. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -interface -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="2" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the object. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the network interface - associated with -\family typewriter -handle -\family default -. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Comment - -Every handle should encode the network interface and the object id relative - to this handle. - Both are presumably encoded using integer values. -\layout Section - -User Identification -\begin_inset LatexCommand \label{sec:uid} - -\end_inset - - -\layout Standard - -Every process runs on behalf of a user. - -\layout Subsection - -PtlGetUid -\layout LyX-Code - -int PtlGetUid( ptl_handle_ni_t ni_handle, -\newline - ptl_uid_t* uid ); -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -ni_handle -\family default - is not a valid network interface handle. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -interface -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="2" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="5in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A network interface handle. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -id -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the user id for the calling - process. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Comment - -Note that user identifiers are dependent on the network interface(s). - In particular, if a node has multiple interfaces, a process may have multiple - user identifiers. -\layout Section - -Process Identification -\begin_inset LatexCommand \label{sec:pid} - -\end_inset - - -\layout Standard - -Processes that use the Portals API, can be identified using a node id and - process id. - Every node accessible through a network interface has a unique node identifier - and every process running on a node has a unique process identifier. - As such, any process in the computing system can be identified by its node - id and process id. - -\layout Standard - -The Portals API defines a type, -\family typewriter -ptl_process_id_t -\family default - for representing process ids and a function, -\emph on -PtlGetId -\emph default -, which can be used to obtain the id of the current process. -\layout Comment - -The portals API does not include thread identifiers. - Messages are delivered to processes (address spaces) not threads (contexts - of execution). -\layout Subsection - -The Process Id Type -\begin_inset LatexCommand \label{sec:pid-type} - -\end_inset - - -\layout LyX-Code - -typedef struct { -\newline - ptl_nid_t nid; /* node id */ -\newline - ptl_pid_t pid; /* process id */ -\newline -} ptl_process_id_t; -\layout Standard -\noindent -The -\family typewriter -ptl_process_id_t -\family default - type uses two identifiers to represent a process id: a node id and a process - id. - -\layout Subsection - -PtlGetId -\begin_inset LatexCommand \label{sub:PtlGetId} - -\end_inset - - -\layout LyX-Code - -int PtlGetId( ptl_handle_ni_t ni_handle, -\newline - ptl_process_id_t* id ); -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -ni_handle -\family default - is not a valid network interface handle. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -id -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="2" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="5in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A network interface handle. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -id -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the id for the calling process. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Comment - -Note that process identifiers are dependent on the network interface(s). - In particular, if a node has multiple interfaces, it may have multiple - node identifiers. -\layout Section - -Match List Entries and Match Lists -\begin_inset LatexCommand \label{sec:me} - -\end_inset - - -\layout Standard - -A match list is a chain of match list entries. - Each match list entry includes a memory descriptor and a set of match criteria. - The match criteria can be used to reject incoming requests based on process - id or the match bits provided in the request. - A match list is created using the -\emph on -PtlMEAttach -\emph default - or -\shape italic -PtlMEAttachAny -\shape default - functions, which create a match list consisting of a single match list - entry, attaches the match list to the specified Portal index, and returns - a handle for the match list entry. - Match entries can be dynamically inserted and removed from a match list - using the -\emph on -PtlMEInsert -\emph default - and -\emph on -PtlMEUnlink -\emph default - functions. -\layout Subsection - -PtlMEAttach -\begin_inset LatexCommand \label{sec:meattach} - -\end_inset - - -\layout LyX-Code - -typedef enum { PTL_RETAIN, PTL_UNLINK } ptl_unlink_t; -\newline - -\layout LyX-Code - -typedef enum { PTL_INS_BEFORE, PTL_INS_AFTER } ptl_ins_pos_t; -\newline - -\layout LyX-Code - -int PtlMEAttach( ptl_handle_ni_t interface, -\newline - ptl_pt_index_t index, -\newline - ptl_process_id_t matchid, -\newline - ptl_match_bits_t match_bits, -\newline - ptl_match_bits_t ignorebits, -\newline - ptl_unlink_t unlink, -\newline - ptl_ins_pos_t position, -\newline - ptl_handle_me_t* handle ); -\layout Standard -\noindent -Values of the type -\family typewriter -ptl_ins_pos_t -\family default - are used to control where a new item is inserted. - The value -\family typewriter -PTL_INS_BEFORE -\family default - is used to insert the new item before the current item or before the head - of the list. - The value -\family typewriter -PTL_INS_AFTER -\family default - is used to insert the new item after the current item or after the last - item in the list. - -\layout Standard - -The -\emph on -PtlMEAttach -\emph default - function creates a match list consisting of a single entry and attaches - this list to the Portal table for -\family typewriter -interface -\family default -. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_PTINDEX Indicates that -\family typewriter -index -\family default - is not a valid Portal table index. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -matchid -\family default - is not a valid process identifier. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - match list entry. - -\layout Description - -PTL_ML_TOOLONG Indicates that the resulting match list is too long. - The maximum length for a match list is defined by the interface. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="7" columns="3"> -<features> -<column alignment="left" valignment="top" width="0.8in"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.75in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the interface to use. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -index -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The Portal table index where the match list should be attached. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -matchid -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Specifies the match criteria for the process id of the requestor. - The constants -\family typewriter -PTL_PID_ANY -\family default - and -\family typewriter -PTL_NID_ANY -\family default - can be used to wildcard either of the ids in the -\family typewriter -ptl_process_id_t -\family default - structure. - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -match_bits, ignorebits -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Specify the match criteria to apply to the match bits in the incoming request. - The -\family typewriter -ignorebits -\family default - are used to mask out insignificant bits in the incoming match bits. - The resulting bits are then compared to the match list entry's match - bits to determine if the incoming request meets the match criteria. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -unlink -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Indicates the match list entry should be unlinked when the last memory descripto -r associated with this match list entry is unlinked. - (Note, the check for unlinking a match entry only occurs when a memory - descriptor is unlinked.) -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -position -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Indicates whether the new match entry should be prepended or appended to - the existing match list. - If there is no existing list, this argument is ignored and the new match - entry becomes the only entry in the list. - Allowed constants: -\family typewriter -PTL_INS_BEFORE -\family default -, -\family typewriter -PTL_INS_AFTER -\family default -. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the newly created - match list entry. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlMEAttachAny -\begin_inset LatexCommand \label{sec:attachany} - -\end_inset - - -\layout LyX-Code - -int PtlMEAttachAny( ptl_handle_ni_t interface, -\newline - ptl_pt_index_t *index, -\newline - ptl_process_id_t matchid, -\newline - ptl_match_bits_t match_bits, -\newline - ptl_match_bits_t ignorebits, -\newline - ptl_unlink_t unlink, -\newline - ptl_handle_me_t* handle ); -\layout Standard - -The -\emph on -PtlMEAttachAny -\emph default - function creates a match list consisting of a single entry and attaches - this list to an unused Portal table entry for -\family typewriter -interface -\family default -. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -matchid -\family default - is not a valid process identifier. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - match list entry. - -\layout Description - -PTL_PT_FULL Indicates that there are no free entries in the Portal table. -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="4" columns="3"> -<features> -<column alignment="left" valignment="top" width="0.8in"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.75in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the interface to use. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -index -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On succesfful return, this location will hold the Portal index where the - match list has been attached. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -matchid, match_bits, ignorebits, unlink -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -See the discussion for -\shape italic -PtlMEAttach -\shape default -. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the newly created - match list entry. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlMEInsert -\begin_inset LatexCommand \label{sec:meinsert} - -\end_inset - - -\layout LyX-Code - -int PtlMEInsert( ptl_handle_me_t current, -\newline - ptl_process_id_t matchid, -\newline - ptl_match_bits_t match_bits, -\newline - ptl_match_bits_t ignorebits, -\newline - ptl_ins_pos_t position, -\newline - ptl_handle_me_t* handle ); -\layout Standard - -The -\emph on -PtlMEInsert -\emph default - function creates a new match list entry and inserts this entry into the - match list containing -\family typewriter -current -\family default -. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -matchid -\family default - is not a valid process identifier. - -\layout Description - -PTL_INV_ME Indicates that -\family typewriter -current -\family default - is not a valid match entry handle. - -\layout Description - -PTL_ML_TOOLONG Indicates that the resulting match list is too long. - The maximum length for a match list is defined by the interface. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - match entry. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="4" columns="3"> -<features> -<column alignment="left" valignment="top" width="0.8in"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="left" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -current -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for a match entry. - The new match entry will be inserted immediately before or immediately - after this match entry. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -matchid -\family default -, -\family typewriter -match_bits -\family default -, -\family typewriter -ignorebits -\family default -, -\family typewriter -unlink -\family default - -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -See the discussion for -\emph on -PtlMEAttach -\emph default - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -position -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Indicates whether the new match entry should be inserted before or after - the -\family typewriter -current -\family default - entry. - Allowed constants: -\family typewriter -PTL_INS_BEFORE -\family default -, -\family typewriter -PTL_INS_AFTER -\family default -. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -See the discussion for -\emph on -PtlMEAttach -\emph default -. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlMEUnlink -\begin_inset LatexCommand \label{sec:meunlink} - -\end_inset - - -\layout LyX-Code - -int PtlMEUnlink( ptl_handle_me_t entry ); -\layout Standard -\noindent -The -\emph on -PtlMEUnlink -\emph default - function can be used to unlink a match entry from a match list. - This operation also releases any resources associated with the match entry - (including the associated memory descriptor). - It is an error to use the match entry handle after calling -\emph on -PtlMEUnlink -\emph default -. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_ME Indicates that -\family typewriter -entry -\family default - is not a valid match entry handle. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="1" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -entry -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -A handle for the match entry to be unlinked. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Section - -Memory Descriptors -\begin_inset LatexCommand \label{sec:md} - -\end_inset - - -\layout Standard - -A memory descriptor contains information about a region of an application - process' memory and an event queue where information about the operations - performed on the memory descriptor are recorded. - The Portals API provides two operations to create memory descriptors: -\emph on -PtlMDAttach -\emph default -, and -\emph on -PtlMDBind -\emph default -; an operation to update a memory descriptor, -\emph on -PtlMDUpdate -\emph default -; and an operation to unlink and release the resources associated with a - memory descriptor, -\emph on -PtlMDUnlink -\emph default -. -\layout Subsection - -The Memory Descriptor Type -\begin_inset LatexCommand \label{sec:md-type} - -\end_inset - - -\layout LyX-Code - -typedef struct { -\newline - void* start; -\newline - ptl_size_t length; -\newline - int threshold; -\newline - unsigned int max_offset; -\newline - unsigned int options; -\newline - void* user_ptr; -\newline - ptl_handle_eq_t eventq; -\newline -} ptl_md_t; -\layout Standard -\noindent -The -\family typewriter -ptl_md_t -\family default - type defines the application view of a memory descriptor. - Values of this type are used to initialize and update the memory descriptors. -\layout Subsubsection - -Members -\layout Description - -start,\SpecialChar ~ -length Specify the memory region associated with the memory descriptor. - The -\family typewriter -start -\family default - member specifies the starting address for the memory region and the -\family typewriter -length -\family default - member specifies the length of the region. - The -\family typewriter -start member -\family default - can be NULL provided that the -\family typewriter -length -\family default - member is zero. - (Zero length buffers are useful to record events.) There are no alignment - restrictions on the starting address or the length of the region; although, - unaligned messages may be slower (i.e., lower bandwidth and/or longer latency) - on some implementations. - -\layout Description - -threshold Specifies the maximum number of operations that can be performed - on the memory descriptor. - An operation is any action that could possibly generate an event (see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - for the different types of events). - In the usual case, the threshold value is decremented for each operation - on the memory descriptor. - When the threshold value is zero, the memory descriptor is -\emph on -inactive -\emph default -, and does not respond to operations. - A memory descriptor can have an initial threshold value of zero to allow - for manipulation of an inactive memory descriptor by the local process. - A threshold value of -\family typewriter -PTL_MD_THRESH_INF -\family default - indicates that there is no bound on the number of operations that may be - applied to a memory descriptor. - Note that local operations (e.g., -\emph on -PtlMDUpdate -\emph default -) are not applied to the threshold count. - -\layout Description - -max_offset Specifies the maximum local offset of a memory descriptor. - When the local offset of a memory descriptor exceeds this maximum, the - memory descriptor becomes -\shape italic -inactive -\shape default - and does not respond to further operations. -\layout Description - -options Specifies the behavior of the memory descriptor. - There are five options that can be selected: enable put operations (yes - or no), enable get operations (yes or no), offset management (local or - remote), message truncation (yes or no), and acknowledgement (yes or no). - Values for this argument can be constructed using a bitwise or of the following - values: -\begin_deeper -\begin_deeper -\layout Description - -PTL_MD_OP_PUT Specifies that the memory descriptor will respond to -\emph on -put -\emph default - operations. - By default, memory descriptors reject -\emph on -put -\emph default - operations. - -\layout Description - -PTL_MD_OP_GET Specifies that the memory descriptor will respond to -\emph on -get -\emph default - operations. - By default, memory descriptors reject -\emph on -get -\emph default - operations. - -\layout Description - -PTL_MD_MANAGE_REMOTE Specifies that the offset used in accessing the memory - region is provided by the incoming request. - By default, the offset is maintained locally. - When the offset is maintained locally, the offset is incremented by the - length of the request so that the next operation (put and/or get) will - access the next part of the memory region. -\layout Description - -PTL_MD_TRUNCATE Specifies that the length provided in the incoming request - can be reduced to match the memory available in the region. - (The memory available in a memory region is determined by subtracting the - offset from the length of the memory region.) By default, if the length - in the incoming operation is greater than the amount of memory available, - the operation is rejected. - -\layout Description - -PTL_MD_ACK_DISABLE Specifies that an acknowledgement should -\emph on -not -\emph default - be sent for incoming -\emph on -put -\emph default - operations, even if requested. - By default, acknowledgements are sent for -\emph on -put -\emph default - operations that request an acknowledgement. - Acknowledgements are never sent for -\emph on -get -\emph default - operations. - The value sent in the reply serves as an implicit acknowledgement. - -\end_deeper -\layout Standard - - -\series bold -Note -\series default -: It is not considered an error to have a memory descriptor that does not - respond to either -\emph on -put -\emph default - or -\emph on -get -\emph default - operations: Every memory descriptor responds to -\emph on -reply -\emph default - operations. - Nor is it considered an error to have a memory descriptor that responds - to both -\emph on -put -\emph default - and -\emph on -get -\emph default - operations. - -\end_deeper -\layout Description - -user_ptr A user-specified value that is associated with the memory descriptor. - The value does not need to be a pointer, but must fit in the space used - by a pointer. - This value (along with other values) is recorded in events associated with - operations on this memory descriptor. -\begin_inset Foot -collapsed true - -\layout Standard - -Tying the memory descriptor to a user-defined value can be useful when multiple - memory descriptor share the same event queue or when the memory descriptor - needs to be associated with a data structure maintained by the application. - For example, an MPI implementation can set the -\family typewriter -user_ptr -\family default - argument to the value of an MPI Request. - This direct association allows for processing of memory descriptor's by - the MPI implementation without a table lookup or a search for the appropriate - MPI Request. -\end_inset - - -\layout Description - -eventq A handle for the event queue used to log the operations performed - on the memory region. - If this argument is -\family typewriter -PTl_EQ_NONE -\family default -, operations performed on this memory descriptor are not logged. - -\layout Subsection - -PtlMDAttach -\begin_inset LatexCommand \label{sec:mdattach} - -\end_inset - - -\layout LyX-Code - -int PtlMDAttach( ptl_handle_me_t match, -\newline - ptl_md_t mem_desc, -\newline - ptl_unlink_t unlink_op, -\newline - ptl_unlink_t unlink_nofit, -\newline - ptl_handle_md_t* handle ); -\layout Standard -\noindent -Values of the type -\family typewriter -ptl_unlink_t -\family default - are used to control whether an item is unlinked from a list. - The value -\family typewriter -PTL_UNLINK -\family default - enables unlinking. - The value -\family typewriter -PTL_RETAIN -\family default - disables unlinking. -\layout Standard - -The -\emph on -PtlMDAttach -\emph default - operation is used to create a memory descriptor and attach it to a match - list entry. - An error code is returned if this match list entry already has an associated - memory descriptor. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INUSE Indicates that -\family typewriter -match -\family default - already has a memory descriptor attached. - -\layout Description - -PTL_INV_ME Indicates that -\family typewriter -match -\family default - is not a valid match entry handle. - -\layout Description - -PTL_ILL_MD Indicates that -\family typewriter -mem_desc -\family default - is not a legal memory descriptor. - This may happen because the memory region defined in -\family typewriter -mem_desc -\family default - is invalid or because the network interface associated with the -\family typewriter -eventq -\family default - in -\family typewriter -mem_desc -\family default - is not the same as the network interface associated with -\family typewriter -match -\family default -. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - memory descriptor. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="5" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -match -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the match entry that the memory descriptor will be associated - with. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Provides initial values for the application visible parts of a memory descriptor. - Other than its use for initialization, there is no linkage between this - structure and the memory descriptor maintained by the API. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -unlink_op -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A flag to indicate whether the memory descriptor is unlinked when it becomes - inactive, either because the operation threshold drops to zero or because - the maximum offset has been exceeded. - (Note, the check for unlinking a memory descriptor only occurs after a - the completion of a successful operation. - If the threshold is set to zero during initialization or using -\emph on -PtlMDUpdate -\emph default -, the memory descriptor is -\series bold -not -\series default - unlinked.) -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -unlink_nofit -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A flag to indicate whether the memory descriptor is unlinked when the space - remaining in the memory descriptor is not sufficient for a matching operation. - If an incoming message arrives arrives at a memory descriptor that does - not have sufficient space and the -\series bold -PTL_MD_TRUNCATE -\series default - operation is not specified, the memory descriptor will be unlinked. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the newly created - memory descriptor. - The -\family typewriter -handle -\family default - argument can be NULL, in which case the handle will not be returned. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlMDBind -\begin_inset LatexCommand \label{sec:mdbind} - -\end_inset - - -\layout LyX-Code - -int PtlMDBind( ptl_handle_ni_t interface, -\newline - ptl_md_t mem_desc, -\newline - ptl_handle_md_t* handle ); -\layout Standard -\noindent -The -\emph on -PtlMDBind -\emph default - operation is used to create a -\begin_inset Quotes eld -\end_inset - -free floating -\begin_inset Quotes erd -\end_inset - - memory descriptor, i.e., a memory descriptor that is not associated with - a match list entry. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid match entry handle. - -\layout Description - -PTL_ILL_MD Indicates that -\family typewriter -mem_desc -\family default - is not a legal memory descriptor. - This may happen because the memory region defined in -\family typewriter -mem_desc -\family default - is invalid or because the network interface associated with the -\family typewriter -eventq -\family default - in -\family typewriter -mem_desc -\family default - is not the same as the network interface, -\family typewriter -interface -\family default -. - -\layout Description - -PTL_INV_EQ Indicates that the event queue associated with -\family typewriter -mem_desc -\family default - is not valid. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - memory descriptor. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -handle -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="3" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the network interface with which the memory descriptor will - be associated. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Provides initial values for the application visible parts of a memory descriptor. - Other than its use for initialization, there is no linkage between this - structure and the memory descriptor maintained by the API. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the newly created - memory descriptor. - The -\family typewriter -handle -\family default - argument must be a valid address and cannot be NULL. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlMDUnlink -\begin_inset LatexCommand \label{sec:mdfree} - -\end_inset - - -\layout LyX-Code - -int PtlMDUnlink( ptl_handle_md_t mem_desc ); -\layout Standard -\noindent -The -\emph on -PtlMDUnlink -\emph default - function unlinks the memory descriptor from any match list entry it may - be linked to and releases the resources associated with a memory descriptor. - (This function does not free the memory region associated with the memory - descriptor.) This function also releases the resources associated with a - floating memory descriptor. - Only memory descriptors with no pending operations may be unlinked. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_MD Indicates that -\family typewriter -mem_desc -\family default - is not a valid memory descriptor handle. -\layout Description - -PTL_MD_INUSE Indicates that -\family typewriter -mem_desc -\family default - has pending operations and cannot be unlinked. -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="1" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the memory descriptor to be released. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlMDUpdate -\begin_inset LatexCommand \label{sec:mdupdate} - -\end_inset - - -\layout LyX-Code - -int PtlMDUpdate( ptl_handle_md_t mem_desc, -\newline - ptl_md_t* old_md, -\newline - ptl_md_t* new_md, -\newline - ptl_handle_eq_t testq ); -\layout Standard -\noindent -The -\emph on -PtlMDUpdate -\emph default - function provides a conditional, atomic update operation for memory descriptors. - The memory descriptor identified by -\family typewriter -mem_desc -\family default - is only updated if the event queue identified by -\family typewriter -testq -\family default - is empty. - The intent is to only enable updates to the memory descriptor when no new - messages have arrived since the last time the queue was checked. - See section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:exmpi} - -\end_inset - - for an example of how this function can be used. -\layout Standard - -If -\family typewriter -new -\family default - is not NULL the memory descriptor identified by handle will be updated - to reflect the values in the structure pointed to by -\family typewriter -new -\family default - if -\family typewriter -testq -\family default - has the value -\family typewriter -PTL_EQ_NONE -\family default - or if the event queue identified by -\family typewriter -testq -\family default - is empty. - If -\family typewriter -old -\family default - is not NULL, the current value of the memory descriptor identified by -\family typewriter -mem_desc -\family default - is recorded in the location identified by -\family typewriter -old -\family default -. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_NOUPDATE Indicates that the update was not performed because -\family typewriter -testq -\family default - was not empty. - -\layout Description - -PTL_INV_MD Indicates that -\family typewriter -mem_desc -\family default - is not a valid memory descriptor handle. - -\layout Description - -PTL_ILL_MD Indicates that the value pointed to by -\family typewriter -new -\family default - is not a legal memory descriptor (e.g., the memory region specified by the - memory descriptor may be invalid). - -\layout Description - -PTL_INV_EQ Indicates that -\family typewriter -testq -\family default - is not a valid event queue handle. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -new -\family default - or -\family typewriter -old -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="4" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the memory descriptor to update. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -old_md -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -If -\family typewriter -old_md -\family default - is not the value -\family typewriter -NULL -\family default -, the current value of the memory descriptor will be stored in the location - identified by -\family typewriter -old -\family default -_md. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -new_md -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -If -\family typewriter -new_md -\family default - is not the value -\family typewriter -NULL -\family default -, this argument provides the new values for the memory descriptor, if the - update is performed. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -testq -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for an event queue used to predicate the update. - If -\family typewriter -testq -\family default - is equal to -\family typewriter -PTL_EQ_NONE -\family default -, the update is performed unconditionally. - Otherwise, the update is performed if and only if -\family typewriter -testq -\family default - is empty. - If the update is not performed, the function returns the value -\family typewriter -PTL_NOUPDATE -\family default -. - (Note, the -\family typewriter -testq -\family default - argument does not need to be the same as the event queue associated with - the memory descriptor.) -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Standard - -The conditional update can be used to ensure that the memory descriptor - has not changed between the time it was examined and the time it is updated. - In particular, it is needed to support an MPI implementation where the - activity of searching an unexpected message queue and posting a receive - must be atomic. -\layout Section - -Events and Event Queues -\begin_inset LatexCommand \label{sec:eq} - -\end_inset - - -\layout Standard - -Event queues are used to log operations performed on memory descriptors. - They can also be used to hold acknowledgements for completed -\emph on -put -\emph default - operations and to note when the data specified in a -\emph on -put -\emph default - operation has been sent (i.e., when it is safe to reuse the buffer that holds - this data). - Multiple memory descriptors can share a single event queue. -\layout Standard - -In addition to the -\family typewriter -ptl_handle_eq_t -\family default - type, the Portals API defines two types associated with events: The -\family typewriter - -\newline -ptl_event_kind_t -\family default - type defines the kinds of events that can be stored in an event queue. - The -\family typewriter -ptl_event_t -\family default - type defines a structure that holds the information associated with an - event. -\layout Standard - -The Portals API also provides four functions for dealing with event queues: - The -\emph on -PtlEQAlloc -\emph default - function is used to allocate the API resources needed for an event queue, - the -\emph on -PtlEQFree -\emph default - function is used to release these resources, the -\emph on -PtlEQGet -\emph default - function can be used to get the next event from an event queue, and the - -\emph on -PtlEQWait -\emph default - function can be used to block a process (or thread) until an event queue - has at least one event. -\layout Subsection - -Kinds of Events -\begin_inset LatexCommand \label{sec:ek-type} - -\end_inset - - -\layout LyX-Code - -typedef enum { -\newline - PTL_EVENT_GET_START, PTL_EVENT_GET_END, PTL_EVENT_GET_FAIL, -\newline - PTL_EVENT_PUT_START, PTL_EVENT_PUT_END, PTL_EVENT_PUT_FAIL, -\newline - PTL_EVENT_REPLY_START, PTL_EVENT_REPLY_END, PTL_EVENT_REPLY_FAIL, -\newline - PTL_EVENT_SEND_START, PTL_EVENT_SEND_END, PTL_EVENT_SEND_FAIL, -\newline - PTL_EVENT_ACK, -\newline - PTL_EVENT_UNLINK -\newline -} ptl_event_kind_t; -\layout Standard -\noindent -The Portals API defines fourteen types of events that can be logged in an - event queue: -\layout Description - -PTL_EVENT_GET_START A remote -\emph on -get -\emph default - operation has been started on the memory descriptor. - The memory region associated with this descriptor should not be altered - until the corresponding END or FAIL event is logged. -\layout Description - -PTL_EVENT_GET_END A previously initiated -\emph on -get -\emph default - operation completed successfully. - This event is logged after the reply has been sent by the local node. - As such, the process could free the memory descriptor once it sees this - event. - -\layout Description - -PTL_EVENT_GET_FAIL A previously initiated -\emph on -get -\emph default - operation completed unsuccessfully. - This event is logged after the reply has been sent by the local node. - As such, the process could free the memory descriptor once it sees this - event. - -\layout Description - -PTL_EVENT_PUT_START A remote -\emph on -put -\emph default - operation has been started on the memory descriptor. - The memory region associated with this descriptor should should be considered - volatile until the corresponding END or FAIL event is logged. -\layout Description - -PTL_EVENT_PUT_END A previously initiated -\emph on -put -\emph default - operation completed successfully. - The underlying layers will not alter the memory (on behalf of this operation) - once this event has been logged. - -\layout Description - -PTL_EVENT_PUT_FAIL A previously initiated -\emph on -put -\emph default - operation completed unsuccessfully. - The underlying layers will not alter the memory (on behalf of this operation) - once this event has been logged. - -\layout Description - -PTL_EVENT_REPLY_START A -\emph on -reply -\emph default - operation has been started on the memory descriptor. - -\layout Description - -PTL_EVENT_REPLY_END A previously initiated -\emph on -reply -\emph default - operation has completed successfully . - This event is logged after the data (if any) from the reply has been written - into the memory descriptor. - -\layout Description - -PTL_EVENT_REPLY_FAIL A previously initiated -\emph on -reply -\emph default - operation has completed unsuccessfully. - This event is logged after the data (if any) from the reply has been written - into the memory descriptor. - -\layout Description - -PTL_EVENT_ACK An -\emph on -acknowledgement -\emph default - was received. - This event is logged when the acknowledgement is received -\layout Description - -PTL_EVENT_SEND_START An outgoing -\emph on -send -\emph default - operation has been started. - The memory region associated with this descriptor should not be altered - until the corresponding END or FAIL event is logged. -\layout Description - -PTL_EVENT_SEND_END A previously initiated -\emph on -send -\emph default - operation has completed successfully. - This event is logged after the entire buffer has been sent and it is safe - for the application to reuse the buffer. - -\layout Description - -PTL_EVENT_SEND_FAIL A previously initiated -\emph on -send -\emph default - operation has completed unsuccessfully. - The process can safely manipulate the memory or free the memory descriptor - once it sees this event. -\layout Description - -PTL_EVENT_UNLINK A memory descriptor associated with this event queue has - been automatically unlinked. - This event is not generated when a memory descriptor is explicitly unlinked - by calling -\shape italic -PtlMDUnlink -\shape default -. - This event does not decrement the threshold count. -\layout Subsection - -Event Ordering -\layout Standard - -The Portals API guarantees that a when a process initiates two operations - on a remote process, the operations will be initiated on the remote process - in the same order that they were initiated on the original process. - As an example, if process A intitates two -\emph on -put -\emph default - operations, -\emph on -x -\emph default - and -\emph on -y -\emph default -, on process B, the Portals API guarantees that process A will receive the - -\family typewriter -PTL_EVENT_SEND_START -\family default - events for -\emph on -x -\emph default - and -\emph on -y -\emph default - in the same order that process B receives the -\family typewriter -PTL_EVENT_PUT_START -\family default - events for -\emph on -x -\emph default - and -\emph on -y -\emph default -. - Notice that the API does not guarantee that the start events will be delivered - in the same order that process A initiated the -\emph on -x -\emph default - and -\emph on -y -\emph default - operations. - If process A needs to ensure the ordering of these operations, it should - include code to wait for the initiation of -\emph on -x -\emph default - before it initiates -\emph on -y -\emph default -. -\layout Subsection - -Failure Notification -\layout Standard - -Operations may fail to complete successfully; however, unless the node itself - fails, every operation that is started will eventually complete. - While an operation is in progress, the memory associated with the operation - should not be viewed (in the case of a put or a reply) or altered (in the - case of a send or get). - Operation completion, whether successful or unsuccessful, is final. - That is, when an operation completes, the memory associated with the operation - will no longer be read or altered by the operation. - A network interface can use the -\family typewriter -ptl_ni_fail_t -\family default - to define more specific information regarding the failure of the operation - and record this information in the -\family typewriter -ni_fail_type -\family default - field of the event. -\layout Subsection - -The Event Type -\begin_inset LatexCommand \label{sec:event-type} - -\end_inset - - -\layout LyX-Code - -typedef struct { -\newline - ptl_event_kind_t type; -\newline - ptl_process_id_t initiator; -\newline - ptl_uid_t uid; -\layout LyX-Code - - ptl_pt_index_t portal; -\newline - ptl_match_bits_t match_bits; -\newline - ptl_size_t rlength; -\newline - ptl_size_t mlength; -\newline - ptl_size_t offset; -\newline - ptl_handle_md_t md_handle; -\newline - ptl_md_t mem_desc; -\newline - ptl_hdr_data_t hdr_data; -\newline - ptl_seq_t link; -\newline - ptl_ni_fail_t ni_fail_type; -\newline - volatile ptl_seq_t sequence; -\newline -} ptl_event_t; -\layout Standard -\noindent -An event structure includes the following members: -\layout Description - -type Indicates the type of the event. - -\layout Description - -initiator The id of the initiator. - -\layout Description - -portal The Portal table index specified in the request. - -\layout Description - -match_bits A copy of the match bits specified in the request. - See section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - for more information on match bits. - -\layout Description - -rlength The length (in bytes) specified in the request. - -\layout Description - -mlength The length (in bytes) of the data that was manipulated by the operation. - For truncated operations, the manipulated length will be the number of - bytes specified by the memory descriptor (possibly with an offset) operation. - For all other operations, the manipulated length will be the length of - the requested operation. - -\layout Description - -offset Is the displacement (in bytes) into the memory region that the operation - used. - The offset can be determined by the operation (see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:datamovement} - -\end_inset - -) for a remote managed memory descriptor, or by the local memory descriptor - (see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - -). - -\layout Description - -md_handle Is the handle to the memory descriptor associated with the event. -\layout Description - -mem_desc Is the state of the memory descriptor immediately after the event - has been processed. - -\layout Description - -hdr_data 64 bits of out-of-band user data (see Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - -). - -\layout Description - -link The -\emph on -link -\emph default - member is used to link -\family typewriter -START -\family default - events with the -\family typewriter -END -\family default - or -\family typewriter -FAIL -\family default - event that signifies completion of the operation. - The -\emph on -link -\emph default - member will be the same for the two events associated with an operation. - The link member is also used to link an -\family typewriter -UNLINK -\family default - event with the event that caused the memory descriptor to be unlinked. -\layout Description - -sequence The sequence number for this event. - Sequence numbers are unique to each event. -\layout Comment - -The -\emph on -sequence -\emph default - member is the last member and is volatile to support SMP implementations. - When an event structure is filled in, the -\emph on -sequence -\emph default - member should be written after all other members have been updated. - Moreover, a memory barrier should be inserted between the updating of other - members and the updating of the -\emph on -sequence -\emph default - member. -\layout Subsection - -PtlEQAlloc -\begin_inset LatexCommand \label{sec:eqalloc} - -\end_inset - - -\layout LyX-Code - -int PtlEQAlloc( ptl_handle_ni_t interface, -\newline - ptl_size_t count, -\newline - ptl_handle_eq_t* handle ); -\layout Standard -\noindent -The -\emph on -PtlEQAlloc -\emph default - function is used to build an event queue. - -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_NOSPACE Indicates that there is insufficient memory to allocate the - event queue. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -handle -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="3" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the interface with which the event queue will be associated. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -count -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The number of events that can be stored in the event queue. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -handle -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold a handle for the newly created - event queue. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlEQFree -\begin_inset LatexCommand \label{sec:eqfree} - -\end_inset - - -\layout LyX-Code - -int PtlEQFree( ptl_handle_eq_t eventq ); -\layout Standard -\noindent -The -\emph on -PtlEQFree -\emph default - function releases the resources associated with an event queue. - It is up to the user to insure that no memory descriptors are associated - with the event queue once it is freed. - -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_EQ Indicates that -\family typewriter -eventq -\family default - is not a valid event queue handle. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="1" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -eventq -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -A handle for the event queue to be released. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlEQGet -\begin_inset LatexCommand \label{sec:eqget} - -\end_inset - - -\layout LyX-Code - -int PtlEQGet( ptl_handle_eq_t eventq, -\newline - ptl_event_t* event ); -\layout Standard -\noindent -The -\emph on -PTLEQGet -\emph default - function is a nonblocking function that can be used to get the next event - in an event queue. - The event is removed from the queue. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_EQ_DROPPED Indicates success (i.e., an event is returned) and that at - least one event between this event and the last event obtained (using -\emph on -PtlEQGet -\emph default - or -\emph on -PtlEQWait -\emph default -) from this event queue has been dropped due to limited space in the event - queue. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_EQ_EMPTY Indicates that -\family typewriter -eventq -\family default - is empty or another thread is waiting on -\emph on -PtlEQWait -\emph default -. - -\layout Description - -PTL_INV_EQ Indicates that -\family typewriter -eventq -\family default - is not a valid event queue handle. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -event -\family default - is not a legal address. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="2" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.5in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -eventq -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the event queue. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -event -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the values associated with - the next event in the event queue. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlEQWait -\begin_inset LatexCommand \label{sec:eqwait} - -\end_inset - - -\layout LyX-Code - -int PtlEQWait( ptl_handle_eq_t eventq, -\newline - ptl_event_t* event ); -\layout Standard -\noindent -The -\emph on -PTLEQWait -\emph default - function can be used to block the calling process (thread) until there - is an event in an event queue. - This function also returns the next event in the event queue and removes - this event from the queue. - This is the only blocking operation in the Portals 3.2 API. - In the event that multiple threads are waiting on the same event queue, - PtlEQWait is guaranteed to wake exactly one thread, but the order in which - they are awakened is not specified. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_EQ_DROPPED Indicates success (i.e., an event is returned) and that at - least one event between this event and the last event obtained (using -\emph on -PtlEQGet -\emph default - or -\emph on -PtlEQWait -\emph default -) from this event queue has been dropped due to limited space in the event - queue. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_EQ Indicates that -\family typewriter -eventq -\family default - is not a valid event queue handle. - -\layout Description - -PTL_SEGV Indicates that -\family typewriter -event -\family default - is not a legal address. - queue handle. - -\layout Subsubsection - -Arguments -\layout Standard -\noindent - -\begin_inset Tabular -<lyxtabular version="3" rows="2" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -eventq -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the event queue to wait on. - The calling process (thread) will be blocked until -\family typewriter -eventq -\family default - is not empty. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -event -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -output -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -On successful return, this location will hold the values associated with - the next event in the event queue. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Section - -The Access Control Table -\begin_inset LatexCommand \label{sec:ac} - -\end_inset - - -\layout Standard - -Processes can use the access control table to control which processes are - allowed to perform operations on Portal table entries. - Each communication interface has a Portal table and an access control table. - The access control table for the default interface contains an entry at - index zero that allows all processes with the same user id to communicate. - Entries in the access control table can be manipulated using the -\emph on -PtlACEntry -\emph default - function. -\layout Subsection - -PtlACEntry -\begin_inset LatexCommand \label{sec:acentry} - -\end_inset - - -\layout LyX-Code - -int PtlACEntry( ptl_handle_ni_t interface, -\newline - ptl_ac_index_t index, -\newline - ptl_process_id_t matchid, -\newline - ptl_uid_t user_id, -\newline - ptl_pt_index_t portal ); -\layout Standard -\noindent -The -\emph on -PtlACEntry -\emph default - function can be used to update an entry in the access control table for - an interface. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_NI Indicates that -\family typewriter -interface -\family default - is not a valid network interface handle. - -\layout Description - -PTL_AC_INV_INDEX Indicates that -\family typewriter -index -\family default - is not a valid access control table index. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -matchid -\family default - is not a valid process identifier. - -\layout Description - -PTL_PT_INV_INDEX Indicates that -\family typewriter -portal -\family default - is not a valid Portal table index. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="5" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -interface -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Identifies the interface to use. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -index -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The index of the entry in the access control table to update. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -matchid -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Identifies the process(es) that are allowed to perform operations. - The constants -\family typewriter -PTL_PID_ANY -\family default - and -\family typewriter -PTL_NID_ANY -\family default - can be used to wildcard either of the ids in the -\family typewriter -ptl_process_id_t -\family default - structure. - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -user_id -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Identifies the user that is allowed to perform operations. - The value -\family typewriter -PTL_UID_ANY -\family default - can be used to wildcard the user. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -portal -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Identifies the Portal index(es) that can be used. - The value -\family typewriter -PTL_PT_INDEX_ANY -\family default - can be used to wildcard the Portal index. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Section - -Data Movement Operations -\begin_inset LatexCommand \label{sec:datamovement} - -\end_inset - - -\layout Standard - -The Portals API provides two data movement operations: -\emph on -PtlPut -\emph default - and -\emph on -PtlGet -\emph default -. -\layout Subsection - -PtlPut -\begin_inset LatexCommand \label{sec:put} - -\end_inset - - -\layout LyX-Code - -typedef enum { PTL_ACK_REQ, PTL_NOACK_REQ } ptl_ack_req_t; -\newline - -\newline -int PtlPut( ptl_handle_md_t mem_desc, -\newline - ptl_ack_req_t ack_req, -\newline - ptl_process_id_t target, -\newline - ptl_pt_index_t portal, -\newline - ptl_ac_index_t cookie, -\newline - ptl_match_bits_t match_bits, -\newline - ptl_size_t offset, -\newline - ptl_hdr_data_t hdr_data ); -\layout Standard -\noindent -Values of the type -\family typewriter -ptl_ack_req_t -\family default - are used to control whether an acknowledgement should be sent when the - operation completes (i.e., when the data has been written to a memory descriptor - of the -\family typewriter -target -\family default - process). - The value -\family typewriter -PTL_ACK_REQ -\family default - requests an acknowledgement, the value -\family typewriter -PTL_NOACK_REQ -\family default - requests that no acknowledgement should be generated. -\layout Standard - -The -\emph on -PtlPut -\emph default - function initiates an asynchronous put operation. - There are several events associated with a put operation: initiation of - the send on the local node ( -\family typewriter -PTL_EVENT_SEND_START -\family default -), completion of the send on the local node ( -\family typewriter -PTL_EVENT_SEND_END -\family default - or -\family typewriter -PTL_EVENT_SEND_FAIL -\family default -), and, when the send completes successfully, the receipt of an acknowledgement - ( -\family typewriter -PTL_EVENT_ACK -\family default -) indicating that the operation was accepted by the target. - These events will be logged in the event queue associated with the memory - descriptor ( -\family typewriter -mem_desc -\family default -) used in the put operation. - Using a memory descriptor that does not have an associated event queue - results in these events being discarded. - In this case, the application must have another mechanism (e.g., a higher - level protocol) for determining when it is safe to modify the memory region - associated with the memory descriptor. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_MD Indicates that -\family typewriter -mem_desc -\family default - is not a valid memory descriptor. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -target -\family default - is not a valid process id. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="8" columns="3"> -<features> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the memory descriptor that describes the memory to be sent. - If the memory descriptor has an event queue associated with it, it will - be used to record events when the message has been sent (PTL_EVENT_SEND_START, - PTL_EVENT_SEND_END). - -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ack_req -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -Controls whether an acknowledgement event is requested. - Acknowledgements are only sent when they are requested by the initiating - process -\series bold -and -\series default - the memory descriptor has an event queue -\series bold -and -\series default - the target memory descriptor enables them. - Allowed constants: -\family typewriter -PTL_ACK_REQ -\family default -, -\family typewriter -PTL_NOACK_REQ -\family default -. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -target -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A process id for the target process. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -portal -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The index in the remote Portal table. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -cookie -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The index into the access control table of the target process. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -match_bits -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The match bits to use for message selection at the target process. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -offset -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The offset into the target memory descriptor (only used when the target - memory descriptor has the -\family typewriter -PTL_MD_MANAGE_REMOTE -\family default - option set). -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -hdr_data -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -64 bits of user data that can be included in message header. - This data is written to an event queue entry at the target if an event - queue is present on the matching memory descriptor. -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Subsection - -PtlGet -\begin_inset LatexCommand \label{sec:get} - -\end_inset - - -\layout LyX-Code - -int PtlGet( ptl_handle_md_t mem_desc, -\newline - ptl_process_id_t target, -\newline - ptl_pt_index_t portal, -\newline - ptl_ac_index_t cookie, -\newline - ptl_match_bits_t match_bits, -\newline - ptl_size_t offset ); -\layout Standard -\noindent -The -\emph on -PtlGet -\emph default - function initiates a remote read operation. - There are two event pairs associated with a get operation , when the data - is sent from the remote node, a -\family typewriter -PTL_EVENT_GET{START|END} -\family default - event pair is registered on the remote node; and when the data is returned - from the remote node a -\family typewriter -PTL_EVENT_REPLY{START|END} -\family default - event pair is registered on the local node. -\layout Subsubsection - -Return Codes -\layout Description - -PTL_OK Indicates success. - -\layout Description - -PTL_NOINIT Indicates that the Portals API has not been successfully initialized. - -\layout Description - -PTL_INV_MD Indicates that -\family typewriter -mem_desc -\family default - is not a valid memory descriptor. - -\layout Description - -PTL_INV_PROC Indicates that -\family typewriter -target -\family default - is not a valid process id. - -\layout Subsubsection - -Arguments -\layout Standard - - -\begin_inset Tabular -<lyxtabular version="3" rows="6" columns="3"> -<features> -<column alignment="right" valignment="top" width="0pt"> -<column alignment="center" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="4.7in"> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="left" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A handle for the memory descriptor that describes the memory into which - the requested data will be received. - The memory descriptor can have an event queue associated with it to record - events, such as when the message receive has started ( -\family typewriter -PTL_EVENT_REPLY -\family default -_ -\family typewriter -START -\family default -). -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -target -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -A process id for the target process. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -portal -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The index in the remote Portal table. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -cookie -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The index into the access control table of the target process. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -match_bits -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The match bits to use for message selection at the target process. -\end_inset -</cell> -</row> -<row> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -offset -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -input -\end_inset -</cell> -<cell alignment="center" valignment="top" topline="true" leftline="true" rightline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -The offset into the target memory descriptor (only used when the target - memory descriptor has the -\family typewriter -PTL_MD_MANAGE_REMOTE -\family default - option set). -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\layout Section - -Summary -\layout Standard - - -\begin_inset LatexCommand \label{sec:summary} - -\end_inset - - We conclude this section by summarizing the names introduced by the Portals - 3.2 API. - We start by summarizing the names of the types introduced by the API. - This is followed by a summary of the functions introduced by the API. - Which is followed by a summary of the function return codes. - Finally, we conclude with a summary of the other constant values introduced - by the API. -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:types} - -\end_inset - - presents a summary of the types defined by the Portals API. - The first column in this table gives the type name, the second column gives - a brief description of the type, the third column identifies the section - where the type is defined, and the fourth column lists the functions that - have arguments of this type. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Types Defined by the Portals 3.2 API -\begin_inset LatexCommand \label{tab:types} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\noindent - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="25" columns="4"> -<features firstHeadEmpty="true"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="2in"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="2.2in"> -<row bottomline="true"> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold - Name -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold - Meaning -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold - Sect -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold - Functions -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ac_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -indexes for an access control table -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:index-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlACEntry, PtlPut, PtlGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ack_req_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -acknowledgement request types -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlPut -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -kinds of events -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -information about events -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:event-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlEQGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -plt_seq_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -event sequence number -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:event-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlEQGet, PtlEQWait -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_any_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -handles for any object -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIHandle -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_eq_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -handles for event queues -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlEQAlloc, PtlEQFree, PtlEQGet, PtlEQWait, PtlMDUpdate -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_md_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -handles for memory descriptors -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMDAlloc, PtlMDUnlink, PtlMDUpdate, PtlMEAttach, PtlMEAttachAny, PtlMEInsert, - PtlPut, PtlGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_me_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -handles for match entries -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlMEUnlink -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_ni_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -handles for network interfaces -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit, PtlNIFini, PtlNIStatus, PtlNIDist, PtlEQAlloc, PtlACEntry, PtlPut, - PtlGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_nid_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -node identifiers -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlGetId,PtlACEntry -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pid_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -process identifier -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlGetId, PtlACEntry -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_uid_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -user indentifier -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlGetUid, PtlACEntry -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ins_pos_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -insertion position (before or after) -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMEAttach, PtlMEAttachAny, PtlMEInsert -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_interface_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -identifiers for network interfaces -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_match_bits_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -match (and ignore) bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:mb-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlPut, PtlGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_md_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -memory descriptors -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMDAttach, PtlMDUpdate -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ni_fail_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -network interface-specific failures -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlEQGet, PtlEQWait -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -process identifiers -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:pid-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlGetId, PtlNIDist, PtlMEAttach, PtlMEAttachAny, PtlACEntry, PtlPut, PtlGet - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -indexes for Portal tables -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:index-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMEAttach, PtlMEAttachAny, PtlACEntry -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -sizes -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:size-t} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlEQAlloc, PtlPut, PtlGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_sr_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -indexes for status registers -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:stat-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIStatus -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_sr_value_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -values in status registers -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:stat-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIStatus -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_unlink_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -unlink options -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMEAttach, PtlMEAttachAny, PtlMEInsert, PtlMDAttach -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:func} - -\end_inset - - presents a summary of the functions defined by the Portals API. - The first column in this table gives the name for the function, the second - column gives a brief description of the operation implemented by the function, - and the third column identifies the section where the function is defined. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Functions Defined by the Portals 3.2 API -\begin_inset LatexCommand \label{tab:func} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="24" columns="3"> -<features firstHeadEmpty="true"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<row bottomline="true"> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -Name -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - Operation -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - Section -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlACEntry -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - update an entry in an access control table -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ac} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlEQAlloc -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - create an event queue -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlEQGet -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - get the next event from an event queue -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlEQFree -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - release the resources for an event queue -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlEQWait -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - wait for a new event in an event queue -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:eq} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlFini -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - shutdown the Portals API -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:init} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlGet -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - perform a get operation -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:datamovement} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlGetId -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - get the id for the current process -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:pid} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlInit -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - initialize the Portals API -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:init} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlMDAttach -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - create a memory descriptor and attach it to a match entry -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlMDBind -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - create a free-floating memory descriptor -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:mdbind} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlMDUnlink -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - remove a memory descriptor from a list and release its resources -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlMDUpdate -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - update a memory descriptor -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlMEAttach -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -create a match entry and attach it to a Portal table -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlMEAttachAny -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -create a match entry and attach it to a free Portal table entry -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:attachany} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlMEInsert -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - create a match entry and insert it in a list -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlMEUnlink -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - remove a match entry from a list and release its resources -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:me} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlNIDist -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - get the distance to another process -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlNIFini -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - shutdown a network interface -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlNIHandle -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - get the network interface handle for an object -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlNIInit -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - initialize a network interface -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlNIStatus -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - read a network interface status register -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - PtlPut -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - perform a put operation -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:datamovement} - -\end_inset - - -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:retcodes} - -\end_inset - - summarizes the return codes used by functions defined by the Portals API. - All of these constants are integer values. - The first column of this table gives the symbolic name for the constant, - the second column gives a brief description of the value, and the third - column identifies the functions that can return this value. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Function Return Codes for the Portals 3.2 API -\begin_inset LatexCommand \label{tab:retcodes} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="27" columns="3"> -<features> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="2.6in"> -<row bottomline="true"> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Name -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Meaning -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Functions -\series default - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_AC_INV_INDEX -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid access control table index -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlACEntry -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EQ_DROPPED -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -at least one event has been dropped -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlEQGet, PtlWait -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EQ_EMPTY -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -no events available in an event queue -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlEQGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_FAIL -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -error during initialization or cleanup -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlInit, PtlFini -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_ILL_MD -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -illegal memory descriptor values -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMDAttach, PtlMDBind, PtlMDUpdate -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INIT_DUP -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -duplicate initialization of an interface -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INIT_INV -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -initialization of an invalid interface -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INUSE -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -the ME already has an MD -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMDAttach -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_ASIZE -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid access control table size -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_EQ -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid event queue handle -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMDUpdate, PtlEQFree, PtlEQGet -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_HANDLE -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid handle -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIHandle -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_MD -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid memory descriptor handle -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMDUnlink, PtlMDUpdate -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_ME -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid match entry handle -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlMDAttach -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_NI -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid network interface handle -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIDist, PtlNIFini, PtlMDBind, PtlEQAlloc -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_PROC -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid process identifier -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit, PtlNIDist, PtlMEAttach, PtlMEInsert, PtlACEntry, PtlPut, PtlGet - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_PTINDEX -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid Portal table index -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlMEAttach -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_REG -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid status register -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlNIStatus -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INV_SR_INDX -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -invalid status register index -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlNIStatus -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_ML_TOOLONG -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -match list too long -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlMEAttach, PtlMEInsert -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_INUSE -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -MD has pending operations -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlMDUnlink -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_NOINIT -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -uninitialized API -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\emph on -all -\emph default -, except PtlInit -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_NOSPACE -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -insufficient memory -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit, PtlMDAttach, PtlMDBind, PtlEQAlloc, PtlMEAttach, PtlMEInsert - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_NOUPDATE -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - no update was performed -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - PtlMDUpdate -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_PT_FULL -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -Portal table is full -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -PtlMEAttachAny -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_OK -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - success -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent - -\emph on -all -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_SEGV -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -addressing violation -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard -\noindent -PtlNIInit, PtlNIStatus, PtlNIDist, PtlNIHandle, PtlMDBind, PtlMDUpdate, - PtlEQAlloc, PtlEQGet, PtlEQWait -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:oconsts} - -\end_inset - - summarizes the remaining constant values introduced by the Portals API. - The first column in this table presents the symbolic name for the constant, - the second column gives a brief description of the value, the third column - identifies the type for the value, and the fourth column identifies the - sections in which the value is mentioned. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Other Constants Defined by the Portals 3.2 API -\begin_inset LatexCommand \label{tab:oconsts} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="36" columns="5"> -<features> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<row bottomline="true"> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Name -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Meaning -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Base type -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Intr. -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Ref. -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_ACK_REQ -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -request an acknowledgement -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ack_req_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EQ_NONE -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -a NULL event queue handle -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_eq_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:handle-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md} - -\end_inset - -, -\begin_inset LatexCommand \ref{sec:mdupdate} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_GET_START -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -get event start -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_GET_END -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -get event end -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_GET_FAIL -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -get event fail -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_PUT_START -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -put event start -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_PUT_END -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -put event end -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_PUT_FAIL -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -put event fail -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_REPLY_START -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -reply event start -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_REPLY_END -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -reply event end -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_REPLY_FAIL -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -reply event fail -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_ACK_START -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -acknowledgement event start -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_ACK_END -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -acknowledgement event end -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_ACK_FAIL -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -acknowledgement event fail -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_SEND_START -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -send event start -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_SEND_END -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -send event end -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_SEND_FAIL -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -send event fail -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_EVENT_UNLINK -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -unlink event -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_event_kind_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ek-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_PID_ANY -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -wildcard for process id fields -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pid_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -, -\begin_inset LatexCommand \ref{sec:acentry} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_NID_ANY -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -wildcard for node id fields -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_nid_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -, -\begin_inset LatexCommand \ref{sec:acentry} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_UID_ANY -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -wildcard for user id -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_uid_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:id-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meattach} - -\end_inset - -, -\begin_inset LatexCommand \ref{sec:acentry} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_IFACE_DEFAULT -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -default interface -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_interface_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:ni-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INS_AFTER -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -insert after -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ins_pos_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meinsert} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_INS_BEFORE -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -insert before -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ins_pos_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:meinsert} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_ACK_DISABLE -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -a flag to disable acknowledgements -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_MANAGE_REMOTE -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -a flag to enable the use of remote offsets -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - -, -\begin_inset LatexCommand \ref{sec:get} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_OP_GET -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -a flag to enable get operations -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_OP_PUT -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -a flag to enable put operations -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_THRESH_INF -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -infinite threshold for a memory descriptor -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_MD_TRUNCATE -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -a flag to enable truncation of a request -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:md-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_NOACK_REQ -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -request no acknowledgement -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ack_req_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:put} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_PT_INDEX_ANY -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -wildcard for Portal indexes -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:acentry} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_RETAIN -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -disable unlinking -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_unlink_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:mdattach} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_SR_DROP_COUNT -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -index for the dropped count register -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_sr_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:stat-type} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:nistatus} - -\end_inset - - -\end_inset -</cell> -</row> -<row> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -PTL_UNLINK -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -enable unlinking -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_unlink_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\begin_inset LatexCommand \ref{sec:mdattach} - -\end_inset - - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Chapter - -The Semantics of Message Transmission -\begin_inset LatexCommand \label{sec:semantics} - -\end_inset - - -\layout Standard - -The portals API uses four types of messages: put requests, acknowledgements, - get requests, and replies. - In this section, we describe the information passed on the wire for each - type of message. - We also describe how this information is used to process incoming messages. -\layout Section - -Sending Messages -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:put-wire} - -\end_inset - - summarizes the information that is transmitted for a put request. - The first column provides a descriptive name for the information, the second - column provides the type for this information, the third column identifies - the source of the information, and the fourth column provides additional - notes. - Most information that is transmitted is obtained directly from the -\emph on -PtlPut -\emph default - operation. - Notice that the handle for the memory descriptor used in the -\emph on -PtlPut -\emph default - operation is transmitted even though this value cannot be interpreted by - the target. - A value of anything other than -\family typewriter -PTL_MD_NONE -\family default -, is interpreted as a request for an acknowledgement. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Information Passed in a Put Request -\begin_inset LatexCommand \label{tab:put-wire} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="12" columns="4"> -<features firstHeadEmpty="true"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<row bottomline="true"> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Information -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Type -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -\emph on -PtlPut -\emph default - arg -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Notes -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -operation -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -indicates a put request -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -initiator -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -local information -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -user -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_uid_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -local information -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -target -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -target -\family default - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -portal index -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -portal -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -cookie -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ac_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -cookie -\family default - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -match bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_match_bits_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -match_bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -offset -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -offset -\family default - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -memory desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_md_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\family default - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -no ack if -\family typewriter -PTL_MD_NONE -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -length -\family default - member -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -data -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family roman -\emph on -bytes -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -start -\family default - and -\family typewriter -length -\family default - members -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:ack-wire} - -\end_inset - - summarizes the information transmitted in an acknowledgement. - Most of the information is simply echoed from the put request. - Notice that the initiator and target are obtained directly from the put - request, but are swapped in generating the acknowledgement. - The only new piece of information in the acknowledgement is the manipulated - length which is determined as the put request is satisfied. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Information Passed in an Acknowledgement -\begin_inset LatexCommand \label{tab:ack-wire} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="10" columns="4"> -<features firstHeadEmpty="true"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<row bottomline="true"> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Information -\series default - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Type -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Put Information -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Notes -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -operation -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - indicates an acknowledgement -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - initiator -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - target -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - target -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - initiator -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - portal index -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - portal index -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - match bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_match_bits_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - match bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - offset -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - offset -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - memory desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter - ptl_handle_md_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - memory desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - requested length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter - ptl_size_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - manipulated length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter - ptl_size_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - obtained from the operation -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:get-wire} - -\end_inset - - summarizes the information that is transmitted for a get request. - Like the information transmitted in a put request, most of the information - transmitted in a get request is obtained directly from the -\emph on -PtlGet -\emph default - operation. - Unlike put requests, get requests do not include the event queue handle. - In this case, the reply is generated whenever the operation succeeds and - the memory descriptor must not be unlinked until the reply is received. - As such, there is no advantage to explicitly sending the event queue handle. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Information Passed in a Get Request -\begin_inset LatexCommand \label{tab:get-wire} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="11" columns="4"> -<features firstHeadEmpty="true"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<row bottomline="true"> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Information -\series default - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Type -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -\emph on -PtlGet -\emph default - argument -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Notes -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -operation -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -indicates a get operation -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -initiator -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -local information -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -user -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_uid_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -local information -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -target -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -target -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -portal index -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -portal -\family default - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -cookie -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_ac_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -cookie -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -match bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_match_bits_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -match_bits -\family default - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -offset -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -offset -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -memory desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_md_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\family default - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -mem_desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -length -\family default - member -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Standard - -Table\SpecialChar ~ - -\begin_inset LatexCommand \ref{tab:reply-wire} - -\end_inset - - summarizes the information transmitted in a reply. - Like an acknowledgement, most of the information is simply echoed from - the get request. - The initiator and target are obtained directly from the get request, but - are swapped in generating the acknowledgement. - The only new information in the acknowledgement are the manipulated length - and the data, which are determined as the get request is satisfied. -\layout Standard - - -\begin_inset Float table -placement htbp -wide false -collapsed false - -\layout Caption - -Information Passed in a Reply -\begin_inset LatexCommand \label{tab:reply-wire} - -\end_inset - - -\layout Standard - - -\begin_inset ERT -status Collapsed - -\layout Standard - -\backslash -medskip -\end_inset - - -\layout Standard -\align center - -\size small - -\begin_inset Tabular -<lyxtabular version="3" rows="11" columns="4"> -<features firstHeadEmpty="true"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<column alignment="left" valignment="top" width="0pt"> -<row bottomline="true"> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Information -\series default - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Type -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Put Information -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\series bold -Notes -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -operation -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -int -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -indicates an acknowledgement -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -initiator -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -target -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -target -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_process_id_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -initiator -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -portal index -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_pt_index_t -\end_inset -</cell> -<cell alignment="left" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -portal index -\end_inset -</cell> -<cell alignment="right" valignment="top" bottomline="true" usebox="none"> -\begin_inset Text - -\layout Standard - -echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -match bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_match_bits_t -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -match bits -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -offset -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -offset -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -memory desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_handle_md_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -memory desc -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -requested length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -echo -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -manipulated length -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\family typewriter -ptl_size_t -\family default - -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -obtained from the operation -\end_inset -</cell> -</row> -<row> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -data -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - - -\emph on -bytes -\end_inset -</cell> -<cell alignment="left" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -\end_inset -</cell> -<cell alignment="right" valignment="top" usebox="none"> -\begin_inset Text - -\layout Standard - -obtained from the operation -\end_inset -</cell> -</row> -</lyxtabular> - -\end_inset - - -\end_inset - - -\layout Section - -Receiving Messages -\begin_inset LatexCommand \label{sec:receiving} - -\end_inset - - -\layout Standard - -When an incoming message arrives on a network interface, the communication - system first checks that the target process identified in the request is - a valid process that has initialized the network interface (i.e., that the - target process has a valid Portal table). - If this test fails, the communication system discards the message and increment -s the dropped message count for the interface. - The remainder of the processing depends on the type of the incoming message. - Put and get messages are subject to access control checks and translation - (searching a match list), while acknowledgement and reply messages bypass - the access control checks and the translation step. -\layout Standard - -Acknowledgement messages include a handle for the memory descriptor used - in the original -\emph on -PtlPut -\emph default - operation. - This memory descriptor will identify the event queue where the event should - be recorded. - Upon receipt of an acknowledgement, the runtime system only needs to confirm - that the memory descriptor and event queue still exist and that there is - space for another event. - Should the any of these conditions fail, the message is simply discarded - and the dropped message count for the interface is incremented. - Otherwise, the system builds an acknowledgement event from the information - in the acknowledgement message and adds it to the event queue. -\layout Standard - -Reception of reply messages is also relatively straightforward. - Each reply message includes a handle for a memory descriptor. - If this descriptor exists, it is used to receive the message. - A reply message will be dropped if the memory descriptor identified in - the request doesn't exist. - In either of this case, the dropped message count for the interface is - incremented. - These are the only reasons for dropping reply messages. - Every memory descriptor accepts and truncates incoming reply messages, - eliminating the other potential reasons for rejecting a reply message. -\layout Standard - -The critical step in processing an incoming put or get request involves - mapping the request to a memory descriptor. - This step starts by using the Portal index in the incoming request to identify - a list of match entries. - This list of match entries is searched in order until a match entry is - found whose match criteria matches the match bits in the incoming request - and whose memory descriptor accepts the request. -\layout Standard - -Because acknowledge and reply messages are generated in response to requests - made by the process receiving these messages, the checks performed by the - runtime system for acknowledgements and replies are minimal. - In contrast, put and get messages are generated by remote processes and - the checks performed for these messages are more extensive. - Incoming put or get messages may be rejected because: -\layout Itemize - -the Portal index supplied in the request is not valid; -\layout Itemize - -the cookie supplied in the request is not a valid access control entry; - -\layout Itemize - -the access control entry identified by the cookie does not match the identifier - of the requesting process; -\layout Itemize - -the access control entry identified by the access control entry does not - match the Portal index supplied in the request; or -\layout Itemize - -the match bits supplied in the request do not match any of the match entries - with a memory descriptor that accepts the request. - -\layout Standard - -In all cases, if the message is rejected, the incoming message is discarded - and the dropped message count for the interface is incremented. -\layout Standard - -A memory descriptor may reject an incoming request for any of the following - reasons: -\layout Itemize - -the -\family typewriter -PTL_MD_PUT -\family default - or -\family typewriter -PTL_MD_GET -\family default - option has not been enabled and the operation is put or get, respectively; - -\layout Itemize - -the length specified in the request is too long for the memory descriptor - and the -\family typewriter -PTL_MD_TRUNCATE -\family default - option has not been enabled. -\layout Chapter - -Examples -\begin_inset LatexCommand \label{sec:examples} - -\end_inset - - -\layout Comment - -The examples presented in this chapter have not been updated to reflect - the current API. -\layout Standard - -In this section we present several example to illustrate expected usage - patterns for the Portals 3.2 API. - The first example describes how to implement parallel servers using the - features of the Portals 3.2 API. - This example covers the access control list and the use of remote managed - offsets. - The second example presents an approach to dealing with dropped requests. - This example covers aspects of match lists and memory descriptors. - The final example covers message reception in MPI. - This example illustrates more sophisticated uses of matching and a procedure - to update a memory descriptor. -\layout Section - -Parallel File Servers -\begin_inset LatexCommand \label{sec:expfs} - -\end_inset - - -\layout Standard - -Figure\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:file} - -\end_inset - - illustrates the logical structure of a parallel file server. - In this case, the parallel server consists of four servers that stripe - application data across four disks. - We would like to present applications with the illusion that the file server - is a single entity. - We will assume that all of the processes that constitute the parallel server - have the same user id. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename file.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 196pt - lyxheight 147pt -\end_inset - - -\layout Caption - -Parallel File Server -\begin_inset LatexCommand \label{fig:file} - -\end_inset - - -\end_inset - - -\layout Standard - -When an application establishes a connection to the parallel file server, - it will allocate a Portal and access control list entry for communicating - with the server. - The access control list entry will include the Portal and match any process - in the parallel file server's, so all of the file server processes will - have access to the portal. - The Portal information and access control entry will be sent to the file - server at this time. - If the application and server need to have multiple, concurrent I/O operations, - they can use additional portals or match entries to keep the operations - from interfering with one another. -\layout Standard - -When an application initiates an I/O operation, it first builds a memory - descriptor that describes the memory region involved in the operation. - This memory descriptor will enable the appropriate operation (put for read - operations and get for write operations) and enable the use of remote offsets - (this lets the servers decide where their data should be placed in the - memory region). - After creating the memory descriptor and linking it into the appropriate - Portal entry, the application sends a read or write request (using -\emph on -PtlPut -\emph default -) to one of the file server processes. - The file server processes can then use put or get operations with the appropria -te offsets to fill or retrieve the contents of the application's buffer. - To know when the operation has completed, the application can add an event - queue to the memory descriptor and add up the lengths of the remote operations - until the sum is the size of the requested I/O operation. -\layout Section - -Dealing with Dropped Requests -\begin_inset LatexCommand \label{sec:exdrop} - -\end_inset - - -\layout Standard - -If a process does not anticipate unexpected requests, they will be discarded. - Applications using the Portals API can query the dropped count for the - interface to determine the number of requests that have been dropped (see - Section\SpecialChar ~ - -\begin_inset LatexCommand \ref{sec:nistatus} - -\end_inset - -). - While this approach minimizes resource consumption, it does not provide - information that might be critical in debugging the implementation of a - higher level protocol. -\layout Standard - -To keep track of more information about dropped requests, we use a memory - descriptor that truncates each incoming request to zero bytes and logs - the -\begin_inset Quotes eld -\end_inset - -dropped -\begin_inset Quotes erd -\end_inset - - operations in an event queue. - Note that the operations are not dropped in the Portals sense, because - the operation succeeds. -\layout Standard - -The following code fragment illustrates an implementation of this approach. - In this case, we assume that a thread is launched to execute the function - -\family typewriter -watch_drop -\family default -. - This code starts by building an event queue to log truncated operations - and a memory descriptor to truncate the incoming requests. - This example only captures -\begin_inset Quotes eld -\end_inset - -dropped -\begin_inset Quotes erd -\end_inset - - requests for a single portal. - In a more realistic situation, the memory descriptor would be appended - to the match list for every portal. - We also assume that the thread is capable of keeping up with the -\begin_inset Quotes eld -\end_inset - -dropped -\begin_inset Quotes erd -\end_inset - - requests. - If this is not the case, we could use a finite threshold on the memory - descriptor to capture the first few dropped requests. -\layout LyX-Code - - -\size small -#include <stdio.h> -\newline -#include <stdlib.h> -\newline -#include <portals.h> -\newline - -\newline -#define DROP_SIZE 32 /* number of dropped requests to track */ -\newline - -\newline -int watch_drop( ptl_handle_ni_t ni, ptl_pt_index_t index ) { -\newline - ptl_handle_eq_t drop_events; -\newline - ptl_event_t event; -\newline - ptl_handle_md_t drop_em; -\newline - ptl_md_t drop_desc; -\newline - ptl_process_id_t any_proc; -\newline - ptl_handle_me_t match_any; -\newline - -\newline - /* create the event queue */ -\newline - if( PtlEQAlloc(ni, DROP_SIZE, &drop_events) != PTL_OK ) { -\newline - fprintf( stderr, "Couldn't create the event queue -\backslash -n" ); -\newline - exit( 1 ); -\newline - } -\newline - -\newline - /* build a match entry */ -\newline - any_proc.nid = PTL_ID_ANY; -\newline - any_proc.pid = PTL_ID_ANY; -\newline - PtlMEAttach( index, any_proc, 0, ~(ptl_match_bits_t)0, PTL_RETAIN, -\newline - &match_any ); -\newline - -\newline - /* create the memory descriptor */ -\newline - drop_desc.start = NULL; -\newline - drop_desc.length = 0; -\newline - drop_desc.threshold = PTL_MD_THRESH_INF; -\newline - drop_desc.options = PTL_MD_OP_PUT | PTL_MD_OP_GET | PTL_MD_TRUNCATE; -\newline - drop_desc.user_ptr = NULL; -\newline - drop_desc.eventq = drop_events; -\newline - if( PtlMDAttach(match_any, drop_desc, &drop_em) != PTL_OK ) { -\newline - fprintf( stderr, "Couldn't create the memory descriptor -\backslash -n" ); -\newline - exit( 1 ); -\newline - } -\newline - -\newline - /* watch for "dropped" requests */ -\newline - while( 1 ) { -\newline - if( PtlEQWait( drop_events, &event ) != PTL_OK ) break; -\newline - fprintf( stderr, "Dropped request from gid = event.initiator.gid, - event.initiator.rid ); -\newline - } -\newline -} -\layout Section - -Message Transmission in MPI -\begin_inset LatexCommand \label{sec:exmpi} - -\end_inset - - -\layout Standard - -We conclude this section with a fairly extensive example that describes - an approach to implementing message transmission for MPI. - Like many MPI implementations, we distinguish two message transmission - protocols: a short message protocol and a long message protocol. - We use the constant -\family typewriter -MPI_LONG_LENGTH -\family default - to determine the size of a long message. -\layout Standard - -For small messages, the sender simply sends the message and presumes that - the message will be received (i.e., the receiver has allocated a memory region - to receive the message body). - For large messages, the sender also sends the message, but does not presume - that the message body will be saved. - Instead, the sender builds a memory descriptor for the message and enables - get operations on this descriptor. - If the target does not save the body of the message, it will record an - event for the put operation. - When the process later issues a matching MPI receive, it will perform a - get operation to retrieve the body of the message. -\layout Standard - -To facilitate receive side matching based on the protocol, we use the most - significant bit in the match bits to indicate the protocol: 1 for long - messages and 0 for short messages. -\layout Standard - -The following code presents a function that implements the send side of - the protocol. - The global variable -\family typewriter -EndGet -\family default - is the last match entry attached to the Portal index used for posting long - messages. - This entry does not match any incoming requests (i.e., the memory descriptor - rejects all get operations) and is built during initialization of the MPI - library. - The other global variable, -\family typewriter -MPI_NI -\family default -, is a handle for the network interface used by the MPI implementation. -\layout LyX-Code - - -\size small -extern ptl_handle_me_t EndGet; -\newline -extern ptl_handle_ni_t MPI_NI; -\newline - -\newline -void MPIsend( void *buf, ptl_size_t len, void *data, ptl_handle_eq_t eventq, -\newline - ptl_process_id target, ptl_match_bits_t match ) -\newline -{ -\newline - ptl_handle_md_t send_handle; -\newline - ptl_md_t mem_desc; -\newline - ptl_ack_req_t want_ack; -\newline - -\newline - mem_desc.start = buf; -\newline - mem_desc.length = len; -\newline - mem_desc.threshold = 1; -\newline - mem_desc.options = PTL_MD_GET_OP; -\newline - mem_desc.user_ptr = data; -\newline - mem_desc.eventq = eventq; -\newline - -\newline - if( len >= MPI_LONG_LENGTH ) { -\newline - ptl_handle_me_t me_handle; -\newline - -\newline - /* add a match entry to the end of the get list */ -\newline - PtlMEInsert( target, match, 0, PTL_UNLINK, PTL_INS_BEFORE, EndGet, - &me_handle ); -\newline - PtlMDAttach( me_handle, mem_desc, PTL_UNLINK, NULL ); -\newline - -\newline - /* we want an ack for long messages */ -\newline - want_ack = PTL_ACK_REQ; -\newline - -\newline - /* set the protocol bit to indicate that this is a long message - */ -\newline - match |= 1<<63; -\newline - } else { -\newline - /* we don't want an ack for short messages */ -\newline - want_ack = PTL_ACK_REQ; -\newline - -\newline - /* set the protocol bit to indicate that this is a short message - */ -\newline - match &= ~(1<<63); -\newline - } -\newline - -\newline - /* create a memory descriptor and send it */ -\newline - PtlMDBind( MPI_NI, mem_desc, &send_handle ); -\newline - PtlPut( send_handle, want_ack, target, MPI_SEND_PINDEX, MPI_AINDEX, match, - 0 ); -\newline -} -\layout Standard - -The -\emph on -MPISend -\emph default - function returns as soon as the message has been scheduled for transmission. - The event queue argument, -\family typewriter -eventq -\family default -, can be used to determine the disposition of the message. - Assuming that -\family typewriter -eventq -\family default - is not -\family typewriter -PTL_EQ_NONE -\family default -, a -\family typewriter -PTL_EVENT_SENT -\family default - event will be recorded for each message as the message is transmitted. - For small messages, this is the only event that will be recorded in -\family typewriter -eventq -\family default -. - In contrast, long messages include an explicit request for an acknowledgement. - If the -\family typewriter -target -\family default - process has posted a matching receive, the acknowledgement will be sent - as the message is received. - If a matching receive has not been posted, the message will be discarded - and no acknowledgement will be sent. - When the -\family typewriter -target -\family default - process later issues a matching receive, the receive will be translated - into a get operation and a -\family typewriter -PTL_EVENT_GET -\family default - event will be recorded in -\family typewriter -eventq -\family default -. -\layout Standard - -Figure\SpecialChar ~ - -\begin_inset LatexCommand \ref{fig:mpi} - -\end_inset - - illustrates the organization of the match list used for receiving MPI messages. - The initial entries (not shown in this figure) would be used to match the - MPI receives that have been preposted by the application. - The preposted receives are followed by a match entry, -\emph on -RcvMark -\emph default -, that marks the boundary between preposted receives and the memory descriptors - used for -\begin_inset Quotes eld -\end_inset - -unexpected -\begin_inset Quotes erd -\end_inset - - messages. - The -\emph on -RcvMark -\emph default - entry is followed by a small collection of match entries that match unexpected - -\begin_inset Quotes eld -\end_inset - -short -\begin_inset Quotes erd -\end_inset - - messages, i.e., messages that have a 0 in the most significant bit of their - match bits. - The memory descriptors associated with these match entries will append - the incoming message to the associated memory descriptor and record an - event in an event queue for unexpected messages. - The unexpected short message matching entries are followed by a match entry - that will match messages that were not matched by the preceding match entries, - i.e., the unexpected long messages. - The memory descriptor associated with this match entry truncates the message - body and records an event in the event queue for unexpected messages. - Note that of the memory descriptors used for unexpected messages share - a common event queue. - This makes it possible to process the unexpected messages in the order - in which they arrived, regardless of. -\layout Standard - - -\begin_inset Float figure -placement htbp -wide false -collapsed false - -\layout Standard -\align center - -\begin_inset Graphics FormatVersion 1 - filename mpi.eps - display color - size_type 0 - rotateOrigin center - lyxsize_type 1 - lyxwidth 389pt - lyxheight 284pt -\end_inset - - -\layout Caption - -Message Reception in MPI -\begin_inset LatexCommand \label{fig:mpi} - -\end_inset - - -\end_inset - - -\layout Standard - -When the local MPI process posts an MPI receive, we must first search the - events unexpected message queue to see if a matching message has already - arrived. - If no matching message is found, a match entry for the receive is inserted - before the -\emph on -RcvMark -\emph default - entry--after the match entries for all of the previously posted receives - and before the match entries for the unexpected messages. - This ensures that preposted receives are matched in the order that they - were posted (a requirement of MPI). - -\layout Standard - -While this strategy respects the temporal semantics of MPI, it introduces - a race condition: a matching message might arrive after the events in the - unexpected message queue have been searched, but before the match entry - for the receive has been inserted in the match list. - -\layout Standard - -To avoid this race condition we start by setting the -\family typewriter -threshold -\family default - of the memory descriptor to 0, making the descriptor inactive. - We then insert the match entry into the match list and proceed to search - the events in the unexpected message queue. - A matching message that arrives as we are searching the unexpected message - queue will not be accepted by the memory descriptor and, if not matched - by an earlier match list element, will add an event to the unexpected message - queue. - After searching the events in the unexpected message queue, we update the - memory descriptor, setting the threshold to 1 to activate the memory descriptor. - This update is predicated by the condition that the unexpected message - queue is empty. - We repeat the process of searching the unexpected message queue until the - update succeeds. -\layout Standard - -The following code fragment illustrates this approach. - Because events must be removed from the unexpected message queue to be - examined, this code fragment assumes the existence of a user managed event - list, -\family typewriter -Rcvd -\family default -, for the events that have already been removed from the unexpected message - queue. - In an effort to keep the example focused on the basic protocol, we have - omitted the code that would be needed to manage the memory descriptors - used for unexpected short messages. - In particular, we simply leave messages in these descriptors until they - are received by the application. - In a robust implementation, we would introduce code to ensure that short - unexpected messages are removed from these memory descriptors so that they - can be re-used. -\layout LyX-Code - - -\size small -extern ptl_handle_eq_t UnexpQueue; -\newline -extern ptl_handle_me_t RcvMark; -\newline -extern ptl_handle_me_t ShortMatch; -\newline - -\newline -typedef struct event_list_tag { -\newline - ptl_event_t event; -\newline - struct event_list_tag* next; -\newline -} event_list; -\newline - -\newline -extern event_list Rcvd; -\newline - -\newline -void AppendRcvd( ptl_event_t event ) -\newline -{ -\newline - /* append an event onto the Rcvd list */ -\newline -} -\newline - -\newline -int SearchRcvd( void *buf, ptl_size_t len, ptl_process_id_t sender, ptl_match_bi -ts_t match, -\newline - ptl_match_bits_t ignore, ptl_event_t *event ) -\newline -{ -\newline - /* Search the Rcvd event queue, looking for a message that matches the - requested message. -\newline - * If one is found, remove the event from the Rcvd list and return it. - */ -\newline -} -\newline - -\newline -typedef enum { RECEIVED, POSTED } receive_state; -\newline - -\newline -receive_state CopyMsg( void *buf, ptl_size_t &length, ptl_event_t event, - ptl_md_t md_buf ) -\newline -{ -\newline - ptl_md_t md_buf; -\newline - ptl_handle_me_t me_handle; -\newline - -\newline - if( event.rlength >= MPI_LONG_LENGTH ) { -\newline - PtlMDBind( MPI_NI, md_buf, &md_handle ); -\newline - PtlGet( event.initiator, MPI_GET_PINDEX, 0, event.match_bits, MPI_AINDEX, - md_handle ); -\newline - return POSTED; -\newline - } else { -\newline - /* copy the message */ -\newline - if( event.mlength < *length ) *length = event.mlength; -\newline - memcpy( buf, (char*)event.md_desc.start+event.offset, *length ); -\newline - return RECEIVED; -\newline - } -\newline -} -\newline - -\newline -receive_state MPIreceive( void *buf, ptl_size_t &len, void *MPI_data, ptl_handle -_eq_t eventq, -\newline - ptl_process_id_t sender, ptl_match_bits_t match, - ptl_match_bits_t ignore ) -\newline -{ -\newline - ptl_md_t md_buf; -\newline - ptl_handle_md_t md_handle; -\newline - ptl_handle_me_t me_handle; -\newline - ptl_event_t event; -\newline - -\newline - /* build a memory descriptor for the receive */ -\newline - md_buf.start = buf; -\newline - md_buf.length = *len; -\newline - md_buf.threshold = 0; /* temporarily disabled */ -\newline - md_buf.options = PTL_MD_PUT_OP; -\newline - md_buf.user_ptr = MPI_data; -\newline - md_buf.eventq = eventq; -\newline - -\newline - /* see if we have already received the message */ -\newline - if( SearchRcvd(buf, len, sender, match, ignore, &event) ) -\newline - return CopyMsg( buf, len, event, md_buf ); -\newline - -\newline - /* create the match entry and attach the memory descriptor */ -\newline - PtlMEInsert(sender, match, ignore, PTL_UNLINK, PTL_INS_BEFORE, RcvMark, - &me_handle); -\newline - PtlMDAttach( me_handle, md_buf, PTL_UNLINK, &md_handle ); -\newline - -\newline - md_buf.threshold = 1; -\newline - do -\newline - if( PtlEQGet( UnexpQueue, &event ) != PTL_EQ_EMPTY ) { -\newline - if( MPIMatch(event, match, ignore, sender) ) { -\newline - return CopyMsg( buf, len, (char*)event.md_desc.start+event.offset, - md_buf ); -\newline - } else { -\newline - AppendRcvd( event ); -\newline - } -\newline - } -\newline - while( PtlMDUpdate(md_handle, NULL, &md_buf, unexp_queue) == PTL_NOUPDATE - ); -\newline - return POSTED; -\newline -} -\layout Chapter* - -Acknowledgments -\layout Standard - -Several people have contributed to the philosophy, design, and implementation - of the Portals message passing architecture as it has evolved. - We acknowledge the following people for their contributions: Al Audette, - Lee Ann Fisk, David Greenberg, Tramm Hudson, Gabi Istrail, Chu Jong, Mike - Levenhagen, Jim Otto, Mark Sears, Lance Shuler, Mack Stallcup, Jeff VanDyke, - Dave van Dresser, Lee Ward, and Stephen Wheat. - -\layout Standard - - -\begin_inset LatexCommand \BibTeX[ieee]{portals3} - -\end_inset - - -\the_end diff --git a/lustre/portals/doc/put.fig b/lustre/portals/doc/put.fig deleted file mode 100644 index 5235b6d..0000000 --- a/lustre/portals/doc/put.fig +++ /dev/null @@ -1,32 +0,0 @@ -#FIG 3.2 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -6 1350 900 2175 1200 -4 0 0 100 0 0 10 0.0000 0 105 825 1350 1200 Transmission\001 -4 0 0 100 0 0 10 0.0000 0 105 285 1620 1050 Data\001 --6 -2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 2700 1275 2700 1725 -2 1 0 1 0 7 100 0 -1 4.000 0 0 -1 1 0 2 - 0 0 1.00 60.00 120.00 - 900 525 2700 1200 -2 2 0 1 0 7 100 0 -1 3.000 0 0 7 0 0 5 - 0 300 1200 300 1200 2250 0 2250 0 300 -2 2 0 1 0 7 100 0 -1 3.000 0 0 7 0 0 5 - 2400 300 3600 300 3600 2250 2400 2250 2400 300 -2 1 1 1 0 7 100 0 -1 4.000 0 0 7 1 0 2 - 0 0 1.00 60.00 120.00 - 2699 1788 899 1938 -4 0 0 100 0 0 10 0.0000 0 105 720 2775 1650 Translation\001 -4 1 0 100 0 0 10 0.0000 0 135 555 1800 2025 Optional\001 -4 1 0 100 0 0 10 0.0000 0 135 1170 1800 2175 Acknowledgement\001 -4 0 0 100 0 0 10 0.0000 0 105 405 2850 1500 Portal\001 -4 1 0 100 0 0 10 0.0000 0 135 405 3000 525 Target\001 -4 1 0 100 0 0 10 0.0000 0 105 540 600 525 Initiator\001 diff --git a/lustre/portals/include/.cvsignore b/lustre/portals/include/.cvsignore deleted file mode 100644 index d45f796..0000000 --- a/lustre/portals/include/.cvsignore +++ /dev/null @@ -1,4 +0,0 @@ -config.h -stamp-h -stamp-h1 -stamp-h.in diff --git a/lustre/portals/include/Makefile.am b/lustre/portals/include/Makefile.am deleted file mode 100644 index 2cf7f99..0000000 --- a/lustre/portals/include/Makefile.am +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -SUBDIRS = portals linux -EXTRA_DIST = config.h.in -include $(top_srcdir)/Rules diff --git a/lustre/portals/include/config.h.in b/lustre/portals/include/config.h.in deleted file mode 100644 index 3aa6909..0000000 --- a/lustre/portals/include/config.h.in +++ /dev/null @@ -1,58 +0,0 @@ -/* portals/include/config.h.in. Generated from configure.in by autoheader. */ - -/* Define to 1 if you have the <inttypes.h> header file. */ -#undef HAVE_INTTYPES_H - -/* Define to 1 if you have the <memory.h> header file. */ -#undef HAVE_MEMORY_H - -/* Define to 1 if you have the <stdint.h> header file. */ -#undef HAVE_STDINT_H - -/* Define to 1 if you have the <stdlib.h> header file. */ -#undef HAVE_STDLIB_H - -/* Define to 1 if you have the <strings.h> header file. */ -#undef HAVE_STRINGS_H - -/* Define to 1 if you have the <string.h> header file. */ -#undef HAVE_STRING_H - -/* Define to 1 if you have the <sys/stat.h> header file. */ -#undef HAVE_SYS_STAT_H - -/* Define to 1 if you have the <sys/types.h> header file. */ -#undef HAVE_SYS_TYPES_H - -/* Define to 1 if you have the <unistd.h> header file. */ -#undef HAVE_UNISTD_H - -/* IOCTL Buffer Size */ -#undef OBD_MAX_IOCTL_BUFFER - -/* Name of package */ -#undef PACKAGE - -/* Define to the address where bug reports for this package should be sent. */ -#undef PACKAGE_BUGREPORT - -/* Define to the full name of this package. */ -#undef PACKAGE_NAME - -/* Define to the full name and version of this package. */ -#undef PACKAGE_STRING - -/* Define to the one symbol short name of this package. */ -#undef PACKAGE_TARNAME - -/* Define to the version of this package. */ -#undef PACKAGE_VERSION - -/* The size of a `unsigned long long', as computed by sizeof. */ -#undef SIZEOF_UNSIGNED_LONG_LONG - -/* Define to 1 if you have the ANSI C header files. */ -#undef STDC_HEADERS - -/* Version number of package */ -#undef VERSION diff --git a/lustre/portals/include/linux/Makefile.am b/lustre/portals/include/linux/Makefile.am deleted file mode 100644 index 6a65cb5..0000000 --- a/lustre/portals/include/linux/Makefile.am +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include $(top_srcdir)/Rules - -linuxincludedir = $(includedir)/linux - -linuxinclude_HEADERS=kp30.h portals_lib.h diff --git a/lustre/portals/include/linux/kp30.h b/lustre/portals/include/linux/kp30.h deleted file mode 100644 index ee3b9fc..0000000 --- a/lustre/portals/include/linux/kp30.h +++ /dev/null @@ -1,941 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef _KP30_INCLUDED -#define _KP30_INCLUDED - - -#define PORTAL_DEBUG - -#ifndef offsetof -# define offsetof(typ,memb) ((int)((char *)&(((typ *)0)->memb))) -#endif - -#define LOWEST_BIT_SET(x) ((x) & ~((x) - 1)) - -#ifndef CONFIG_SMP -# define smp_processor_id() 0 -#endif - -/* - * Debugging - */ -extern unsigned int portal_subsystem_debug; -extern unsigned int portal_stack; -extern unsigned int portal_debug; -extern unsigned int portal_printk; -/* Debugging subsystems (8 bit ID) - * - * If you add debug subsystem #32, you need to send email to phil, because - * you're going to break kernel subsystem debug filtering. */ -#define S_UNDEFINED (0 << 24) -#define S_MDC (1 << 24) -#define S_MDS (2 << 24) -#define S_OSC (3 << 24) -#define S_OST (4 << 24) -#define S_CLASS (5 << 24) -#define S_OBDFS (6 << 24) /* obsolete */ -#define S_LLITE (7 << 24) -#define S_RPC (8 << 24) -#define S_EXT2OBD (9 << 24) /* obsolete */ -#define S_PORTALS (10 << 24) -#define S_SOCKNAL (11 << 24) -#define S_QSWNAL (12 << 24) -#define S_PINGER (13 << 24) -#define S_FILTER (14 << 24) -#define S_TRACE (15 << 24) /* obsolete */ -#define S_ECHO (16 << 24) -#define S_LDLM (17 << 24) -#define S_LOV (18 << 24) -#define S_GMNAL (19 << 24) -#define S_PTLROUTER (20 << 24) -#define S_COBD (21 << 24) -#define S_PTLBD (22 << 24) -#define S_LOG (23 << 24) - -/* If you change these values, please keep portals/linux/utils/debug.c - * up to date! */ - -/* Debugging masks (24 bits, non-overlapping) */ -#define D_TRACE (1 << 0) /* ENTRY/EXIT markers */ -#define D_INODE (1 << 1) -#define D_SUPER (1 << 2) -#define D_EXT2 (1 << 3) /* anything from ext2_debug */ -#define D_MALLOC (1 << 4) /* print malloc, free information */ -#define D_CACHE (1 << 5) /* cache-related items */ -#define D_INFO (1 << 6) /* general information */ -#define D_IOCTL (1 << 7) /* ioctl related information */ -#define D_BLOCKS (1 << 8) /* ext2 block allocation */ -#define D_NET (1 << 9) /* network communications */ -#define D_WARNING (1 << 10) -#define D_BUFFS (1 << 11) -#define D_OTHER (1 << 12) -#define D_DENTRY (1 << 13) -#define D_PORTALS (1 << 14) /* ENTRY/EXIT markers */ -#define D_PAGE (1 << 15) /* bulk page handling */ -#define D_DLMTRACE (1 << 16) -#define D_ERROR (1 << 17) /* CERROR(...) == CDEBUG (D_ERROR, ...) */ -#define D_EMERG (1 << 18) /* CEMERG(...) == CDEBUG (D_EMERG, ...) */ -#define D_HA (1 << 19) /* recovery and failover */ -#define D_RPCTRACE (1 << 20) /* for distributed debugging */ -#define D_VFSTRACE (1 << 21) - -#ifndef __KERNEL__ -#define THREAD_SIZE 8192 -#endif -#ifdef __ia64__ -#define CDEBUG_STACK() (THREAD_SIZE - \ - ((unsigned long)__builtin_dwarf_cfa() & \ - (THREAD_SIZE - 1))) -#else -#define CDEBUG_STACK() (THREAD_SIZE - \ - ((unsigned long)__builtin_frame_address(0) & \ - (THREAD_SIZE - 1))) -#endif - -#ifdef __KERNEL__ -#define CHECK_STACK(stack) \ - do { \ - if ((stack) > 3*THREAD_SIZE/4 && (stack) > portal_stack) { \ - portals_debug_msg(DEBUG_SUBSYSTEM, D_ERROR, \ - __FILE__, __FUNCTION__, __LINE__, \ - (stack), \ - "maximum lustre stack %u\n", \ - portal_stack = (stack)); \ - /*panic("LBUG");*/ \ - } \ - } while (0) -#else -#define CHECK_STACK(stack) do { } while(0) -#endif - -#if 1 -#define CDEBUG(mask, format, a...) \ -do { \ - CHECK_STACK(CDEBUG_STACK()); \ - if (!(mask) || ((mask) & (D_ERROR | D_EMERG)) || \ - (portal_debug & (mask) && \ - portal_subsystem_debug & (1 << (DEBUG_SUBSYSTEM >> 24)))) \ - portals_debug_msg(DEBUG_SUBSYSTEM, mask, \ - __FILE__, __FUNCTION__, __LINE__, \ - CDEBUG_STACK(), format , ## a); \ -} while (0) - -#define CWARN(format, a...) CDEBUG(D_WARNING, format, ## a) -#define CERROR(format, a...) CDEBUG(D_ERROR, format, ## a) -#define CEMERG(format, a...) CDEBUG(D_EMERG, format, ## a) - -#define GOTO(label, rc) \ -do { \ - long GOTO__ret = (long)(rc); \ - CDEBUG(D_TRACE,"Process leaving via %s (rc=%lu : %ld : %lx)\n", \ - #label, (unsigned long)GOTO__ret, (signed long)GOTO__ret,\ - (signed long)GOTO__ret); \ - goto label; \ -} while (0) - -#define RETURN(rc) \ -do { \ - typeof(rc) RETURN__ret = (rc); \ - CDEBUG(D_TRACE, "Process leaving (rc=%lu : %ld : %lx)\n", \ - (long)RETURN__ret, (long)RETURN__ret, (long)RETURN__ret);\ - return RETURN__ret; \ -} while (0) - -#define ENTRY \ -do { \ - CDEBUG(D_TRACE, "Process entered\n"); \ -} while (0) - -#define EXIT \ -do { \ - CDEBUG(D_TRACE, "Process leaving\n"); \ -} while(0) -#else -#define CDEBUG(mask, format, a...) do { } while (0) -#define CWARN(format, a...) do { } while (0) -#define CERROR(format, a...) printk("<3>" format, ## a) -#define CEMERG(format, a...) printk("<0>" format, ## a) -#define GOTO(label, rc) do { (void)(rc); goto label; } while (0) -#define RETURN(rc) return (rc) -#define ENTRY do { } while (0) -#define EXIT do { } while (0) -#endif - - -#ifdef __KERNEL__ -# include <linux/vmalloc.h> -# include <linux/time.h> -# include <linux/slab.h> -# include <linux/interrupt.h> -# include <linux/highmem.h> -# include <linux/module.h> -# include <linux/version.h> -# include <portals/lib-nal.h> -# include <linux/smp_lock.h> -# include <asm/atomic.h> - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#define schedule_work schedule_task -#define prepare_work(wq,cb,cbdata) \ -do { \ - INIT_TQUEUE((wq), 0, 0); \ - PREPARE_TQUEUE((wq), (cb), (cbdata)); \ -} while (0) - -#define ll_invalidate_inode_pages invalidate_inode_pages -#define PageUptodate Page_Uptodate -#define our_recalc_sigpending(current) recalc_sigpending(current) -#define num_online_cpus() smp_num_cpus -static inline void our_cond_resched(void) -{ - if (current->need_resched) - schedule (); -} - -#else - -#define prepare_work(wq,cb,cbdata) \ -do { \ - INIT_WORK((wq), (void *)(cb), (void *)(cbdata)); \ -} while (0) -#define ll_invalidate_inode_pages(inode) invalidate_inode_pages((inode)->i_mapping) -#define wait_on_page wait_on_page_locked -#define our_recalc_sigpending(current) recalc_sigpending() -#define strtok(a,b) strpbrk(a, b) -static inline void our_cond_resched(void) -{ - cond_resched(); -} -#endif /* LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) */ - -#ifdef PORTAL_DEBUG -extern void kportal_assertion_failed(char *expr,char *file,char *func,int line); -#define LASSERT(e) ((e) ? 0 : kportal_assertion_failed( #e , __FILE__, \ - __FUNCTION__, __LINE__)) -#else -#define LASSERT(e) -#endif - -#ifdef __arch_um__ -#define LBUG_WITH_LOC(file, func, line) \ -do { \ - CEMERG("LBUG - trying to dump log to /tmp/lustre-log\n"); \ - portals_debug_dumplog(); \ - portals_run_lbug_upcall(file, func, line); \ - panic("LBUG"); \ -} while (0) -#else -#define LBUG_WITH_LOC(file, func, line) \ -do { \ - CEMERG("LBUG\n"); \ - portals_debug_dumplog(); \ - portals_run_lbug_upcall(file, func, line); \ - set_task_state(current, TASK_UNINTERRUPTIBLE); \ - schedule(); \ -} while (0) -#endif /* __arch_um__ */ - -#define LBUG() LBUG_WITH_LOC(__FILE__, __FUNCTION__, __LINE__) - -/* - * Memory - */ -#ifdef PORTAL_DEBUG -extern atomic_t portal_kmemory; - -# define portal_kmem_inc(ptr, size) \ -do { \ - atomic_add(size, &portal_kmemory); \ -} while (0) - -# define portal_kmem_dec(ptr, size) do { \ - atomic_sub(size, &portal_kmemory); \ -} while (0) - -#else -# define portal_kmem_inc(ptr, size) do {} while (0) -# define portal_kmem_dec(ptr, size) do {} while (0) -#endif /* PORTAL_DEBUG */ - -#define PORTAL_VMALLOC_SIZE 16384 - -#define PORTAL_ALLOC(ptr, size) \ -do { \ - LASSERT (!in_interrupt()); \ - if ((size) > PORTAL_VMALLOC_SIZE) \ - (ptr) = vmalloc(size); \ - else \ - (ptr) = kmalloc((size), GFP_NOFS); \ - if ((ptr) == NULL) \ - CERROR("PORTALS: out of memory at %s:%d (tried to alloc '"\ - #ptr "' = %d)\n", __FILE__, __LINE__, (int)(size));\ - else { \ - portal_kmem_inc((ptr), (size)); \ - memset((ptr), 0, (size)); \ - } \ - CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %d at %p (tot %d).\n", \ - (int)(size), (ptr), atomic_read (&portal_kmemory)); \ -} while (0) - -#define PORTAL_FREE(ptr, size) \ -do { \ - int s = (size); \ - if ((ptr) == NULL) { \ - CERROR("PORTALS: free NULL '" #ptr "' (%d bytes) at " \ - "%s:%d\n", s, __FILE__, __LINE__); \ - break; \ - } \ - if (s > PORTAL_VMALLOC_SIZE) \ - vfree(ptr); \ - else \ - kfree(ptr); \ - portal_kmem_dec((ptr), s); \ - CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ - s, (ptr), atomic_read(&portal_kmemory)); \ -} while (0) - -#define PORTAL_SLAB_ALLOC(ptr, slab, size) \ -do { \ - LASSERT(!in_interrupt()); \ - (ptr) = kmem_cache_alloc((slab), SLAB_KERNEL); \ - if ((ptr) == NULL) { \ - CERROR("PORTALS: out of memory at %s:%d (tried to alloc" \ - " '" #ptr "' from slab '" #slab "')\n", __FILE__, \ - __LINE__); \ - } else { \ - portal_kmem_inc((ptr), (size)); \ - memset((ptr), 0, (size)); \ - } \ - CDEBUG(D_MALLOC, "kmalloced '" #ptr "': %ld at %p (tot %d).\n", \ - (int)(size), (ptr), atomic_read(&portal_kmemory)); \ -} while (0) - -#define PORTAL_SLAB_FREE(ptr, slab, size) \ -do { \ - int s = (size); \ - if ((ptr) == NULL) { \ - CERROR("PORTALS: free NULL '" #ptr "' (%d bytes) at " \ - "%s:%d\n", s, __FILE__, __LINE__); \ - break; \ - } \ - memset((ptr), 0x5a, s); \ - kmem_cache_free((slab), ptr); \ - portal_kmem_dec((ptr), s); \ - CDEBUG(D_MALLOC, "kfreed '" #ptr "': %d at %p (tot %d).\n", \ - s, (ptr), atomic_read (&portal_kmemory)); \ -} while (0) - -/* ------------------------------------------------------------------- */ - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - -#define PORTAL_SYMBOL_REGISTER(x) inter_module_register(#x, THIS_MODULE, &x) -#define PORTAL_SYMBOL_UNREGISTER(x) inter_module_unregister(#x) - -#define PORTAL_SYMBOL_GET(x) ((typeof(&x))inter_module_get(#x)) -#define PORTAL_SYMBOL_PUT(x) inter_module_put(#x) - -#define PORTAL_MODULE_USE MOD_INC_USE_COUNT -#define PORTAL_MODULE_UNUSE MOD_DEC_USE_COUNT -#else - -#define PORTAL_SYMBOL_REGISTER(x) -#define PORTAL_SYMBOL_UNREGISTER(x) - -#define PORTAL_SYMBOL_GET(x) symbol_get(x) -#define PORTAL_SYMBOL_PUT(x) symbol_put(x) - -#define PORTAL_MODULE_USE try_module_get(THIS_MODULE) -#define PORTAL_MODULE_UNUSE module_put(THIS_MODULE) - -#endif - -/******************************************************************************/ -/* Kernel Portals Router interface */ - -typedef void (*kpr_fwd_callback_t)(void *arg, int error); // completion callback - -/* space for routing targets to stash "stuff" in a forwarded packet */ -typedef union { - long long _alignment; - void *_space[16]; /* scale with CPU arch */ -} kprfd_scratch_t; - -/* Kernel Portals Routing Forwarded message Descriptor */ -typedef struct { - struct list_head kprfd_list; /* stash in queues (routing target can use) */ - ptl_nid_t kprfd_target_nid; /* final destination NID */ - ptl_nid_t kprfd_gateway_nid; /* gateway NID */ - int kprfd_nob; /* # message bytes (including header) */ - int kprfd_niov; /* # message frags (including header) */ - struct iovec *kprfd_iov; /* message fragments */ - void *kprfd_router_arg; // originating NAL's router arg - kpr_fwd_callback_t kprfd_callback; /* completion callback */ - void *kprfd_callback_arg; /* completion callback arg */ - kprfd_scratch_t kprfd_scratch; // scratchpad for routing targets -} kpr_fwd_desc_t; - -typedef void (*kpr_fwd_t)(void *arg, kpr_fwd_desc_t *fwd); - -/* NAL's routing interface (Kernel Portals Routing Nal Interface) */ -typedef const struct { - int kprni_nalid; /* NAL's id */ - void *kprni_arg; /* Arg to pass when calling into NAL */ - kpr_fwd_t kprni_fwd; /* NAL's forwarding entrypoint */ -} kpr_nal_interface_t; - -/* Router's routing interface (Kernel Portals Routing Router Interface) */ -typedef const struct { - /* register the calling NAL with the router and get back the handle for - * subsequent calls */ - int (*kprri_register) (kpr_nal_interface_t *nal_interface, - void **router_arg); - - /* ask the router to find a gateway that forwards to 'nid' and is a peer - * of the calling NAL */ - int (*kprri_lookup) (void *router_arg, ptl_nid_t nid, - ptl_nid_t *gateway_nid); - - /* hand a packet over to the router for forwarding */ - kpr_fwd_t kprri_fwd_start; - - /* hand a packet back to the router for completion */ - void (*kprri_fwd_done) (void *router_arg, kpr_fwd_desc_t *fwd, - int error); - - /* the calling NAL is shutting down */ - void (*kprri_shutdown) (void *router_arg); - - /* deregister the calling NAL with the router */ - void (*kprri_deregister) (void *router_arg); - -} kpr_router_interface_t; - -/* Convenient struct for NAL to stash router interface/args */ -typedef struct { - kpr_router_interface_t *kpr_interface; - void *kpr_arg; -} kpr_router_t; - -/* Router's control interface (Kernel Portals Routing Control Interface) */ -typedef const struct { - int (*kprci_add_route)(int gateway_nal, ptl_nid_t gateway_nid, - ptl_nid_t lo_nid, ptl_nid_t hi_nid); - int (*kprci_del_route)(ptl_nid_t nid); - int (*kprci_get_route)(int index, int *gateway_nal, - ptl_nid_t *gateway, ptl_nid_t *lo_nid, - ptl_nid_t *hi_nid); -} kpr_control_interface_t; - -extern kpr_control_interface_t kpr_control_interface; -extern kpr_router_interface_t kpr_router_interface; - -static inline int -kpr_register (kpr_router_t *router, kpr_nal_interface_t *nalif) -{ - int rc; - - router->kpr_interface = PORTAL_SYMBOL_GET (kpr_router_interface); - if (router->kpr_interface == NULL) - return (-ENOENT); - - rc = (router->kpr_interface)->kprri_register (nalif, &router->kpr_arg); - if (rc != 0) - router->kpr_interface = NULL; - - PORTAL_SYMBOL_PUT (kpr_router_interface); - return (rc); -} - -static inline int -kpr_routing (kpr_router_t *router) -{ - return (router->kpr_interface != NULL); -} - -static inline int -kpr_lookup (kpr_router_t *router, ptl_nid_t nid, ptl_nid_t *gateway_nid) -{ - if (!kpr_routing (router)) - return (-EHOSTUNREACH); - - return (router->kpr_interface->kprri_lookup(router->kpr_arg, nid, - gateway_nid)); -} - -static inline void -kpr_fwd_init (kpr_fwd_desc_t *fwd, ptl_nid_t nid, - int nob, int niov, struct iovec *iov, - kpr_fwd_callback_t callback, void *callback_arg) -{ - fwd->kprfd_target_nid = nid; - fwd->kprfd_gateway_nid = nid; - fwd->kprfd_nob = nob; - fwd->kprfd_niov = niov; - fwd->kprfd_iov = iov; - fwd->kprfd_callback = callback; - fwd->kprfd_callback_arg = callback_arg; -} - -static inline void -kpr_fwd_start (kpr_router_t *router, kpr_fwd_desc_t *fwd) -{ - if (!kpr_routing (router)) - fwd->kprfd_callback (fwd->kprfd_callback_arg, -EHOSTUNREACH); - else - router->kpr_interface->kprri_fwd_start (router->kpr_arg, fwd); -} - -static inline void -kpr_fwd_done (kpr_router_t *router, kpr_fwd_desc_t *fwd, int error) -{ - LASSERT (kpr_routing (router)); - router->kpr_interface->kprri_fwd_done (router->kpr_arg, fwd, error); -} - -static inline void -kpr_shutdown (kpr_router_t *router) -{ - if (kpr_routing (router)) - router->kpr_interface->kprri_shutdown (router->kpr_arg); -} - -static inline void -kpr_deregister (kpr_router_t *router) -{ - if (!kpr_routing (router)) - return; - router->kpr_interface->kprri_deregister (router->kpr_arg); - router->kpr_interface = NULL; -} - -/******************************************************************************/ - -#ifdef PORTALS_PROFILING -#define prof_enum(FOO) PROF__##FOO -enum { - prof_enum(our_recvmsg), - prof_enum(our_sendmsg), - prof_enum(socknal_recv), - prof_enum(lib_parse), - prof_enum(conn_list_walk), - prof_enum(memcpy), - prof_enum(lib_finalize), - prof_enum(pingcli_time), - prof_enum(gmnal_send), - prof_enum(gmnal_recv), - MAX_PROFS -}; - -struct prof_ent { - char *str; - /* hrmph. wrap-tastic. */ - u32 starts; - u32 finishes; - cycles_t total_cycles; - cycles_t start; - cycles_t end; -}; - -extern struct prof_ent prof_ents[MAX_PROFS]; - -#define PROF_START(FOO) \ - do { \ - struct prof_ent *pe = &prof_ents[PROF__##FOO]; \ - pe->starts++; \ - pe->start = get_cycles(); \ - } while (0) - -#define PROF_FINISH(FOO) \ - do { \ - struct prof_ent *pe = &prof_ents[PROF__##FOO]; \ - pe->finishes++; \ - pe->end = get_cycles(); \ - pe->total_cycles += (pe->end - pe->start); \ - } while (0) -#else /* !PORTALS_PROFILING */ -#define PROF_START(FOO) do {} while(0) -#define PROF_FINISH(FOO) do {} while(0) -#endif /* PORTALS_PROFILING */ - -/* debug.c */ -void portals_run_lbug_upcall(char * file, char *fn, int line); -void portals_debug_dumplog(void); -int portals_debug_init(unsigned long bufsize); -int portals_debug_cleanup(void); -int portals_debug_clear_buffer(void); -int portals_debug_mark_buffer(char *text); -int portals_debug_set_daemon(unsigned int cmd, unsigned int length, - char *file, unsigned int size); -__s32 portals_debug_copy_to_user(char *buf, unsigned long len); -#if (__GNUC__) -/* Use the special GNU C __attribute__ hack to have the compiler check the - * printf style argument string against the actual argument count and - * types. - */ -#ifdef printf -# warning printf has been defined as a macro... -# undef printf -#endif -void portals_debug_msg (int subsys, int mask, char *file, char *fn, int line, - unsigned long stack, const char *format, ...) - __attribute__ ((format (printf, 7, 8))); -#else -void portals_debug_msg (int subsys, int mask, char *file, char *fn, - int line, unsigned long stack, - const char *format, ...); -#endif /* __GNUC__ */ -void portals_debug_set_level(unsigned int debug_level); - -# define fprintf(a, format, b...) CDEBUG(D_OTHER, format , ## b) -# define printf(format, b...) CDEBUG(D_OTHER, format , ## b) -# define time(a) CURRENT_TIME - -extern void kportal_daemonize (char *name); -extern void kportal_blockallsigs (void); - -#else /* !__KERNEL__ */ -# include <stdio.h> -# include <stdlib.h> -#ifndef __CYGWIN__ -# include <stdint.h> -#endif -# include <unistd.h> -# include <time.h> -# include <asm/types.h> -# ifndef DEBUG_SUBSYSTEM -# define DEBUG_SUBSYSTEM S_UNDEFINED -# endif -# ifdef PORTAL_DEBUG -# undef NDEBUG -# include <assert.h> -# define LASSERT(e) assert(e) -# else -# define LASSERT(e) -# endif -# define printk(format, args...) printf (format, ## args) -# define PORTAL_ALLOC(ptr, size) do { (ptr) = malloc(size); } while (0); -# define PORTAL_FREE(a, b) do { free(a); } while (0); -# define portals_debug_msg(subsys, mask, file, fn, line, stack, format, a...) \ - printf ("%02x:%06x (@%lu %s:%s,l. %d %d %lu): " format, \ - (subsys) >> 24, (mask), (long)time(0), file, fn, line, \ - getpid() , stack, ## a); -#endif - -#ifndef CURRENT_TIME -# define CURRENT_TIME time(0) -#endif - -#include <linux/portals_lib.h> - -/* - * USER LEVEL STUFF BELOW - */ - -#define PORTAL_IOCTL_VERSION 0x00010007 -#define PING_SYNC 0 -#define PING_ASYNC 1 - -struct portal_ioctl_data { - __u32 ioc_len; - __u32 ioc_version; - __u64 ioc_nid; - __u64 ioc_nid2; - __u64 ioc_nid3; - __u32 ioc_count; - __u32 ioc_nal; - __u32 ioc_nal_cmd; - __u32 ioc_fd; - __u32 ioc_id; - - __u32 ioc_flags; - __u32 ioc_size; - - __u32 ioc_wait; - __u32 ioc_timeout; - __u32 ioc_misc; - - __u32 ioc_inllen1; - char *ioc_inlbuf1; - __u32 ioc_inllen2; - char *ioc_inlbuf2; - - __u32 ioc_plen1; /* buffers in userspace */ - char *ioc_pbuf1; - __u32 ioc_plen2; /* buffers in userspace */ - char *ioc_pbuf2; - - char ioc_bulk[0]; -}; - -struct portal_ioctl_hdr { - __u32 ioc_len; - __u32 ioc_version; -}; - -struct portals_debug_ioctl_data -{ - struct portal_ioctl_hdr hdr; - unsigned int subs; - unsigned int debug; -}; - -#define PORTAL_IOC_INIT(data) \ -do { \ - memset(&data, 0, sizeof(data)); \ - data.ioc_version = PORTAL_IOCTL_VERSION; \ - data.ioc_len = sizeof(data); \ -} while (0) - -/* FIXME check conflict with lustre_lib.h */ -#define PTL_IOC_DEBUG_MASK _IOWR('f', 250, long) - -static inline int portal_ioctl_packlen(struct portal_ioctl_data *data) -{ - int len = sizeof(*data); - len += size_round(data->ioc_inllen1); - len += size_round(data->ioc_inllen2); - return len; -} - -static inline int portal_ioctl_is_invalid(struct portal_ioctl_data *data) -{ - if (data->ioc_len > (1<<30)) { - CERROR ("PORTALS ioctl: ioc_len larger than 1<<30\n"); - return 1; - } - if (data->ioc_inllen1 > (1<<30)) { - CERROR ("PORTALS ioctl: ioc_inllen1 larger than 1<<30\n"); - return 1; - } - if (data->ioc_inllen2 > (1<<30)) { - CERROR ("PORTALS ioctl: ioc_inllen2 larger than 1<<30\n"); - return 1; - } - if (data->ioc_inlbuf1 && !data->ioc_inllen1) { - CERROR ("PORTALS ioctl: inlbuf1 pointer but 0 length\n"); - return 1; - } - if (data->ioc_inlbuf2 && !data->ioc_inllen2) { - CERROR ("PORTALS ioctl: inlbuf2 pointer but 0 length\n"); - return 1; - } - if (data->ioc_pbuf1 && !data->ioc_plen1) { - CERROR ("PORTALS ioctl: pbuf1 pointer but 0 length\n"); - return 1; - } - if (data->ioc_pbuf2 && !data->ioc_plen2) { - CERROR ("PORTALS ioctl: pbuf2 pointer but 0 length\n"); - return 1; - } - if (data->ioc_plen1 && !data->ioc_pbuf1) { - CERROR ("PORTALS ioctl: plen1 nonzero but no pbuf1 pointer\n"); - return 1; - } - if (data->ioc_plen2 && !data->ioc_pbuf2) { - CERROR ("PORTALS ioctl: plen2 nonzero but no pbuf2 pointer\n"); - return 1; - } - if (portal_ioctl_packlen(data) != data->ioc_len ) { - CERROR ("PORTALS ioctl: packlen != ioc_len\n"); - return 1; - } - if (data->ioc_inllen1 && - data->ioc_bulk[data->ioc_inllen1 - 1] != '\0') { - CERROR ("PORTALS ioctl: inlbuf1 not 0 terminated\n"); - return 1; - } - if (data->ioc_inllen2 && - data->ioc_bulk[size_round(data->ioc_inllen1) + - data->ioc_inllen2 - 1] != '\0') { - CERROR ("PORTALS ioctl: inlbuf2 not 0 terminated\n"); - return 1; - } - return 0; -} - -#ifndef __KERNEL__ -static inline int portal_ioctl_pack(struct portal_ioctl_data *data, char **pbuf, - int max) -{ - char *ptr; - struct portal_ioctl_data *overlay; - data->ioc_len = portal_ioctl_packlen(data); - data->ioc_version = PORTAL_IOCTL_VERSION; - - if (*pbuf && portal_ioctl_packlen(data) > max) - return 1; - if (*pbuf == NULL) { - *pbuf = malloc(data->ioc_len); - } - if (!*pbuf) - return 1; - overlay = (struct portal_ioctl_data *)*pbuf; - memcpy(*pbuf, data, sizeof(*data)); - - ptr = overlay->ioc_bulk; - if (data->ioc_inlbuf1) - LOGL(data->ioc_inlbuf1, data->ioc_inllen1, ptr); - if (data->ioc_inlbuf2) - LOGL(data->ioc_inlbuf2, data->ioc_inllen2, ptr); - if (portal_ioctl_is_invalid(overlay)) - return 1; - - return 0; -} -#else -#include <asm/uaccess.h> - -/* buffer MUST be at least the size of portal_ioctl_hdr */ -static inline int portal_ioctl_getdata(char *buf, char *end, void *arg) -{ - struct portal_ioctl_hdr *hdr; - struct portal_ioctl_data *data; - int err; - ENTRY; - - hdr = (struct portal_ioctl_hdr *)buf; - data = (struct portal_ioctl_data *)buf; - - err = copy_from_user(buf, (void *)arg, sizeof(*hdr)); - if ( err ) { - EXIT; - return err; - } - - if (hdr->ioc_version != PORTAL_IOCTL_VERSION) { - CERROR ("PORTALS: version mismatch kernel vs application\n"); - return -EINVAL; - } - - if (hdr->ioc_len + buf >= end) { - CERROR ("PORTALS: user buffer exceeds kernel buffer\n"); - return -EINVAL; - } - - - if (hdr->ioc_len < sizeof(struct portal_ioctl_data)) { - CERROR ("PORTALS: user buffer too small for ioctl\n"); - return -EINVAL; - } - - err = copy_from_user(buf, (void *)arg, hdr->ioc_len); - if ( err ) { - EXIT; - return err; - } - - if (portal_ioctl_is_invalid(data)) { - CERROR ("PORTALS: ioctl not correctly formatted\n"); - return -EINVAL; - } - - if (data->ioc_inllen1) { - data->ioc_inlbuf1 = &data->ioc_bulk[0]; - } - - if (data->ioc_inllen2) { - data->ioc_inlbuf2 = &data->ioc_bulk[0] + - size_round(data->ioc_inllen1); - } - - EXIT; - return 0; -} -#endif - -/* ioctls for manipulating snapshots 30- */ -#define IOC_PORTAL_TYPE 'e' -#define IOC_PORTAL_MIN_NR 30 - -#define IOC_PORTAL_PING _IOWR('e', 30, long) -#define IOC_PORTAL_GET_DEBUG _IOWR('e', 31, long) -#define IOC_PORTAL_CLEAR_DEBUG _IOWR('e', 32, long) -#define IOC_PORTAL_MARK_DEBUG _IOWR('e', 33, long) -#define IOC_PORTAL_PANIC _IOWR('e', 34, long) -#define IOC_PORTAL_ADD_ROUTE _IOWR('e', 35, long) -#define IOC_PORTAL_DEL_ROUTE _IOWR('e', 36, long) -#define IOC_PORTAL_GET_ROUTE _IOWR('e', 37, long) -#define IOC_PORTAL_NAL_CMD _IOWR('e', 38, long) -#define IOC_PORTAL_GET_NID _IOWR('e', 39, long) -#define IOC_PORTAL_FAIL_NID _IOWR('e', 40, long) -#define IOC_PORTAL_SET_DAEMON _IOWR('e', 41, long) - -#define IOC_PORTAL_MAX_NR 41 - -enum { - QSWNAL = 1, - SOCKNAL, - GMNAL, - TOENAL, - TCPNAL, - SCIMACNAL, - NAL_ENUM_END_MARKER -}; - -#ifdef __KERNEL__ -extern ptl_handle_ni_t kqswnal_ni; -extern ptl_handle_ni_t ksocknal_ni; -extern ptl_handle_ni_t ktoenal_ni; -extern ptl_handle_ni_t kgmnal_ni; -extern ptl_handle_ni_t kscimacnal_ni; -#endif - -#define NAL_MAX_NR (NAL_ENUM_END_MARKER - 1) - -#define NAL_CMD_REGISTER_PEER_FD 100 -#define NAL_CMD_CLOSE_CONNECTION 101 -#define NAL_CMD_REGISTER_MYNID 102 -#define NAL_CMD_PUSH_CONNECTION 103 - -enum { - DEBUG_DAEMON_START = 1, - DEBUG_DAEMON_STOP = 2, - DEBUG_DAEMON_PAUSE = 3, - DEBUG_DAEMON_CONTINUE = 4, -}; - -/* XXX remove to lustre ASAP */ -struct lustre_peer { - ptl_nid_t peer_nid; - ptl_handle_ni_t peer_ni; -}; - -/* module.c */ -typedef int (*nal_cmd_handler_t)(struct portal_ioctl_data *, void * private); -int kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private); -int kportal_nal_unregister(int nal); - -ptl_handle_ni_t *kportal_get_ni (int nal); -void kportal_put_ni (int nal); - -#ifdef __CYGWIN__ -#ifndef BITS_PER_LONG -#if (~0UL) == 0xffffffffUL -#define BITS_PER_LONG 32 -#else -#define BITS_PER_LONG 64 -#endif -#endif -#endif - -#if (BITS_PER_LONG == 32 || __WORDSIZE == 32) -# define LPU64 "%Lu" -# define LPD64 "%Ld" -# define LPX64 "%#Lx" -# define LPSZ "%u" -# define LPSSZ "%d" -#endif -#if (BITS_PER_LONG == 64 || __WORDSIZE == 64) -# define LPU64 "%lu" -# define LPD64 "%ld" -# define LPX64 "%#lx" -# define LPSZ "%lu" -# define LPSSZ "%ld" -#endif -#ifndef LPU64 -# error "No word size defined" -#endif - -#endif diff --git a/lustre/portals/include/linux/portals_compat25.h b/lustre/portals/include/linux/portals_compat25.h deleted file mode 100644 index e28fbac..0000000 --- a/lustre/portals/include/linux/portals_compat25.h +++ /dev/null @@ -1,13 +0,0 @@ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) || defined(CONFIG_RH_2_4_20) -# define SIGNAL_MASK_LOCK(task, flags) \ - spin_lock_irqsave(&task->sighand->siglock, flags) -# define SIGNAL_MASK_UNLOCK(task, flags) \ - spin_unlock_irqrestore(&task->sighand->siglock, flags) -# define RECALC_SIGPENDING recalc_sigpending() -#else -# define SIGNAL_MASK_LOCK(task, flags) \ - spin_lock_irqsave(&task->sigmask_lock, flags) -# define SIGNAL_MASK_UNLOCK(task, flags) \ - spin_unlock_irqrestore(&task->sigmask_lock, flags) -# define RECALC_SIGPENDING recalc_sigpending(current) -#endif diff --git a/lustre/portals/include/linux/portals_lib.h b/lustre/portals/include/linux/portals_lib.h deleted file mode 100644 index a528a80..0000000 --- a/lustre/portals/include/linux/portals_lib.h +++ /dev/null @@ -1,188 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef _PORTALS_LIB_H -#define _PORTALS_LIB_H - -#ifndef __KERNEL__ -# include <string.h> -#else -# include <asm/types.h> -#endif - -#undef MIN -#define MIN(a,b) (((a)<(b)) ? (a): (b)) -#undef MAX -#define MAX(a,b) (((a)>(b)) ? (a): (b)) -#define MKSTR(ptr) ((ptr))? (ptr) : "" - -static inline int size_round (int val) -{ - return (val + 7) & (~0x7); -} - -static inline int size_round0(int val) -{ - if (!val) - return 0; - return (val + 1 + 7) & (~0x7); -} - -static inline size_t round_strlen(char *fset) -{ - return size_round(strlen(fset) + 1); -} - -#ifdef __KERNEL__ -static inline char *strdup(const char *str) -{ - int len = strlen(str) + 1; - char *tmp = kmalloc(len, GFP_KERNEL); - if (tmp) - memcpy(tmp, str, len); - - return tmp; -} -#endif - -#ifdef __KERNEL__ -# define NTOH__u32(var) le32_to_cpu(var) -# define NTOH__u64(var) le64_to_cpu(var) -# define HTON__u32(var) cpu_to_le32(var) -# define HTON__u64(var) cpu_to_le64(var) -#else -# define expansion_u64(var) \ - ({ __u64 ret; \ - switch (sizeof(var)) { \ - case 8: (ret) = (var); break; \ - case 4: (ret) = (__u32)(var); break; \ - case 2: (ret) = (__u16)(var); break; \ - case 1: (ret) = (__u8)(var); break; \ - }; \ - (ret); \ - }) -# define NTOH__u32(var) (var) -# define NTOH__u64(var) (expansion_u64(var)) -# define HTON__u32(var) (var) -# define HTON__u64(var) (expansion_u64(var)) -#endif - -/* - * copy sizeof(type) bytes from pointer to var and move ptr forward. - * return EFAULT if pointer goes beyond end - */ -#define UNLOGV(var,type,ptr,end) \ -do { \ - var = *(type *)ptr; \ - ptr += sizeof(type); \ - if (ptr > end ) \ - return -EFAULT; \ -} while (0) - -/* the following two macros convert to little endian */ -/* type MUST be __u32 or __u64 */ -#define LUNLOGV(var,type,ptr,end) \ -do { \ - var = NTOH##type(*(type *)ptr); \ - ptr += sizeof(type); \ - if (ptr > end ) \ - return -EFAULT; \ -} while (0) - -/* now log values */ -#define LOGV(var,type,ptr) \ -do { \ - *((type *)ptr) = var; \ - ptr += sizeof(type); \ -} while (0) - -/* and in network order */ -#define LLOGV(var,type,ptr) \ -do { \ - *((type *)ptr) = HTON##type(var); \ - ptr += sizeof(type); \ -} while (0) - - -/* - * set var to point at (type *)ptr, move ptr forward with sizeof(type) - * return from function with EFAULT if ptr goes beyond end - */ -#define UNLOGP(var,type,ptr,end) \ -do { \ - var = (type *)ptr; \ - ptr += sizeof(type); \ - if (ptr > end ) \ - return -EFAULT; \ -} while (0) - -#define LOGP(var,type,ptr) \ -do { \ - memcpy(ptr, var, sizeof(type)); \ - ptr += sizeof(type); \ -} while (0) - -/* - * set var to point at (char *)ptr, move ptr forward by size_round(len); - * return from function with EFAULT if ptr goes beyond end - */ -#define UNLOGL(var,type,len,ptr,end) \ -do { \ - var = (type *)ptr; \ - ptr += size_round(len * sizeof(type)); \ - if (ptr > end ) \ - return -EFAULT; \ -} while (0) - -#define UNLOGL0(var,type,len,ptr,end) \ -do { \ - UNLOGL(var,type,len,ptr,end); \ - if ( *((char *)ptr - size_round(len) + len - 1) != '\0') \ - return -EFAULT; \ -} while (0) - -#define LOGL(var,len,ptr) \ -do { \ - if (var) \ - memcpy((char *)ptr, (const char *)var, len); \ - ptr += size_round(len); \ -} while (0) - -#define LOGU(var,len,ptr) \ -do { \ - if (var) \ - memcpy((char *)var, (const char *)ptr, len); \ - ptr += size_round(len); \ -} while (0) - -#define LOGL0(var,len,ptr) \ -do { \ - if (!len) \ - break; \ - memcpy((char *)ptr, (const char *)var, len); \ - *((char *)(ptr) + len) = 0; \ - ptr += size_round(len + 1); \ -} while (0) - -#endif /* _PORTALS_LIB_H */ diff --git a/lustre/portals/include/portals/Makefile.am b/lustre/portals/include/portals/Makefile.am deleted file mode 100644 index c61b084..0000000 --- a/lustre/portals/include/portals/Makefile.am +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -SUBDIRS = base -include $(top_srcdir)/Rules - -pkginclude_HEADERS=api-support.h api.h arg-blocks.h defines.h errno.h internal.h lib-dispatch.h lib-nal.h lib-p30.h lib-types.h myrnal.h nal.h p30.h ppid.h ptlctl.h stringtab.h types.h nalids.h list.h bridge.h ipmap.h procbridge.h lltrace.h - diff --git a/lustre/portals/include/portals/api-support.h b/lustre/portals/include/portals/api-support.h deleted file mode 100644 index af4a2dc..0000000 --- a/lustre/portals/include/portals/api-support.h +++ /dev/null @@ -1,27 +0,0 @@ -# define DEBUG_SUBSYSTEM S_PORTALS -# define PORTAL_DEBUG - -#ifndef __KERNEL__ -# include <stdio.h> -# include <stdlib.h> -# include <unistd.h> -# include <time.h> - -/* Lots of POSIX dependencies to support PtlEQWait_timeout */ -# include <signal.h> -# include <setjmp.h> -# include <time.h> -#endif - -#include <portals/types.h> -#include <linux/kp30.h> -#include <portals/p30.h> - -#include <portals/internal.h> -#include <portals/nal.h> -#include <portals/arg-blocks.h> - -/* Hack for 2.4.18 macro name collision */ -#ifdef yield -#undef yield -#endif diff --git a/lustre/portals/include/portals/api.h b/lustre/portals/include/portals/api.h deleted file mode 100644 index a83749b..0000000 --- a/lustre/portals/include/portals/api.h +++ /dev/null @@ -1,159 +0,0 @@ -#ifndef P30_API_H -#define P30_API_H - -#include <portals/types.h> - -#ifndef PTL_NO_WRAP -int PtlInit(void); -int PtlInitialized(void); -void PtlFini(void); - -int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size_in, - ptl_ac_index_t acl_size_in, ptl_pid_t requested_pid, - ptl_handle_ni_t * interface_out); - -int PtlNIInitialized(ptl_interface_t); - -int PtlNIFini(ptl_handle_ni_t interface_in); - -#endif - -int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id); - - -/* - * Network interfaces - */ - -#ifndef PTL_NO_WRAP -int PtlNIBarrier(ptl_handle_ni_t interface_in); -#endif - -int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in, - ptl_sr_value_t * status_out); - -int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in, - unsigned long *distance_out); - -#ifndef PTL_NO_WRAP -int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * interface_out); -#endif - - -/* - * PtlNIDebug: - * - * This is not an official Portals 3 API call. It is provided - * by the reference implementation to allow the maintainers an - * easy way to turn on and off debugging information in the - * library. Do not use it in code that is not intended for use - * with any version other than the portable reference library. - */ -unsigned int PtlNIDebug(ptl_handle_ni_t ni, unsigned int mask_in); - -/* - * PtlNIFailNid - * - * Not an official Portals 3 API call. It provides a way of simulating - * communications failures to all (nid == PTL_NID_ANY), or specific peers - * (via multiple calls), either until further notice (threshold == -1), or - * for a specific number of messages. Passing a threshold of zero, "heals" - * the given peer. - */ -int PtlFailNid (ptl_handle_ni_t ni, ptl_nid_t nid, unsigned int threshold); - - -/* - * Match entries - */ - -int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in, - ptl_process_id_t match_id_in, ptl_match_bits_t match_bits_in, - ptl_match_bits_t ignore_bits_in, ptl_unlink_t unlink_in, - ptl_ins_pos_t pos_in, ptl_handle_me_t * handle_out); - -int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in, - ptl_match_bits_t match_bits_in, ptl_match_bits_t ignore_bits_in, - ptl_unlink_t unlink_in, ptl_ins_pos_t position_in, - ptl_handle_me_t * handle_out); - -int PtlMEUnlink(ptl_handle_me_t current_in); - -int PtlMEUnlinkList(ptl_handle_me_t current_in); - -int PtlTblDump(ptl_handle_ni_t ni, int index_in); -int PtlMEDump(ptl_handle_me_t current_in); - - - -/* - * Memory descriptors - */ - -#ifndef PTL_NO_WRAP -int PtlMDAttach(ptl_handle_me_t current_in, ptl_md_t md_in, - ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out); - -int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in, - ptl_handle_md_t * handle_out); - -int PtlMDUnlink(ptl_handle_md_t md_in); - -int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t * old_inout, - ptl_md_t * new_inout, ptl_handle_eq_t testq_in); - -#endif - -/* These should not be called by users */ -int PtlMDUpdate_internal(ptl_handle_md_t md_in, ptl_md_t * old_inout, - ptl_md_t * new_inout, ptl_handle_eq_t testq_in, - ptl_seq_t sequence_in); - - - - -/* - * Event queues - */ -#ifndef PTL_NO_WRAP - -/* These should be called by users */ -int PtlEQAlloc(ptl_handle_ni_t ni_in, ptl_size_t count_in, - int (*callback) (ptl_event_t * event), - ptl_handle_eq_t * handle_out); -int PtlEQFree(ptl_handle_eq_t eventq_in); - -int PtlEQCount(ptl_handle_eq_t eventq_in, ptl_size_t * count_out); - -int PtlEQGet(ptl_handle_eq_t eventq_in, ptl_event_t * event_out); - - -int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t * event_out); - -int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out, - int timeout); -#endif - -/* - * Access Control Table - */ -int PtlACEntry(ptl_handle_ni_t ni_in, ptl_ac_index_t index_in, - ptl_process_id_t match_id_in, ptl_pt_index_t portal_in); - - -/* - * Data movement - */ - -int PtlPut(ptl_handle_md_t md_in, ptl_ack_req_t ack_req_in, - ptl_process_id_t target_in, ptl_pt_index_t portal_in, - ptl_ac_index_t cookie_in, ptl_match_bits_t match_bits_in, - ptl_size_t offset_in, ptl_hdr_data_t hdr_data_in); - -int PtlGet(ptl_handle_md_t md_in, ptl_process_id_t target_in, - ptl_pt_index_t portal_in, ptl_ac_index_t cookie_in, - ptl_match_bits_t match_bits_in, ptl_size_t offset_in); - - - -#endif diff --git a/lustre/portals/include/portals/arg-blocks.h b/lustre/portals/include/portals/arg-blocks.h deleted file mode 100644 index 3c3b154..0000000 --- a/lustre/portals/include/portals/arg-blocks.h +++ /dev/null @@ -1,265 +0,0 @@ -#ifndef PTL_BLOCKS_H -#define PTL_BLOCKS_H - -/* - * blocks.h - * - * Argument block types for the Portals 3.0 library - * Generated by idl - * - */ - -#include <portals/types.h> - -/* put LIB_MAX_DISPATCH last here -- these must match the - assignements to the dispatch table in lib-p30/dispatch.c */ -#define PTL_GETID 1 -#define PTL_NISTATUS 2 -#define PTL_NIDIST 3 -#define PTL_NIDEBUG 4 -#define PTL_MEATTACH 5 -#define PTL_MEINSERT 6 -// #define PTL_MEPREPEND 7 -#define PTL_MEUNLINK 8 -#define PTL_TBLDUMP 9 -#define PTL_MEDUMP 10 -#define PTL_MDATTACH 11 -// #define PTL_MDINSERT 12 -#define PTL_MDBIND 13 -#define PTL_MDUPDATE 14 -#define PTL_MDUNLINK 15 -#define PTL_EQALLOC 16 -#define PTL_EQFREE 17 -#define PTL_ACENTRY 18 -#define PTL_PUT 19 -#define PTL_GET 20 -#define PTL_FAILNID 21 -#define LIB_MAX_DISPATCH 21 - -typedef struct PtlFailNid_in { - ptl_handle_ni_t interface; - ptl_nid_t nid; - unsigned int threshold; -} PtlFailNid_in; - -typedef struct PtlFailNid_out { - int rc; -} PtlFailNid_out; - -typedef struct PtlGetId_in { - ptl_handle_ni_t handle_in; -} PtlGetId_in; - -typedef struct PtlGetId_out { - int rc; - ptl_process_id_t id_out; -} PtlGetId_out; - -typedef struct PtlNIStatus_in { - ptl_handle_ni_t interface_in; - ptl_sr_index_t register_in; -} PtlNIStatus_in; - -typedef struct PtlNIStatus_out { - int rc; - ptl_sr_value_t status_out; -} PtlNIStatus_out; - - -typedef struct PtlNIDist_in { - ptl_handle_ni_t interface_in; - ptl_process_id_t process_in; -} PtlNIDist_in; - -typedef struct PtlNIDist_out { - int rc; - unsigned long distance_out; -} PtlNIDist_out; - - -typedef struct PtlNIDebug_in { - unsigned int mask_in; -} PtlNIDebug_in; - -typedef struct PtlNIDebug_out { - unsigned int rc; -} PtlNIDebug_out; - - -typedef struct PtlMEAttach_in { - ptl_handle_ni_t interface_in; - ptl_pt_index_t index_in; - ptl_ins_pos_t position_in; - ptl_process_id_t match_id_in; - ptl_match_bits_t match_bits_in; - ptl_match_bits_t ignore_bits_in; - ptl_unlink_t unlink_in; -} PtlMEAttach_in; - -typedef struct PtlMEAttach_out { - int rc; - ptl_handle_me_t handle_out; -} PtlMEAttach_out; - - -typedef struct PtlMEInsert_in { - ptl_handle_me_t current_in; - ptl_process_id_t match_id_in; - ptl_match_bits_t match_bits_in; - ptl_match_bits_t ignore_bits_in; - ptl_unlink_t unlink_in; - ptl_ins_pos_t position_in; -} PtlMEInsert_in; - -typedef struct PtlMEInsert_out { - int rc; - ptl_handle_me_t handle_out; -} PtlMEInsert_out; - -typedef struct PtlMEUnlink_in { - ptl_handle_me_t current_in; - ptl_unlink_t unlink_in; -} PtlMEUnlink_in; - -typedef struct PtlMEUnlink_out { - int rc; -} PtlMEUnlink_out; - - -typedef struct PtlTblDump_in { - int index_in; -} PtlTblDump_in; - -typedef struct PtlTblDump_out { - int rc; -} PtlTblDump_out; - - -typedef struct PtlMEDump_in { - ptl_handle_me_t current_in; -} PtlMEDump_in; - -typedef struct PtlMEDump_out { - int rc; -} PtlMEDump_out; - - -typedef struct PtlMDAttach_in { - ptl_handle_me_t me_in; - ptl_handle_eq_t eq_in; - ptl_md_t md_in; - ptl_unlink_t unlink_in; -} PtlMDAttach_in; - -typedef struct PtlMDAttach_out { - int rc; - ptl_handle_md_t handle_out; -} PtlMDAttach_out; - - -typedef struct PtlMDBind_in { - ptl_handle_ni_t ni_in; - ptl_handle_eq_t eq_in; - ptl_md_t md_in; -} PtlMDBind_in; - -typedef struct PtlMDBind_out { - int rc; - ptl_handle_md_t handle_out; -} PtlMDBind_out; - - -typedef struct PtlMDUpdate_internal_in { - ptl_handle_md_t md_in; - ptl_handle_eq_t testq_in; - ptl_seq_t sequence_in; - - ptl_md_t old_inout; - int old_inout_valid; - ptl_md_t new_inout; - int new_inout_valid; -} PtlMDUpdate_internal_in; - -typedef struct PtlMDUpdate_internal_out { - int rc; - ptl_md_t old_inout; - ptl_md_t new_inout; -} PtlMDUpdate_internal_out; - - -typedef struct PtlMDUnlink_in { - ptl_handle_md_t md_in; -} PtlMDUnlink_in; - -typedef struct PtlMDUnlink_out { - int rc; - ptl_md_t status_out; -} PtlMDUnlink_out; - - -typedef struct PtlEQAlloc_in { - ptl_handle_ni_t ni_in; - ptl_size_t count_in; - void *base_in; - int len_in; - int (*callback_in) (ptl_event_t * event); -} PtlEQAlloc_in; - -typedef struct PtlEQAlloc_out { - int rc; - ptl_handle_eq_t handle_out; -} PtlEQAlloc_out; - - -typedef struct PtlEQFree_in { - ptl_handle_eq_t eventq_in; -} PtlEQFree_in; - -typedef struct PtlEQFree_out { - int rc; -} PtlEQFree_out; - - -typedef struct PtlACEntry_in { - ptl_handle_ni_t ni_in; - ptl_ac_index_t index_in; - ptl_process_id_t match_id_in; - ptl_pt_index_t portal_in; -} PtlACEntry_in; - -typedef struct PtlACEntry_out { - int rc; -} PtlACEntry_out; - - -typedef struct PtlPut_in { - ptl_handle_md_t md_in; - ptl_ack_req_t ack_req_in; - ptl_process_id_t target_in; - ptl_pt_index_t portal_in; - ptl_ac_index_t cookie_in; - ptl_match_bits_t match_bits_in; - ptl_size_t offset_in; - ptl_hdr_data_t hdr_data_in; -} PtlPut_in; - -typedef struct PtlPut_out { - int rc; -} PtlPut_out; - - -typedef struct PtlGet_in { - ptl_handle_md_t md_in; - ptl_process_id_t target_in; - ptl_pt_index_t portal_in; - ptl_ac_index_t cookie_in; - ptl_match_bits_t match_bits_in; - ptl_size_t offset_in; -} PtlGet_in; - -typedef struct PtlGet_out { - int rc; -} PtlGet_out; - - -#endif diff --git a/lustre/portals/include/portals/defines.h b/lustre/portals/include/portals/defines.h deleted file mode 100644 index 785ce73..0000000 --- a/lustre/portals/include/portals/defines.h +++ /dev/null @@ -1,116 +0,0 @@ -/* -** -** This files contains definitions that are used throughout the cplant code. -*/ - -#ifndef CPLANT_H -#define CPLANT_H - -#define TITLE(fname,zmig) - - -/* -** TRUE and FALSE -*/ -#undef TRUE -#define TRUE (1) -#undef FALSE -#define FALSE (0) - - -/* -** Return codes from functions -*/ -#undef OK -#define OK (0) -#undef ERROR -#define ERROR (-1) - - - -/* -** The GCC macro for a safe max() that works on all types arithmetic types. -*/ -#ifndef MAX -#define MAX(a, b) (a) > (b) ? (a) : (b) -#endif /* MAX */ - -#ifndef MIN -#define MIN(a, b) (a) < (b) ? (a) : (b) -#endif /* MIN */ - -/* -** The rest is from the old qkdefs.h -*/ - -#ifndef __linux__ -#define __inline__ -#endif - -#ifndef NULL -#define NULL ((void *)0) -#endif - -#ifndef __osf__ -#define PRIVATE static -#define PUBLIC -#endif - -#ifndef __osf__ -typedef unsigned char uchar; -#endif - -typedef char CHAR; -typedef unsigned char UCHAR; -typedef char INT8; -typedef unsigned char UINT8; -typedef short int INT16; -typedef unsigned short int UINT16; -typedef int INT32; -typedef unsigned int UINT32; -typedef long LONG32; -typedef unsigned long ULONG32; - -/* long may be 32 or 64, so we can't really append the size to the definition */ -typedef long LONG; -typedef unsigned long ULONG; - -#ifdef __alpha__ -typedef long int_t; -#ifndef __osf__ -typedef unsigned long uint_t; -#endif -#endif - -#ifdef __i386__ -typedef int int_t; -typedef unsigned int uint_t; -#endif - -typedef float FLOAT32; -typedef double FLOAT64; -typedef void VOID; -typedef INT32 BOOLEAN; -typedef void (*FCN_PTR)(void); - -#ifndef off64_t - -#if defined (__alpha__) || defined (__ia64__) -typedef long off64_t; -#else -typedef long long off64_t; -#endif - -#endif - -/* -** Process related typedefs -*/ -typedef UINT16 PID_TYPE; /* Type of Local process ID */ -typedef UINT16 NID_TYPE; /* Type of Physical node ID */ -typedef UINT16 GID_TYPE; /* Type of Group ID */ -typedef UINT16 RANK_TYPE; /* Type of Logical rank/process within a group */ - - - -#endif /* CPLANT_H */ diff --git a/lustre/portals/include/portals/errno.h b/lustre/portals/include/portals/errno.h deleted file mode 100644 index 817936a..0000000 --- a/lustre/portals/include/portals/errno.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef _P30_ERRNO_H_ -#define _P30_ERRNO_H_ - -/* - * include/portals/errno.h - * - * Shared error number lists - */ - -/* If you change these, you must update the string table in api-errno.c */ -typedef enum { - PTL_OK = 0, - PTL_SEGV = 1, - - PTL_NOSPACE = 2, - PTL_INUSE = 3, - PTL_VAL_FAILED = 4, - - PTL_NAL_FAILED = 5, - PTL_NOINIT = 6, - PTL_INIT_DUP = 7, - PTL_INIT_INV = 8, - PTL_AC_INV_INDEX = 9, - - PTL_INV_ASIZE = 10, - PTL_INV_HANDLE = 11, - PTL_INV_MD = 12, - PTL_INV_ME = 13, - PTL_INV_NI = 14, -/* If you change these, you must update the string table in api-errno.c */ - PTL_ILL_MD = 15, - PTL_INV_PROC = 16, - PTL_INV_PSIZE = 17, - PTL_INV_PTINDEX = 18, - PTL_INV_REG = 19, - - PTL_INV_SR_INDX = 20, - PTL_ML_TOOLONG = 21, - PTL_ADDR_UNKNOWN = 22, - PTL_INV_EQ = 23, - PTL_EQ_DROPPED = 24, - - PTL_EQ_EMPTY = 25, - PTL_NOUPDATE = 26, - PTL_FAIL = 27, - PTL_NOT_IMPLEMENTED = 28, - PTL_NO_ACK = 29, - - PTL_IOV_TOO_MANY = 30, - PTL_IOV_TOO_SMALL = 31, - - PTL_EQ_INUSE = 32, - PTL_MD_INUSE = 33, - - PTL_MAX_ERRNO = 33 -} ptl_err_t; -/* If you change these, you must update the string table in api-errno.c */ - -extern const char *ptl_err_str[]; - -#endif diff --git a/lustre/portals/include/portals/internal.h b/lustre/portals/include/portals/internal.h deleted file mode 100644 index e69de29..0000000 diff --git a/lustre/portals/include/portals/lib-dispatch.h b/lustre/portals/include/portals/lib-dispatch.h deleted file mode 100644 index f87ff83..0000000 --- a/lustre/portals/include/portals/lib-dispatch.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef PTL_DISPATCH_H -#define PTL_DISPATCH_H - -/* - * include/dispatch.h - * - * Dispatch table header and externs for remote side - * operations - * - * Generated by idl - * - */ - -#include <portals/lib-p30.h> -#include <portals/arg-blocks.h> - -extern int do_PtlGetId(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlNIStatus(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlNIDist(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlNIDebug(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlMEAttach(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlMEInsert(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlMEPrepend(nal_cb_t * nal, void *private, void *args, - void *ret); -extern int do_PtlMEUnlink(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlTblDump(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlMEDump(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlMDAttach(nal_cb_t * nal, void *private, void *args, - void *ret); -extern int do_PtlMDBind(nal_cb_t * nal, void *private, void *args, - void *ret); -extern int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *args, - void *ret); -extern int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *args, - void *ret); -extern int do_PtlEQAlloc_internal(nal_cb_t * nal, void *private, void *args, - void *ret); -extern int do_PtlEQFree_internal(nal_cb_t * nal, void *private, void *args, - void *ret); -extern int do_PtlPut(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlGet(nal_cb_t * nal, void *private, void *args, void *ret); -extern int do_PtlFailNid (nal_cb_t *nal, void *private, void *args, void *ret); - -extern char *dispatch_name(int index); -#endif diff --git a/lustre/portals/include/portals/lib-nal.h b/lustre/portals/include/portals/lib-nal.h deleted file mode 100644 index 4052c0c..0000000 --- a/lustre/portals/include/portals/lib-nal.h +++ /dev/null @@ -1,102 +0,0 @@ -#ifndef _LIB_NAL_H_ -#define _LIB_NAL_H_ - -/* - * nal.h - * - * Library side headers that define the abstraction layer's - * responsibilities and interfaces - */ - -#include <portals/lib-types.h> - -struct nal_cb_t { - /* - * Per interface portal table, access control table - * and NAL private data field; - */ - lib_ni_t ni; - void *nal_data; - /* - * send: Sends a preformatted header and user data to a - * specified remote process. - * Can overwrite iov. - */ - int (*cb_send) (nal_cb_t * nal, void *private, lib_msg_t * cookie, - ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int niov, struct iovec *iov, size_t mlen); - - /* as send, but with a set of page fragments (NULL if not supported) */ - int (*cb_send_pages) (nal_cb_t * nal, void *private, lib_msg_t * cookie, - ptl_hdr_t * hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int niov, ptl_kiov_t *iov, size_t mlen); - /* - * recv: Receives an incoming message from a remote process - * Type of iov depends on options. Can overwrite iov. - */ - int (*cb_recv) (nal_cb_t * nal, void *private, lib_msg_t * cookie, - unsigned int niov, struct iovec *iov, size_t mlen, - size_t rlen); - - /* as recv, but with a set of page fragments (NULL if not supported) */ - int (*cb_recv_pages) (nal_cb_t * nal, void *private, lib_msg_t * cookie, - unsigned int niov, ptl_kiov_t *iov, size_t mlen, - size_t rlen); - /* - * read: Reads a block of data from a specified user address - */ - int (*cb_read) (nal_cb_t * nal, void *private, void *dst_addr, - user_ptr src_addr, size_t len); - - /* - * write: Writes a block of data into a specified user address - */ - int (*cb_write) (nal_cb_t * nal, void *private, user_ptr dsr_addr, - void *src_addr, size_t len); - - /* - * callback: Calls an event callback - */ - int (*cb_callback) (nal_cb_t * nal, void *private, lib_eq_t *eq, - ptl_event_t *ev); - - /* - * malloc: Acquire a block of memory in a system independent - * fashion. - */ - void *(*cb_malloc) (nal_cb_t * nal, size_t len); - - void (*cb_free) (nal_cb_t * nal, void *buf, size_t len); - - /* - * (un)map: Tell the NAL about some memory it will access. - * *addrkey passed to cb_unmap() is what cb_map() set it to. - * type of *iov depends on options. - * Set to NULL if not required. - */ - int (*cb_map) (nal_cb_t * nal, unsigned int niov, struct iovec *iov, - void **addrkey); - void (*cb_unmap) (nal_cb_t * nal, unsigned int niov, struct iovec *iov, - void **addrkey); - - /* as (un)map, but with a set of page fragments */ - int (*cb_map_pages) (nal_cb_t * nal, unsigned int niov, ptl_kiov_t *iov, - void **addrkey); - void (*cb_unmap_pages) (nal_cb_t * nal, unsigned int niov, ptl_kiov_t *iov, - void **addrkey); - - void (*cb_printf) (nal_cb_t * nal, const char *fmt, ...); - - /* Turn interrupts off (begin of protected area) */ - void (*cb_cli) (nal_cb_t * nal, unsigned long *flags); - - /* Turn interrupts on (end of protected area) */ - void (*cb_sti) (nal_cb_t * nal, unsigned long *flags); - - /* - * Calculate a network "distance" to given node - */ - int (*cb_dist) (nal_cb_t * nal, ptl_nid_t nid, unsigned long *dist); -}; - -#endif diff --git a/lustre/portals/include/portals/lib-p30.h b/lustre/portals/include/portals/lib-p30.h deleted file mode 100644 index b623b93..0000000 --- a/lustre/portals/include/portals/lib-p30.h +++ /dev/null @@ -1,385 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib-p30.h - * - * Top level include for library side routines - */ - -#ifndef _LIB_P30_H_ -#define _LIB_P30_H_ - -#ifdef __KERNEL__ -# include <asm/page.h> -# include <linux/string.h> -#else -# include <portals/list.h> -# include <string.h> -#endif -#include <portals/types.h> -#include <linux/kp30.h> -#include <portals/p30.h> -#include <portals/errno.h> -#include <portals/lib-types.h> -#include <portals/lib-nal.h> -#include <portals/lib-dispatch.h> - -static inline int ptl_is_wire_handle_none (ptl_handle_wire_t *wh) -{ - return (wh->wh_interface_cookie == PTL_WIRE_HANDLE_NONE.wh_interface_cookie && - wh->wh_object_cookie == PTL_WIRE_HANDLE_NONE.wh_object_cookie); -} - -#ifdef __KERNEL__ -#define state_lock(nal,flagsp) \ -do { \ - CDEBUG(D_PORTALS, "taking state lock\n"); \ - nal->cb_cli(nal, flagsp); \ -} while (0) - -#define state_unlock(nal,flagsp) \ -{ \ - CDEBUG(D_PORTALS, "releasing state lock\n"); \ - nal->cb_sti(nal, flagsp); \ -} -#else -/* not needed in user space until we thread there */ -#define state_lock(nal,flagsp) \ -do { \ - CDEBUG(D_PORTALS, "taking state lock\n"); \ - CDEBUG(D_PORTALS, "%p:%p\n", nal, flagsp); \ -} while (0) - -#define state_unlock(nal,flagsp) \ -{ \ - CDEBUG(D_PORTALS, "releasing state lock\n"); \ - CDEBUG(D_PORTALS, "%p:%p\n", nal, flagsp); \ -} -#endif /* __KERNEL__ */ - -#ifndef PTL_USE_SLAB_CACHE - -#define MAX_MES 2048 -#define MAX_MDS 2048 -#define MAX_MSGS 2048 /* Outstanding messages */ -#define MAX_EQS 512 - -extern int lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int nobj, int objsize); -extern void lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl); - -static inline void * -lib_freelist_alloc (lib_freelist_t *fl) -{ - /* ALWAYS called with statelock held */ - lib_freeobj_t *o; - - if (list_empty (&fl->fl_list)) - return (NULL); - - o = list_entry (fl->fl_list.next, lib_freeobj_t, fo_list); - list_del (&o->fo_list); - return ((void *)&o->fo_contents); -} - -static inline void -lib_freelist_free (lib_freelist_t *fl, void *obj) -{ - /* ALWAYS called with statelock held */ - lib_freeobj_t *o = list_entry (obj, lib_freeobj_t, fo_contents); - - list_add (&o->fo_list, &fl->fl_list); -} - - -static inline lib_eq_t * -lib_eq_alloc (nal_cb_t *nal) -{ - /* NEVER called with statelock held */ - unsigned long flags; - lib_eq_t *eq; - - state_lock (nal, &flags); - eq = (lib_eq_t *)lib_freelist_alloc (&nal->ni.ni_free_eqs); - state_unlock (nal, &flags); - - return (eq); -} - -static inline void -lib_eq_free (nal_cb_t *nal, lib_eq_t *eq) -{ - /* ALWAYS called with statelock held */ - lib_freelist_free (&nal->ni.ni_free_eqs, eq); -} - -static inline lib_md_t * -lib_md_alloc (nal_cb_t *nal) -{ - /* NEVER called with statelock held */ - unsigned long flags; - lib_md_t *md; - - state_lock (nal, &flags); - md = (lib_md_t *)lib_freelist_alloc (&nal->ni.ni_free_mds); - state_unlock (nal, &flags); - - return (md); -} - -static inline void -lib_md_free (nal_cb_t *nal, lib_md_t *md) -{ - /* ALWAYS called with statelock held */ - lib_freelist_free (&nal->ni.ni_free_mds, md); -} - -static inline lib_me_t * -lib_me_alloc (nal_cb_t *nal) -{ - /* NEVER called with statelock held */ - unsigned long flags; - lib_me_t *me; - - state_lock (nal, &flags); - me = (lib_me_t *)lib_freelist_alloc (&nal->ni.ni_free_mes); - state_unlock (nal, &flags); - - return (me); -} - -static inline void -lib_me_free (nal_cb_t *nal, lib_me_t *me) -{ - /* ALWAYS called with statelock held */ - lib_freelist_free (&nal->ni.ni_free_mes, me); -} - -static inline lib_msg_t * -lib_msg_alloc (nal_cb_t *nal) -{ - /* ALWAYS called with statelock held */ - return ((lib_msg_t *)lib_freelist_alloc (&nal->ni.ni_free_msgs)); -} - -static inline void -lib_msg_free (nal_cb_t *nal, lib_msg_t *msg) -{ - /* ALWAYS called with statelock held */ - lib_freelist_free (&nal->ni.ni_free_msgs, msg); -} - -#else - -extern kmem_cache_t *ptl_md_slab; -extern kmem_cache_t *ptl_msg_slab; -extern kmem_cache_t *ptl_me_slab; -extern kmem_cache_t *ptl_eq_slab; -extern atomic_t md_in_use_count; -extern atomic_t msg_in_use_count; -extern atomic_t me_in_use_count; -extern atomic_t eq_in_use_count; - -static inline lib_eq_t * -lib_eq_alloc (nal_cb_t *nal) -{ - /* NEVER called with statelock held */ - lib_eq_t *eq = kmem_cache_alloc(ptl_eq_slab, GFP_NOFS); - - if (eq == NULL) - return (NULL); - - atomic_inc (&eq_in_use_count); - return (eq); -} - -static inline void -lib_eq_free (nal_cb_t *nal, lib_eq_t *eq) -{ - /* ALWAYS called with statelock held */ - atomic_dec (&eq_in_use_count); - kmem_cache_free(ptl_eq_slab, eq); -} - -static inline lib_md_t * -lib_md_alloc (nal_cb_t *nal) -{ - /* NEVER called with statelock held */ - lib_md_t *md = kmem_cache_alloc(ptl_md_slab, GFP_NOFS); - - if (md == NULL) - return (NULL); - - atomic_inc (&md_in_use_count); - return (md); -} - -static inline void -lib_md_free (nal_cb_t *nal, lib_md_t *md) -{ - /* ALWAYS called with statelock held */ - atomic_dec (&md_in_use_count); - kmem_cache_free(ptl_md_slab, md); -} - -static inline lib_me_t * -lib_me_alloc (nal_cb_t *nal) -{ - /* NEVER called with statelock held */ - lib_me_t *me = kmem_cache_alloc(ptl_me_slab, GFP_NOFS); - - if (me == NULL) - return (NULL); - - atomic_inc (&me_in_use_count); - return (me); -} - -static inline void -lib_me_free(nal_cb_t *nal, lib_me_t *me) -{ - /* ALWAYS called with statelock held */ - atomic_dec (&me_in_use_count); - kmem_cache_free(ptl_me_slab, me); -} - -static inline lib_msg_t * -lib_msg_alloc(nal_cb_t *nal) -{ - /* ALWAYS called with statelock held */ - lib_msg_t *msg = kmem_cache_alloc(ptl_msg_slab, GFP_ATOMIC); - - if (msg == NULL) - return (NULL); - - atomic_inc (&msg_in_use_count); - return (msg); -} - -static inline void -lib_msg_free(nal_cb_t *nal, lib_msg_t *msg) -{ - /* ALWAYS called with statelock held */ - atomic_dec (&msg_in_use_count); - kmem_cache_free(ptl_msg_slab, msg); -} -#endif - -extern lib_handle_t *lib_lookup_cookie (nal_cb_t *nal, __u64 cookie, int type); -extern void lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh, int type); -extern void lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh); - -static inline void -ptl_eq2handle (ptl_handle_eq_t *handle, lib_eq_t *eq) -{ - handle->cookie = eq->eq_lh.lh_cookie; -} - -static inline lib_eq_t * -ptl_handle2eq (ptl_handle_eq_t *handle, nal_cb_t *nal) -{ - /* ALWAYS called with statelock held */ - lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, - PTL_COOKIE_TYPE_EQ); - if (lh == NULL) - return (NULL); - - return (lh_entry (lh, lib_eq_t, eq_lh)); -} - -static inline void -ptl_md2handle (ptl_handle_md_t *handle, lib_md_t *md) -{ - handle->cookie = md->md_lh.lh_cookie; -} - -static inline lib_md_t * -ptl_handle2md (ptl_handle_md_t *handle, nal_cb_t *nal) -{ - /* ALWAYS called with statelock held */ - lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, - PTL_COOKIE_TYPE_MD); - if (lh == NULL) - return (NULL); - - return (lh_entry (lh, lib_md_t, md_lh)); -} - -static inline lib_md_t * -ptl_wire_handle2md (ptl_handle_wire_t *wh, nal_cb_t *nal) -{ - /* ALWAYS called with statelock held */ - lib_handle_t *lh; - - if (wh->wh_interface_cookie != nal->ni.ni_interface_cookie) - return (NULL); - - lh = lib_lookup_cookie (nal, wh->wh_object_cookie, - PTL_COOKIE_TYPE_MD); - if (lh == NULL) - return (NULL); - - return (lh_entry (lh, lib_md_t, md_lh)); -} - -static inline void -ptl_me2handle (ptl_handle_me_t *handle, lib_me_t *me) -{ - handle->cookie = me->me_lh.lh_cookie; -} - -static inline lib_me_t * -ptl_handle2me (ptl_handle_me_t *handle, nal_cb_t *nal) -{ - /* ALWAYS called with statelock held */ - lib_handle_t *lh = lib_lookup_cookie (nal, handle->cookie, - PTL_COOKIE_TYPE_ME); - if (lh == NULL) - return (NULL); - - return (lh_entry (lh, lib_me_t, me_lh)); -} - -extern int lib_init(nal_cb_t * cb, ptl_nid_t nid, ptl_pid_t pid, int gsize, - ptl_pt_index_t tbl_size, ptl_ac_index_t ac_size); -extern int lib_fini(nal_cb_t * cb); -extern void lib_dispatch(nal_cb_t * cb, void *private, int index, - void *arg_block, void *ret_block); -extern char *dispatch_name(int index); - -/* - * When the NAL detects an incoming message, it should call - * lib_parse() decode it. The NAL callbacks will be handed - * the private cookie as a way for the NAL to maintain state - * about which transaction is being processed. An extra parameter, - * lib_cookie will contain the necessary information for - * finalizing the message. - * - * After it has finished the handling the message, it should - * call lib_finalize() with the lib_cookie parameter. - * Call backs will be made to write events, send acks or - * replies and so on. - */ -extern int lib_parse(nal_cb_t * nal, ptl_hdr_t * hdr, void *private); -extern int lib_finalize(nal_cb_t * nal, void *private, lib_msg_t * msg); -extern void print_hdr(nal_cb_t * nal, ptl_hdr_t * hdr); - -extern ptl_size_t lib_iov_nob (int niov, struct iovec *iov); -extern void lib_copy_iov2buf (char *dest, int niov, struct iovec *iov, ptl_size_t len); -extern void lib_copy_buf2iov (int niov, struct iovec *iov, char *dest, ptl_size_t len); - -extern ptl_size_t lib_kiov_nob (int niov, ptl_kiov_t *iov); -extern void lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *iov, ptl_size_t len); -extern void lib_copy_buf2kiov (int niov, ptl_kiov_t *iov, char *src, ptl_size_t len); -extern void lib_assert_wire_constants (void); - -extern void lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md, - ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen); -extern int lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - lib_md_t *md, ptl_size_t offset, ptl_size_t len); - -extern void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md_in, - ptl_md_t * md_out); -extern void lib_md_unlink(nal_cb_t * nal, lib_md_t * md_in); -extern void lib_me_unlink(nal_cb_t * nal, lib_me_t * me_in); -#endif diff --git a/lustre/portals/include/portals/lib-types.h b/lustre/portals/include/portals/lib-types.h deleted file mode 100644 index 47c0dd2..0000000 --- a/lustre/portals/include/portals/lib-types.h +++ /dev/null @@ -1,282 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * p30/lib-types.h - * - * Types used by the library side routines that do not need to be - * exposed to the user application - */ - -#ifndef _LIB_TYPES_H_ -#define _LIB_TYPES_H_ - -#include <portals/types.h> -#ifdef __KERNEL__ -# define PTL_USE_SLAB_CACHE -# include <linux/uio.h> -# include <linux/smp_lock.h> -# include <linux/types.h> -#else -# include <sys/types.h> -#endif - -/* struct nal_cb_t is defined in lib-nal.h */ -typedef struct nal_cb_t nal_cb_t; - -typedef char *user_ptr; -typedef struct lib_msg_t lib_msg_t; -typedef struct lib_ptl_t lib_ptl_t; -typedef struct lib_ac_t lib_ac_t; -typedef struct lib_me_t lib_me_t; -typedef struct lib_md_t lib_md_t; -typedef struct lib_eq_t lib_eq_t; - -#define WIRE_ATTR __attribute__((packed)) - -/* The wire handle's interface cookie only matches one network interface in - * one epoch (i.e. new cookie when the interface restarts or the node - * reboots). The object cookie only matches one object on that interface - * during that object's lifetime (i.e. no cookie re-use). */ -typedef struct { - __u64 wh_interface_cookie; - __u64 wh_object_cookie; -} WIRE_ATTR ptl_handle_wire_t; - -/* byte-flip insensitive! */ -#define PTL_WIRE_HANDLE_NONE \ -((const ptl_handle_wire_t) {.wh_interface_cookie = -1, .wh_object_cookie = -1}) - -typedef enum { - PTL_MSG_ACK = 0, - PTL_MSG_PUT, - PTL_MSG_GET, - PTL_MSG_REPLY, - PTL_MSG_HELLO, -} ptl_msg_type_t; - -/* Each of these structs should start with an odd number of - * __u32, or the compiler could add its own padding and confuse - * everyone. - * - * Also, "length" needs to be at offset 28 of each struct. - */ -typedef struct ptl_ack { - ptl_size_t mlength; - ptl_handle_wire_t dst_wmd; - ptl_match_bits_t match_bits; - ptl_size_t length; /* common length (0 for acks) moving out RSN */ -} WIRE_ATTR ptl_ack_t; - -typedef struct ptl_put { - ptl_pt_index_t ptl_index; - ptl_handle_wire_t ack_wmd; - ptl_match_bits_t match_bits; - ptl_size_t length; /* common length moving out RSN */ - ptl_size_t offset; - ptl_hdr_data_t hdr_data; -} WIRE_ATTR ptl_put_t; - -typedef struct ptl_get { - ptl_pt_index_t ptl_index; - ptl_handle_wire_t return_wmd; - ptl_match_bits_t match_bits; - ptl_size_t length; /* common length (0 for gets) moving out RSN */ - ptl_size_t src_offset; - ptl_size_t return_offset; /* unused: going RSN */ - ptl_size_t sink_length; -} WIRE_ATTR ptl_get_t; - -typedef struct ptl_reply { - __u32 unused1; /* unused fields going RSN */ - ptl_handle_wire_t dst_wmd; - ptl_size_t dst_offset; /* unused: going RSN */ - __u32 unused2; - ptl_size_t length; /* common length moving out RSN */ -} WIRE_ATTR ptl_reply_t; - -typedef struct { - ptl_nid_t dest_nid; - ptl_nid_t src_nid; - ptl_pid_t dest_pid; - ptl_pid_t src_pid; - __u32 type; /* ptl_msg_type_t */ - union { - ptl_ack_t ack; - ptl_put_t put; - ptl_get_t get; - ptl_reply_t reply; - } msg; -} WIRE_ATTR ptl_hdr_t; - -/* All length fields in individual unions at same offset */ -/* LASSERT for same in lib-move.c */ -#define PTL_HDR_LENGTH(h) ((h)->msg.ack.length) - -/* A HELLO message contains the portals magic number and protocol version - * code in the header's dest_nid, the peer's NID in the src_nid, and - * PTL_MSG_HELLO in the type field. All other fields are zero (including - * PTL_HDR_LENGTH; i.e. no payload). - * This is for use by byte-stream NALs (e.g. TCP/IP) to check the peer is - * running the same protocol and to find out its NID, so that hosts with - * multiple IP interfaces can have a single NID. These NALs should exchange - * HELLO messages when a connection is first established. */ -typedef struct { - __u32 magic; /* PORTALS_PROTO_MAGIC */ - __u16 version_major; /* increment on incompatible change */ - __u16 version_minor; /* increment on compatible change */ -} WIRE_ATTR ptl_magicversion_t; - -#define PORTALS_PROTO_MAGIC 0xeebc0ded - -#define PORTALS_PROTO_VERSION_MAJOR 0 -#define PORTALS_PROTO_VERSION_MINOR 1 - -typedef struct { - long recv_count, recv_length, send_count, send_length, drop_count, - drop_length, msgs_alloc, msgs_max; -} lib_counters_t; - -/* temporary expedient: limit number of entries in discontiguous MDs */ -#if PTL_LARGE_MTU -# define PTL_MD_MAX_IOV 64 -#else -# define PTL_MD_MAX_IOV 16 -#endif - -struct lib_msg_t { - struct list_head msg_list; - int send_ack; - lib_md_t *md; - ptl_nid_t nid; - ptl_pid_t pid; - ptl_event_t ev; - ptl_handle_wire_t ack_wmd; - union { - struct iovec iov[PTL_MD_MAX_IOV]; - ptl_kiov_t kiov[PTL_MD_MAX_IOV]; - } msg_iov; -}; - -struct lib_ptl_t { - ptl_pt_index_t size; - struct list_head *tbl; -}; - -struct lib_ac_t { - int next_free; -}; - -typedef struct { - struct list_head lh_hash_chain; - __u64 lh_cookie; -} lib_handle_t; - -#define lh_entry(ptr, type, member) \ - ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) - -struct lib_eq_t { - struct list_head eq_list; - lib_handle_t eq_lh; - ptl_seq_t sequence; - ptl_size_t size; - ptl_event_t *base; - int eq_refcount; - int (*event_callback) (ptl_event_t * event); - void *eq_addrkey; -}; - -struct lib_me_t { - struct list_head me_list; - lib_handle_t me_lh; - ptl_process_id_t match_id; - ptl_match_bits_t match_bits, ignore_bits; - ptl_unlink_t unlink; - lib_md_t *md; -}; - -struct lib_md_t { - struct list_head md_list; - lib_handle_t md_lh; - lib_me_t *me; - user_ptr start; - ptl_size_t offset; - ptl_size_t length; - ptl_size_t max_size; - int threshold; - int pending; - ptl_unlink_t unlink; - unsigned int options; - unsigned int md_flags; - void *user_ptr; - lib_eq_t *eq; - void *md_addrkey; - unsigned int md_niov; /* # frags */ - union { - struct iovec iov[PTL_MD_MAX_IOV]; - ptl_kiov_t kiov[PTL_MD_MAX_IOV]; - } md_iov; -}; - -#define PTL_MD_FLAG_UNLINK (1 << 0) -#define PTL_MD_FLAG_AUTO_UNLINKED (1 << 1) - -#ifndef PTL_USE_SLAB_CACHE -typedef struct -{ - void *fl_objs; /* single contiguous array of objects */ - int fl_nobjs; /* the number of them */ - int fl_objsize; /* the size (including overhead) of each of them */ - struct list_head fl_list; /* where they are enqueued */ -} lib_freelist_t; - -typedef struct -{ - struct list_head fo_list; /* enqueue on fl_list */ - void *fo_contents; /* aligned contents */ -} lib_freeobj_t; -#endif - -typedef struct { - /* info about peers we are trying to fail */ - struct list_head tp_list; /* stash in ni.ni_test_peers */ - ptl_nid_t tp_nid; /* matching nid */ - unsigned int tp_threshold; /* # failures to simulate */ -} lib_test_peer_t; - -#define PTL_COOKIE_TYPE_MD 1 -#define PTL_COOKIE_TYPE_ME 2 -#define PTL_COOKIE_TYPE_EQ 3 -#define PTL_COOKIE_TYPES 4 -/* PTL_COOKIE_TYPES must be a power of 2, so the cookie type can be - * extracted by masking with (PTL_COOKIE_TYPES - 1) */ - -typedef struct { - int up; - int refcnt; - ptl_nid_t nid; - ptl_pid_t pid; - int num_nodes; - unsigned int debug; - lib_ptl_t tbl; - lib_ac_t ac; - lib_counters_t counters; - - int ni_lh_hash_size; /* size of lib handle hash table */ - struct list_head *ni_lh_hash_table; /* all extant lib handles, this interface */ - __u64 ni_next_object_cookie; /* cookie generator */ - __u64 ni_interface_cookie; /* uniquely identifies this ni in this epoch */ - - struct list_head ni_test_peers; - -#ifndef PTL_USE_SLAB_CACHE - lib_freelist_t ni_free_mes; - lib_freelist_t ni_free_msgs; - lib_freelist_t ni_free_mds; - lib_freelist_t ni_free_eqs; -#endif - struct list_head ni_active_msgs; - struct list_head ni_active_mds; - struct list_head ni_active_eqs; -} lib_ni_t; - -#endif diff --git a/lustre/portals/include/portals/list.h b/lustre/portals/include/portals/list.h deleted file mode 100644 index 2b63312..0000000 --- a/lustre/portals/include/portals/list.h +++ /dev/null @@ -1,245 +0,0 @@ -#ifndef _LINUX_LIST_H -#define _LINUX_LIST_H - - -/* - * Simple doubly linked list implementation. - * - * Some of the internal functions ("__xxx") are useful when - * manipulating whole lists rather than single entries, as - * sometimes we already know the next/prev entries and we can - * generate better code by using them directly rather than - * using the generic single-entry routines. - */ - -#define prefetch(a) ((void)a) - -struct list_head { - struct list_head *next, *prev; -}; - -#define LIST_HEAD_INIT(name) { &(name), &(name) } - -#define LIST_HEAD(name) \ - struct list_head name = LIST_HEAD_INIT(name) - -#define INIT_LIST_HEAD(ptr) do { \ - (ptr)->next = (ptr); (ptr)->prev = (ptr); \ -} while (0) - -/* - * Insert a new entry between two known consecutive entries. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_add(struct list_head * new, - struct list_head * prev, - struct list_head * next) -{ - next->prev = new; - new->next = next; - new->prev = prev; - prev->next = new; -} - -/** - * list_add - add a new entry - * @new: new entry to be added - * @head: list head to add it after - * - * Insert a new entry after the specified head. - * This is good for implementing stacks. - */ -static inline void list_add(struct list_head *new, struct list_head *head) -{ - __list_add(new, head, head->next); -} - -/** - * list_add_tail - add a new entry - * @new: new entry to be added - * @head: list head to add it before - * - * Insert a new entry before the specified head. - * This is useful for implementing queues. - */ -static inline void list_add_tail(struct list_head *new, struct list_head *head) -{ - __list_add(new, head->prev, head); -} - -/* - * Delete a list entry by making the prev/next entries - * point to each other. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_del(struct list_head * prev, struct list_head * next) -{ - next->prev = prev; - prev->next = next; -} - -/** - * list_del - deletes entry from list. - * @entry: the element to delete from the list. - * Note: list_empty on entry does not return true after this, the entry is in an undefined state. - */ -static inline void list_del(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); -} - -/** - * list_del_init - deletes entry from list and reinitialize it. - * @entry: the element to delete from the list. - */ -static inline void list_del_init(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - INIT_LIST_HEAD(entry); -} - -/** - * list_move - delete from one list and add as another's head - * @list: the entry to move - * @head: the head that will precede our entry - */ -static inline void list_move(struct list_head *list, struct list_head *head) -{ - __list_del(list->prev, list->next); - list_add(list, head); -} - -/** - * list_move_tail - delete from one list and add as another's tail - * @list: the entry to move - * @head: the head that will follow our entry - */ -static inline void list_move_tail(struct list_head *list, - struct list_head *head) -{ - __list_del(list->prev, list->next); - list_add_tail(list, head); -} - -/** - * list_empty - tests whether a list is empty - * @head: the list to test. - */ -static inline int list_empty(struct list_head *head) -{ - return head->next == head; -} - -static inline void __list_splice(struct list_head *list, - struct list_head *head) -{ - struct list_head *first = list->next; - struct list_head *last = list->prev; - struct list_head *at = head->next; - - first->prev = head; - head->next = first; - - last->next = at; - at->prev = last; -} - -/** - * list_splice - join two lists - * @list: the new list to add. - * @head: the place to add it in the first list. - */ -static inline void list_splice(struct list_head *list, struct list_head *head) -{ - if (!list_empty(list)) - __list_splice(list, head); -} - -/** - * list_splice_init - join two lists and reinitialise the emptied list. - * @list: the new list to add. - * @head: the place to add it in the first list. - * - * The list at @list is reinitialised - */ -static inline void list_splice_init(struct list_head *list, - struct list_head *head) -{ - if (!list_empty(list)) { - __list_splice(list, head); - INIT_LIST_HEAD(list); - } -} - -/** - * list_entry - get the struct for this entry - * @ptr: the &struct list_head pointer. - * @type: the type of the struct this is embedded in. - * @member: the name of the list_struct within the struct. - */ -#define list_entry(ptr, type, member) \ - ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) - -/** - * list_for_each - iterate over a list - * @pos: the &struct list_head to use as a loop counter. - * @head: the head for your list. - */ -#define list_for_each(pos, head) \ - for (pos = (head)->next, prefetch(pos->next); pos != (head); \ - pos = pos->next, prefetch(pos->next)) - -/** - * list_for_each_prev - iterate over a list in reverse order - * @pos: the &struct list_head to use as a loop counter. - * @head: the head for your list. - */ -#define list_for_each_prev(pos, head) \ - for (pos = (head)->prev, prefetch(pos->prev); pos != (head); \ - pos = pos->prev, prefetch(pos->prev)) - -/** - * list_for_each_safe - iterate over a list safe against removal of list entry - * @pos: the &struct list_head to use as a loop counter. - * @n: another &struct list_head to use as temporary storage - * @head: the head for your list. - */ -#define list_for_each_safe(pos, n, head) \ - for (pos = (head)->next, n = pos->next; pos != (head); \ - pos = n, n = pos->next) - -#endif - -#ifndef list_for_each_entry -/** - * list_for_each_entry - iterate over list of given type - * @pos: the type * to use as a loop counter. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry(pos, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member), \ - prefetch(pos->member.next); \ - &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member), \ - prefetch(pos->member.next)) -#endif - -#ifndef list_for_each_entry_safe -/** - * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry - * @pos: the type * to use as a loop counter. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry_safe(pos, n, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member), \ - n = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.next, typeof(*n), member)) -#endif diff --git a/lustre/portals/include/portals/lltrace.h b/lustre/portals/include/portals/lltrace.h deleted file mode 100644 index 7d1b304..0000000 --- a/lustre/portals/include/portals/lltrace.h +++ /dev/null @@ -1,175 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Compile with: - * cc -I../../portals/include -o fio fio.c -L../../portals/linux/utils -lptlctl - */ -#ifndef __LTRACE_H_ -#define __LTRACE_H_ - -#include <stdio.h> -#include <stdlib.h> -#include <getopt.h> -#include <string.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> -#include <sys/time.h> -#include <portals/types.h> -#include <portals/ptlctl.h> -#include <linux/kp30.h> -#include <linux/limits.h> -#include <asm/page.h> -#include <linux/version.h> - -static inline int ltrace_write_file(char* fname) -{ - char* argv[3]; - - argv[0] = "debug_kernel"; - argv[1] = fname; - argv[2] = "1"; - - fprintf(stderr, "[ptlctl] %s %s %s\n", argv[0], argv[1], argv[2]); - - return jt_dbg_debug_kernel(3, argv); -} - -static inline int ltrace_clear() -{ - char* argv[1]; - - argv[0] = "clear"; - - fprintf(stderr, "[ptlctl] %s\n", argv[0]); - - return jt_dbg_clear_debug_buf(1, argv); -} - -static inline int ltrace_mark(int indent_level, char* text) -{ - char* argv[2]; - char mark_buf[PATH_MAX]; - - snprintf(mark_buf, PATH_MAX, "====%d=%s", indent_level, text); - - argv[0] = "mark"; - argv[1] = mark_buf; - return jt_dbg_mark_debug_buf(2, argv); -} - -static inline int ltrace_applymasks() -{ - char* argv[2]; - argv[0] = "list"; - argv[1] = "applymasks"; - - fprintf(stderr, "[ptlctl] %s %s\n", argv[0], argv[1]); - - return jt_dbg_list(2, argv); -} - - -static inline int ltrace_filter(char* subsys_or_mask) -{ - char* argv[2]; - argv[0] = "filter"; - argv[1] = subsys_or_mask; - return jt_dbg_filter(2, argv); -} - -static inline int ltrace_show(char* subsys_or_mask) -{ - char* argv[2]; - argv[0] = "show"; - argv[1] = subsys_or_mask; - return jt_dbg_show(2, argv); -} - -static inline int ltrace_start() -{ - int rc = 0; - dbg_initialize(0, NULL); -#ifdef PORTALS_DEV_ID - rc = register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH); -#endif - ltrace_filter("class"); - ltrace_filter("socknal"); - ltrace_filter("qswnal"); - ltrace_filter("gmnal"); - ltrace_filter("portals"); - - ltrace_show("all_types"); - ltrace_filter("trace"); - ltrace_filter("malloc"); - ltrace_filter("net"); - ltrace_filter("page"); - ltrace_filter("other"); - ltrace_filter("info"); - ltrace_applymasks(); - - return rc; -} - - -static inline void ltrace_stop() -{ -#ifdef PORTALS_DEV_ID - unregister_ioc_dev(PORTALS_DEV_ID); -#endif -} - -static inline int not_uml() -{ - /* Return Values: - * 0 when run under UML - * 1 when run on host - * <0 when lookup failed - */ - struct stat buf; - int rc = stat("/dev/ubd", &buf); - rc = ((rc<0) && (errno == ENOENT)) ? 1 : rc; - if (rc<0) { - fprintf(stderr, "Cannot stat /dev/ubd: %s\n", strerror(errno)); - rc = 1; /* Assume host */ - } - return rc; -} - -#define LTRACE_MAX_NOB 256 -static inline void ltrace_add_processnames(char* fname) -{ - char cmdbuf[LTRACE_MAX_NOB]; - struct timeval tv; - struct timezone tz; - int nob; - int underuml = !not_uml(); - - gettimeofday(&tv, &tz); - - nob = snprintf(cmdbuf, LTRACE_MAX_NOB, "ps --no-headers -eo \""); - - /* Careful - these format strings need to match the CDEBUG - * formats in portals/linux/debug.c EXACTLY - */ - nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, "%02x:%06x:%d:%lu.%06lu ", - S_RPC >> 24, D_VFSTRACE, 0, tv.tv_sec, tv.tv_usec); - - if (underuml && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))) { - nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB, - "(%s:%d:%s() %d | %d+%lu): ", - "lltrace.h", __LINE__, __FUNCTION__, 0, 0, 0L); - } - else { - nob += snprintf (cmdbuf+nob, LTRACE_MAX_NOB, - "(%s:%d:%s() %d+%lu): ", - "lltrace.h", __LINE__, __FUNCTION__, 0, 0L); - } - - nob += snprintf(cmdbuf+nob, LTRACE_MAX_NOB, " %%p %%c\" >> %s", fname); - system(cmdbuf); -} - -#endif diff --git a/lustre/portals/include/portals/myrnal.h b/lustre/portals/include/portals/myrnal.h deleted file mode 100644 index 12b1925..0000000 --- a/lustre/portals/include/portals/myrnal.h +++ /dev/null @@ -1,26 +0,0 @@ -/* -*/ - -#ifndef MYRNAL_H -#define MYRNAL_H - -#define MAX_ARGS_LEN (256) -#define MAX_RET_LEN (128) -#define MYRNAL_MAX_ACL_SIZE (64) -#define MYRNAL_MAX_PTL_SIZE (64) - -#define P3CMD (100) -#define P3SYSCALL (200) -#define P3REGISTER (300) - -enum { PTL_MLOCKALL }; - -typedef struct { - void *args; - size_t args_len; - void *ret; - size_t ret_len; - int p3cmd; -} myrnal_forward_t; - -#endif /* MYRNAL_H */ diff --git a/lustre/portals/include/portals/nal.h b/lustre/portals/include/portals/nal.h deleted file mode 100644 index 88be63c..0000000 --- a/lustre/portals/include/portals/nal.h +++ /dev/null @@ -1,49 +0,0 @@ -/* -*/ -#ifndef _NAL_H_ -#define _NAL_H_ - -/* - * p30/nal.h - * - * The API side NAL declarations - */ - -#include <portals/types.h> - -#ifdef yield -#undef yield -#endif - -typedef struct nal_t nal_t; - -struct nal_t { - ptl_ni_t ni; - int refct; - void *nal_data; - int *timeout; /* for libp30api users */ - int (*forward) (nal_t * nal, int index, /* Function ID */ - void *args, size_t arg_len, void *ret, size_t ret_len); - - int (*shutdown) (nal_t * nal, int interface); - - int (*validate) (nal_t * nal, void *base, size_t extent); - - void (*yield) (nal_t * nal); - - void (*lock) (nal_t * nal, unsigned long *flags); - - void (*unlock) (nal_t * nal, unsigned long *flags); -}; - -typedef nal_t *(ptl_interface_t) (int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid); -extern nal_t *PTL_IFACE_IP(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid); -extern nal_t *PTL_IFACE_MYR(int, ptl_pt_index_t, ptl_ac_index_t, ptl_pid_t requested_pid); - -extern nal_t *ptl_hndl2nal(ptl_handle_any_t * any); - -#ifndef PTL_IFACE_DEFAULT -#define PTL_IFACE_DEFAULT (PTL_IFACE_IP) -#endif - -#endif diff --git a/lustre/portals/include/portals/nalids.h b/lustre/portals/include/portals/nalids.h deleted file mode 100644 index 1b837b4..0000000 --- a/lustre/portals/include/portals/nalids.h +++ /dev/null @@ -1,4 +0,0 @@ -#define PTL_IFACE_TCP 1 -#define PTL_IFACE_ER 2 -#define PTL_IFACE_SS 3 -#define PTL_IFACE_MAX 4 diff --git a/lustre/portals/include/portals/p30.h b/lustre/portals/include/portals/p30.h deleted file mode 100644 index a4ea39b..0000000 --- a/lustre/portals/include/portals/p30.h +++ /dev/null @@ -1,72 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef _P30_H_ -#define _P30_H_ - -/* - * p30.h - * - * User application interface file - */ - -#if defined (__KERNEL__) -#include <linux/uio.h> -#include <linux/types.h> -#else -#include <sys/types.h> -#include <sys/uio.h> -#endif - -#include <portals/types.h> -#include <portals/nal.h> -#include <portals/api.h> -#include <portals/errno.h> -#include <portals/nalids.h> - -extern int __p30_initialized; /* for libraries & test codes */ -extern int __p30_myr_initialized; /* that don't know if p30 */ -extern int __p30_ip_initialized; /* had been initialized yet */ -extern ptl_handle_ni_t __myr_ni_handle, __ip_ni_handle; - -extern int __p30_myr_timeout; /* in seconds, for PtlNIBarrier, */ -extern int __p30_ip_timeout; /* PtlReduce_all, & PtlBroadcast_all */ - -/* - * Debugging flags reserved for the Portals reference library. - * These are not part of the API as described in the SAND report - * but are for the use of the maintainers of the reference implementation. - * - * It is not expected that the real implementations will export - * this functionality. - */ -#define PTL_DEBUG_NONE 0ul -#define PTL_DEBUG_ALL (0x0FFFul) /* Only the Portals flags */ - -#define __bit(x) ((unsigned long) 1<<(x)) -#define PTL_DEBUG_PUT __bit(0) -#define PTL_DEBUG_GET __bit(1) -#define PTL_DEBUG_REPLY __bit(2) -#define PTL_DEBUG_ACK __bit(3) -#define PTL_DEBUG_DROP __bit(4) -#define PTL_DEBUG_REQUEST __bit(5) -#define PTL_DEBUG_DELIVERY __bit(6) -#define PTL_DEBUG_UNLINK __bit(7) -#define PTL_DEBUG_THRESHOLD __bit(8) -#define PTL_DEBUG_API __bit(9) - -/* - * These eight are reserved for the NAL to define - * It should probably give them better names... - */ -#define PTL_DEBUG_NI_ALL (0xF000ul) /* Only the NAL flags */ -#define PTL_DEBUG_NI0 __bit(24) -#define PTL_DEBUG_NI1 __bit(25) -#define PTL_DEBUG_NI2 __bit(26) -#define PTL_DEBUG_NI3 __bit(27) -#define PTL_DEBUG_NI4 __bit(28) -#define PTL_DEBUG_NI5 __bit(29) -#define PTL_DEBUG_NI6 __bit(30) -#define PTL_DEBUG_NI7 __bit(31) - -#endif diff --git a/lustre/portals/include/portals/ppid.h b/lustre/portals/include/portals/ppid.h deleted file mode 100644 index 4727599..0000000 --- a/lustre/portals/include/portals/ppid.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - */ - -#ifndef _INCppidh_ -#define _INCppidh_ - -#include "defines.h" -// #include "idtypes.h" - - -#define MAX_PPID 1000 /* this needs to fit into 16 bits so the - maximum value is 65535. having it "large" - can help w/ debugging process accounting - but there are reasons for making it - somewhat smaller than the maximum -- - requiring storage for arrays that index - on the ppid, eg... */ - -#define MAX_GID 1000 /* this needs to fit into 16 bits... */ - -#define MAX_FIXED_PPID 100 -#define MAX_FIXED_GID 100 -#define PPID_FLOATING MAX_FIXED_PPID+1 /* Floating area starts here */ -#define GID_FLOATING MAX_FIXED_GID+1 /* Floating area starts here */ -#define NUM_PTL_TASKS MAX_FIXED_PPID+80 /* Maximum no. portals tasks */ - -#define PPID_AUTO 0 - -/* Minimum PPID is 1 */ -#define PPID_BEBOPD 1 /* bebopd */ -#define GID_BEBOPD 1 /* bebopd */ - -#define PPID_PCT 2 /* pct */ -#define GID_PCT 2 /* pct */ - -#define PPID_FYOD 3 /* fyod */ -#define GID_FYOD 3 /* fyod */ - -#define PPID_GDBWRAP 11 /* portals proxy for gdb */ -#define GID_GDBWRAP 11 /* portals proxy for gdb */ - -#define PPID_TEST 15 /* for portals tests */ -#define GID_TEST 15 - -#define GID_YOD 5 /* yod */ -#define GID_PINGD 6 /* pingd */ -#define GID_BT 7 /* bt */ -#define GID_PTLTEST 8 /* ptltest */ -#define GID_CGDB 9 /* cgdb */ -#define GID_TVDSVR 10 /* start-tvdsvr */ - -#endif /* _INCppidh_ */ diff --git a/lustre/portals/include/portals/ptlctl.h b/lustre/portals/include/portals/ptlctl.h deleted file mode 100644 index dc02780..0000000 --- a/lustre/portals/include/portals/ptlctl.h +++ /dev/null @@ -1,75 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * header for libptlctl.a - */ -#ifndef _PTLCTL_H_ -#define _PTLCTL_H_ - -#define PORTALS_DEV_ID 0 -#define PORTALS_DEV_PATH "/dev/portals" -#define OBD_DEV_ID 1 -#define OBD_DEV_PATH "/dev/obd" - -int ptl_name2nal(char *str); -int ptl_parse_nid (ptl_nid_t *nidp, char *str); -char * ptl_nid2str (char *buffer, ptl_nid_t nid); - -int ptl_initialize(int argc, char **argv); -int jt_ptl_network(int argc, char **argv); -int jt_ptl_connect(int argc, char **argv); -int jt_ptl_disconnect(int argc, char **argv); -int jt_ptl_push_connection(int argc, char **argv); -int jt_ptl_ping(int argc, char **argv); -int jt_ptl_shownid(int argc, char **argv); -int jt_ptl_mynid(int argc, char **argv); -int jt_ptl_add_uuid(int argc, char **argv); -int jt_ptl_add_uuid_old(int argc, char **argv); /* backwards compatibility */ -int jt_ptl_close_uuid(int argc, char **argv); -int jt_ptl_del_uuid(int argc, char **argv); -int jt_ptl_rxmem (int argc, char **argv); -int jt_ptl_txmem (int argc, char **argv); -int jt_ptl_nagle (int argc, char **argv); -int jt_ptl_add_route (int argc, char **argv); -int jt_ptl_del_route (int argc, char **argv); -int jt_ptl_print_routes (int argc, char **argv); -int jt_ptl_fail_nid (int argc, char **argv); - -int dbg_initialize(int argc, char **argv); -int jt_dbg_filter(int argc, char **argv); -int jt_dbg_show(int argc, char **argv); -int jt_dbg_list(int argc, char **argv); -int jt_dbg_debug_kernel(int argc, char **argv); -int jt_dbg_debug_daemon(int argc, char **argv); -int jt_dbg_debug_file(int argc, char **argv); -int jt_dbg_clear_debug_buf(int argc, char **argv); -int jt_dbg_mark_debug_buf(int argc, char **argv); -int jt_dbg_modules(int argc, char **argv); -int jt_dbg_panic(int argc, char **argv); - -/* l_ioctl.c */ -int register_ioc_dev(int dev_id, const char * dev_name); -void unregister_ioc_dev(int dev_id); -int set_ioctl_dump(char * file); -int l_ioctl(int dev_id, int opc, void *buf); -int parse_dump(char * dump_file, int (*ioc_func)(int dev_id, int opc, void *)); -int jt_ioc_dump(int argc, char **argv); - -#endif diff --git a/lustre/portals/include/portals/stringtab.h b/lustre/portals/include/portals/stringtab.h deleted file mode 100644 index c9683f7..0000000 --- a/lustre/portals/include/portals/stringtab.h +++ /dev/null @@ -1,5 +0,0 @@ -/* -*/ -/* - * stringtab.h - */ diff --git a/lustre/portals/include/portals/types.h b/lustre/portals/include/portals/types.h deleted file mode 100644 index d4038b6..0000000 --- a/lustre/portals/include/portals/types.h +++ /dev/null @@ -1,157 +0,0 @@ -#ifndef _P30_TYPES_H_ -#define _P30_TYPES_H_ - -#ifdef __linux__ -#include <asm/types.h> -#include <asm/timex.h> -#else -#include <sys/types.h> -typedef u_int32_t __u32; -typedef u_int64_t __u64; -typedef unsigned long long cycles_t; -static inline cycles_t get_cycles(void) { return 0; } -#endif - -typedef __u64 ptl_nid_t; -typedef __u32 ptl_pid_t; -typedef __u32 ptl_pt_index_t; -typedef __u32 ptl_ac_index_t; -typedef __u64 ptl_match_bits_t; -typedef __u64 ptl_hdr_data_t; -typedef __u32 ptl_size_t; - -typedef struct { - unsigned long nal_idx; /* which network interface */ - __u64 cookie; /* which thing on that interface */ -} ptl_handle_any_t; - -typedef ptl_handle_any_t ptl_handle_ni_t; -typedef ptl_handle_any_t ptl_handle_eq_t; -typedef ptl_handle_any_t ptl_handle_md_t; -typedef ptl_handle_any_t ptl_handle_me_t; - -#define PTL_HANDLE_NONE \ -((const ptl_handle_any_t){.nal_idx = -1, .cookie = -1}) -#define PTL_EQ_NONE PTL_HANDLE_NONE - -static inline int PtlHandleEqual (ptl_handle_any_t h1, ptl_handle_any_t h2) -{ - return (h1.nal_idx == h2.nal_idx && h1.cookie == h2.cookie); -} - -#define PTL_NID_ANY ((ptl_nid_t) -1) -#define PTL_PID_ANY ((ptl_pid_t) -1) - -typedef struct { - ptl_nid_t nid; - ptl_pid_t pid; /* node id / process id */ -} ptl_process_id_t; - -typedef enum { - PTL_RETAIN = 0, - PTL_UNLINK -} ptl_unlink_t; - -typedef enum { - PTL_INS_BEFORE, - PTL_INS_AFTER -} ptl_ins_pos_t; - -typedef struct { - struct page *kiov_page; - unsigned int kiov_len; - unsigned int kiov_offset; -} ptl_kiov_t; - -typedef struct { - void *start; - ptl_size_t length; - int threshold; - int max_size; - unsigned int options; - void *user_ptr; - ptl_handle_eq_t eventq; - unsigned int niov; -} ptl_md_t; - -/* Options for the MD structure */ -#define PTL_MD_OP_PUT (1 << 0) -#define PTL_MD_OP_GET (1 << 1) -#define PTL_MD_MANAGE_REMOTE (1 << 2) -#define PTL_MD_AUTO_UNLINK (1 << 3) -#define PTL_MD_TRUNCATE (1 << 4) -#define PTL_MD_ACK_DISABLE (1 << 5) -#define PTL_MD_IOV (1 << 6) -#define PTL_MD_MAX_SIZE (1 << 7) -#define PTL_MD_KIOV (1 << 8) - -#define PTL_MD_THRESH_INF (-1) - -typedef enum { - PTL_EVENT_GET, - PTL_EVENT_PUT, - PTL_EVENT_REPLY, - PTL_EVENT_ACK, - PTL_EVENT_SENT -} ptl_event_kind_t; - -#define PTL_SEQ_BASETYPE long -typedef unsigned PTL_SEQ_BASETYPE ptl_seq_t; -#define PTL_SEQ_GT(a,b) (((signed PTL_SEQ_BASETYPE)((a) - (b))) > 0) - -typedef struct { - ptl_event_kind_t type; - ptl_process_id_t initiator; - ptl_pt_index_t portal; - ptl_match_bits_t match_bits; - ptl_size_t rlength, mlength, offset; - ptl_handle_me_t unlinked_me; - ptl_md_t mem_desc; - ptl_hdr_data_t hdr_data; - cycles_t arrival_time; - volatile ptl_seq_t sequence; -} ptl_event_t; - - -typedef enum { - PTL_ACK_REQ, - PTL_NOACK_REQ -} ptl_ack_req_t; - - -typedef struct { - volatile ptl_seq_t sequence; - ptl_size_t size; - ptl_event_t *base; - ptl_handle_any_t cb_eq_handle; -} ptl_eq_t; - -typedef struct { - ptl_eq_t *eq; -} ptl_ni_t; - - -typedef struct { - int max_match_entries; /* max number of match entries */ - int max_mem_descriptors; /* max number of memory descriptors */ - int max_event_queues; /* max number of event queues */ - int max_atable_index; /* maximum access control list table index */ - int max_ptable_index; /* maximum portals table index */ -} ptl_ni_limits_t; - -/* - * Status registers - */ -typedef enum { - PTL_SR_DROP_COUNT, - PTL_SR_DROP_LENGTH, - PTL_SR_RECV_COUNT, - PTL_SR_RECV_LENGTH, - PTL_SR_SEND_COUNT, - PTL_SR_SEND_LENGTH, - PTL_SR_MSGS_MAX, -} ptl_sr_index_t; - -typedef int ptl_sr_value_t; - -#endif diff --git a/lustre/portals/knals/.cvsignore b/lustre/portals/knals/.cvsignore deleted file mode 100644 index 282522d..0000000 --- a/lustre/portals/knals/.cvsignore +++ /dev/null @@ -1,2 +0,0 @@ -Makefile -Makefile.in diff --git a/lustre/portals/knals/Makefile.am b/lustre/portals/knals/Makefile.am deleted file mode 100644 index fed2785..0000000 --- a/lustre/portals/knals/Makefile.am +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -DIST_SUBDIRS= socknal toenal qswnal gmnal scimacnal -SUBDIRS= socknal toenal @QSWNAL@ @GMNAL@ @SCIMACNAL@ diff --git a/lustre/portals/knals/Makefile.mk b/lustre/portals/knals/Makefile.mk deleted file mode 100644 index ce40a60..0000000 --- a/lustre/portals/knals/Makefile.mk +++ /dev/null @@ -1,4 +0,0 @@ -include ../Kernelenv - -obj-y = socknal/ -# more coming... \ No newline at end of file diff --git a/lustre/portals/knals/gmnal/.cvsignore b/lustre/portals/knals/gmnal/.cvsignore deleted file mode 100644 index e995588..0000000 --- a/lustre/portals/knals/gmnal/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lustre/portals/knals/gmnal/Makefile.am b/lustre/portals/knals/gmnal/Makefile.am deleted file mode 100644 index 1dc6f4e..0000000 --- a/lustre/portals/knals/gmnal/Makefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../../Rules.linux - -MODULE = kgmnal -modulenet_DATA = kgmnal.o -EXTRA_PROGRAMS = kgmnal - -DEFS = -kgmnal_SOURCES = gmnal.c gmnal_cb.c gmnal.h diff --git a/lustre/portals/knals/gmnal/gm-1.5.2.1-exports.patch b/lustre/portals/knals/gmnal/gm-1.5.2.1-exports.patch deleted file mode 100644 index 23c80d9..0000000 --- a/lustre/portals/knals/gmnal/gm-1.5.2.1-exports.patch +++ /dev/null @@ -1,43 +0,0 @@ -diff -ru gm-1.5.2.1_Linux/drivers/linux/gm/gm_arch.c gm-1.5.2.1_Linux-cfs/drivers/linux/gm/gm_arch.c ---- gm-1.5.2.1_Linux/drivers/linux/gm/gm_arch.c Mon Jul 1 10:35:09 2002 -+++ gm-1.5.2.1_Linux-cfs/drivers/linux/gm/gm_arch.c Thu Sep 19 14:19:38 2002 -@@ -30,6 +30,8 @@ - * - ************************************************************************/ - -+#define EXPORT_SYMTAB -+ - #include <linux/config.h> - #include <linux/module.h> - -@@ -4075,6 +4077,28 @@ - return 0; - } - -+EXPORT_SYMBOL(gm_blocking_receive_no_spin); -+EXPORT_SYMBOL(gm_close); -+EXPORT_SYMBOL(gm_dma_free); -+EXPORT_SYMBOL(gm_dma_malloc); -+EXPORT_SYMBOL(gm_drop_sends); -+EXPORT_SYMBOL(gm_finalize); -+EXPORT_SYMBOL(gm_get_node_id); -+EXPORT_SYMBOL(gm_init); -+EXPORT_SYMBOL(gm_initialize_alarm); -+EXPORT_SYMBOL(gm_max_node_id_in_use); -+EXPORT_SYMBOL(gm_min_size_for_length); -+EXPORT_SYMBOL(gm_num_receive_tokens); -+EXPORT_SYMBOL(gm_num_send_tokens); -+EXPORT_SYMBOL(gm_open); -+EXPORT_SYMBOL(gm_provide_receive_buffer); -+EXPORT_SYMBOL(gm_resume_sending); -+EXPORT_SYMBOL(gm_send_with_callback); -+EXPORT_SYMBOL(gm_set_acceptable_sizes); -+EXPORT_SYMBOL(gm_set_alarm); -+EXPORT_SYMBOL(gm_unknown); -+ -+ - /* - This file uses GM standard indentation. - -Only in gm-1.5.2.1_Linux-cfs/drivers/linux/gm: gm_arch.c~ -Only in gm-1.5.2.1_Linux-cfs/: trace diff --git a/lustre/portals/knals/gmnal/gmnal.c b/lustre/portals/knals/gmnal/gmnal.c deleted file mode 100644 index ceeea2a..0000000 --- a/lustre/portals/knals/gmnal/gmnal.c +++ /dev/null @@ -1,284 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Based on ksocknal and qswnal - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Robert Read <rread@datarithm.net> - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "gmnal.h" - -ptl_handle_ni_t kgmnal_ni; -nal_t kgmnal_api; - -kgmnal_data_t kgmnal_data; -int gmnal_debug = 0; - -kpr_nal_interface_t kqswnal_router_interface = { - kprni_nalid: GMNAL, - kprni_arg: NULL, - kprni_fwd: kgmnal_fwd_packet, -}; - -static int kgmnal_forward(nal_t *nal, - int id, - void *args, size_t args_len, - void *ret, size_t ret_len) -{ - kgmnal_data_t *k = nal->nal_data; - nal_cb_t *nal_cb = k->kgm_cb; - - LASSERT (nal == &kgmnal_api); - LASSERT (k == &kgmnal_data); - LASSERT (nal_cb == &kgmnal_lib); - - lib_dispatch(nal_cb, k, id, args, ret); /* nal needs k */ - return PTL_OK; -} - -static void kgmnal_lock(nal_t *nal, unsigned long *flags) -{ - kgmnal_data_t *k = nal->nal_data; - nal_cb_t *nal_cb = k->kgm_cb; - - - LASSERT (nal == &kgmnal_api); - LASSERT (k == &kgmnal_data); - LASSERT (nal_cb == &kgmnal_lib); - - nal_cb->cb_cli(nal_cb,flags); -} - -static void kgmnal_unlock(nal_t *nal, unsigned long *flags) -{ - kgmnal_data_t *k = nal->nal_data; - nal_cb_t *nal_cb = k->kgm_cb; - - - LASSERT (nal == &kgmnal_api); - LASSERT (k == &kgmnal_data); - LASSERT (nal_cb == &kgmnal_lib); - - nal_cb->cb_sti(nal_cb,flags); -} - -static int kgmnal_shutdown(nal_t *nal, int ni) -{ - LASSERT (nal == &kgmnal_api); - return 0; -} - -static void kgmnal_yield( nal_t *nal ) -{ - LASSERT (nal == &kgmnal_api); - - if (current->need_resched) - schedule(); - return; -} - -kgmnal_rx_t *kgm_add_recv(kgmnal_data_t *data,int ndx) -{ - kgmnal_rx_t *conn; - - PORTAL_ALLOC(conn, sizeof(kgmnal_rx_t)); - /* Check for out of mem here */ - if (conn==NULL) { - printk("kgm_add_recv: memory alloc failed\n"); - return NULL; - } - - list_add(&conn->krx_item,(struct list_head *)&data->kgm_list); - // conn->ndx=ndx; - // conn->len=conn->ptlhdr_copied=0; - // conn->loopback=0; - return conn; -} - -static nal_t *kgmnal_init(int interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t ac_size, ptl_pid_t requested_pid) -{ - unsigned int nnids; - - gm_max_node_id_in_use(kgmnal_data.kgm_port, &nnids); - - CDEBUG(D_NET, "calling lib_init with nid 0x%Lx of %d\n", - kgmnal_data.kgm_nid, nnids); - lib_init(&kgmnal_lib, kgmnal_data.kgm_nid, 0, nnids,ptl_size, ac_size); - return &kgmnal_api; -} - -static void __exit -kgmnal_finalize(void) -{ - struct list_head *tmp; - - PORTAL_SYMBOL_UNREGISTER (kgmnal_ni); - PtlNIFini(kgmnal_ni); - lib_fini(&kgmnal_api); - - if (kgmnal_data.kgm_port) { - gm_close(kgmnal_data.kgm_port); - } - - /* FIXME: free dma buffers */ - /* FIXME: kill receiver thread */ - - PORTAL_FREE (kgmnal_data.kgm_trans, bsizeof(kgmnal_tx_t)*TXMSGS); - - list_for_each(tmp, &kgmnal_data.kgm_list) { - kgmnal_rx_t *conn; - conn = list_entry(tmp, kgmnal_rx_t, krx_item); - CDEBUG(D_IOCTL, "freeing conn %p\n",conn); - tmp = tmp->next; - list_del(&conn->krx_item); - PORTAL_FREE(conn, sizeof(*conn)); - } - - CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read (&portal_kmemory)); - - return; -} - -static int __init -kgmnal_initialize(void) -{ - int rc; - int ntok; - unsigned long sizemask; - unsigned int nid; - - CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read (&portal_kmemory)); - - kgmnal_api.forward = kgmnal_forward; - kgmnal_api.shutdown = kgmnal_shutdown; - kgmnal_api.yield = kgmnal_yield; - kgmnal_api.validate = NULL; /* our api validate is a NOOP */ - kgmnal_api.lock= kgmnal_lock; - kgmnal_api.unlock= kgmnal_unlock; - kgmnal_api.nal_data = &kgmnal_data; - - kgmnal_lib.nal_data = &kgmnal_data; - - memset(&kgmnal_data, 0, sizeof(kgmnal_data)); - - INIT_LIST_HEAD(&kgmnal_data.kgm_list); - kgmnal_data.kgm_cb = &kgmnal_lib; - - /* Allocate transmit descriptors */ - PORTAL_ALLOC (kgmnal_data.kgm_trans, sizeof(kgmnal_tx_t)*TXMSGS); - if (kgmnal_data.kgm_trans==NULL) { - printk("kgmnal: init: failed to allocate transmit " - "descriptors\n"); - return -1; - } - memset(kgmnal_data.kgm_trans,-1,sizeof(kgmnal_tx_t)*(TXMSGS)); - - spin_lock_init(&kgmnal_data.kgm_dispatch_lock); - spin_lock_init(&kgmnal_data.kgm_update_lock); - spin_lock_init(&kgmnal_data.kgm_send_lock); - - /* Do the receiver and xmtr allocation */ - - rc = gm_init(); - if (rc != GM_SUCCESS) { - CERROR("gm_init failed: %d\n", rc); - return -1; - } - - rc = gm_open(&kgmnal_data.kgm_port, 0 , KGM_PORT_NUM, KGM_HOSTNAME, - GM_API_VERSION_1_1); - if (rc != GM_SUCCESS) { - gm_finalize(); - kgmnal_data.kgm_port = NULL; - CERROR("gm_open failed: %d\n", rc); - return -1; - } - gm_get_node_id(kgmnal_data.kgm_port, &nid); - kgmnal_data.kgm_nid = nid; - /* Allocate 2 different sizes of buffers. For new, use half - the tokens for each. */ - ntok = gm_num_receive_tokens(kgmnal_data.kgm_port)/2; - CDEBUG(D_NET, "gmnal_init: creating %d large %d byte recv buffers\n", - ntok, MSG_LEN_LARGE); - while (ntok-- > 0) { - void * buffer = gm_dma_malloc(kgmnal_data.kgm_port, - MSG_LEN_LARGE); - if (buffer == NULL) { - CERROR("gm_init failed: %d\n", rc); - return (-ENOMEM); - } - CDEBUG(D_NET, " add buffer: port %p buf %p len %d size %d " - "pri %d\n ", kgmnal_data.kgm_port, buffer, - MSG_LEN_LARGE, MSG_SIZE_LARGE, GM_LOW_PRIORITY); - - gm_provide_receive_buffer(kgmnal_data.kgm_port, buffer, - MSG_SIZE_LARGE, GM_LOW_PRIORITY); - } - - ntok = gm_num_receive_tokens(kgmnal_data.kgm_port)/2; - CDEBUG(D_NET, "gmnal_init: creating %d small %d byte recv buffers\n", - ntok, MSG_LEN_SMALL); - while (ntok-- > 0) { - void * buffer = gm_dma_malloc(kgmnal_data.kgm_port, - MSG_LEN_SMALL); - if (buffer == NULL) { - CERROR("gm_init failed: %d\n", rc); - return (-ENOMEM); - } - CDEBUG(D_NET, " add buffer: port %p buf %p len %d size %d " - "pri %d\n ", kgmnal_data.kgm_port, buffer, - MSG_LEN_SMALL, MSG_SIZE_SMALL, GM_LOW_PRIORITY); - - gm_provide_receive_buffer(kgmnal_data.kgm_port, buffer, - MSG_SIZE_SMALL, GM_LOW_PRIORITY); - - } - sizemask = (1 << MSG_SIZE_LARGE) | (1 << MSG_SIZE_SMALL); - CDEBUG(D_NET, "gm_set_acceptable_sizes port %p pri %d mask 0x%x\n", - kgmnal_data.kgm_port, GM_LOW_PRIORITY, sizemask); - gm_set_acceptable_sizes(kgmnal_data.kgm_port, GM_LOW_PRIORITY, - sizemask); - gm_set_acceptable_sizes(kgmnal_data.kgm_port, GM_HIGH_PRIORITY, 0); - - /* Initialize Network Interface */ - rc = PtlNIInit(kgmnal_init, 32, 4, 0, &kgmnal_ni); - if (rc) { - CERROR("PtlNIInit failed %d\n", rc); - return (-ENOMEM); - } - - /* Start receiver thread */ - kernel_thread(kgmnal_recv_thread, &kgmnal_data, 0); - - PORTAL_SYMBOL_REGISTER(kgmnal_ni); - - kgmnal_data.kgm_init = 1; - - return 0; -} - -MODULE_AUTHOR("Robert Read <rread@datarithm.net>"); -MODULE_DESCRIPTION("Kernel Myrinet GM NAL v0.1"); -MODULE_LICENSE("GPL"); - -module_init (kgmnal_initialize); -module_exit (kgmnal_finalize); - -EXPORT_SYMBOL (kgmnal_ni); diff --git a/lustre/portals/knals/gmnal/gmnal.h b/lustre/portals/knals/gmnal/gmnal.h deleted file mode 100644 index 47e8c3c..0000000 --- a/lustre/portals/knals/gmnal/gmnal.h +++ /dev/null @@ -1,101 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#ifndef _GMNAL_H -#define _GMNAL_H - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/locks.h> -#include <linux/unistd.h> -#include <linux/init.h> - -#include <asm/system.h> -#include <asm/uaccess.h> - -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <asm/uaccess.h> -#include <asm/segment.h> - -#define DEBUG_SUBSYSTEM S_GMNAL - -#include <linux/kp30.h> -#include <portals/p30.h> -#include <portals/lib-p30.h> - -#include <gm.h> - - -/* - * Myrinet GM NAL - */ -#define NPAGES_LARGE 16 -#define NPAGES_SMALL 1 -#define MSG_LEN_LARGE NPAGES_LARGE*PAGE_SIZE -#define MSG_LEN_SMALL NPAGES_SMALL*PAGE_SIZE -#define MSG_SIZE_LARGE (gm_min_size_for_length(MSG_LEN_LARGE)) -#define MSG_SIZE_SMALL (gm_min_size_for_length(MSG_LEN_SMALL)) - -#define TXMSGS 64 /* Number of Transmit Messages */ -#define ENVELOPES 8 /* Number of outstanding receive msgs */ - -#define KGM_PORT_NUM 3 -#define KGM_HOSTNAME "kgmnal" - - -typedef struct { - char *krx_buffer; - unsigned long krx_len; - unsigned int krx_size; - unsigned int krx_priority; - struct list_head krx_item; -} kgmnal_rx_t; - - -typedef struct { - nal_cb_t *ktx_nal; - void *ktx_private; - lib_msg_t *ktx_cookie; - char *ktx_buffer; - size_t ktx_len; - unsigned long ktx_size; - int ktx_ndx; - unsigned int ktx_priority; - unsigned int ktx_tgt_node; - unsigned int ktx_tgt_port_id; -} kgmnal_tx_t; - - -typedef struct { - char kgm_init; - char kgm_shuttingdown; - struct gm_port *kgm_port; - struct list_head kgm_list; - ptl_nid_t kgm_nid; - nal_cb_t *kgm_cb; - struct kgm_trans *kgm_trans; - struct tq_struct kgm_ready_tq; - spinlock_t kgm_dispatch_lock; - spinlock_t kgm_update_lock; - spinlock_t kgm_send_lock; -} kgmnal_data_t; - -int kgm_init(kgmnal_data_t *kgm_data); -int kgmnal_recv_thread(void *); -int gm_return_mynid(void); -void kgmnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd); - -extern kgmnal_data_t kgmnal_data; -extern nal_t kgmnal_api; -extern nal_cb_t kgmnal_lib; - -#endif /* _GMNAL_H */ - diff --git a/lustre/portals/knals/gmnal/gmnal_cb.c b/lustre/portals/knals/gmnal/gmnal_cb.c deleted file mode 100644 index 3d4c86d..0000000 --- a/lustre/portals/knals/gmnal/gmnal_cb.c +++ /dev/null @@ -1,517 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Based on ksocknal and qswnal - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Robert Read <rread@datarithm.net> - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* TODO - * preallocate send buffers, store on list - * put receive buffers on queue, handle with receive threads - * use routing - */ - -#include "gmnal.h" - -extern kgmnal_rx_t *kgm_add_recv(kgmnal_data_t *,int); - -static kgmnal_tx_t * -get_trans(void) -{ - kgmnal_tx_t *t; - PORTAL_ALLOC(t, (sizeof(kgmnal_tx_t))); - return t; -} - -static void -put_trans(kgmnal_tx_t *t) -{ - PORTAL_FREE(t, sizeof(kgmnal_tx_t)); -} - -int -kgmnal_ispeer (ptl_nid_t nid) -{ - unsigned int gmnid = (unsigned int)nid; - unsigned int nnids; - - gm_max_node_id_in_use(kgmnal_data.kgm_port, &nnids); - - return ((ptl_nid_t)gmnid == nid &&/* didn't lose high bits on conversion ? */ - gmnid < nnids); /* it's in this machine */ -} - -/* - * LIB functions follow - * - */ -static int -kgmnal_read (nal_cb_t *nal, void *private, void *dst_addr, user_ptr src_addr, - size_t len) -{ - CDEBUG(D_NET, "0x%Lx: reading %ld bytes from %p -> %p\n", - nal->ni.nid, (long)len, src_addr, dst_addr ); - memcpy( dst_addr, src_addr, len ); - return 0; -} - -static int -kgmnal_write(nal_cb_t *nal, void *private, user_ptr dst_addr, void *src_addr, - size_t len) -{ - CDEBUG(D_NET, "0x%Lx: writing %ld bytes from %p -> %p\n", - nal->ni.nid, (long)len, src_addr, dst_addr ); - memcpy( dst_addr, src_addr, len ); - return 0; -} - -static void * -kgmnal_malloc(nal_cb_t *nal, size_t len) -{ - void *buf; - - PORTAL_ALLOC(buf, len); - return buf; -} - -static void -kgmnal_free(nal_cb_t *nal, void *buf, size_t len) -{ - PORTAL_FREE(buf, len); -} - -static void -kgmnal_printf(nal_cb_t *nal, const char *fmt, ...) -{ - va_list ap; - char msg[256]; - - if (portal_debug & D_NET) { - va_start( ap, fmt ); - vsnprintf( msg, sizeof(msg), fmt, ap ); - va_end( ap ); - - printk("CPUId: %d %s",smp_processor_id(), msg); - } -} - - -static void -kgmnal_cli(nal_cb_t *nal, unsigned long *flags) -{ - kgmnal_data_t *data= nal->nal_data; - - spin_lock_irqsave(&data->kgm_dispatch_lock,*flags); -} - - -static void -kgmnal_sti(nal_cb_t *nal, unsigned long *flags) -{ - kgmnal_data_t *data= nal->nal_data; - - spin_unlock_irqrestore(&data->kgm_dispatch_lock,*flags); -} - - -static int -kgmnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) -{ - /* network distance doesn't mean much for this nal */ - if ( nal->ni.nid == nid ) { - *dist = 0; - } else { - *dist = 1; - } - - return 0; -} - -/* FIXME rmr: add rounting code here */ -static void -kgmnal_tx_done(kgmnal_tx_t *trans, int error) -{ - lib_finalize(trans->ktx_nal, trans->ktx_private, trans->ktx_cookie); - - gm_dma_free(kgmnal_data.kgm_port, trans->ktx_buffer); - - trans->ktx_buffer = NULL; - trans->ktx_len = 0; - - put_trans(trans); -} -static char * gm_error_strings[GM_NUM_STATUS_CODES] = { - [GM_SUCCESS] = "GM_SUCCESS", - [GM_SEND_TIMED_OUT] = "GM_SEND_TIMED_OUT", - [GM_SEND_REJECTED] = "GM_SEND_REJECTED", - [GM_SEND_TARGET_PORT_CLOSED] = "GM_SEND_TARGET_PORT_CLOSED", - [GM_SEND_TARGET_NODE_UNREACHABLE] = "GM_SEND_TARGET_NODE_UNREACHABLE", - [GM_SEND_DROPPED] = "GM_SEND_DROPPED", - [GM_SEND_PORT_CLOSED] = "GM_SEND_PORT_CLOSED", -}; - -inline char * get_error(int status) -{ - if (gm_error_strings[status] != NULL) - return gm_error_strings[status]; - else - return "Unknown error"; -} - -static void -kgmnal_errhandler(struct gm_port *p, void *context, gm_status_t status) -{ - CDEBUG(D_NET,"error callback: ktx %p status %d\n", context, status); -} - -static void -kgmnal_txhandler(struct gm_port *p, void *context, gm_status_t status) -{ - kgmnal_tx_t *ktx = (kgmnal_tx_t *)context; - int err = 0; - - LASSERT (p != NULL); - LASSERT (ktx != NULL); - - CDEBUG(D_NET,"ktx %p status %d nid 0x%x pid %d\n", ktx, status, - ktx->ktx_tgt_node, ktx->ktx_tgt_port_id); - - switch((int)status) { - case GM_SUCCESS: /* normal */ - break; - case GM_SEND_TIMED_OUT: /* application error */ - case GM_SEND_REJECTED: /* size of msg unacceptable */ - case GM_SEND_TARGET_PORT_CLOSED: - CERROR("%s (%d):\n", get_error(status), status); - gm_resume_sending(kgmnal_data.kgm_port, ktx->ktx_priority, - ktx->ktx_tgt_node, ktx->ktx_tgt_port_id, - kgmnal_errhandler, NULL); - err = -EIO; - break; - case GM_SEND_TARGET_NODE_UNREACHABLE: - case GM_SEND_PORT_CLOSED: - CERROR("%s (%d):\n", get_error(status), status); - gm_drop_sends(kgmnal_data.kgm_port, ktx->ktx_priority, - ktx->ktx_tgt_node, ktx->ktx_tgt_port_id, - kgmnal_errhandler, NULL); - err = -EIO; - break; - case GM_SEND_DROPPED: - CERROR("%s (%d):\n", get_error(status), status); - err = -EIO; - break; - default: - CERROR("Unknown status: %d\n", status); - err = -EIO; - break; - } - - kgmnal_tx_done(ktx, err); -} - -/* - */ - -static int -kgmnal_send(nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - ptl_hdr_t *hdr, - int type, - ptl_nid_t nid, - ptl_pid_t pid, - int options, - unsigned int niov, - lib_md_iov_t *iov, - size_t len) -{ - /* - * ipnal assumes that this is the private as passed to lib_dispatch.. - * so do we :/ - */ - kgmnal_tx_t *ktx=NULL; - int rc=0; - void * buf; - int buf_len = sizeof(ptl_hdr_t) + len; - int buf_size = 0; - - LASSERT ((options & PTL_MD_KIOV) == 0); - - PROF_START(gmnal_send); - - - CDEBUG(D_NET, "sending %d bytes from %p to nid: 0x%Lx pid %d\n", - len, iov, nid, KGM_PORT_NUM); - - /* ensure there is an available tx handle */ - - /* save transaction info to trans for later finalize and cleanup */ - ktx = get_trans(); - if (ktx == NULL) { - rc = -ENOMEM; - goto send_exit; - } - - /* hmmm... GM doesn't support vectored write, so need to allocate buffer to coalesce - header and data. - Also, memory must be dma'able or registered with GM. */ - - if (buf_len <= MSG_LEN_SMALL) { - buf_size = MSG_SIZE_SMALL; - } else if (buf_len <= MSG_LEN_LARGE) { - buf_size = MSG_SIZE_LARGE; - } else { - printk("kgmnal:request exceeds TX MTU size (%d).\n", - MSG_SIZE_LARGE); - rc = -1; - goto send_exit; - } - - buf = gm_dma_malloc(kgmnal_data.kgm_port, buf_len); - if (buf == NULL) { - rc = -ENOMEM; - goto send_exit; - } - memcpy(buf, hdr, sizeof(ptl_hdr_t)); - - if (len != 0) - lib_copy_iov2buf(((char *)buf) + sizeof (ptl_hdr_t), - options, niov, iov, len); - - ktx->ktx_nal = nal; - ktx->ktx_private = private; - ktx->ktx_cookie = cookie; - ktx->ktx_len = buf_len; - ktx->ktx_size = buf_size; - ktx->ktx_buffer = buf; - ktx->ktx_priority = GM_LOW_PRIORITY; - ktx->ktx_tgt_node = nid; - ktx->ktx_tgt_port_id = KGM_PORT_NUM; - - CDEBUG(D_NET, "gm_send %d bytes (size %d) from %p to nid: 0x%Lx " - "pid %d pri %d\n", buf_len, buf_size, iov, nid, KGM_PORT_NUM, - GM_LOW_PRIORITY); - - gm_send_with_callback(kgmnal_data.kgm_port, buf, buf_size, - buf_len, GM_LOW_PRIORITY, - nid, KGM_PORT_NUM, - kgmnal_txhandler, ktx); - - PROF_FINISH(gmnal_send); - send_exit: - return rc; -} -void -kgmnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) -{ - CERROR ("forwarding not implemented\n"); -} - -void -kqswnal_fwd_callback (void *arg, int error) -{ - CERROR ("forwarding not implemented\n"); -} - - -static inline void -kgmnal_requeue_rx(kgmnal_rx_t *krx) -{ - gm_provide_receive_buffer(kgmnal_data.kgm_port, krx->krx_buffer, - krx->krx_size, krx->krx_priority); -} - -/* Process a received portals packet */ - -/* Receive Interrupt Handler */ -static void kgmnal_rx(kgmnal_data_t *kgm, unsigned long len, unsigned int size, - void * buf, unsigned int pri) -{ - ptl_hdr_t *hdr = buf; - kgmnal_rx_t krx; - - CDEBUG(D_NET,"buf %p, len %ld\n", buf, len); - - if ( len < sizeof( ptl_hdr_t ) ) { - /* XXX what's this for? */ - if (kgm->kgm_shuttingdown) - return; - CERROR("kgmnal: did not receive complete portal header, " - "len= %ld", len); - gm_provide_receive_buffer(kgm->kgm_port, buf, size, pri); - return; - } - - /* might want to use seperate threads to handle receive */ - krx.krx_buffer = buf; - krx.krx_len = len; - krx.krx_size = size; - krx.krx_priority = pri; - - if ( hdr->dest_nid == kgmnal_lib.ni.nid ) { - PROF_START(lib_parse); - lib_parse(&kgmnal_lib, (ptl_hdr_t *)krx.krx_buffer, &krx); - PROF_FINISH(lib_parse); - } else if (kgmnal_ispeer(hdr->dest_nid)) { - /* should have gone direct to peer */ - CERROR("dropping packet from 0x%llx to 0x%llx: target is " - "a peer", hdr->src_nid, hdr->dest_nid); - kgmnal_requeue_rx(&krx); - } else { - /* forward to gateway */ - CERROR("forwarding not implemented yet"); - kgmnal_requeue_rx(&krx); - } - - return; -} - - -static int kgmnal_recv(nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - int options, - unsigned int niov, - lib_md_iov_t *iov, - size_t mlen, - size_t rlen) -{ - kgmnal_rx_t *krx = private; - - LASSERT ((options & PTL_MD_KIOV) == 0); - - CDEBUG(D_NET,"mlen=%d, rlen=%d\n", mlen, rlen); - - /* What was actually received must be >= what sender claims to - * have sent. This is an LASSERT, since lib-move doesn't - * check cb return code yet. */ - LASSERT (krx->krx_len >= sizeof (ptl_hdr_t) + rlen); - LASSERT (mlen <= rlen); - - PROF_START(gmnal_recv); - - if(mlen != 0) { - PROF_START(memcpy); - lib_copy_buf2iov (options, niov, iov, - krx->krx_buffer + sizeof (ptl_hdr_t), mlen); - PROF_FINISH(memcpy); - } - - PROF_START(lib_finalize); - lib_finalize(nal, private, cookie); - PROF_FINISH(lib_finalize); - - kgmnal_requeue_rx(krx); - - PROF_FINISH(gmnal_recv); - - return rlen; -} - - -static void kgmnal_shutdown(void * none) -{ - CERROR("called\n"); - return; -} - -/* - * Set terminate and use alarm to wake up the recv thread. - */ -static void recv_shutdown(kgmnal_data_t *kgm) -{ - gm_alarm_t alarm; - - kgm->kgm_shuttingdown = 1; - gm_initialize_alarm(&alarm); - gm_set_alarm(kgm->kgm_port, &alarm, 1, kgmnal_shutdown, NULL); -} - -int kgmnal_end(kgmnal_data_t *kgm) -{ - - /* wait for sends to finish ? */ - /* remove receive buffers */ - /* shutdown receive thread */ - - recv_shutdown(kgm); - - return 0; -} - -/* Used only for the spinner */ -int kgmnal_recv_thread(void *arg) -{ - kgmnal_data_t *kgm = arg; - - LASSERT(kgm != NULL); - - kportal_daemonize("kgmnal_rx"); - - while(1) { - gm_recv_event_t *e; - int priority = GM_LOW_PRIORITY; - if (kgm->kgm_shuttingdown) - break; - - e = gm_blocking_receive_no_spin(kgm->kgm_port); - if (e == NULL) { - CERROR("gm_blocking_receive returned NULL\n"); - break; - } - - switch(gm_ntohc(e->recv.type)) { - case GM_HIGH_RECV_EVENT: - priority = GM_HIGH_PRIORITY; - /* fall through */ - case GM_RECV_EVENT: - kgmnal_rx(kgm, gm_ntohl(e->recv.length), - gm_ntohc(e->recv.size), - gm_ntohp(e->recv.buffer), priority); - break; - case GM_ALARM_EVENT: - CERROR("received alarm"); - gm_unknown(kgm->kgm_port, e); - break; - case GM_BAD_SEND_DETECTED_EVENT: /* ?? */ - CERROR("received bad send!\n"); - break; - default: - gm_unknown(kgm->kgm_port, e); - } - } - - CERROR("shuttting down.\n"); - return 0; -} - -nal_cb_t kgmnal_lib = { - nal_data: &kgmnal_data, /* NAL private data */ - cb_send: kgmnal_send, - cb_recv: kgmnal_recv, - cb_read: kgmnal_read, - cb_write: kgmnal_write, - cb_malloc: kgmnal_malloc, - cb_free: kgmnal_free, - cb_printf: kgmnal_printf, - cb_cli: kgmnal_cli, - cb_sti: kgmnal_sti, - cb_dist: kgmnal_dist -}; diff --git a/lustre/portals/knals/qswnal/.cvsignore b/lustre/portals/knals/qswnal/.cvsignore deleted file mode 100644 index e995588..0000000 --- a/lustre/portals/knals/qswnal/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lustre/portals/knals/qswnal/Makefile.am b/lustre/portals/knals/qswnal/Makefile.am deleted file mode 100644 index 3eb4dd5..0000000 --- a/lustre/portals/knals/qswnal/Makefile.am +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../../Rules.linux - -MODULE = kqswnal -modulenet_DATA = kqswnal.o -EXTRA_PROGRAMS = kqswnal - - -#CFLAGS:= @KCFLAGS@ -#CPPFLAGS:=@KCPPFLAGS@ -DEFS = -CPPFLAGS=@CPPFLAGS@ @with_quadrics@ -kqswnal_SOURCES = qswnal.c qswnal_cb.c qswnal.h diff --git a/lustre/portals/knals/qswnal/qswnal.c b/lustre/portals/knals/qswnal/qswnal.c deleted file mode 100644 index 1a8fb74..0000000 --- a/lustre/portals/knals/qswnal/qswnal.c +++ /dev/null @@ -1,608 +0,0 @@ -/* - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL) - * W. Marcus Miller - Based on ksocknal - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "qswnal.h" - -ptl_handle_ni_t kqswnal_ni; -nal_t kqswnal_api; -kqswnal_data_t kqswnal_data; - -kpr_nal_interface_t kqswnal_router_interface = { - kprni_nalid: QSWNAL, - kprni_arg: NULL, - kprni_fwd: kqswnal_fwd_packet, -}; - - -static int -kqswnal_forward(nal_t *nal, - int id, - void *args, size_t args_len, - void *ret, size_t ret_len) -{ - kqswnal_data_t *k = nal->nal_data; - nal_cb_t *nal_cb = k->kqn_cb; - - LASSERT (nal == &kqswnal_api); - LASSERT (k == &kqswnal_data); - LASSERT (nal_cb == &kqswnal_lib); - - lib_dispatch(nal_cb, k, id, args, ret); /* nal needs k */ - return (PTL_OK); -} - -static void -kqswnal_lock (nal_t *nal, unsigned long *flags) -{ - kqswnal_data_t *k = nal->nal_data; - nal_cb_t *nal_cb = k->kqn_cb; - - LASSERT (nal == &kqswnal_api); - LASSERT (k == &kqswnal_data); - LASSERT (nal_cb == &kqswnal_lib); - - nal_cb->cb_cli(nal_cb,flags); -} - -static void -kqswnal_unlock(nal_t *nal, unsigned long *flags) -{ - kqswnal_data_t *k = nal->nal_data; - nal_cb_t *nal_cb = k->kqn_cb; - - LASSERT (nal == &kqswnal_api); - LASSERT (k == &kqswnal_data); - LASSERT (nal_cb == &kqswnal_lib); - - nal_cb->cb_sti(nal_cb,flags); -} - -static int -kqswnal_shutdown(nal_t *nal, int ni) -{ - CDEBUG (D_NET, "shutdown\n"); - - LASSERT (nal == &kqswnal_api); - return (0); -} - -static void -kqswnal_yield( nal_t *nal ) -{ - CDEBUG (D_NET, "yield\n"); - - if (current->need_resched) - schedule(); - return; -} - -static nal_t * -kqswnal_init(int interface, ptl_pt_index_t ptl_size, ptl_ac_index_t ac_size, - ptl_pid_t requested_pid) -{ - ptl_nid_t mynid = kqswnal_elanid2nid (kqswnal_data.kqn_elanid); - int nnids = kqswnal_data.kqn_nnodes; - - CDEBUG(D_NET, "calling lib_init with nid "LPX64" of %d\n", mynid, nnids); - - lib_init(&kqswnal_lib, mynid, 0, nnids, ptl_size, ac_size); - - return (&kqswnal_api); -} - -int -kqswnal_cmd (struct portal_ioctl_data *data, void *private) -{ - LASSERT (data != NULL); - - switch (data->ioc_nal_cmd) { - case NAL_CMD_REGISTER_MYNID: - CDEBUG (D_IOCTL, "setting NID offset to "LPX64" (was "LPX64")\n", - data->ioc_nid - kqswnal_data.kqn_elanid, - kqswnal_data.kqn_nid_offset); - kqswnal_data.kqn_nid_offset = - data->ioc_nid - kqswnal_data.kqn_elanid; - kqswnal_lib.ni.nid = data->ioc_nid; - return (0); - - default: - return (-EINVAL); - } -} - -void __exit -kqswnal_finalise (void) -{ - switch (kqswnal_data.kqn_init) - { - default: - LASSERT (0); - - case KQN_INIT_ALL: - PORTAL_SYMBOL_UNREGISTER (kqswnal_ni); - /* fall through */ - - case KQN_INIT_PTL: - PtlNIFini (kqswnal_ni); - lib_fini (&kqswnal_lib); - /* fall through */ - - case KQN_INIT_DATA: - break; - - case KQN_INIT_NOTHING: - return; - } - - /**********************************************************************/ - /* Make router stop her calling me and fail any more call-ins */ - kpr_shutdown (&kqswnal_data.kqn_router); - - /**********************************************************************/ - /* flag threads to terminate, wake them and wait for them to die */ - - kqswnal_data.kqn_shuttingdown = 1; - wake_up_all (&kqswnal_data.kqn_sched_waitq); - - while (atomic_read (&kqswnal_data.kqn_nthreads) != 0) { - CDEBUG(D_NET, "waiting for %d threads to terminate\n", - atomic_read (&kqswnal_data.kqn_nthreads)); - set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (HZ); - } - - /**********************************************************************/ - /* close elan comms */ - - if (kqswnal_data.kqn_eprx_small != NULL) - ep_remove_large_rcvr (kqswnal_data.kqn_eprx_small); - - if (kqswnal_data.kqn_eprx_large != NULL) - ep_remove_large_rcvr (kqswnal_data.kqn_eprx_large); - - if (kqswnal_data.kqn_eptx != NULL) - ep_free_large_xmtr (kqswnal_data.kqn_eptx); - - /**********************************************************************/ - /* No more threads. No more portals, router or comms callbacks! - * I control the horizontals and the verticals... - */ - - /**********************************************************************/ - /* Complete any blocked forwarding packets with error - */ - - while (!list_empty (&kqswnal_data.kqn_idletxd_fwdq)) - { - kpr_fwd_desc_t *fwd = list_entry (kqswnal_data.kqn_idletxd_fwdq.next, - kpr_fwd_desc_t, kprfd_list); - list_del (&fwd->kprfd_list); - kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -EHOSTUNREACH); - } - - while (!list_empty (&kqswnal_data.kqn_delayedfwds)) - { - kpr_fwd_desc_t *fwd = list_entry (kqswnal_data.kqn_delayedfwds.next, - kpr_fwd_desc_t, kprfd_list); - list_del (&fwd->kprfd_list); - kpr_fwd_done (&kqswnal_data.kqn_router, fwd, -EHOSTUNREACH); - } - - /**********************************************************************/ - /* Wait for router to complete any packets I sent her - */ - - kpr_deregister (&kqswnal_data.kqn_router); - - - /**********************************************************************/ - /* Unmap message buffers and free all descriptors and buffers - */ - - if (kqswnal_data.kqn_eprxdmahandle != NULL) - { - elan3_dvma_unload(kqswnal_data.kqn_epdev->DmaState, - kqswnal_data.kqn_eprxdmahandle, 0, - KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL + - KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE); - - elan3_dma_release(kqswnal_data.kqn_epdev->DmaState, - kqswnal_data.kqn_eprxdmahandle); - } - - if (kqswnal_data.kqn_eptxdmahandle != NULL) - { - elan3_dvma_unload(kqswnal_data.kqn_epdev->DmaState, - kqswnal_data.kqn_eptxdmahandle, 0, - KQSW_NTXMSGPAGES * (KQSW_NTXMSGS + - KQSW_NNBLK_TXMSGS)); - - elan3_dma_release(kqswnal_data.kqn_epdev->DmaState, - kqswnal_data.kqn_eptxdmahandle); - } - - if (kqswnal_data.kqn_txds != NULL) - { - int i; - - for (i = 0; i < KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS; i++) - { - kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i]; - - if (ktx->ktx_buffer != NULL) - PORTAL_FREE(ktx->ktx_buffer, - KQSW_TX_BUFFER_SIZE); - } - - PORTAL_FREE(kqswnal_data.kqn_txds, - sizeof (kqswnal_tx_t) * (KQSW_NTXMSGS + - KQSW_NNBLK_TXMSGS)); - } - - if (kqswnal_data.kqn_rxds != NULL) - { - int i; - int j; - - for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++) - { - kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i]; - - for (j = 0; j < krx->krx_npages; j++) - if (krx->krx_pages[j] != NULL) - __free_page (krx->krx_pages[j]); - } - - PORTAL_FREE(kqswnal_data.kqn_rxds, - sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + - KQSW_NRXMSGS_LARGE)); - } - - /* resets flags, pointers to NULL etc */ - memset(&kqswnal_data, 0, sizeof (kqswnal_data)); - - CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read(&portal_kmemory)); - - printk (KERN_INFO "Routing QSW NAL unloaded (final mem %d)\n", - atomic_read(&portal_kmemory)); -} - -static int __init -kqswnal_initialise (void) -{ - ELAN3_DMA_REQUEST dmareq; - int rc; - int i; - int elan_page_idx; - int pkmem = atomic_read(&portal_kmemory); - - LASSERT (kqswnal_data.kqn_init == KQN_INIT_NOTHING); - - CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read(&portal_kmemory)); - - kqswnal_api.forward = kqswnal_forward; - kqswnal_api.shutdown = kqswnal_shutdown; - kqswnal_api.yield = kqswnal_yield; - kqswnal_api.validate = NULL; /* our api validate is a NOOP */ - kqswnal_api.lock = kqswnal_lock; - kqswnal_api.unlock = kqswnal_unlock; - kqswnal_api.nal_data = &kqswnal_data; - - kqswnal_lib.nal_data = &kqswnal_data; - - /* ensure all pointers NULL etc */ - memset (&kqswnal_data, 0, sizeof (kqswnal_data)); - - kqswnal_data.kqn_cb = &kqswnal_lib; - - INIT_LIST_HEAD (&kqswnal_data.kqn_idletxds); - INIT_LIST_HEAD (&kqswnal_data.kqn_nblk_idletxds); - spin_lock_init (&kqswnal_data.kqn_idletxd_lock); - init_waitqueue_head (&kqswnal_data.kqn_idletxd_waitq); - INIT_LIST_HEAD (&kqswnal_data.kqn_idletxd_fwdq); - - INIT_LIST_HEAD (&kqswnal_data.kqn_delayedfwds); - INIT_LIST_HEAD (&kqswnal_data.kqn_delayedtxds); - INIT_LIST_HEAD (&kqswnal_data.kqn_readyrxds); - - spin_lock_init (&kqswnal_data.kqn_sched_lock); - init_waitqueue_head (&kqswnal_data.kqn_sched_waitq); - - spin_lock_init (&kqswnal_data.kqn_statelock); - - /* pointers/lists/locks initialised */ - kqswnal_data.kqn_init = KQN_INIT_DATA; - - /**********************************************************************/ - /* Find the first Elan device */ - - kqswnal_data.kqn_epdev = ep_device (0); - if (kqswnal_data.kqn_epdev == NULL) - { - CERROR ("Can't get elan device 0\n"); - return (-ENOMEM); - } - - kqswnal_data.kqn_nid_offset = 0; - kqswnal_data.kqn_nnodes = ep_numnodes (kqswnal_data.kqn_epdev); - kqswnal_data.kqn_elanid = ep_nodeid (kqswnal_data.kqn_epdev); - - /**********************************************************************/ - /* Get the transmitter */ - - kqswnal_data.kqn_eptx = ep_alloc_large_xmtr (kqswnal_data.kqn_epdev); - if (kqswnal_data.kqn_eptx == NULL) - { - CERROR ("Can't allocate transmitter\n"); - kqswnal_finalise (); - return (-ENOMEM); - } - - /**********************************************************************/ - /* Get the receivers */ - - kqswnal_data.kqn_eprx_small = ep_install_large_rcvr (kqswnal_data.kqn_epdev, - EP_SVC_LARGE_PORTALS_SMALL, - KQSW_EP_ENVELOPES_SMALL); - if (kqswnal_data.kqn_eprx_small == NULL) - { - CERROR ("Can't install small msg receiver\n"); - kqswnal_finalise (); - return (-ENOMEM); - } - - kqswnal_data.kqn_eprx_large = ep_install_large_rcvr (kqswnal_data.kqn_epdev, - EP_SVC_LARGE_PORTALS_LARGE, - KQSW_EP_ENVELOPES_LARGE); - if (kqswnal_data.kqn_eprx_large == NULL) - { - CERROR ("Can't install large msg receiver\n"); - kqswnal_finalise (); - return (-ENOMEM); - } - - /**********************************************************************/ - /* Reserve Elan address space for transmit buffers */ - - dmareq.Waitfn = DDI_DMA_SLEEP; - dmareq.ElanAddr = (E3_Addr) 0; - dmareq.Attr = PTE_LOAD_LITTLE_ENDIAN; - dmareq.Perm = ELAN_PERM_REMOTEREAD; - - rc = elan3_dma_reserve(kqswnal_data.kqn_epdev->DmaState, - KQSW_NTXMSGPAGES*(KQSW_NTXMSGS+KQSW_NNBLK_TXMSGS), - &dmareq, &kqswnal_data.kqn_eptxdmahandle); - if (rc != DDI_SUCCESS) - { - CERROR ("Can't reserve rx dma space\n"); - kqswnal_finalise (); - return (-ENOMEM); - } - - /**********************************************************************/ - /* Reserve Elan address space for receive buffers */ - - dmareq.Waitfn = DDI_DMA_SLEEP; - dmareq.ElanAddr = (E3_Addr) 0; - dmareq.Attr = PTE_LOAD_LITTLE_ENDIAN; - dmareq.Perm = ELAN_PERM_REMOTEWRITE; - - rc = elan3_dma_reserve (kqswnal_data.kqn_epdev->DmaState, - KQSW_NRXMSGPAGES_SMALL * KQSW_NRXMSGS_SMALL + - KQSW_NRXMSGPAGES_LARGE * KQSW_NRXMSGS_LARGE, - &dmareq, &kqswnal_data.kqn_eprxdmahandle); - if (rc != DDI_SUCCESS) - { - CERROR ("Can't reserve rx dma space\n"); - kqswnal_finalise (); - return (-ENOMEM); - } - - /**********************************************************************/ - /* Allocate/Initialise transmit descriptors */ - - PORTAL_ALLOC(kqswnal_data.kqn_txds, - sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS)); - if (kqswnal_data.kqn_txds == NULL) - { - kqswnal_finalise (); - return (-ENOMEM); - } - - /* clear flags, null pointers etc */ - memset(kqswnal_data.kqn_txds, 0, - sizeof(kqswnal_tx_t) * (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS)); - for (i = 0; i < (KQSW_NTXMSGS + KQSW_NNBLK_TXMSGS); i++) - { - int premapped_pages; - kqswnal_tx_t *ktx = &kqswnal_data.kqn_txds[i]; - int basepage = i * KQSW_NTXMSGPAGES; - - PORTAL_ALLOC (ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE); - if (ktx->ktx_buffer == NULL) - { - kqswnal_finalise (); - return (-ENOMEM); - } - - /* Map pre-allocated buffer NOW, to save latency on transmit */ - premapped_pages = kqswnal_pages_spanned(ktx->ktx_buffer, - KQSW_TX_BUFFER_SIZE); - - elan3_dvma_kaddr_load (kqswnal_data.kqn_epdev->DmaState, - kqswnal_data.kqn_eptxdmahandle, - ktx->ktx_buffer, KQSW_TX_BUFFER_SIZE, - basepage, &ktx->ktx_ebuffer); - - ktx->ktx_basepage = basepage + premapped_pages; /* message mapping starts here */ - ktx->ktx_npages = KQSW_NTXMSGPAGES - premapped_pages; /* for this many pages */ - - if (i < KQSW_NTXMSGS) - ktx->ktx_idle = &kqswnal_data.kqn_idletxds; - else - ktx->ktx_idle = &kqswnal_data.kqn_nblk_idletxds; - - list_add_tail (&ktx->ktx_list, ktx->ktx_idle); - } - - /**********************************************************************/ - /* Allocate/Initialise receive descriptors */ - - PORTAL_ALLOC (kqswnal_data.kqn_rxds, - sizeof (kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE)); - if (kqswnal_data.kqn_rxds == NULL) - { - kqswnal_finalise (); - return (-ENOMEM); - } - - memset(kqswnal_data.kqn_rxds, 0, /* clear flags, null pointers etc */ - sizeof(kqswnal_rx_t) * (KQSW_NRXMSGS_SMALL+KQSW_NRXMSGS_LARGE)); - - elan_page_idx = 0; - for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++) - { - E3_Addr elanaddr; - int j; - kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i]; - - if (i < KQSW_NRXMSGS_SMALL) - { - krx->krx_npages = KQSW_NRXMSGPAGES_SMALL; - krx->krx_eprx = kqswnal_data.kqn_eprx_small; - } - else - { - krx->krx_npages = KQSW_NRXMSGPAGES_LARGE; - krx->krx_eprx = kqswnal_data.kqn_eprx_large; - } - - LASSERT (krx->krx_npages > 0); - for (j = 0; j < krx->krx_npages; j++) - { - krx->krx_pages[j] = alloc_page(GFP_KERNEL); - if (krx->krx_pages[j] == NULL) - { - kqswnal_finalise (); - return (-ENOMEM); - } - - LASSERT(page_address(krx->krx_pages[j]) != NULL); - - elan3_dvma_kaddr_load(kqswnal_data.kqn_epdev->DmaState, - kqswnal_data.kqn_eprxdmahandle, - page_address(krx->krx_pages[j]), - PAGE_SIZE, elan_page_idx, - &elanaddr); - elan_page_idx++; - - if (j == 0) - krx->krx_elanaddr = elanaddr; - - /* NB we assume a contiguous */ - LASSERT (elanaddr == krx->krx_elanaddr + j * PAGE_SIZE); - } - } - LASSERT (elan_page_idx == - (KQSW_NRXMSGS_SMALL * KQSW_NRXMSGPAGES_SMALL) + - (KQSW_NRXMSGS_LARGE * KQSW_NRXMSGPAGES_LARGE)); - - /**********************************************************************/ - /* Network interface ready to initialise */ - - rc = PtlNIInit(kqswnal_init, 32, 4, 0, &kqswnal_ni); - if (rc != 0) - { - CERROR ("PtlNIInit failed %d\n", rc); - kqswnal_finalise (); - return (-ENOMEM); - } - - kqswnal_data.kqn_init = KQN_INIT_PTL; - - /**********************************************************************/ - /* Queue receives, now that it's OK to run their completion callbacks */ - - for (i = 0; i < KQSW_NRXMSGS_SMALL + KQSW_NRXMSGS_LARGE; i++) - { - kqswnal_rx_t *krx = &kqswnal_data.kqn_rxds[i]; - - /* NB this enqueue can allocate/sleep (attr == 0) */ - rc = ep_queue_receive(krx->krx_eprx, kqswnal_rxhandler, krx, - krx->krx_elanaddr, - krx->krx_npages * PAGE_SIZE, 0); - if (rc != 0) - { - CERROR ("failed ep_queue_receive %d\n", rc); - kqswnal_finalise (); - return (-ENOMEM); - } - } - - /**********************************************************************/ - /* Spawn scheduling threads */ - for (i = 0; i < smp_num_cpus; i++) - { - rc = kqswnal_thread_start (kqswnal_scheduler, NULL); - if (rc != 0) - { - CERROR ("failed to spawn scheduling thread: %d\n", rc); - kqswnal_finalise (); - return (rc); - } - } - - /**********************************************************************/ - /* Connect to the router */ - rc = kpr_register (&kqswnal_data.kqn_router, &kqswnal_router_interface); - CDEBUG(D_NET, "Can't initialise routing interface (rc = %d): not routing\n",rc); - - rc = kportal_nal_register (QSWNAL, &kqswnal_cmd, NULL); - if (rc != 0) { - CERROR ("Can't initialise command interface (rc = %d)\n", rc); - kqswnal_finalise (); - return (rc); - } - - PORTAL_SYMBOL_REGISTER(kqswnal_ni); - kqswnal_data.kqn_init = KQN_INIT_ALL; - - printk(KERN_INFO "Routing QSW NAL loaded on node %d of %d " - "(Routing %s, initial mem %d)\n", - kqswnal_data.kqn_elanid, kqswnal_data.kqn_nnodes, - kpr_routing (&kqswnal_data.kqn_router) ? "enabled" : "disabled", - pkmem); - - return (0); -} - - -MODULE_AUTHOR("W. Marcus Miller <marcusm@llnl.gov>"); -MODULE_DESCRIPTION("Kernel Quadrics Switch NAL v1.00"); -MODULE_LICENSE("GPL"); - -module_init (kqswnal_initialise); -module_exit (kqswnal_finalise); - -EXPORT_SYMBOL (kqswnal_ni); diff --git a/lustre/portals/knals/qswnal/qswnal.h b/lustre/portals/knals/qswnal/qswnal.h deleted file mode 100644 index 85e585b..0000000 --- a/lustre/portals/knals/qswnal/qswnal.h +++ /dev/null @@ -1,270 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. <braam@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Basic library routines. - * - */ - -#ifndef _QSWNAL_H -#define _QSWNAL_H -#define EXPORT_SYMTAB - -#ifdef PROPRIETARY_ELAN -# include <qsw/kernel.h> -#else -# include <qsnet/kernel.h> -#endif - -#undef printf /* nasty QSW #define */ - -#include <linux/config.h> -#include <linux/module.h> - -#include <elan3/elanregs.h> -#include <elan3/elandev.h> -#include <elan3/elanvp.h> -#include <elan3/elan3mmu.h> -#include <elan3/elanctxt.h> -#include <elan3/elandebug.h> -#include <elan3/urom_addrs.h> -#include <elan3/busops.h> -#include <elan3/kcomm.h> - -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/locks.h> -#include <linux/unistd.h> -#include <net/sock.h> -#include <linux/uio.h> - -#include <asm/system.h> -#include <asm/uaccess.h> - -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <asm/uaccess.h> -#include <asm/segment.h> - -#define DEBUG_SUBSYSTEM S_QSWNAL - -#include <linux/kp30.h> -#include <portals/p30.h> -#include <portals/lib-p30.h> - -#define KQSW_CHECKSUM 0 -#if KQSW_CHECKSUM -typedef unsigned long kqsw_csum_t; -#define KQSW_CSUM_SIZE (2 * sizeof (kqsw_csum_t)) -#else -#define KQSW_CSUM_SIZE 0 -#endif -#define KQSW_HDR_SIZE (sizeof (ptl_hdr_t) + KQSW_CSUM_SIZE) - -/* - * Elan NAL - */ -#define EP_SVC_LARGE_PORTALS_SMALL (0x10) /* Portals over elan port number (large payloads) */ -#define EP_SVC_LARGE_PORTALS_LARGE (0x11) /* Portals over elan port number (small payloads) */ -/* NB small/large message sizes are GLOBAL constants */ - -/* - * Performance Tuning defines - * NB no mention of PAGE_SIZE for interoperability - */ -#if PTL_LARGE_MTU -# define KQSW_MAXPAYLOAD (256<<10) /* biggest message this NAL will cope with */ -#else -# define KQSW_MAXPAYLOAD (64<<10) /* biggest message this NAL will cope with */ -#endif - -#define KQSW_SMALLPAYLOAD ((4<<10) - KQSW_HDR_SIZE) /* small/large ep receiver breakpoint */ - -#define KQSW_TX_MAXCONTIG (1<<10) /* largest payload that gets made contiguous on transmit */ - -#define KQSW_NTXMSGS 8 /* # normal transmit messages */ -#define KQSW_NNBLK_TXMSGS 256 /* # reserved transmit messages if can't block */ - -#define KQSW_NRXMSGS_LARGE 64 /* # large receive buffers */ -#define KQSW_EP_ENVELOPES_LARGE 128 /* # large ep envelopes */ - -#define KQSW_NRXMSGS_SMALL 256 /* # small receive buffers */ -#define KQSW_EP_ENVELOPES_SMALL 2048 /* # small ep envelopes */ - -#define KQSW_RESCHED 100 /* # busy loops that forces scheduler to yield */ - -/* - * derived constants - */ - -#define KQSW_TX_BUFFER_SIZE (KQSW_HDR_SIZE + KQSW_TX_MAXCONTIG) -/* The pre-allocated tx buffer (hdr + small payload) */ - -#define KQSW_NTXMSGPAGES (btopr(KQSW_TX_BUFFER_SIZE) + 1 + btopr(KQSW_MAXPAYLOAD) + 1) -/* Reserve elan address space for pre-allocated and pre-mapped transmit - * buffer and a full payload too. Extra pages allow for page alignment */ - -#define KQSW_NRXMSGPAGES_SMALL (btopr(KQSW_HDR_SIZE + KQSW_SMALLPAYLOAD)) -/* receive hdr/payload always contiguous and page aligned */ -#define KQSW_NRXMSGBYTES_SMALL (KQSW_NRXMSGPAGES_SMALL * PAGE_SIZE) - -#define KQSW_NRXMSGPAGES_LARGE (btopr(KQSW_HDR_SIZE + KQSW_MAXPAYLOAD)) -/* receive hdr/payload always contiguous and page aligned */ -#define KQSW_NRXMSGBYTES_LARGE (KQSW_NRXMSGPAGES_LARGE * PAGE_SIZE) -/* biggest complete packet we can receive (or transmit) */ - - -typedef struct -{ - struct list_head krx_list; /* enqueue -> thread */ - EP_RCVR *krx_eprx; /* port to post receives to */ - EP_RXD *krx_rxd; /* receive descriptor (for repost) */ - E3_Addr krx_elanaddr; /* Elan address of buffer (contiguous in elan vm) */ - int krx_npages; /* # pages in receive buffer */ - int krx_nob; /* Number Of Bytes received into buffer */ - kpr_fwd_desc_t krx_fwd; /* embedded forwarding descriptor */ - struct page *krx_pages[KQSW_NRXMSGPAGES_LARGE]; /* pages allocated */ - struct iovec krx_iov[KQSW_NRXMSGPAGES_LARGE]; /* iovec for forwarding */ -} kqswnal_rx_t; - -typedef struct -{ - struct list_head ktx_list; /* enqueue idle/delayed */ - struct list_head *ktx_idle; /* where to put when idle */ - char ktx_state; /* What I'm doing */ - uint32_t ktx_basepage; /* page offset in reserved elan tx vaddrs for mapping pages */ - int ktx_npages; /* pages reserved for mapping messages */ - int ktx_nmappedpages; /* # pages mapped for current message */ - EP_IOVEC ktx_iov[EP_MAXFRAG]; /* msg frags (elan vaddrs) */ - int ktx_niov; /* # message frags */ - int ktx_port; /* destination ep port */ - ptl_nid_t ktx_nid; /* destination node */ - void *ktx_args[2]; /* completion passthru */ - E3_Addr ktx_ebuffer; /* elan address of ktx_buffer */ - char *ktx_buffer; /* pre-allocated contiguous buffer for hdr + small payloads */ -} kqswnal_tx_t; - -#define KTX_IDLE 0 /* MUST BE ZERO (so zeroed ktx is idle) */ -#define KTX_SENDING 1 /* local send */ -#define KTX_FORWARDING 2 /* routing a packet */ - -typedef struct -{ - char kqn_init; /* what's been initialised */ - char kqn_shuttingdown; /* I'm trying to shut down */ - atomic_t kqn_nthreads; /* # threads still running */ - - kqswnal_rx_t *kqn_rxds; /* all the receive descriptors */ - kqswnal_tx_t *kqn_txds; /* all the transmit descriptors */ - - struct list_head kqn_idletxds; /* transmit descriptors free to use */ - struct list_head kqn_nblk_idletxds; /* reserve of */ - spinlock_t kqn_idletxd_lock; /* serialise idle txd access */ - wait_queue_head_t kqn_idletxd_waitq; /* sender blocks here waiting for idle txd */ - struct list_head kqn_idletxd_fwdq; /* forwarded packets block here waiting for idle txd */ - - spinlock_t kqn_sched_lock; /* serialise packet schedulers */ - wait_queue_head_t kqn_sched_waitq; /* scheduler blocks here */ - - struct list_head kqn_readyrxds; /* rxds full of data */ - struct list_head kqn_delayedfwds; /* delayed forwards */ - struct list_head kqn_delayedtxds; /* delayed transmits */ - - spinlock_t kqn_statelock; /* cb_cli/cb_sti */ - nal_cb_t *kqn_cb; /* -> kqswnal_lib */ - EP_DEV *kqn_epdev; /* elan device */ - EP_XMTR *kqn_eptx; /* elan transmitter */ - EP_RCVR *kqn_eprx_small; /* elan receiver (small messages) */ - EP_RCVR *kqn_eprx_large; /* elan receiver (large messages) */ - ELAN3_DMA_HANDLE *kqn_eptxdmahandle; /* elan reserved tx vaddrs */ - ELAN3_DMA_HANDLE *kqn_eprxdmahandle; /* elan reserved rx vaddrs */ - kpr_router_t kqn_router; /* connection to Kernel Portals Router module */ - - ptl_nid_t kqn_nid_offset; /* this cluster's NID offset */ - int kqn_nnodes; /* this cluster's size */ - int kqn_elanid; /* this nodes's elan ID */ -} kqswnal_data_t; - -/* kqn_init state */ -#define KQN_INIT_NOTHING 0 /* MUST BE ZERO so zeroed state is initialised OK */ -#define KQN_INIT_DATA 1 -#define KQN_INIT_PTL 2 -#define KQN_INIT_ALL 3 - -extern nal_cb_t kqswnal_lib; -extern nal_t kqswnal_api; -extern kqswnal_data_t kqswnal_data; - -extern int kqswnal_thread_start (int (*fn)(void *arg), void *arg); -extern void kqswnal_rxhandler(EP_RXD *rxd); -extern int kqswnal_scheduler (void *); -extern void kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd); - -static inline ptl_nid_t -kqswnal_elanid2nid (int elanid) -{ - return (kqswnal_data.kqn_nid_offset + elanid); -} - -static inline int -kqswnal_nid2elanid (ptl_nid_t nid) -{ - /* not in this cluster? */ - if (nid < kqswnal_data.kqn_nid_offset || - nid >= kqswnal_data.kqn_nid_offset + kqswnal_data.kqn_nnodes) - return (-1); - - return (nid - kqswnal_data.kqn_nid_offset); -} - -static inline void -kqswnal_requeue_rx (kqswnal_rx_t *krx) -{ - ep_requeue_receive (krx->krx_rxd, kqswnal_rxhandler, krx, - krx->krx_elanaddr, krx->krx_npages * PAGE_SIZE); -} - -static inline int -kqswnal_pages_spanned (void *base, int nob) -{ - unsigned long first_page = ((unsigned long)base) >> PAGE_SHIFT; - unsigned long last_page = (((unsigned long)base) + (nob - 1)) >> PAGE_SHIFT; - - LASSERT (last_page >= first_page); /* can't wrap address space */ - return (last_page - first_page + 1); -} - -#if KQSW_CHECKSUM -static inline kqsw_csum_t kqsw_csum (kqsw_csum_t sum, void *base, int nob) -{ - unsigned char *ptr = (unsigned char *)base; - - while (nob-- > 0) - sum += *ptr++; - - return (sum); -} -#endif - -#endif /* _QSWNAL_H */ diff --git a/lustre/portals/knals/qswnal/qswnal_cb.c b/lustre/portals/knals/qswnal/qswnal_cb.c deleted file mode 100644 index c03d592..0000000 --- a/lustre/portals/knals/qswnal/qswnal_cb.c +++ /dev/null @@ -1,1240 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Eric Barton <eric@bartonsoftware.com> - * - * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL) - * W. Marcus Miller - Based on ksocknal - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "qswnal.h" - -atomic_t kqswnal_packets_launched; -atomic_t kqswnal_packets_transmitted; -atomic_t kqswnal_packets_received; - - -/* - * LIB functions follow - * - */ -static int -kqswnal_read(nal_cb_t *nal, void *private, void *dst_addr, user_ptr src_addr, - size_t len) -{ - CDEBUG (D_NET, LPX64": reading "LPSZ" bytes from %p -> %p\n", - nal->ni.nid, len, src_addr, dst_addr ); - memcpy( dst_addr, src_addr, len ); - - return (0); -} - -static int -kqswnal_write(nal_cb_t *nal, void *private, user_ptr dst_addr, void *src_addr, - size_t len) -{ - CDEBUG (D_NET, LPX64": writing "LPSZ" bytes from %p -> %p\n", - nal->ni.nid, len, src_addr, dst_addr ); - memcpy( dst_addr, src_addr, len ); - - return (0); -} - -static void * -kqswnal_malloc(nal_cb_t *nal, size_t len) -{ - void *buf; - - PORTAL_ALLOC(buf, len); - return (buf); -} - -static void -kqswnal_free(nal_cb_t *nal, void *buf, size_t len) -{ - PORTAL_FREE(buf, len); -} - -static void -kqswnal_printf (nal_cb_t * nal, const char *fmt, ...) -{ - va_list ap; - char msg[256]; - - va_start (ap, fmt); - vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */ - va_end (ap); - - msg[sizeof (msg) - 1] = 0; /* ensure terminated */ - - CDEBUG (D_NET, "%s", msg); -} - - -static void -kqswnal_cli(nal_cb_t *nal, unsigned long *flags) -{ - kqswnal_data_t *data= nal->nal_data; - - spin_lock_irqsave(&data->kqn_statelock, *flags); -} - - -static void -kqswnal_sti(nal_cb_t *nal, unsigned long *flags) -{ - kqswnal_data_t *data= nal->nal_data; - - spin_unlock_irqrestore(&data->kqn_statelock, *flags); -} - - -static int -kqswnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) -{ - if (nid == nal->ni.nid) - *dist = 0; /* it's me */ - else if (kqswnal_nid2elanid (nid) >= 0) - *dist = 1; /* it's my peer */ - else - *dist = 2; /* via router */ - return (0); -} - -void -kqswnal_unmap_tx (kqswnal_tx_t *ktx) -{ - if (ktx->ktx_nmappedpages == 0) - return; - - CDEBUG (D_NET, "%p[%d] unloading pages %d for %d\n", - ktx, ktx->ktx_niov, ktx->ktx_basepage, ktx->ktx_nmappedpages); - - LASSERT (ktx->ktx_nmappedpages <= ktx->ktx_npages); - LASSERT (ktx->ktx_basepage + ktx->ktx_nmappedpages <= - kqswnal_data.kqn_eptxdmahandle->NumDvmaPages); - - elan3_dvma_unload(kqswnal_data.kqn_epdev->DmaState, - kqswnal_data.kqn_eptxdmahandle, - ktx->ktx_basepage, ktx->ktx_nmappedpages); - ktx->ktx_nmappedpages = 0; -} - -int -kqswnal_map_tx_kiov (kqswnal_tx_t *ktx, int nob, int niov, ptl_kiov_t *kiov) -{ - int nfrags = ktx->ktx_niov; - const int maxfrags = sizeof (ktx->ktx_iov)/sizeof (ktx->ktx_iov[0]); - int nmapped = ktx->ktx_nmappedpages; - int maxmapped = ktx->ktx_npages; - uint32_t basepage = ktx->ktx_basepage + nmapped; - char *ptr; - - LASSERT (nmapped <= maxmapped); - LASSERT (nfrags <= maxfrags); - LASSERT (niov > 0); - LASSERT (nob > 0); - - do { - int fraglen = kiov->kiov_len; - - /* nob exactly spans the iovs */ - LASSERT (fraglen <= nob); - /* each frag fits in a page */ - LASSERT (kiov->kiov_offset + kiov->kiov_len <= PAGE_SIZE); - - nmapped++; - if (nmapped > maxmapped) { - CERROR("Can't map message in %d pages (max %d)\n", - nmapped, maxmapped); - return (-EMSGSIZE); - } - - if (nfrags == maxfrags) { - CERROR("Message too fragmented in Elan VM (max %d frags)\n", - maxfrags); - return (-EMSGSIZE); - } - - /* XXX this is really crap, but we'll have to kmap until - * EKC has a page (rather than vaddr) mapping interface */ - - ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset; - - CDEBUG(D_NET, - "%p[%d] loading %p for %d, page %d, %d total\n", - ktx, nfrags, ptr, fraglen, basepage, nmapped); - - elan3_dvma_kaddr_load (kqswnal_data.kqn_epdev->DmaState, - kqswnal_data.kqn_eptxdmahandle, - ptr, fraglen, - basepage, &ktx->ktx_iov[nfrags].Base); - - kunmap (kiov->kiov_page); - - /* keep in loop for failure case */ - ktx->ktx_nmappedpages = nmapped; - - if (nfrags > 0 && /* previous frag mapped */ - ktx->ktx_iov[nfrags].Base == /* contiguous with this one */ - (ktx->ktx_iov[nfrags-1].Base + ktx->ktx_iov[nfrags-1].Len)) - /* just extend previous */ - ktx->ktx_iov[nfrags - 1].Len += fraglen; - else { - ktx->ktx_iov[nfrags].Len = fraglen; - nfrags++; /* new frag */ - } - - basepage++; - kiov++; - niov--; - nob -= fraglen; - - /* iov must not run out before end of data */ - LASSERT (nob == 0 || niov > 0); - - } while (nob > 0); - - ktx->ktx_niov = nfrags; - CDEBUG (D_NET, "%p got %d frags over %d pages\n", - ktx, ktx->ktx_niov, ktx->ktx_nmappedpages); - - return (0); -} - -int -kqswnal_map_tx_iov (kqswnal_tx_t *ktx, int nob, int niov, struct iovec *iov) -{ - int nfrags = ktx->ktx_niov; - const int maxfrags = sizeof (ktx->ktx_iov)/sizeof (ktx->ktx_iov[0]); - int nmapped = ktx->ktx_nmappedpages; - int maxmapped = ktx->ktx_npages; - uint32_t basepage = ktx->ktx_basepage + nmapped; - - LASSERT (nmapped <= maxmapped); - LASSERT (nfrags <= maxfrags); - LASSERT (niov > 0); - LASSERT (nob > 0); - - do { - int fraglen = iov->iov_len; - long npages = kqswnal_pages_spanned (iov->iov_base, fraglen); - - /* nob exactly spans the iovs */ - LASSERT (fraglen <= nob); - - nmapped += npages; - if (nmapped > maxmapped) { - CERROR("Can't map message in %d pages (max %d)\n", - nmapped, maxmapped); - return (-EMSGSIZE); - } - - if (nfrags == maxfrags) { - CERROR("Message too fragmented in Elan VM (max %d frags)\n", - maxfrags); - return (-EMSGSIZE); - } - - CDEBUG(D_NET, - "%p[%d] loading %p for %d, pages %d for %ld, %d total\n", - ktx, nfrags, iov->iov_base, fraglen, basepage, npages, - nmapped); - - elan3_dvma_kaddr_load (kqswnal_data.kqn_epdev->DmaState, - kqswnal_data.kqn_eptxdmahandle, - iov->iov_base, fraglen, - basepage, &ktx->ktx_iov[nfrags].Base); - /* keep in loop for failure case */ - ktx->ktx_nmappedpages = nmapped; - - if (nfrags > 0 && /* previous frag mapped */ - ktx->ktx_iov[nfrags].Base == /* contiguous with this one */ - (ktx->ktx_iov[nfrags-1].Base + ktx->ktx_iov[nfrags-1].Len)) - /* just extend previous */ - ktx->ktx_iov[nfrags - 1].Len += fraglen; - else { - ktx->ktx_iov[nfrags].Len = fraglen; - nfrags++; /* new frag */ - } - - basepage += npages; - iov++; - niov--; - nob -= fraglen; - - /* iov must not run out before end of data */ - LASSERT (nob == 0 || niov > 0); - - } while (nob > 0); - - ktx->ktx_niov = nfrags; - CDEBUG (D_NET, "%p got %d frags over %d pages\n", - ktx, ktx->ktx_niov, ktx->ktx_nmappedpages); - - return (0); -} - -void -kqswnal_put_idle_tx (kqswnal_tx_t *ktx) -{ - kpr_fwd_desc_t *fwd = NULL; - struct list_head *idle = ktx->ktx_idle; - unsigned long flags; - - kqswnal_unmap_tx (ktx); /* release temporary mappings */ - ktx->ktx_state = KTX_IDLE; - - spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags); - - list_add (&ktx->ktx_list, idle); - - /* reserved for non-blocking tx */ - if (idle == &kqswnal_data.kqn_nblk_idletxds) { - spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); - return; - } - - /* anything blocking for a tx descriptor? */ - if (!list_empty(&kqswnal_data.kqn_idletxd_fwdq)) /* forwarded packet? */ - { - CDEBUG(D_NET,"wakeup fwd\n"); - - fwd = list_entry (kqswnal_data.kqn_idletxd_fwdq.next, - kpr_fwd_desc_t, kprfd_list); - list_del (&fwd->kprfd_list); - } - - if (waitqueue_active (&kqswnal_data.kqn_idletxd_waitq)) /* process? */ - { - /* local sender waiting for tx desc */ - CDEBUG(D_NET,"wakeup process\n"); - wake_up (&kqswnal_data.kqn_idletxd_waitq); - } - - spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); - - if (fwd == NULL) - return; - - /* schedule packet for forwarding again */ - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - - list_add_tail (&fwd->kprfd_list, &kqswnal_data.kqn_delayedfwds); - if (waitqueue_active (&kqswnal_data.kqn_sched_waitq)) - wake_up (&kqswnal_data.kqn_sched_waitq); - - spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); -} - -kqswnal_tx_t * -kqswnal_get_idle_tx (kpr_fwd_desc_t *fwd, int may_block) -{ - unsigned long flags; - kqswnal_tx_t *ktx = NULL; - - for (;;) { - spin_lock_irqsave (&kqswnal_data.kqn_idletxd_lock, flags); - - /* "normal" descriptor is free */ - if (!list_empty (&kqswnal_data.kqn_idletxds)) { - ktx = list_entry (kqswnal_data.kqn_idletxds.next, - kqswnal_tx_t, ktx_list); - list_del (&ktx->ktx_list); - break; - } - - /* "normal" descriptor pool is empty */ - - if (fwd != NULL) { /* forwarded packet => queue for idle txd */ - CDEBUG (D_NET, "blocked fwd [%p]\n", fwd); - list_add_tail (&fwd->kprfd_list, - &kqswnal_data.kqn_idletxd_fwdq); - break; - } - - /* doing a local transmit */ - if (!may_block) { - if (list_empty (&kqswnal_data.kqn_nblk_idletxds)) { - CERROR ("intr tx desc pool exhausted\n"); - break; - } - - ktx = list_entry (kqswnal_data.kqn_nblk_idletxds.next, - kqswnal_tx_t, ktx_list); - list_del (&ktx->ktx_list); - break; - } - - /* block for idle tx */ - - spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); - - CDEBUG (D_NET, "blocking for tx desc\n"); - wait_event (kqswnal_data.kqn_idletxd_waitq, - !list_empty (&kqswnal_data.kqn_idletxds)); - } - - spin_unlock_irqrestore (&kqswnal_data.kqn_idletxd_lock, flags); - - /* Idle descs can't have any mapped (as opposed to pre-mapped) pages */ - LASSERT (ktx == NULL || ktx->ktx_nmappedpages == 0); - return (ktx); -} - -void -kqswnal_tx_done (kqswnal_tx_t *ktx, int error) -{ - switch (ktx->ktx_state) { - case KTX_FORWARDING: /* router asked me to forward this packet */ - kpr_fwd_done (&kqswnal_data.kqn_router, - (kpr_fwd_desc_t *)ktx->ktx_args[0], error); - break; - - case KTX_SENDING: /* packet sourced locally */ - lib_finalize (&kqswnal_lib, ktx->ktx_args[0], - (lib_msg_t *)ktx->ktx_args[1]); - break; - - default: - LASSERT (0); - } - - kqswnal_put_idle_tx (ktx); -} - -static void -kqswnal_txhandler(EP_TXD *txd, void *arg, int status) -{ - kqswnal_tx_t *ktx = (kqswnal_tx_t *)arg; - - LASSERT (txd != NULL); - LASSERT (ktx != NULL); - - CDEBUG(D_NET, "txd %p, arg %p status %d\n", txd, arg, status); - - if (status == EP_SUCCESS) - atomic_inc (&kqswnal_packets_transmitted); - - if (status != EP_SUCCESS) - { - CERROR ("kqswnal: Transmit failed with %d\n", status); - status = -EIO; - } - - kqswnal_tx_done (ktx, status); -} - -int -kqswnal_launch (kqswnal_tx_t *ktx) -{ - /* Don't block for transmit descriptor if we're in interrupt context */ - int attr = in_interrupt() ? (EP_NO_SLEEP | EP_NO_ALLOC) : 0; - int dest = kqswnal_nid2elanid (ktx->ktx_nid); - long flags; - int rc; - - LASSERT (dest >= 0); /* must be a peer */ - rc = ep_transmit_large(kqswnal_data.kqn_eptx, dest, - ktx->ktx_port, attr, kqswnal_txhandler, - ktx, ktx->ktx_iov, ktx->ktx_niov); - if (rc == 0) - atomic_inc (&kqswnal_packets_launched); - - if (rc != ENOMEM) - return (rc); - - /* can't allocate ep txd => queue for later */ - - LASSERT (in_interrupt()); /* not called by thread (not looping) */ - - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - - list_add_tail (&ktx->ktx_list, &kqswnal_data.kqn_delayedtxds); - if (waitqueue_active (&kqswnal_data.kqn_sched_waitq)) - wake_up (&kqswnal_data.kqn_sched_waitq); - - spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); - - return (0); -} - - -static char * -hdr_type_string (ptl_hdr_t *hdr) -{ - switch (hdr->type) { - case PTL_MSG_ACK: - return ("ACK"); - case PTL_MSG_PUT: - return ("PUT"); - case PTL_MSG_GET: - return ("GET"); - case PTL_MSG_REPLY: - return ("REPLY"); - default: - return ("<UNKNOWN>"); - } -} - -static void -kqswnal_cerror_hdr(ptl_hdr_t * hdr) -{ - char *type_str = hdr_type_string (hdr); - - CERROR("P3 Header at %p of type %s\n", hdr, type_str); - CERROR(" From nid/pid "LPU64"/%u", NTOH__u64(hdr->src_nid), - NTOH__u32(hdr->src_pid)); - CERROR(" To nid/pid "LPU64"/%u\n", NTOH__u64(hdr->dest_nid), - NTOH__u32(hdr->dest_pid)); - - switch (NTOH__u32(hdr->type)) { - case PTL_MSG_PUT: - CERROR(" Ptl index %d, ack md "LPX64"."LPX64", " - "match bits "LPX64"\n", - NTOH__u32 (hdr->msg.put.ptl_index), - hdr->msg.put.ack_wmd.wh_interface_cookie, - hdr->msg.put.ack_wmd.wh_object_cookie, - NTOH__u64 (hdr->msg.put.match_bits)); - CERROR(" Length %d, offset %d, hdr data "LPX64"\n", - NTOH__u32(PTL_HDR_LENGTH(hdr)), - NTOH__u32(hdr->msg.put.offset), - hdr->msg.put.hdr_data); - break; - - case PTL_MSG_GET: - CERROR(" Ptl index %d, return md "LPX64"."LPX64", " - "match bits "LPX64"\n", - NTOH__u32 (hdr->msg.get.ptl_index), - hdr->msg.get.return_wmd.wh_interface_cookie, - hdr->msg.get.return_wmd.wh_object_cookie, - hdr->msg.get.match_bits); - CERROR(" Length %d, src offset %d\n", - NTOH__u32 (hdr->msg.get.sink_length), - NTOH__u32 (hdr->msg.get.src_offset)); - break; - - case PTL_MSG_ACK: - CERROR(" dst md "LPX64"."LPX64", manipulated length %d\n", - hdr->msg.ack.dst_wmd.wh_interface_cookie, - hdr->msg.ack.dst_wmd.wh_object_cookie, - NTOH__u32 (hdr->msg.ack.mlength)); - break; - - case PTL_MSG_REPLY: - CERROR(" dst md "LPX64"."LPX64", length %d\n", - hdr->msg.reply.dst_wmd.wh_interface_cookie, - hdr->msg.reply.dst_wmd.wh_object_cookie, - NTOH__u32 (PTL_HDR_LENGTH(hdr))); - } - -} /* end of print_hdr() */ - -static int -kqswnal_sendmsg (nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - ptl_hdr_t *hdr, - int type, - ptl_nid_t nid, - ptl_pid_t pid, - unsigned int payload_niov, - struct iovec *payload_iov, - ptl_kiov_t *payload_kiov, - size_t payload_nob) -{ - kqswnal_tx_t *ktx; - int rc; - ptl_nid_t gatewaynid; -#if KQSW_CHECKSUM - int i; - kqsw_csum_t csum; - int sumnob; -#endif - - /* NB, the return code from this procedure is ignored. - * If we can't send, we must still complete with lib_finalize(). - * We'll have to wait for 3.2 to return an error event. - */ - - CDEBUG(D_NET, "sending "LPSZ" bytes in %d frags to nid: "LPX64 - " pid %u\n", payload_nob, payload_niov, nid, pid); - - LASSERT (payload_nob == 0 || payload_niov > 0); - LASSERT (payload_niov <= PTL_MD_MAX_IOV); - - /* It must be OK to kmap() if required */ - LASSERT (payload_kiov == NULL || !in_interrupt ()); - /* payload is either all vaddrs or all pages */ - LASSERT (!(payload_kiov != NULL && payload_iov != NULL)); - - if (payload_nob > KQSW_MAXPAYLOAD) { - CERROR ("request exceeds MTU size "LPSZ" (max %u).\n", - payload_nob, KQSW_MAXPAYLOAD); - lib_finalize (&kqswnal_lib, private, cookie); - return (-1); - } - - if (kqswnal_nid2elanid (nid) < 0) { /* Can't send direct: find gateway? */ - rc = kpr_lookup (&kqswnal_data.kqn_router, nid, &gatewaynid); - if (rc != 0) { - CERROR("Can't route to "LPX64": router error %d\n", - nid, rc); - lib_finalize (&kqswnal_lib, private, cookie); - return (-1); - } - if (kqswnal_nid2elanid (gatewaynid) < 0) { - CERROR("Bad gateway "LPX64" for "LPX64"\n", - gatewaynid, nid); - lib_finalize (&kqswnal_lib, private, cookie); - return (-1); - } - nid = gatewaynid; - } - - /* I may not block for a transmit descriptor if I might block the - * receiver, or an interrupt handler. */ - ktx = kqswnal_get_idle_tx(NULL, !(type == PTL_MSG_ACK || - type == PTL_MSG_REPLY || - in_interrupt())); - if (ktx == NULL) { - kqswnal_cerror_hdr (hdr); - lib_finalize (&kqswnal_lib, private, cookie); - return (-1); - } - - memcpy (ktx->ktx_buffer, hdr, sizeof (*hdr)); /* copy hdr from caller's stack */ - -#if KQSW_CHECKSUM - csum = kqsw_csum (0, (char *)hdr, sizeof (*hdr)); - memcpy (ktx->ktx_buffer + sizeof (*hdr), &csum, sizeof (csum)); - for (csum = 0, i = 0, sumnob = payload_nob; sumnob > 0; i++) { - if (payload_kiov != NULL) { - ptl_kiov_t *kiov = &payload_kiov[i]; - char *addr = ((char *)kmap (kiov->kiov_page)) + - kiov->kiov_offset; - - csum = kqsw_csum (csum, addr, MIN (sumnob, kiov->kiov_len)); - sumnob -= kiov->kiov_len; - } else { - struct iovec *iov = &payload_iov[i]; - - csum = kqsw_csum (csum, iov->iov_base, MIN (sumnob, kiov->iov_len)); - sumnob -= iov->iov_len; - } - } - memcpy(ktx->ktx_buffer +sizeof(*hdr) +sizeof(csum), &csum,sizeof(csum)); -#endif - - /* Set up first frag from pre-mapped buffer (it's at least the - * portals header) */ - ktx->ktx_iov[0].Base = ktx->ktx_ebuffer; - ktx->ktx_iov[0].Len = KQSW_HDR_SIZE; - ktx->ktx_niov = 1; - - if (payload_nob > 0) { /* got some payload (something more to do) */ - /* make a single contiguous message? */ - if (payload_nob <= KQSW_TX_MAXCONTIG) { - /* copy payload to ktx_buffer, immediately after hdr */ - if (payload_kiov != NULL) - lib_copy_kiov2buf (ktx->ktx_buffer + KQSW_HDR_SIZE, - payload_niov, payload_kiov, payload_nob); - else - lib_copy_iov2buf (ktx->ktx_buffer + KQSW_HDR_SIZE, - payload_niov, payload_iov, payload_nob); - /* first frag includes payload */ - ktx->ktx_iov[0].Len += payload_nob; - } else { - if (payload_kiov != NULL) - rc = kqswnal_map_tx_kiov (ktx, payload_nob, - payload_niov, payload_kiov); - else - rc = kqswnal_map_tx_iov (ktx, payload_nob, - payload_niov, payload_iov); - if (rc != 0) { - kqswnal_put_idle_tx (ktx); - lib_finalize (&kqswnal_lib, private, cookie); - return (-1); - } - } - } - - ktx->ktx_port = (payload_nob <= KQSW_SMALLPAYLOAD) ? - EP_SVC_LARGE_PORTALS_SMALL : EP_SVC_LARGE_PORTALS_LARGE; - ktx->ktx_nid = nid; - ktx->ktx_state = KTX_SENDING; /* => lib_finalize() on completion */ - ktx->ktx_args[0] = private; - ktx->ktx_args[1] = cookie; - - rc = kqswnal_launch (ktx); - if (rc != 0) { /* failed? */ - CERROR ("Failed to send packet to "LPX64": %d\n", nid, rc); - lib_finalize (&kqswnal_lib, private, cookie); - return (-1); - } - - CDEBUG(D_NET, "send to "LPSZ" bytes to "LPX64"\n", payload_nob, nid); - return (0); -} - -static int -kqswnal_send (nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - ptl_hdr_t *hdr, - int type, - ptl_nid_t nid, - ptl_pid_t pid, - unsigned int payload_niov, - struct iovec *payload_iov, - size_t payload_nob) -{ - return (kqswnal_sendmsg (nal, private, cookie, hdr, type, nid, pid, - payload_niov, payload_iov, NULL, payload_nob)); -} - -static int -kqswnal_send_pages (nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - ptl_hdr_t *hdr, - int type, - ptl_nid_t nid, - ptl_pid_t pid, - unsigned int payload_niov, - ptl_kiov_t *payload_kiov, - size_t payload_nob) -{ - return (kqswnal_sendmsg (nal, private, cookie, hdr, type, nid, pid, - payload_niov, NULL, payload_kiov, payload_nob)); -} - -int kqswnal_fwd_copy_contig = 0; - -void -kqswnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) -{ - int rc; - kqswnal_tx_t *ktx; - struct iovec *iov = fwd->kprfd_iov; - int niov = fwd->kprfd_niov; - int nob = fwd->kprfd_nob; - ptl_nid_t nid = fwd->kprfd_gateway_nid; - -#if KQSW_CHECKSUM - CERROR ("checksums for forwarded packets not implemented\n"); - LBUG (); -#endif - /* The router wants this NAL to forward a packet */ - CDEBUG (D_NET, "forwarding [%p] to "LPX64", %d frags %d bytes\n", - fwd, nid, niov, nob); - - LASSERT (niov > 0); - - ktx = kqswnal_get_idle_tx (fwd, FALSE); - if (ktx == NULL) /* can't get txd right now */ - return; /* fwd will be scheduled when tx desc freed */ - - if (nid == kqswnal_lib.ni.nid) /* gateway is me */ - nid = fwd->kprfd_target_nid; /* target is final dest */ - - if (kqswnal_nid2elanid (nid) < 0) { - CERROR("Can't forward [%p] to "LPX64": not a peer\n", fwd, nid); - rc = -EHOSTUNREACH; - goto failed; - } - - if (nob > KQSW_NRXMSGBYTES_LARGE) { - CERROR ("Can't forward [%p] to "LPX64 - ": size %d bigger than max packet size %ld\n", - fwd, nid, nob, (long)KQSW_NRXMSGBYTES_LARGE); - rc = -EMSGSIZE; - goto failed; - } - - if ((kqswnal_fwd_copy_contig || niov > 1) && - nob <= KQSW_TX_BUFFER_SIZE) - { - /* send from ktx's pre-allocated/mapped contiguous buffer? */ - lib_copy_iov2buf (ktx->ktx_buffer, niov, iov, nob); - ktx->ktx_iov[0].Base = ktx->ktx_ebuffer; /* already mapped */ - ktx->ktx_iov[0].Len = nob; - ktx->ktx_niov = 1; - } - else - { - /* zero copy */ - ktx->ktx_niov = 0; /* no frags mapped yet */ - rc = kqswnal_map_tx_iov (ktx, nob, niov, iov); - if (rc != 0) - goto failed; - } - - ktx->ktx_port = (nob <= (sizeof (ptl_hdr_t) + KQSW_SMALLPAYLOAD)) ? - EP_SVC_LARGE_PORTALS_SMALL : EP_SVC_LARGE_PORTALS_LARGE; - ktx->ktx_nid = nid; - ktx->ktx_state = KTX_FORWARDING; /* kpr_put_packet() on completion */ - ktx->ktx_args[0] = fwd; - - rc = kqswnal_launch (ktx); - if (rc == 0) - return; - - failed: - LASSERT (rc != 0); - CERROR ("Failed to forward [%p] to "LPX64": %d\n", fwd, nid, rc); - - kqswnal_put_idle_tx (ktx); - /* complete now (with failure) */ - kpr_fwd_done (&kqswnal_data.kqn_router, fwd, rc); -} - -void -kqswnal_fwd_callback (void *arg, int error) -{ - kqswnal_rx_t *krx = (kqswnal_rx_t *)arg; - - /* The router has finished forwarding this packet */ - - if (error != 0) - { - ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_pages[0]); - - CERROR("Failed to route packet from "LPX64" to "LPX64": %d\n", - NTOH__u64(hdr->src_nid), NTOH__u64(hdr->dest_nid),error); - } - - kqswnal_requeue_rx (krx); -} - -void -kqswnal_rx (kqswnal_rx_t *krx) -{ - ptl_hdr_t *hdr = (ptl_hdr_t *) page_address (krx->krx_pages[0]); - ptl_nid_t dest_nid = NTOH__u64 (hdr->dest_nid); - int nob; - int niov; - - if (dest_nid == kqswnal_lib.ni.nid) { /* It's for me :) */ - /* NB krx requeued when lib_parse() calls back kqswnal_recv */ - lib_parse (&kqswnal_lib, hdr, krx); - return; - } - -#if KQSW_CHECKSUM - CERROR ("checksums for forwarded packets not implemented\n"); - LBUG (); -#endif - if (kqswnal_nid2elanid (dest_nid) >= 0) /* should have gone direct to peer */ - { - CERROR("dropping packet from "LPX64" for "LPX64 - ": target is peer\n", NTOH__u64(hdr->src_nid), dest_nid); - kqswnal_requeue_rx (krx); - return; - } - - /* NB forwarding may destroy iov; rebuild every time */ - for (nob = krx->krx_nob, niov = 0; nob > 0; nob -= PAGE_SIZE, niov++) - { - LASSERT (niov < krx->krx_npages); - krx->krx_iov[niov].iov_base= page_address(krx->krx_pages[niov]); - krx->krx_iov[niov].iov_len = MIN(PAGE_SIZE, nob); - } - - kpr_fwd_init (&krx->krx_fwd, dest_nid, - krx->krx_nob, niov, krx->krx_iov, - kqswnal_fwd_callback, krx); - - kpr_fwd_start (&kqswnal_data.kqn_router, &krx->krx_fwd); -} - -/* Receive Interrupt Handler: posts to schedulers */ -void -kqswnal_rxhandler(EP_RXD *rxd) -{ - long flags; - int nob = ep_rxd_len (rxd); - int status = ep_rxd_status (rxd); - kqswnal_rx_t *krx = (kqswnal_rx_t *)ep_rxd_arg (rxd); - - CDEBUG(D_NET, "kqswnal_rxhandler: rxd %p, krx %p, nob %d, status %d\n", - rxd, krx, nob, status); - - LASSERT (krx != NULL); - - krx->krx_rxd = rxd; - krx->krx_nob = nob; - - /* must receive a whole header to be able to parse */ - if (status != EP_SUCCESS || nob < sizeof (ptl_hdr_t)) - { - /* receives complete with failure when receiver is removed */ - if (kqswnal_data.kqn_shuttingdown) - return; - - CERROR("receive status failed with status %d nob %d\n", - ep_rxd_status(rxd), nob); - kqswnal_requeue_rx (krx); - return; - } - - atomic_inc (&kqswnal_packets_received); - - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - - list_add_tail (&krx->krx_list, &kqswnal_data.kqn_readyrxds); - if (waitqueue_active (&kqswnal_data.kqn_sched_waitq)) - wake_up (&kqswnal_data.kqn_sched_waitq); - - spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); -} - -#if KQSW_CHECKSUM -void -kqswnal_csum_error (kqswnal_rx_t *krx, int ishdr) -{ - ptl_hdr_t *hdr = (ptl_hdr_t *)page_address (krx->krx_pages[0]); - - CERROR ("%s checksum mismatch %p: dnid "LPX64", snid "LPX64 - ", dpid %d, spid %d, type %d\n", - ishdr ? "Header" : "Payload", krx, - NTOH__u64(hdr->dest_nid), NTOH__u64(hdr->src_nid) - NTOH__u32(hdr->dest_pid), NTOH__u32(hdr->src_pid), - NTOH__u32(hdr->type)); - - switch (NTOH__u32 (hdr->type)) - { - case PTL_MSG_ACK: - CERROR("ACK: mlen %d dmd "LPX64"."LPX64" match "LPX64 - " len %u\n", - NTOH__u32(hdr->msg.ack.mlength), - hdr->msg.ack.dst_wmd.handle_cookie, - hdr->msg.ack.dst_wmd.handle_idx, - NTOH__u64(hdr->msg.ack.match_bits), - NTOH__u32(hdr->msg.ack.length)); - break; - case PTL_MSG_PUT: - CERROR("PUT: ptl %d amd "LPX64"."LPX64" match "LPX64 - " len %u off %u data "LPX64"\n", - NTOH__u32(hdr->msg.put.ptl_index), - hdr->msg.put.ack_wmd.handle_cookie, - hdr->msg.put.ack_wmd.handle_idx, - NTOH__u64(hdr->msg.put.match_bits), - NTOH__u32(hdr->msg.put.length), - NTOH__u32(hdr->msg.put.offset), - hdr->msg.put.hdr_data); - break; - case PTL_MSG_GET: - CERROR ("GET: <>\n"); - break; - case PTL_MSG_REPLY: - CERROR ("REPLY: <>\n"); - break; - default: - CERROR ("TYPE?: <>\n"); - } -} -#endif - -static int -kqswnal_recvmsg (nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - unsigned int niov, - struct iovec *iov, - ptl_kiov_t *kiov, - size_t mlen, - size_t rlen) -{ - kqswnal_rx_t *krx = (kqswnal_rx_t *)private; - int page; - char *page_ptr; - int page_nob; - char *iov_ptr; - int iov_nob; - int frag; -#if KQSW_CHECKSUM - kqsw_csum_t senders_csum; - kqsw_csum_t payload_csum = 0; - kqsw_csum_t hdr_csum = kqsw_csum(0, page_address(krx->krx_pages[0]), - sizeof(ptl_hdr_t)); - size_t csum_len = mlen; - int csum_frags = 0; - int csum_nob = 0; - static atomic_t csum_counter; - int csum_verbose = (atomic_read(&csum_counter)%1000001) == 0; - - atomic_inc (&csum_counter); - - memcpy (&senders_csum, ((char *)page_address (krx->krx_pages[0])) + - sizeof (ptl_hdr_t), sizeof (kqsw_csum_t)); - if (senders_csum != hdr_csum) - kqswnal_csum_error (krx, 1); -#endif - CDEBUG(D_NET,"kqswnal_recv, mlen="LPSZ", rlen="LPSZ"\n", mlen, rlen); - - /* What was actually received must be >= payload. - * This is an LASSERT, as lib_finalize() doesn't have a completion status. */ - LASSERT (krx->krx_nob >= KQSW_HDR_SIZE + mlen); - LASSERT (mlen <= rlen); - - /* It must be OK to kmap() if required */ - LASSERT (kiov == NULL || !in_interrupt ()); - /* Either all pages or all vaddrs */ - LASSERT (!(kiov != NULL && iov != NULL)); - - if (mlen != 0) - { - page = 0; - page_ptr = ((char *) page_address(krx->krx_pages[0])) + - KQSW_HDR_SIZE; - page_nob = PAGE_SIZE - KQSW_HDR_SIZE; - - LASSERT (niov > 0); - if (kiov != NULL) { - iov_ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset; - iov_nob = kiov->kiov_len; - } else { - iov_ptr = iov->iov_base; - iov_nob = iov->iov_len; - } - - for (;;) - { - /* We expect the iov to exactly match mlen */ - LASSERT (iov_nob <= mlen); - - frag = MIN (page_nob, iov_nob); - memcpy (iov_ptr, page_ptr, frag); -#if KQSW_CHECKSUM - payload_csum = kqsw_csum (payload_csum, iov_ptr, frag); - csum_nob += frag; - csum_frags++; -#endif - mlen -= frag; - if (mlen == 0) - break; - - page_nob -= frag; - if (page_nob != 0) - page_ptr += frag; - else - { - page++; - LASSERT (page < krx->krx_npages); - page_ptr = page_address(krx->krx_pages[page]); - page_nob = PAGE_SIZE; - } - - iov_nob -= frag; - if (iov_nob != 0) - iov_ptr += frag; - else if (kiov != NULL) { - kunmap (kiov->kiov_page); - kiov++; - niov--; - LASSERT (niov > 0); - iov_ptr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset; - iov_nob = kiov->kiov_len; - } else { - iov++; - niov--; - LASSERT (niov > 0); - iov_ptr = iov->iov_base; - iov_nob = iov->iov_len; - } - } - - if (kiov != NULL) - kunmap (kiov->kiov_page); - } - -#if KQSW_CHECKSUM - memcpy (&senders_csum, ((char *)page_address (krx->krx_pages[0])) + - sizeof(ptl_hdr_t) + sizeof(kqsw_csum_t), sizeof(kqsw_csum_t)); - - if (csum_len != rlen) - CERROR("Unable to checksum data in user's buffer\n"); - else if (senders_csum != payload_csum) - kqswnal_csum_error (krx, 0); - - if (csum_verbose) - CERROR("hdr csum %lx, payload_csum %lx, csum_frags %d, " - "csum_nob %d\n", - hdr_csum, payload_csum, csum_frags, csum_nob); -#endif - lib_finalize(nal, private, cookie); - - kqswnal_requeue_rx (krx); - - return (rlen); -} - -static int -kqswnal_recv(nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - unsigned int niov, - struct iovec *iov, - size_t mlen, - size_t rlen) -{ - return (kqswnal_recvmsg (nal, private, cookie, niov, iov, NULL, mlen, rlen)); -} - -static int -kqswnal_recv_pages (nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - unsigned int niov, - ptl_kiov_t *kiov, - size_t mlen, - size_t rlen) -{ - return (kqswnal_recvmsg (nal, private, cookie, niov, NULL, kiov, mlen, rlen)); -} - -int -kqswnal_thread_start (int (*fn)(void *arg), void *arg) -{ - long pid = kernel_thread (fn, arg, 0); - - if (pid < 0) - return ((int)pid); - - atomic_inc (&kqswnal_data.kqn_nthreads); - return (0); -} - -void -kqswnal_thread_fini (void) -{ - atomic_dec (&kqswnal_data.kqn_nthreads); -} - -int -kqswnal_scheduler (void *arg) -{ - kqswnal_rx_t *krx; - kqswnal_tx_t *ktx; - kpr_fwd_desc_t *fwd; - long flags; - int rc; - int counter = 0; - int did_something; - - kportal_daemonize ("kqswnal_sched"); - kportal_blockallsigs (); - - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - - while (!kqswnal_data.kqn_shuttingdown) - { - did_something = FALSE; - - if (!list_empty (&kqswnal_data.kqn_readyrxds)) - { - krx = list_entry(kqswnal_data.kqn_readyrxds.next, - kqswnal_rx_t, krx_list); - list_del (&krx->krx_list); - spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, - flags); - - kqswnal_rx (krx); - - did_something = TRUE; - spin_lock_irqsave(&kqswnal_data.kqn_sched_lock, flags); - } - - if (!list_empty (&kqswnal_data.kqn_delayedtxds)) - { - ktx = list_entry(kqswnal_data.kqn_delayedtxds.next, - kqswnal_tx_t, ktx_list); - list_del (&ktx->ktx_list); - spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, - flags); - - rc = kqswnal_launch (ktx); - if (rc != 0) /* failed: ktx_nid down? */ - { - CERROR("Failed delayed transmit to "LPX64 - ": %d\n", ktx->ktx_nid, rc); - kqswnal_tx_done (ktx, rc); - } - - did_something = TRUE; - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - } - - if (!list_empty (&kqswnal_data.kqn_delayedfwds)) - { - fwd = list_entry (kqswnal_data.kqn_delayedfwds.next, kpr_fwd_desc_t, kprfd_list); - list_del (&fwd->kprfd_list); - spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); - - kqswnal_fwd_packet (NULL, fwd); - - did_something = TRUE; - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - } - - /* nothing to do or hogging CPU */ - if (!did_something || counter++ == KQSW_RESCHED) { - spin_unlock_irqrestore(&kqswnal_data.kqn_sched_lock, - flags); - - counter = 0; - - if (!did_something) { - rc = wait_event_interruptible (kqswnal_data.kqn_sched_waitq, - kqswnal_data.kqn_shuttingdown || - !list_empty(&kqswnal_data.kqn_readyrxds) || - !list_empty(&kqswnal_data.kqn_delayedtxds) || - !list_empty(&kqswnal_data.kqn_delayedfwds)); - LASSERT (rc == 0); - } else if (current->need_resched) - schedule (); - - spin_lock_irqsave (&kqswnal_data.kqn_sched_lock, flags); - } - } - - spin_unlock_irqrestore (&kqswnal_data.kqn_sched_lock, flags); - - kqswnal_thread_fini (); - return (0); -} - -nal_cb_t kqswnal_lib = -{ - nal_data: &kqswnal_data, /* NAL private data */ - cb_send: kqswnal_send, - cb_send_pages: kqswnal_send_pages, - cb_recv: kqswnal_recv, - cb_recv_pages: kqswnal_recv_pages, - cb_read: kqswnal_read, - cb_write: kqswnal_write, - cb_malloc: kqswnal_malloc, - cb_free: kqswnal_free, - cb_printf: kqswnal_printf, - cb_cli: kqswnal_cli, - cb_sti: kqswnal_sti, - cb_dist: kqswnal_dist -}; diff --git a/lustre/portals/knals/scimacnal/.cvsignore b/lustre/portals/knals/scimacnal/.cvsignore deleted file mode 100644 index e995588..0000000 --- a/lustre/portals/knals/scimacnal/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lustre/portals/knals/scimacnal/Makefile.am b/lustre/portals/knals/scimacnal/Makefile.am deleted file mode 100644 index 6da31f0..0000000 --- a/lustre/portals/knals/scimacnal/Makefile.am +++ /dev/null @@ -1,11 +0,0 @@ -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../../Rules.linux - -MODULE = kscimacnal -modulenet_DATA = kscimacnal.o -EXTRA_PROGRAMS = kscimacnal - -DEFS = -kscimacnal_SOURCES = scimacnal.c scimacnal_cb.c scimacnal.h diff --git a/lustre/portals/knals/scimacnal/README.scimacnal b/lustre/portals/knals/scimacnal/README.scimacnal deleted file mode 100644 index d4c6a49..0000000 --- a/lustre/portals/knals/scimacnal/README.scimacnal +++ /dev/null @@ -1,14 +0,0 @@ - -scimacnal - A NAL for the Scali ScaMAC midlayer. - -The ScaMAC midlayer is a simplified API to the SCI high performance -interconnect. - -In order to use this NAL you'll need to tune scimac to use larger buffers. -See scimac.conf in this directory for an example. - -Overall performance and stability isn't great but this can be attributed -to the scimac driver which apparently is in need of some development. - -TODO: -Routing isn't yet implemented. diff --git a/lustre/portals/knals/scimacnal/scimac.conf b/lustre/portals/knals/scimacnal/scimac.conf deleted file mode 100644 index bfb6d02..0000000 --- a/lustre/portals/knals/scimacnal/scimac.conf +++ /dev/null @@ -1,35 +0,0 @@ -# Configuration file for the scimac driver - lustre friendly settings -# - -# The maximal number of message headers to use in the system. -scimac_max_no_hdrs = 32 - -# The maximal number of eager buffers to use in the system. -scimac_max_no_ebufs = 8 - -# The maximal size in bytes of each eager buffer. -scimac_max_ebuf_size = 65536 - -# Enable use of a kernel thread to defer reception of packets. -# Default is to use a tasklet (sw interrupt). -scimac_use_ulevel_recv = 1 - -# The maximal number of packets queued for transfer per path at any one time. -scimac_max_send_queuelen = 2000 - -# The packet retransmit time in milliseconds. -# The time elapsed since a packet was attempted sent until the packet is resent. -scimac_pkt_rexmit_time = 200 - -# The packet's maximal retransmit time in milliseconds. -# The total time that a packet will be attempted sent before it is dropped. -scimac_max_rexmit_time = 5000 - -# The lowest valid node identifier in the system. -scimac_min_nodeid_number = 0x100 - -# The largest valid node identifier in the system. -scimac_max_nodeid_number = 0xff00 - -# The incremental nodeid step in the system. -scimac_nodeid_increment = 0x100 diff --git a/lustre/portals/knals/scimacnal/scimacnal.c b/lustre/portals/knals/scimacnal/scimacnal.c deleted file mode 100644 index 1066d69..0000000 --- a/lustre/portals/knals/scimacnal/scimacnal.c +++ /dev/null @@ -1,219 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8:cindent: - * - * Copyright (C) 2003 High Performance Computing Center North (HPC2N) - * Author: Niklas Edmundsson <nikke@hpc2n.umu.se> - - * Based on gmnal, which is based on ksocknal and qswnal - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - - -#include "scimacnal.h" - -ptl_handle_ni_t kscimacnal_ni; -nal_t kscimacnal_api; - -kscimacnal_data_t kscimacnal_data; - -kpr_nal_interface_t kscimacnal_router_interface = { - kprni_nalid: SCIMACNAL, - kprni_arg: NULL, - kprni_fwd: kscimacnal_fwd_packet, -}; - - -static int kscimacnal_forward(nal_t *nal, - int id, - void *args, size_t args_len, - void *ret, size_t ret_len) -{ - kscimacnal_data_t *ksci = nal->nal_data; - nal_cb_t *nal_cb = ksci->ksci_cb; - - LASSERT (nal == &kscimacnal_api); - LASSERT (ksci == &kscimacnal_data); - LASSERT (nal_cb == &kscimacnal_lib); - - lib_dispatch(nal_cb, ksci, id, args, ret); /* nal needs ksci */ - return PTL_OK; -} - - -static void kscimacnal_lock(nal_t *nal, unsigned long *flags) -{ - kscimacnal_data_t *ksci = nal->nal_data; - nal_cb_t *nal_cb = ksci->ksci_cb; - - - LASSERT (nal == &kscimacnal_api); - LASSERT (ksci == &kscimacnal_data); - LASSERT (nal_cb == &kscimacnal_lib); - - nal_cb->cb_cli(nal_cb,flags); -} - - -static void kscimacnal_unlock(nal_t *nal, unsigned long *flags) -{ - kscimacnal_data_t *ksci = nal->nal_data; - nal_cb_t *nal_cb = ksci->ksci_cb; - - - LASSERT (nal == &kscimacnal_api); - LASSERT (ksci == &kscimacnal_data); - LASSERT (nal_cb == &kscimacnal_lib); - - nal_cb->cb_sti(nal_cb,flags); -} - - -static int kscimacnal_shutdown(nal_t *nal, int ni) -{ - LASSERT (nal == &kscimacnal_api); - return 0; -} - - -static void kscimacnal_yield( nal_t *nal ) -{ - LASSERT (nal == &kscimacnal_api); - - if (current->need_resched) - schedule(); - return; -} - - -static nal_t *kscimacnal_init(int interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t ac_size, ptl_pid_t requested_pid) -{ - int nnids = 512; /* FIXME: Need ScaMac funktion to get #nodes */ - - CDEBUG(D_NET, "calling lib_init with nid 0x%Lx nnids %d\n", kscimacnal_data.ksci_nid, nnids); - lib_init(&kscimacnal_lib, kscimacnal_data.ksci_nid, 0, nnids,ptl_size, ac_size); - return &kscimacnal_api; -} - - -/* Called by kernel at module unload time */ -static void __exit -kscimacnal_finalize(void) -{ - /* FIXME: How should the shutdown procedure really look? */ - kscimacnal_data.ksci_shuttingdown=1; - - PORTAL_SYMBOL_UNREGISTER(kscimacnal_ni); - - PtlNIFini(kscimacnal_ni); - lib_fini(&kscimacnal_lib); - - mac_finish(kscimacnal_data.ksci_machandle); - - CDEBUG (D_MALLOC, "done kmem %d\n", atomic_read (&portal_kmemory)); - - return; -} - - -/* Called by kernel at module insertion time */ -static int __init -kscimacnal_initialize(void) -{ - int rc; - unsigned long nid=0; - mac_handle_t *machandle = NULL; - - - CDEBUG (D_MALLOC, "start kmem %d\n", atomic_read (&portal_kmemory)); - - kscimacnal_api.forward = kscimacnal_forward; - kscimacnal_api.shutdown = kscimacnal_shutdown; - kscimacnal_api.yield = kscimacnal_yield; - kscimacnal_api.validate = NULL; /* our api validate is a NOOP */ - kscimacnal_api.lock= kscimacnal_lock; - kscimacnal_api.unlock= kscimacnal_unlock; - kscimacnal_api.nal_data = &kscimacnal_data; - - kscimacnal_lib.nal_data = &kscimacnal_data; - - memset(&kscimacnal_data, 0, sizeof(kscimacnal_data)); - - kscimacnal_data.ksci_cb = &kscimacnal_lib; - - /* We're not using this, but cli/sti callbacks does... ??? */ - spin_lock_init(&kscimacnal_data.ksci_dispatch_lock); - - /* FIXME: We only support one adapter for now */ - machandle = mac_init(0, MAC_SAPID_LUSTRE, kscimacnal_rx, - &kscimacnal_data); - - if(!machandle) { - CERROR("mac_init() failed\n"); - return -1; - } - - kscimacnal_data.ksci_machandle = machandle; - - /* Make sure the scimac MTU is tuned */ - if(mac_get_mtusize(machandle) < SCIMACNAL_MTU) { - CERROR("scimac mtu of %ld smaller than SCIMACNAL MTU of %d\n", - mac_get_mtusize(machandle), SCIMACNAL_MTU); - CERROR("Consult README.scimacnal for more information\n"); - mac_finish(machandle); - return -1; - } - - /* Get the node ID */ - /* mac_get_physaddrlen() is a function instead of define, sigh */ - LASSERT(mac_get_physaddrlen(machandle) <= sizeof(nid)); - if(mac_get_physaddr(machandle, (mac_physaddr_t *) &nid)) { - CERROR("mac_get_physaddr() failed\n"); - mac_finish(machandle); - return -1; - } - nid = ntohl(nid); - kscimacnal_data.ksci_nid = nid; - - - /* Initialize Network Interface */ - /* FIXME: What do the magic numbers mean? Documentation anyone? */ - rc = PtlNIInit(kscimacnal_init, 32, 4, 0, &kscimacnal_ni); - if (rc) { - CERROR("PtlNIInit failed %d\n", rc); - mac_finish(machandle); - return (-ENOMEM); - } - - PORTAL_SYMBOL_REGISTER(kscimacnal_ni); - - /* We're done now, it's OK for the RX callback to do stuff */ - kscimacnal_data.ksci_init = 1; - - return 0; -} - - -MODULE_AUTHOR("Niklas Edmundsson <nikke@hpc2n.umu.se>"); -MODULE_DESCRIPTION("Kernel Scali ScaMAC SCI NAL v0.0"); -MODULE_LICENSE("GPL"); - -module_init (kscimacnal_initialize); -module_exit (kscimacnal_finalize); - -EXPORT_SYMBOL(kscimacnal_ni); diff --git a/lustre/portals/knals/scimacnal/scimacnal.h b/lustre/portals/knals/scimacnal/scimacnal.h deleted file mode 100644 index 1ff180e..0000000 --- a/lustre/portals/knals/scimacnal/scimacnal.h +++ /dev/null @@ -1,85 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8:cindent: - * - * Copyright (C) 2003 High Performance Computing Center North (HPC2N) - * Author: Niklas Edmundsson <nikke@hpc2n.umu.se> - */ - - -#ifndef _SCIMACNAL_H -#define _SCIMACNAL_H - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/locks.h> -#include <linux/unistd.h> -#include <linux/init.h> - -#include <asm/system.h> -#include <asm/uaccess.h> - -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <asm/uaccess.h> -#include <asm/segment.h> -#include <asm/page.h> /* For PAGE_SIZE */ - -#define DEBUG_SUBSYSTEM S_UNDEFINED - -#include <linux/kp30.h> -#include <portals/p30.h> -#include <portals/lib-p30.h> - -#include <scamac.h> - -#ifndef MAC_SAPID_LUSTRE -#define MAC_SAPID_LUSTRE MAC_SAPID_TEST1 -#endif /* MAC_SAPID_LUSTRE */ - -#define SCIMACNAL_MTU 65536 -/* FIXME: What is really the MTU of lustre? */ -#if PTL_MD_MAX_IOV*PAGE_SIZE > SCIMACNAL_MTU -#error Max MTU of ScaMAC is 64k, PTL_MD_MAX_IOV*PAGE_SIZE is bigger. -#endif - -typedef struct { - mac_handle_t *handle; - mac_mblk_t *msg; - mac_msg_type_t type; - void *userdata; -} kscimacnal_rx_t; - - -typedef struct { - nal_cb_t *ktx_nal; - void *ktx_private; - lib_msg_t *ktx_cookie; - ptl_hdr_t ktx_hdr; -} kscimacnal_tx_t; - - -typedef struct { - char ksci_init; - char ksci_shuttingdown; - ptl_nid_t ksci_nid; - nal_cb_t *ksci_cb; - spinlock_t ksci_dispatch_lock; - mac_handle_t *ksci_machandle; -} kscimacnal_data_t; - -extern kscimacnal_data_t kscimacnal_data; -extern nal_t kscimacnal_api; -extern nal_cb_t kscimacnal_lib; - -void kscimacnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd); -void kscimacnal_rx(mac_handle_t *handle, mac_mblk_t *msg, mac_msg_type_t type, void *userdata); - - -#endif /* _SCIMACNAL_H */ diff --git a/lustre/portals/knals/scimacnal/scimacnal_cb.c b/lustre/portals/knals/scimacnal/scimacnal_cb.c deleted file mode 100644 index 7e4a2e8..0000000 --- a/lustre/portals/knals/scimacnal/scimacnal_cb.c +++ /dev/null @@ -1,468 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8:cindent: - * - * Copyright (C) 2003 High Performance Computing Center North (HPC2N) - * Author: Niklas Edmundsson <nikke@hpc2n.umu.se> - - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "scimacnal.h" - -static int -kscimacnal_read (nal_cb_t *nal, void *private, - void *dst_addr, user_ptr src_addr, size_t len) -{ - CDEBUG(D_NET, "0x%Lx: reading %ld bytes from %p -> %p\n", - nal->ni.nid, (long)len, src_addr, dst_addr ); - memcpy( dst_addr, src_addr, len ); - return 0; -} - - -static int -kscimacnal_write(nal_cb_t *nal, void *private, - user_ptr dst_addr, void *src_addr, size_t len) -{ - CDEBUG(D_NET, "0x%Lx: writing %ld bytes from %p -> %p\n", - nal->ni.nid, (long)len, src_addr, dst_addr ); - memcpy( dst_addr, src_addr, len ); - return 0; -} - - -static void * -kscimacnal_malloc(nal_cb_t *nal, size_t len) -{ - void *buf; - - PORTAL_ALLOC(buf, len); - return buf; -} - - -static void -kscimacnal_free(nal_cb_t *nal, void *buf, size_t len) -{ - PORTAL_FREE(buf, len); -} - - -static void -kscimacnal_printf(nal_cb_t *nal, const char *fmt, ...) -{ - va_list ap; - char msg[256]; - - if (portal_debug & D_NET) { - va_start( ap, fmt ); - vsnprintf( msg, sizeof(msg), fmt, ap ); - va_end( ap ); - - printk("CPUId: %d %s",smp_processor_id(), msg); - } -} - - -static void -kscimacnal_cli(nal_cb_t *nal, unsigned long *flags) -{ - kscimacnal_data_t *data= nal->nal_data; - - spin_lock_irqsave(&data->ksci_dispatch_lock,*flags); -} - - -static void -kscimacnal_sti(nal_cb_t *nal, unsigned long *flags) -{ - kscimacnal_data_t *data= nal->nal_data; - - spin_unlock_irqrestore(&data->ksci_dispatch_lock,*flags); -} - - -static int -kscimacnal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) -{ - /* FIXME: Network distance has a meaning, but is there no easy - * way to figure it out (depends on routing) */ - - if ( nal->ni.nid == nid ) { - *dist = 0; - } else { - *dist = 1; - } - - return 0; -} - - -static -char * get_mac_error(mac_status_t status) -{ - switch(status) { - case MAC_MSG_STAT_OK: - return "MAC_MSG_STAT_OK"; - case MAC_MSG_STAT_FREED: - return "MAC_MSG_STAT_FREED"; - case MAC_MSG_STAT_ABORTED: - return "MAC_MSG_STAT_ABORTED"; - case MAC_MSG_STAT_TIMEDOUT: - return "MAC_MSG_STAT_TIMEDOUT"; - case MAC_MSG_STAT_NODEUNREACH: - return "MAC_MSG_STAT_NODEUNREACH"; - case MAC_MSG_STAT_NETDOWN: - return "MAC_MSG_STAT_NETDOWN"; - case MAC_MSG_STAT_RESET: - return "MAC_MSG_STAT_RESET"; - case MAC_MSG_STAT_INITFAILED: - return "MAC_MSG_STAT_INITFAILED"; - case MAC_MSG_STAT_SYNCFAILED: - return "MAC_MSG_STAT_SYNCFAILED"; - case MAC_MSG_STAT_BADPROTO: - return "MAC_MSG_STAT_BADPROTO"; - case MAC_MSG_STAT_NOBUFSPACE: - return "MAC_MSG_STAT_NOBUFSPACE"; - case MAC_MSG_STAT_CONGESTION: - return "MAC_MSG_STAT_CONGESTION"; - case MAC_MSG_STAT_OTHER: - return "MAC_MSG_STAT_OTHER"; - default: - return "Unknown error"; - } -} - - -/* FIXME add routing code here ? */ - -/* Called by ScaMac when transmission is complete (ie. message is released) */ -static void -kscimacnal_txrelease(mac_mblk_t *msg, mac_msg_status_t status, void *context) -{ - kscimacnal_tx_t *ktx = (kscimacnal_tx_t *)context; - int err=0; - - LASSERT (ktx != NULL); - - /* Euh, there is no feedback when transmission fails?! */ - switch(status) { - case MAC_MSG_STAT_OK: /* normal */ - break; - default: - CERROR("%s (%d):\n", get_mac_error(status), status); - err = -EIO; - break; - } - - lib_finalize(ktx->ktx_nal, ktx->ktx_private, ktx->ktx_cookie); - - PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t))); -} - - -/* Called by portals when it wants to send a message. - * Since ScaMAC has it's own TX thread we don't bother setting up our own. */ -static int -kscimacnal_send(nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - ptl_hdr_t *hdr, - int type, - ptl_nid_t nid, - ptl_pid_t pid, - unsigned int payload_niov, - struct iovec *payload_iov, - size_t payload_len) -{ - kscimacnal_tx_t *ktx=NULL; - kscimacnal_data_t *ksci = nal->nal_data; - int rc=0; - int buf_len = sizeof(ptl_hdr_t) + payload_len; - mac_mblk_t *msg=NULL, *lastblk, *newblk; - unsigned long physaddr; - - - CDEBUG(D_NET, "sending %d bytes from %p to nid 0x%Lx niov: %d\n", - payload_len, payload_iov, nid, payload_niov); - - LASSERT(ksci != NULL); - - LASSERT(hdr != NULL); - - /* Do real check if we can send this */ - if (buf_len > mac_get_mtusize(ksci->ksci_machandle)) { - CERROR("kscimacnal:request exceeds TX MTU size (%ld).\n", - mac_get_mtusize(ksci->ksci_machandle)); - return -EINVAL; - } - - - /* save transaction info for later finalize and cleanup */ - PORTAL_ALLOC(ktx, (sizeof(kscimacnal_tx_t))); - if (!ktx) { - return -ENOMEM; - } - - /* *SIGH* hdr is a stack variable in the calling function, so we - * need to copy it to a buffer. Zerocopy magic (or is it just - * deferred memcpy?) is annoying sometimes. */ - memcpy(&ktx->ktx_hdr, hdr, sizeof(ptl_hdr_t)); - - /* First, put the header in the main message mblk */ - msg = mac_alloc_mblk(&ktx->ktx_hdr, sizeof(ptl_hdr_t), - kscimacnal_txrelease, ktx); - if (!msg) { - PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t))); - return -ENOMEM; - } - mac_put_mblk(msg, sizeof(ptl_hdr_t)); - lastblk=msg; - - /* Allocate additional mblks for each iov as needed. - * Essentially lib_copy_iov2buf with a twist or two */ - while (payload_len > 0) - { - ptl_size_t nob; - - LASSERT (payload_niov > 0); - - nob = MIN (payload_iov->iov_len, payload_len); - - /* We don't need a callback on the additional mblks, since - * all release callbacks seems to be called when the entire - * message has been sent */ - newblk=mac_alloc_mblk(payload_iov->iov_base, nob, NULL, NULL); - if(!newblk) { - mac_free_msg(msg); - PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t))); - return -ENOMEM; - } - mac_put_mblk(newblk, nob); - mac_link_mblk(lastblk, newblk); - lastblk=newblk; - - payload_len -= nob; - payload_niov--; - payload_iov++; - } - - ktx->ktx_nal = nal; - ktx->ktx_private = private; - ktx->ktx_cookie = cookie; - - CDEBUG(D_NET, "mac_send %d bytes to nid: 0x%Lx\n", buf_len, nid); - - physaddr = htonl(nid); - - if((rc=mac_send(ksci->ksci_machandle, msg, - (mac_physaddr_t *) &physaddr))) { - CERROR("kscimacnal: mac_send() failed, rc=%d\n", rc); - mac_free_msg(msg); - PORTAL_FREE(ktx, (sizeof(kscimacnal_tx_t))); - return rc; - } - - return 0; -} - - -void -kscimacnal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) -{ - CERROR ("forwarding not implemented\n"); -} - - -/* Process a received portals packet */ -/* Called by the ScaMac RX thread when a packet is received */ -void -kscimacnal_rx(mac_handle_t *handle, mac_mblk_t *msg, mac_msg_type_t type, - void *userdata) -{ - ptl_hdr_t *hdr = NULL; - kscimacnal_rx_t krx; - mac_size_t size; - kscimacnal_data_t *ksci = userdata; - - LASSERT(ksci != NULL); - - if ( !ksci->ksci_init || ksci->ksci_shuttingdown || - type == MAC_MSG_TYPE_CTRL || type == MAC_MSG_TYPE_OTHER ) { - /* We're not interested in messages not for us, ignore */ - mac_free_msg(msg); - return; - } - - size = mac_msg_size(msg); - - CDEBUG(D_NET,"msg %p type %d, size %ld bytes (%ld mblks)\n", - msg, type, size, mac_msg_mblks(msg)); - - if( size < sizeof( ptl_hdr_t ) ) { - /* XXX what's this for? */ - if (ksci->ksci_shuttingdown) - return; - CERROR("kscimacnal: did not receive complete portal header," - "size= %ld\n", size); - /* Free the message before exiting */ - mac_free_msg(msg); - return; - } - - /* Provide everything we know */ - krx.handle = handle; - krx.msg = msg; - krx.type = type; - krx.userdata = userdata; - - /* mac_msg_next returns the next mblk with unread data */ - hdr = mac_get_mblk(mac_msg_next(msg), sizeof(ptl_hdr_t) ); - - if(!hdr) { - CERROR("kscimacnal: no data block in message %p\n", msg); - mac_free_msg(msg); - return; - } - - if ( hdr->dest_nid == kscimacnal_lib.ni.nid ) { - PROF_START(lib_parse); - /* sets wanted_len, iovs etc and calls our callback */ - lib_parse(&kscimacnal_lib, hdr, &krx); - PROF_FINISH(lib_parse); -#if 0 /* FIXME: Is it possible to detect this? */ - } else if (kgmnal_ispeer(hdr->dest_nid)) { - /* should have gone direct to peer */ - CERROR("dropping packet from 0x%llx to 0x%llx:" - "target is a peer\n", - hdr->src_nid, hdr->dest_nid); - kgmnal_requeue_rx(&krx); -#endif /* if 0 FIXME */ - } else { - /* forward to gateway */ - CERROR("forwarding not implemented, mynid=0x%llx dest=0x%llx\n", - kscimacnal_lib.ni.nid, hdr->dest_nid); - } - - mac_free_msg(msg); - - CDEBUG(D_NET, "msg %p: Done\n", msg); -} - - -/* Called by portals to process a recieved packet */ -static int kscimacnal_recv(nal_cb_t *nal, - void *private, - lib_msg_t *cookie, - unsigned int niov, - struct iovec *iov, - size_t mlen, - size_t rlen) -{ - kscimacnal_rx_t *krx = private; - mac_mblk_t *mblk; - void *src; - mac_size_t pkt_len; - ptl_size_t iovused=0; - - LASSERT (krx != NULL); - LASSERT (krx->msg != NULL); - - CDEBUG(D_NET,"msg %p: mlen=%d, rlen=%d, niov=%d\n", - krx->msg, mlen, rlen, niov); - - /* What was actually received must be >= what sender claims to have - * sent. This is an LASSERT, since lib-move doesn't check cb return - * code yet. Also, rlen seems to be negative when mlen==0 so don't - * assert on that. - */ - LASSERT (mlen==0 || mac_msg_size(krx->msg) >= sizeof(ptl_hdr_t)+rlen); - LASSERT (mlen==0 || mlen <= rlen); - - PROF_START(memcpy); - - /* mac_msg_next returns next mblk with unread data (ie. can - * be same mblk */ - while (mlen != 0 && (mblk = mac_msg_next(krx->msg))) { - pkt_len = mac_mblk_len(mblk); - src = mac_get_mblk(mblk, pkt_len); /* Next unread block */ - - CDEBUG(D_NET,"msg %p: mblk: %p pkt_len: %ld src: %p\n", - krx->msg, mblk, pkt_len, src); - - LASSERT(src != NULL); - - /* Essentially lib_copy_buf2iov but with continuation support, - * we "gracefully" thrash the argument vars ;) */ - while (pkt_len > 0) { - ptl_size_t nob; - - LASSERT (niov > 0); - - LASSERT(iovused < iov->iov_len); - - nob = MIN (iov->iov_len-iovused, pkt_len); - CDEBUG(D_NET, "iovbase: %p iovlen: %d src: %p nob: %d " - "iovused: %d\n", - iov->iov_base, iov->iov_len, - src, nob, iovused); - - memcpy (iov->iov_base+iovused, src, nob); - pkt_len -= nob; - src += nob; - - if(nob+iovused < iov->iov_len) { - /* We didn't use all of the iov */ - iovused+=nob; - } - else { - niov--; - iov++; - iovused=0; - } - } - } - PROF_FINISH(memcpy); - - CDEBUG(D_NET, "Calling lib_finalize.\n"); - - PROF_START(lib_finalize); - lib_finalize(nal, private, cookie); - PROF_FINISH(lib_finalize); - - CDEBUG(D_NET, "Done.\n"); - - return rlen; -} - - -nal_cb_t kscimacnal_lib = { - nal_data: &kscimacnal_data, /* NAL private data */ - cb_send: kscimacnal_send, - cb_send_pages: NULL, /* Ignore for now */ - cb_recv: kscimacnal_recv, - cb_recv_pages: NULL, - cb_read: kscimacnal_read, - cb_write: kscimacnal_write, - cb_malloc: kscimacnal_malloc, - cb_free: kscimacnal_free, - cb_printf: kscimacnal_printf, - cb_cli: kscimacnal_cli, - cb_sti: kscimacnal_sti, - cb_dist: kscimacnal_dist -}; diff --git a/lustre/portals/knals/socknal/.cvsignore b/lustre/portals/knals/socknal/.cvsignore deleted file mode 100644 index e995588..0000000 --- a/lustre/portals/knals/socknal/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lustre/portals/knals/socknal/Makefile.am b/lustre/portals/knals/socknal/Makefile.am deleted file mode 100644 index 437d7fc..0000000 --- a/lustre/portals/knals/socknal/Makefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../../Rules.linux - -MODULE = ksocknal -modulenet_DATA = ksocknal.o -EXTRA_PROGRAMS = ksocknal - -DEFS = -ksocknal_SOURCES = socknal.c socknal_cb.c socknal.h diff --git a/lustre/portals/knals/socknal/Makefile.mk b/lustre/portals/knals/socknal/Makefile.mk deleted file mode 100644 index 46edf01..0000000 --- a/lustre/portals/knals/socknal/Makefile.mk +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../../Kernelenv - -obj-y += ksocknal.o -ksocknal-objs := socknal.o socknal_cb.o - diff --git a/lustre/portals/knals/socknal/socknal.c b/lustre/portals/knals/socknal/socknal.c deleted file mode 100644 index 91d971c..0000000 --- a/lustre/portals/knals/socknal/socknal.c +++ /dev/null @@ -1,860 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Zach Brown <zab@zabbo.net> - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "socknal.h" - -ptl_handle_ni_t ksocknal_ni; -static nal_t ksocknal_api; -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -ksock_nal_data_t ksocknal_data; -#else -static ksock_nal_data_t ksocknal_data; -#endif - -kpr_nal_interface_t ksocknal_router_interface = { - kprni_nalid: SOCKNAL, - kprni_arg: &ksocknal_data, - kprni_fwd: ksocknal_fwd_packet, -}; - - -int -ksocknal_api_forward(nal_t *nal, int id, void *args, size_t args_len, - void *ret, size_t ret_len) -{ - ksock_nal_data_t *k; - nal_cb_t *nal_cb; - - k = nal->nal_data; - nal_cb = k->ksnd_nal_cb; - - lib_dispatch(nal_cb, k, id, args, ret); /* ksocknal_send needs k */ - return PTL_OK; -} - -int -ksocknal_api_shutdown(nal_t *nal, int ni) -{ - CDEBUG (D_NET, "closing all connections\n"); - - return ksocknal_close_sock(0); /* close all sockets */ -} - -void -ksocknal_api_yield(nal_t *nal) -{ - our_cond_resched(); - return; -} - -void -ksocknal_api_lock(nal_t *nal, unsigned long *flags) -{ - ksock_nal_data_t *k; - nal_cb_t *nal_cb; - - k = nal->nal_data; - nal_cb = k->ksnd_nal_cb; - nal_cb->cb_cli(nal_cb,flags); -} - -void -ksocknal_api_unlock(nal_t *nal, unsigned long *flags) -{ - ksock_nal_data_t *k; - nal_cb_t *nal_cb; - - k = nal->nal_data; - nal_cb = k->ksnd_nal_cb; - nal_cb->cb_sti(nal_cb,flags); -} - -nal_t * -ksocknal_init(int interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t ac_size, ptl_pid_t requested_pid) -{ - CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n", (ptl_nid_t)0); - lib_init(&ksocknal_lib, (ptl_nid_t)0, 0, 10, ptl_size, ac_size); - return (&ksocknal_api); -} - -/* - * EXTRA functions follow - */ - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#define SOCKET_I(inode) (&(inode)->u.socket_i) -#endif -static __inline__ struct socket * -socki_lookup(struct inode *inode) -{ - return SOCKET_I(inode); -} - -int -ksocknal_set_mynid(ptl_nid_t nid) -{ - lib_ni_t *ni = &ksocknal_lib.ni; - - /* FIXME: we have to do this because we call lib_init() at module - * insertion time, which is before we have 'mynid' available. lib_init - * sets the NAL's nid, which it uses to tell other nodes where packets - * are coming from. This is not a very graceful solution to this - * problem. */ - - CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n", - nid, ni->nid); - - ni->nid = nid; - return (0); -} - -void -ksocknal_bind_irq (unsigned int irq, int cpu) -{ -#if (defined(CONFIG_SMP) && CPU_AFFINITY) - char cmdline[64]; - char *argv[] = {"/bin/sh", - "-c", - cmdline, - NULL}; - char *envp[] = {"HOME=/", - "PATH=/sbin:/bin:/usr/sbin:/usr/bin", - NULL}; - - snprintf (cmdline, sizeof (cmdline), - "echo %d > /proc/irq/%u/smp_affinity", 1 << cpu, irq); - - printk (KERN_INFO "Binding irq %u to CPU %d with cmd: %s\n", - irq, cpu, cmdline); - - /* FIXME: Find a better method of setting IRQ affinity... - */ - - call_usermodehelper (argv[0], argv, envp); -#endif -} - -int -ksocknal_add_sock (ptl_nid_t nid, int fd, int bind_irq) -{ - unsigned long flags; - ksock_conn_t *conn; - struct file *file = NULL; - struct socket *sock = NULL; - ksock_sched_t *sched = NULL; - unsigned int irq = 0; - struct net_device *dev = NULL; - int ret; - int idx; - ENTRY; - - LASSERT (!in_interrupt()); - - file = fget(fd); - if (file == NULL) - RETURN(-EINVAL); - - ret = -EINVAL; - sock = socki_lookup(file->f_dentry->d_inode); - if (sock == NULL) - GOTO(error, ret); - - ret = -ENOMEM; - PORTAL_ALLOC(conn, sizeof(*conn)); - if (!conn) - GOTO(error, ret); - - sock->sk->allocation = GFP_NOFS; /* don't call info fs for alloc */ - - conn->ksnc_file = file; - conn->ksnc_sock = sock; - conn->ksnc_saved_data_ready = sock->sk->data_ready; - conn->ksnc_saved_write_space = sock->sk->write_space; - conn->ksnc_peernid = nid; - atomic_set (&conn->ksnc_refcount, 1); /* 1 ref for socklist */ - - conn->ksnc_rx_ready = 0; - conn->ksnc_rx_scheduled = 0; - ksocknal_new_packet (conn, 0); - - INIT_LIST_HEAD (&conn->ksnc_tx_queue); - conn->ksnc_tx_ready = 0; - conn->ksnc_tx_scheduled = 0; - -#warning check it is OK to derefence sk->dst_cache->dev like this... - lock_sock (conn->ksnc_sock->sk); - - if (conn->ksnc_sock->sk->dst_cache != NULL) { - dev = conn->ksnc_sock->sk->dst_cache->dev; - if (dev != NULL) { - irq = dev->irq; - if (irq >= NR_IRQS) { - CERROR ("Unexpected IRQ %x\n", irq); - irq = 0; - } - } - } - - release_sock (conn->ksnc_sock->sk); - - write_lock_irqsave (&ksocknal_data.ksnd_socklist_lock, flags); - - if (irq == 0 || - ksocknal_data.ksnd_irq_info[irq] == SOCKNAL_IRQ_UNASSIGNED) { - /* This is a software NIC, or we haven't associated it with - * a CPU yet */ - - /* Choose the CPU with the fewest connections */ - sched = ksocknal_data.ksnd_schedulers; - for (idx = 1; idx < SOCKNAL_N_SCHED; idx++) - if (sched->kss_nconns > - ksocknal_data.ksnd_schedulers[idx].kss_nconns) - sched = &ksocknal_data.ksnd_schedulers[idx]; - - if (irq != 0) { /* Hardware NIC */ - /* Remember which scheduler we chose */ - idx = sched - ksocknal_data.ksnd_schedulers; - - LASSERT (idx < SOCKNAL_IRQ_SCHED_MASK); - - if (bind_irq) /* remember if we will bind below */ - idx |= SOCKNAL_IRQ_BOUND; - - ksocknal_data.ksnd_irq_info[irq] = idx; - } - } else { - /* This is a hardware NIC, associated with a CPU */ - idx = ksocknal_data.ksnd_irq_info[irq]; - - /* Don't bind again if we've bound already */ - if ((idx & SOCKNAL_IRQ_BOUND) != 0) - bind_irq = 0; - - sched = &ksocknal_data.ksnd_schedulers[idx & SOCKNAL_IRQ_SCHED_MASK]; - } - - sched->kss_nconns++; - conn->ksnc_scheduler = sched; - - list_add(&conn->ksnc_list, &ksocknal_data.ksnd_socklist); - - write_unlock_irqrestore (&ksocknal_data.ksnd_socklist_lock, flags); - - if (bind_irq && /* irq binding required */ - irq != 0) /* hardware NIC */ - ksocknal_bind_irq (irq, sched - ksocknal_data.ksnd_schedulers); - - /* NOW it's safe to get called back when socket is ready... */ - sock->sk->user_data = conn; - sock->sk->data_ready = ksocknal_data_ready; - sock->sk->write_space = ksocknal_write_space; - - /* ...which I call right now to get things going */ - ksocknal_data_ready (sock->sk, 0); - ksocknal_write_space (sock->sk); - - CDEBUG(D_IOCTL, "conn [%p] registered for nid "LPX64"\n", - conn, conn->ksnc_peernid); - - /* Can't unload while connection active */ - PORTAL_MODULE_USE; - RETURN(0); - -error: - fput(file); - return (ret); -} - -/* Passing in a zero nid will close all connections */ -int -ksocknal_close_sock(ptl_nid_t nid) -{ - long flags; - ksock_conn_t *conn; - LIST_HEAD (death_row); - struct list_head *tmp; - - LASSERT (!in_interrupt()); - write_lock_irqsave (&ksocknal_data.ksnd_socklist_lock, flags); - - if (nid == 0) { /* close ALL connections */ - /* insert 'death row' into the socket list... */ - list_add (&death_row, &ksocknal_data.ksnd_socklist); - /* ...extract and reinitialise the socket list itself... */ - list_del_init (&ksocknal_data.ksnd_socklist); - /* ...and voila, death row is the proud owner of all conns */ - } else list_for_each (tmp, &ksocknal_data.ksnd_socklist) { - - conn = list_entry (tmp, ksock_conn_t, ksnc_list); - - if (conn->ksnc_peernid == nid) { - list_del (&conn->ksnc_list); - list_add (&conn->ksnc_list, &death_row); - break; - } - } - - write_unlock_irqrestore (&ksocknal_data.ksnd_socklist_lock, flags); - - if (nid && list_empty (&death_row)) - return (-ENOENT); - - while (!list_empty (&death_row)) { - conn = list_entry (death_row.next, ksock_conn_t, ksnc_list); - list_del (&conn->ksnc_list); - - /* NB I _have_ to restore the callback, rather than storing - * a noop, since the socket could survive past this module - * being unloaded!! */ - conn->ksnc_sock->sk->data_ready = conn->ksnc_saved_data_ready; - conn->ksnc_sock->sk->write_space = conn->ksnc_saved_write_space; - - /* OK; no more callbacks, but they could be in progress now, - * so wait for them to complete... */ - write_lock_irqsave (&ksocknal_data.ksnd_socklist_lock, flags); - - /* ...however if I get the lock before a callback gets it, - * this will make them noop - */ - conn->ksnc_sock->sk->user_data = NULL; - - /* And drop the scheduler's connection count while I've got - * the exclusive lock */ - conn->ksnc_scheduler->kss_nconns--; - - write_unlock_irqrestore(&ksocknal_data.ksnd_socklist_lock, - flags); - - ksocknal_put_conn (conn); /* drop ref for ksnd_socklist */ - } - - return (0); -} - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -struct tcp_opt *sock2tcp_opt(struct sock *sk) -{ - return &(sk->tp_pinfo.af_tcp); -} -#else -struct tcp_opt *sock2tcp_opt(struct sock *sk) -{ - struct tcp_sock *s = (struct tcp_sock *)sk; - return &s->tcp; -} -#endif - -void -ksocknal_push_conn (ksock_conn_t *conn) -{ - struct sock *sk = conn->ksnc_sock->sk; - struct tcp_opt *tp = sock2tcp_opt(sk); - int nonagle; - int val = 1; - int rc; - mm_segment_t oldmm; - - lock_sock (sk); - nonagle = tp->nonagle; - tp->nonagle = 1; - release_sock (sk); - - oldmm = get_fs (); - set_fs (KERNEL_DS); - - rc = sk->prot->setsockopt (sk, SOL_TCP, TCP_NODELAY, - (char *)&val, sizeof (val)); - LASSERT (rc == 0); - - set_fs (oldmm); - - lock_sock (sk); - tp->nonagle = nonagle; - release_sock (sk); -} - -/* Passing in a zero nid pushes all connections */ -int -ksocknal_push_sock (ptl_nid_t nid) -{ - ksock_conn_t *conn; - struct list_head *tmp; - int index; - int i; - - if (nid != 0) { - conn = ksocknal_get_conn (nid); - - if (conn == NULL) - return (-ENOENT); - - ksocknal_push_conn (conn); - ksocknal_put_conn (conn); - - return (0); - } - - /* NB we can't remove connections from the socket list so we have to - * cope with them being removed from under us... - */ - for (index = 0; ; index++) { - read_lock (&ksocknal_data.ksnd_socklist_lock); - - i = 0; - conn = NULL; - - list_for_each (tmp, &ksocknal_data.ksnd_socklist) { - if (i++ == index) { - conn = list_entry(tmp, ksock_conn_t, ksnc_list); - atomic_inc (&conn->ksnc_refcount); // take a ref - break; - } - } - - read_unlock (&ksocknal_data.ksnd_socklist_lock); - - if (conn == NULL) - break; - - ksocknal_push_conn (conn); - ksocknal_put_conn (conn); - } - - return (0); -} - -ksock_conn_t * -ksocknal_get_conn (ptl_nid_t nid) -{ - struct list_head *tmp; - ksock_conn_t *conn; - - PROF_START(conn_list_walk); - - read_lock (&ksocknal_data.ksnd_socklist_lock); - - list_for_each(tmp, &ksocknal_data.ksnd_socklist) { - - conn = list_entry(tmp, ksock_conn_t, ksnc_list); - - if (conn->ksnc_peernid == nid) { - /* caller is referencing */ - atomic_inc (&conn->ksnc_refcount); - - read_unlock (&ksocknal_data.ksnd_socklist_lock); - - CDEBUG(D_NET, "got conn [%p] -> "LPX64" (%d)\n", - conn, nid, atomic_read (&conn->ksnc_refcount)); - - PROF_FINISH(conn_list_walk); - return (conn); - } - } - - read_unlock (&ksocknal_data.ksnd_socklist_lock); - - CDEBUG(D_NET, "No connection found when looking for nid "LPX64"\n", - nid); - PROF_FINISH(conn_list_walk); - return (NULL); -} - -void -ksocknal_close_conn (ksock_conn_t *conn) -{ - CDEBUG (D_NET, "connection [%p] closed \n", conn); - - fput (conn->ksnc_file); - PORTAL_FREE (conn, sizeof (*conn)); - - /* One less connection keeping us hanging on */ - PORTAL_MODULE_UNUSE; -} - -void -_ksocknal_put_conn (ksock_conn_t *conn) -{ - unsigned long flags; - - CDEBUG (D_NET, "connection [%p] handed the black spot\n", conn); - - /* "But what is the black spot, captain?" I asked. - * "That's a summons, mate..." */ - - LASSERT (atomic_read (&conn->ksnc_refcount) == 0); - LASSERT (conn->ksnc_sock->sk->data_ready != ksocknal_data_ready); - LASSERT (conn->ksnc_sock->sk->write_space != ksocknal_write_space); - LASSERT (conn->ksnc_sock->sk->user_data == NULL); - LASSERT (!conn->ksnc_rx_scheduled); - - if (!in_interrupt()) { - ksocknal_close_conn (conn); - return; - } - - spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags); - - list_add (&conn->ksnc_list, &ksocknal_data.ksnd_reaper_list); - wake_up (&ksocknal_data.ksnd_reaper_waitq); - - spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags); -} - -int -ksocknal_cmd(struct portal_ioctl_data * data, void * private) -{ - int rc = -EINVAL; - - LASSERT (data != NULL); - - switch(data->ioc_nal_cmd) { - case NAL_CMD_REGISTER_PEER_FD: { - rc = ksocknal_add_sock(data->ioc_nid, data->ioc_fd, - data->ioc_flags); - break; - } - case NAL_CMD_CLOSE_CONNECTION: { - rc = ksocknal_close_sock(data->ioc_nid); - break; - } - case NAL_CMD_REGISTER_MYNID: { - rc = ksocknal_set_mynid (data->ioc_nid); - break; - } - case NAL_CMD_PUSH_CONNECTION: { - rc = ksocknal_push_sock (data->ioc_nid); - break; - } - } - - return rc; -} - -void -ksocknal_free_buffers (void) -{ - if (ksocknal_data.ksnd_fmbs != NULL) { - ksock_fmb_t *fmb = (ksock_fmb_t *)ksocknal_data.ksnd_fmbs; - int i; - int j; - - for (i = 0; - i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); - i++, fmb++) - for (j = 0; j < fmb->fmb_npages; j++) - if (fmb->fmb_pages[j] != NULL) - __free_page (fmb->fmb_pages[j]); - - PORTAL_FREE (ksocknal_data.ksnd_fmbs, - sizeof (ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); - } - - if (ksocknal_data.ksnd_ltxs != NULL) - PORTAL_FREE (ksocknal_data.ksnd_ltxs, - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + - SOCKNAL_NNBLK_LTXS)); - - if (ksocknal_data.ksnd_schedulers != NULL) - PORTAL_FREE (ksocknal_data.ksnd_schedulers, - sizeof (ksock_sched_t) * SOCKNAL_N_SCHED); -} - -void __exit -ksocknal_module_fini (void) -{ - int i; - - CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", - atomic_read (&portal_kmemory)); - - switch (ksocknal_data.ksnd_init) { - default: - LASSERT (0); - - case SOCKNAL_INIT_ALL: - kportal_nal_unregister(SOCKNAL); - PORTAL_SYMBOL_UNREGISTER (ksocknal_ni); - /* fall through */ - - case SOCKNAL_INIT_PTL: - PtlNIFini(ksocknal_ni); - lib_fini(&ksocknal_lib); - /* fall through */ - - case SOCKNAL_INIT_DATA: - /* Module refcount only gets to zero when all connections - * have been closed so all lists must be empty */ - LASSERT (list_empty (&ksocknal_data.ksnd_socklist)); - LASSERT (list_empty (&ksocknal_data.ksnd_reaper_list)); - LASSERT (list_empty (&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns)); - LASSERT (list_empty (&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns)); - - if (ksocknal_data.ksnd_schedulers != NULL) - for (i = 0; i < SOCKNAL_N_SCHED; i++) { - ksock_sched_t *kss = - &ksocknal_data.ksnd_schedulers[i]; - - LASSERT (list_empty (&kss->kss_tx_conns)); - LASSERT (list_empty (&kss->kss_rx_conns)); - LASSERT (kss->kss_nconns == 0); - } - - /* stop router calling me */ - kpr_shutdown (&ksocknal_data.ksnd_router); - - /* flag threads to terminate; wake and wait for them to die */ - ksocknal_data.ksnd_shuttingdown = 1; - wake_up_all (&ksocknal_data.ksnd_reaper_waitq); - - for (i = 0; i < SOCKNAL_N_SCHED; i++) - wake_up_all(&ksocknal_data.ksnd_schedulers[i].kss_waitq); - - while (atomic_read (&ksocknal_data.ksnd_nthreads) != 0) { - CDEBUG (D_NET, "waitinf for %d threads to terminate\n", - atomic_read (&ksocknal_data.ksnd_nthreads)); - set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (HZ); - } - - kpr_deregister (&ksocknal_data.ksnd_router); - - ksocknal_free_buffers(); - /* fall through */ - - case SOCKNAL_INIT_NOTHING: - break; - } - - CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", - atomic_read (&portal_kmemory)); - - printk(KERN_INFO "Routing socket NAL unloaded (final mem %d)\n", - atomic_read(&portal_kmemory)); -} - - -int __init -ksocknal_module_init (void) -{ - int pkmem = atomic_read(&portal_kmemory); - int rc; - int i; - int j; - - /* packet descriptor must fit in a router descriptor's scratchpad */ - LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t)); - - LASSERT (ksocknal_data.ksnd_init == SOCKNAL_INIT_NOTHING); - - ksocknal_api.forward = ksocknal_api_forward; - ksocknal_api.shutdown = ksocknal_api_shutdown; - ksocknal_api.yield = ksocknal_api_yield; - ksocknal_api.validate = NULL; /* our api validate is a NOOP */ - ksocknal_api.lock = ksocknal_api_lock; - ksocknal_api.unlock = ksocknal_api_unlock; - ksocknal_api.nal_data = &ksocknal_data; - - ksocknal_lib.nal_data = &ksocknal_data; - - memset (&ksocknal_data, 0, sizeof (ksocknal_data)); /* zero pointers */ - - INIT_LIST_HEAD(&ksocknal_data.ksnd_socklist); - rwlock_init(&ksocknal_data.ksnd_socklist_lock); - - ksocknal_data.ksnd_nal_cb = &ksocknal_lib; - spin_lock_init (&ksocknal_data.ksnd_nal_cb_lock); - - spin_lock_init(&ksocknal_data.ksnd_small_fmp.fmp_lock); - INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_idle_fmbs); - INIT_LIST_HEAD(&ksocknal_data.ksnd_small_fmp.fmp_blocked_conns); - - spin_lock_init(&ksocknal_data.ksnd_large_fmp.fmp_lock); - INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_idle_fmbs); - INIT_LIST_HEAD(&ksocknal_data.ksnd_large_fmp.fmp_blocked_conns); - - spin_lock_init(&ksocknal_data.ksnd_idle_ltx_lock); - INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_nblk_ltx_list); - INIT_LIST_HEAD(&ksocknal_data.ksnd_idle_ltx_list); - init_waitqueue_head(&ksocknal_data.ksnd_idle_ltx_waitq); - - spin_lock_init (&ksocknal_data.ksnd_reaper_lock); - INIT_LIST_HEAD (&ksocknal_data.ksnd_reaper_list); - init_waitqueue_head(&ksocknal_data.ksnd_reaper_waitq); - - memset (&ksocknal_data.ksnd_irq_info, SOCKNAL_IRQ_UNASSIGNED, - sizeof (ksocknal_data.ksnd_irq_info)); - - /* flag lists/ptrs/locks initialised */ - ksocknal_data.ksnd_init = SOCKNAL_INIT_DATA; - - PORTAL_ALLOC(ksocknal_data.ksnd_schedulers, - sizeof(ksock_sched_t) * SOCKNAL_N_SCHED); - if (ksocknal_data.ksnd_schedulers == NULL) - RETURN(-ENOMEM); - - for (i = 0; i < SOCKNAL_N_SCHED; i++) { - ksock_sched_t *kss = &ksocknal_data.ksnd_schedulers[i]; - - spin_lock_init (&kss->kss_lock); - INIT_LIST_HEAD (&kss->kss_rx_conns); - INIT_LIST_HEAD (&kss->kss_tx_conns); -#if SOCKNAL_ZC - INIT_LIST_HEAD (&kss->kss_zctxdone_list); -#endif - init_waitqueue_head (&kss->kss_waitq); - } - - CERROR ("ltx "LPSZ", total "LPSZ"\n", sizeof (ksock_ltx_t), - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS)); - - PORTAL_ALLOC(ksocknal_data.ksnd_ltxs, - sizeof(ksock_ltx_t) * (SOCKNAL_NLTXS +SOCKNAL_NNBLK_LTXS)); - if (ksocknal_data.ksnd_ltxs == NULL) { - ksocknal_module_fini (); - return (-ENOMEM); - } - - /* Deterministic bugs please */ - memset (ksocknal_data.ksnd_ltxs, 0xeb, - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS)); - - for (i = 0; i < SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS; i++) { - ksock_ltx_t *ltx = &((ksock_ltx_t *)ksocknal_data.ksnd_ltxs)[i]; - - ltx->ltx_idle = i < SOCKNAL_NLTXS ? - &ksocknal_data.ksnd_idle_ltx_list : - &ksocknal_data.ksnd_idle_nblk_ltx_list; - list_add (<x->ltx_tx.tx_list, ltx->ltx_idle); - } - - rc = PtlNIInit(ksocknal_init, 32, 4, 0, &ksocknal_ni); - if (rc != 0) { - CERROR("ksocknal: PtlNIInit failed: error %d\n", rc); - ksocknal_module_fini (); - RETURN (rc); - } - PtlNIDebug(ksocknal_ni, ~0); - - ksocknal_data.ksnd_init = SOCKNAL_INIT_PTL; // flag PtlNIInit() called - - for (i = 0; i < SOCKNAL_N_SCHED; i++) { - rc = ksocknal_thread_start (ksocknal_scheduler, - &ksocknal_data.ksnd_schedulers[i]); - if (rc != 0) { - CERROR("Can't spawn socknal scheduler[%d]: %d\n", - i, rc); - ksocknal_module_fini (); - RETURN (rc); - } - } - - rc = ksocknal_thread_start (ksocknal_reaper, NULL); - if (rc != 0) { - CERROR("Can't spawn socknal reaper: %d\n", rc); - ksocknal_module_fini (); - RETURN (rc); - } - - rc = kpr_register(&ksocknal_data.ksnd_router, - &ksocknal_router_interface); - if (rc != 0) { - CDEBUG(D_NET, "Can't initialise routing interface " - "(rc = %d): not routing\n", rc); - } else { - /* Only allocate forwarding buffers if I'm on a gateway */ - - PORTAL_ALLOC(ksocknal_data.ksnd_fmbs, - sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); - if (ksocknal_data.ksnd_fmbs == NULL) { - ksocknal_module_fini (); - RETURN(-ENOMEM); - } - - /* NULL out buffer pointers etc */ - memset(ksocknal_data.ksnd_fmbs, 0, - sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS)); - - for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + - SOCKNAL_LARGE_FWD_NMSGS); i++) { - ksock_fmb_t *fmb = - &((ksock_fmb_t *)ksocknal_data.ksnd_fmbs)[i]; - - if (i < SOCKNAL_SMALL_FWD_NMSGS) { - fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES; - fmb->fmb_pool = &ksocknal_data.ksnd_small_fmp; - } else { - fmb->fmb_npages = SOCKNAL_LARGE_FWD_PAGES; - fmb->fmb_pool = &ksocknal_data.ksnd_large_fmp; - } - - LASSERT (fmb->fmb_npages > 0); - for (j = 0; j < fmb->fmb_npages; j++) { - fmb->fmb_pages[j] = alloc_page(GFP_KERNEL); - - if (fmb->fmb_pages[j] == NULL) { - ksocknal_module_fini (); - return (-ENOMEM); - } - - LASSERT(page_address (fmb->fmb_pages[j]) != - NULL); - } - - list_add(&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs); - } - } - - rc = kportal_nal_register(SOCKNAL, &ksocknal_cmd, NULL); - if (rc != 0) { - CERROR ("Can't initialise command interface (rc = %d)\n", rc); - ksocknal_module_fini (); - return (rc); - } - - PORTAL_SYMBOL_REGISTER(ksocknal_ni); - - /* flag everything initialised */ - ksocknal_data.ksnd_init = SOCKNAL_INIT_ALL; - - printk(KERN_INFO "Routing socket NAL loaded (Routing %s, initial " - "mem %d)\n", - kpr_routing (&ksocknal_data.ksnd_router) ? - "enabled" : "disabled", pkmem); - - return (0); -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Kernel TCP Socket NAL v0.01"); -MODULE_LICENSE("GPL"); - -module_init(ksocknal_module_init); -module_exit(ksocknal_module_fini); - -EXPORT_SYMBOL (ksocknal_ni); diff --git a/lustre/portals/knals/socknal/socknal.h b/lustre/portals/knals/socknal/socknal.h deleted file mode 100644 index 86cdeb0..0000000 --- a/lustre/portals/knals/socknal/socknal.h +++ /dev/null @@ -1,292 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Zach Brown <zab@zabbo.net> - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define DEBUG_PORTAL_ALLOC -#define EXPORT_SYMTAB - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <net/sock.h> -#include <net/tcp.h> -#include <linux/uio.h> - -#include <asm/system.h> -#include <asm/uaccess.h> - -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <linux/kmod.h> -#include <asm/uaccess.h> -#include <asm/segment.h> - -#define DEBUG_SUBSYSTEM S_SOCKNAL - -#include <linux/kp30.h> -#include <portals/p30.h> -#include <portals/lib-p30.h> - -#define SOCKNAL_N_SCHED num_online_cpus() /* # socknal schedulers */ - -#if PTL_LARGE_MTU -# define SOCKNAL_MAX_FWD_PAYLOAD (256<<10) /* biggest payload I can forward */ -#else -# define SOCKNAL_MAX_FWD_PAYLOAD (64<<10) /* biggest payload I can forward */ -#endif - -#define SOCKNAL_NLTXS 128 /* # normal transmit messages */ -#define SOCKNAL_NNBLK_LTXS 128 /* # transmit messages reserved if can't block */ - -#define SOCKNAL_SMALL_FWD_NMSGS 128 /* # small messages I can be forwarding at any time */ -#define SOCKNAL_LARGE_FWD_NMSGS 64 /* # large messages I can be forwarding at any time */ - -#define SOCKNAL_SMALL_FWD_PAGES 1 /* # pages in a small message fwd buffer */ - -#define SOCKNAL_LARGE_FWD_PAGES (PAGE_ALIGN (sizeof (ptl_hdr_t) + SOCKNAL_MAX_FWD_PAYLOAD) >> PAGE_SHIFT) - /* # pages in a large message fwd buffer */ - -#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */ - -#define SOCKNAL_TX_LOW_WATER(sk) (((sk)->sndbuf*8)/10) - -typedef struct /* pool of forwarding buffers */ -{ - spinlock_t fmp_lock; /* serialise */ - struct list_head fmp_idle_fmbs; /* buffers waiting for a connection */ - struct list_head fmp_blocked_conns; /* connections waiting for a buffer */ -} ksock_fmb_pool_t; - - -typedef struct /* per scheduler state */ -{ - spinlock_t kss_lock; /* serialise */ - struct list_head kss_rx_conns; /* conn waiting to be read */ - struct list_head kss_tx_conns; /* conn waiting to be written */ -#if SOCKNAL_ZC - struct list_head kss_zctxdone_list; /* completed ZC transmits */ -#endif - wait_queue_head_t kss_waitq; /* where scheduler sleeps */ - int kss_nconns; /* # connections assigned to this scheduler */ -} ksock_sched_t; - -typedef struct { - int ksnd_init; /* initialisation state */ - - struct list_head ksnd_socklist; /* all my connections */ - rwlock_t ksnd_socklist_lock; /* stabilise add/find/remove */ - - nal_cb_t *ksnd_nal_cb; - spinlock_t ksnd_nal_cb_lock; /* lib cli/sti lock */ - - atomic_t ksnd_nthreads; /* # live threads */ - int ksnd_shuttingdown; /* tell threads to exit */ - ksock_sched_t *ksnd_schedulers; /* scheduler state */ - - kpr_router_t ksnd_router; /* THE router */ - - void *ksnd_fmbs; /* all the pre-allocated FMBs */ - ksock_fmb_pool_t ksnd_small_fmp; /* small message forwarding buffers */ - ksock_fmb_pool_t ksnd_large_fmp; /* large message forwarding buffers */ - - void *ksnd_ltxs; /* all the pre-allocated LTXs */ - spinlock_t ksnd_idle_ltx_lock; /* serialise ltx alloc/free */ - struct list_head ksnd_idle_ltx_list; /* where to get an idle LTX */ - struct list_head ksnd_idle_nblk_ltx_list; /* where to get an idle LTX if you can't block */ - wait_queue_head_t ksnd_idle_ltx_waitq; /* where to block for an idle LTX */ - - struct list_head ksnd_reaper_list; /* conn waiting to be reaped */ - wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */ - spinlock_t ksnd_reaper_lock; /* serialise */ - unsigned char ksnd_irq_info[NR_IRQS]; /* irq->scheduler lookup */ -} ksock_nal_data_t; - -#define SOCKNAL_INIT_NOTHING 0 -#define SOCKNAL_INIT_DATA 1 -#define SOCKNAL_INIT_PTL 2 -#define SOCKNAL_INIT_ALL 3 - -#define SOCKNAL_IRQ_BOUND 0x80 /* flag we _did_ bind already */ -#define SOCKNAL_IRQ_SCHED_MASK 0x7f /* we assume < 127 CPUs */ -#define SOCKNAL_IRQ_UNASSIGNED 0xff /* flag unassigned */ - -/* A packet just assembled for transmission is represented by 1 or more - * struct iovec fragments and 0 or more ptl_kiov_t fragments. Forwarded - * messages, or messages from an MD with PTL_MD_KIOV _not_ set have 0 - * ptl_kiov_t fragments. Messages from an MD with PTL_MD_KIOV set, have 1 - * struct iovec fragment (the header) and up to PTL_MD_MAX_IOV ptl_kiov_t - * fragments. - * - * On the receive side, initially 1 struct iovec fragment is posted for - * receive (the header). Once the header has been received, if the message - * requires forwarding or will be received into mapped memory, up to - * PTL_MD_MAX_IOV struct iovec fragments describe the target memory. - * Otherwise up to PTL_MD_MAX_IOV ptl_kiov_t fragments are used. - */ - -typedef struct /* transmit packet */ -{ - struct list_head tx_list; /* queue on conn for transmission etc */ - char tx_isfwd; /* forwarding / sourced here */ - int tx_nob; /* # packet bytes */ - int tx_niov; /* # packet iovec frags */ - struct iovec *tx_iov; /* packet iovec frags */ - int tx_nkiov; /* # packet page frags */ - ptl_kiov_t *tx_kiov; /* packet page frags */ -#if SOCKNAL_ZC - ksock_sched_t *tx_sched; /* who to wake on callback */ - zccd_t tx_zccd; /* zero copy callback descriptor */ -#endif -} ksock_tx_t; - -#define KSOCK_ZCCD_2_TX(ptr) list_entry (ptr, ksock_tx_t, tx_zccd) -/* network zero copy callback descriptor embedded in ksock_tx_t */ - -/* space for the tx frag descriptors: hdr is always 1 iovec - * and payload is PTL_MD_MAX of either type. */ -typedef struct -{ - struct iovec hdr; - union { - struct iovec iov[PTL_MD_MAX_IOV]; - ptl_kiov_t kiov[PTL_MD_MAX_IOV]; - } payload; -} ksock_txiovspace_t; - -typedef struct /* locally transmitted packet */ -{ - ksock_tx_t ltx_tx; /* send info */ - struct list_head *ltx_idle; /* where to put when idle */ - void *ltx_private; /* lib_finalize() callback arg */ - void *ltx_cookie; /* lib_finalize() callback arg */ - ksock_txiovspace_t ltx_iov_space; /* where to stash frag descriptors */ - ptl_hdr_t ltx_hdr; /* buffer for packet header */ -} ksock_ltx_t; - -#define KSOCK_TX_2_KPR_FWD_DESC(ptr) list_entry ((kprfd_scratch_t *)ptr, kpr_fwd_desc_t, kprfd_scratch) -/* forwarded packets (router->socknal) embedded in kpr_fwd_desc_t::kprfd_scratch */ - -#define KSOCK_TX_2_KSOCK_LTX(ptr) list_entry (ptr, ksock_ltx_t, ltx_tx) -/* local packets (lib->socknal) embedded in ksock_ltx_t::ltx_tx */ - -/* NB list_entry() is used here as convenient macro for calculating a - * pointer to a struct from the address of a member. - */ - -typedef struct /* Kernel portals Socket Forwarding message buffer */ -{ /* (socknal->router) */ - struct list_head fmb_list; /* queue idle */ - kpr_fwd_desc_t fmb_fwd; /* router's descriptor */ - int fmb_npages; /* # pages allocated */ - ksock_fmb_pool_t *fmb_pool; /* owning pool */ - struct page *fmb_pages[SOCKNAL_LARGE_FWD_PAGES]; - struct iovec fmb_iov[SOCKNAL_LARGE_FWD_PAGES]; -} ksock_fmb_t; - -/* space for the rx frag descriptors; we either read a single contiguous - * header, or PTL_MD_MAX_IOV frags of payload of either type. */ -typedef union { - struct iovec iov[PTL_MD_MAX_IOV]; - ptl_kiov_t kiov[PTL_MD_MAX_IOV]; -} ksock_rxiovspace_t; - -#define SOCKNAL_RX_HEADER 1 /* reading header */ -#define SOCKNAL_RX_BODY 2 /* reading body (to deliver here) */ -#define SOCKNAL_RX_BODY_FWD 3 /* reading body (to forward) */ -#define SOCKNAL_RX_SLOP 4 /* skipping body */ -#define SOCKNAL_RX_GET_FMB 5 /* scheduled for forwarding */ -#define SOCKNAL_RX_FMB_SLEEP 6 /* blocked waiting for a fwd desc */ - -typedef struct -{ - struct list_head ksnc_list; /* stash on global socket list */ - struct file *ksnc_file; /* socket filp */ - struct socket *ksnc_sock; /* actual socket */ - void *ksnc_saved_data_ready; /* socket's original data_ready() callback */ - void *ksnc_saved_write_space; /* socket's original write_space() callback */ - ptl_nid_t ksnc_peernid; /* who's on the other end */ - atomic_t ksnc_refcount; /* # users */ - ksock_sched_t *ksnc_scheduler; /* who schedules this connection */ - - /* READER */ - struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */ - volatile int ksnc_rx_ready; /* data ready to read */ - int ksnc_rx_scheduled; /* being progressed */ - int ksnc_rx_state; /* what is being read */ - int ksnc_rx_nob_left; /* # bytes to next hdr/body */ - int ksnc_rx_nob_wanted; /* bytes actually wanted */ - int ksnc_rx_niov; /* # iovec frags */ - struct iovec *ksnc_rx_iov; /* the iovec frags */ - int ksnc_rx_nkiov; /* # page frags */ - ptl_kiov_t *ksnc_rx_kiov; /* the page frags */ - ksock_rxiovspace_t ksnc_rx_iov_space; /* space for frag descriptors */ - void *ksnc_cookie; /* rx lib_finalize passthru arg */ - ptl_hdr_t ksnc_hdr; /* where I read headers into */ - - /* WRITER */ - struct list_head ksnc_tx_list; /* where I enq waiting for output space */ - struct list_head ksnc_tx_queue; /* packets waiting to be sent */ - volatile int ksnc_tx_ready; /* write space */ - int ksnc_tx_scheduled; /* being progressed */ - -} ksock_conn_t; - -extern int ksocknal_add_sock (ptl_nid_t nid, int fd, int client); -extern int ksocknal_close_sock(ptl_nid_t nid); -extern int ksocknal_set_mynid(ptl_nid_t nid); -extern int ksocknal_push_sock(ptl_nid_t nid); -extern ksock_conn_t *ksocknal_get_conn (ptl_nid_t nid); -extern void _ksocknal_put_conn (ksock_conn_t *conn); -extern void ksocknal_close_conn (ksock_conn_t *conn); - -static inline void -ksocknal_put_conn (ksock_conn_t *conn) -{ - CDEBUG (D_OTHER, "putting conn[%p] -> "LPX64" (%d)\n", - conn, conn->ksnc_peernid, atomic_read (&conn->ksnc_refcount)); - - if (atomic_dec_and_test (&conn->ksnc_refcount)) - _ksocknal_put_conn (conn); -} - -extern int ksocknal_thread_start (int (*fn)(void *arg), void *arg); -extern int ksocknal_new_packet (ksock_conn_t *conn, int skip); -extern void ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd); -extern int ksocknal_scheduler (void *arg); -extern int ksocknal_reaper (void *arg); -extern void ksocknal_data_ready(struct sock *sk, int n); -extern void ksocknal_write_space(struct sock *sk); - - -extern nal_cb_t ksocknal_lib; -extern ksock_nal_data_t ksocknal_data; diff --git a/lustre/portals/knals/socknal/socknal_cb.c b/lustre/portals/knals/socknal/socknal_cb.c deleted file mode 100644 index 6147d8a..0000000 --- a/lustre/portals/knals/socknal/socknal_cb.c +++ /dev/null @@ -1,1613 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Zach Brown <zab@zabbo.net> - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Eric Barton <eric@bartonsoftware.com> - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include "socknal.h" - -atomic_t ksocknal_packets_received; -atomic_t ksocknal_packets_launched; -atomic_t ksocknal_packets_being_sent; - -#if SOCKNAL_ZC -int ksocknal_do_zc = 1; -int ksocknal_zc_min_frag = 2048; -#endif - -/* - * LIB functions follow - * - */ -int -ksocknal_read(nal_cb_t *nal, void *private, void *dst_addr, - user_ptr src_addr, size_t len) -{ - CDEBUG(D_NET, LPX64": reading %ld bytes from %p -> %p\n", - nal->ni.nid, (long)len, src_addr, dst_addr); - - memcpy( dst_addr, src_addr, len ); - return 0; -} - -int -ksocknal_write(nal_cb_t *nal, void *private, user_ptr dst_addr, - void *src_addr, size_t len) -{ - CDEBUG(D_NET, LPX64": writing %ld bytes from %p -> %p\n", - nal->ni.nid, (long)len, src_addr, dst_addr); - - memcpy( dst_addr, src_addr, len ); - return 0; -} - -int -ksocknal_callback (nal_cb_t * nal, void *private, lib_eq_t *eq, - ptl_event_t *ev) -{ - CDEBUG(D_NET, LPX64": callback eq %p ev %p\n", - nal->ni.nid, eq, ev); - - if (eq->event_callback != NULL) - eq->event_callback(ev); - - return 0; -} - -void * -ksocknal_malloc(nal_cb_t *nal, size_t len) -{ - void *buf; - - PORTAL_ALLOC(buf, len); - - if (buf != NULL) - memset(buf, 0, len); - - return (buf); -} - -void -ksocknal_free(nal_cb_t *nal, void *buf, size_t len) -{ - PORTAL_FREE(buf, len); -} - -void -ksocknal_printf(nal_cb_t *nal, const char *fmt, ...) -{ - va_list ap; - char msg[256]; - - va_start (ap, fmt); - vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */ - va_end (ap); - - msg[sizeof (msg) - 1] = 0; /* ensure terminated */ - - CDEBUG (D_NET, "%s", msg); -} - -void -ksocknal_cli(nal_cb_t *nal, unsigned long *flags) -{ - ksock_nal_data_t *data = nal->nal_data; - - spin_lock(&data->ksnd_nal_cb_lock); -} - -void -ksocknal_sti(nal_cb_t *nal, unsigned long *flags) -{ - ksock_nal_data_t *data; - data = nal->nal_data; - - spin_unlock(&data->ksnd_nal_cb_lock); -} - -int -ksocknal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) -{ - /* I would guess that if ksocknal_get_conn(nid) == NULL, - and we're not routing, then 'nid' is very distant :) */ - if ( nal->ni.nid == nid ) { - *dist = 0; - } else { - *dist = 1; - } - - return 0; -} - -ksock_ltx_t * -ksocknal_get_ltx (int may_block) -{ - long flags; - ksock_ltx_t *ltx = NULL; - - for (;;) { - spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - if (!list_empty (&ksocknal_data.ksnd_idle_ltx_list)) { - ltx = list_entry(ksocknal_data.ksnd_idle_ltx_list.next, - ksock_ltx_t, ltx_tx.tx_list); - list_del (<x->ltx_tx.tx_list); - break; - } - - if (!may_block) { - if (!list_empty(&ksocknal_data.ksnd_idle_nblk_ltx_list)) { - ltx = list_entry(ksocknal_data.ksnd_idle_nblk_ltx_list.next, - ksock_ltx_t, ltx_tx.tx_list); - list_del (<x->ltx_tx.tx_list); - } - break; - } - - spin_unlock_irqrestore(&ksocknal_data.ksnd_idle_ltx_lock, - flags); - - wait_event (ksocknal_data.ksnd_idle_ltx_waitq, - !list_empty (&ksocknal_data.ksnd_idle_ltx_list)); - } - - spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - return (ltx); -} - -#if SOCKNAL_ZC -struct page * -ksocknal_kvaddr_to_page (unsigned long vaddr) -{ - struct page *page; - - if (vaddr >= VMALLOC_START && - vaddr < VMALLOC_END) - page = vmalloc_to_page ((void *)vaddr); -#if CONFIG_HIGHMEM - else if (vaddr >= PKMAP_BASE && - vaddr < (PKMAP_BASE + LAST_PKMAP * PAGE_SIZE)) - page = vmalloc_to_page ((void *)vaddr); - /* in 2.4 ^ just walks the page tables */ -#endif - else - page = virt_to_page (vaddr); - - if (page == NULL || - !VALID_PAGE (page)) - return (NULL); - - return (page); -} -#endif - -int -ksocknal_send_iov (struct socket *sock, ksock_tx_t *tx, int more) -{ - struct iovec *iov = tx->tx_iov; - int fragsize = iov->iov_len; - unsigned long vaddr = (unsigned long)iov->iov_base; -#if SOCKNAL_ZC - int offset = vaddr & (PAGE_SIZE - 1); - int zcsize = MIN (fragsize, PAGE_SIZE - offset); - struct page *page; -#endif - int rc; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone, so we only send 1 frag at a time. */ - LASSERT (fragsize <= tx->tx_nob); - LASSERT (tx->tx_niov > 0); - more |= (tx->tx_niov > 1); - -#if SOCKNAL_ZC - if (ksocknal_do_zc && - (sock->sk->route_caps & NETIF_F_SG) && - (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) && - zcsize >= ksocknal_zc_min_frag && - (page = ksocknal_kvaddr_to_page (vaddr)) != NULL) { - - CDEBUG(D_NET, "vaddr %p, page %p->%p + offset %x for %d\n", - (void *)vaddr, page, page_address(page), offset, zcsize); - - more |= (zcsize < fragsize); - - rc = tcp_sendpage_zccd(sock, page, offset, zcsize, - more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT, - &tx->tx_zccd); - } else -#endif - { - /* NB don't pass tx's iov; sendmsg may or may not update it */ - struct iovec fragiov = { .iov_base = (void *)vaddr, - .iov_len = fragsize}; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = &fragiov, - .msg_iovlen = 1, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT - }; - mm_segment_t oldmm = get_fs(); - - set_fs (KERNEL_DS); - rc = sock->sk->prot->sendmsg(sock->sk, &msg, fragsize); - set_fs (oldmm); - } - - if (rc <= 0) - return (rc); - - tx->tx_nob -= rc; - - if (rc < fragsize) { - /* didn't send whole frag */ - iov->iov_base = (void *)(vaddr + rc); - iov->iov_len = fragsize - rc; - return (-EAGAIN); - } - - /* everything went */ - LASSERT (rc == fragsize); - tx->tx_iov++; - tx->tx_niov--; - return (1); -} - -int -ksocknal_send_kiov (struct socket *sock, ksock_tx_t *tx, int more) -{ - ptl_kiov_t *kiov = tx->tx_kiov; - int fragsize = kiov->kiov_len; - struct page *page = kiov->kiov_page; - int offset = kiov->kiov_offset; - int rc; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone, so we only send 1 frag at a time. */ - LASSERT (fragsize <= tx->tx_nob); - LASSERT (offset + fragsize <= PAGE_SIZE); - LASSERT (tx->tx_nkiov > 0); - more |= (tx->tx_nkiov > 1); - -#if SOCKNAL_ZC - if (ksocknal_do_zc && - (sock->sk->route_caps & NETIF_F_SG) && - (sock->sk->route_caps & (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)) && - fragsize >= ksocknal_zc_min_frag) { - - CDEBUG(D_NET, "page %p + offset %x for %d\n", - page, offset, fragsize); - - rc = tcp_sendpage_zccd(sock, page, offset, fragsize, - more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT, - &tx->tx_zccd); - } else -#endif - { - char *addr = ((char *)kmap (page)) + offset; - struct iovec fragiov = {.iov_base = addr, - .iov_len = fragsize}; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = &fragiov, - .msg_iovlen = 1, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = more ? (MSG_DONTWAIT | MSG_MORE) : MSG_DONTWAIT - }; - mm_segment_t oldmm = get_fs(); - - set_fs (KERNEL_DS); - rc = sock->sk->prot->sendmsg(sock->sk, &msg, fragsize); - set_fs (oldmm); - kunmap (page); - } - - if (rc <= 0) - return (rc); - - tx->tx_nob -= rc; - - if (rc < fragsize) { - /* didn't send whole frag */ - kiov->kiov_offset = offset + rc; - kiov->kiov_len = fragsize - rc; - return (-EAGAIN); - } - - /* everything went */ - LASSERT (rc == fragsize); - tx->tx_kiov++; - tx->tx_nkiov--; - return (1); -} - -int -ksocknal_sendmsg (struct socket *sock, ksock_tx_t *tx, int more) -{ - int rc; - int sent_some = 0; - ENTRY; - - LASSERT (!in_interrupt()); - - for (;;) { - if (tx->tx_niov != 0) - rc = ksocknal_send_iov (sock, tx, more || tx->tx_nkiov != 0); - else - rc = ksocknal_send_kiov (sock, tx, more); - - /* Interpret a zero rc the same as -EAGAIN (Adaptech TOE) */ - if (rc <= 0) /* error or partial send */ - RETURN ((sent_some || rc == -EAGAIN) ? 0 : rc); - - if (tx->tx_nob == 0) /* sent everything */ - RETURN (0); - - sent_some = 1; - } -} - -int -ksocknal_recv_iov (ksock_conn_t *conn) -{ - struct iovec *iov = conn->ksnc_rx_iov; - int fragsize = iov->iov_len; - unsigned long vaddr = (unsigned long)iov->iov_base; - struct iovec fragiov = { .iov_base = (void *)vaddr, - .iov_len = fragsize}; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = &fragiov, - .msg_iovlen = 1, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = 0 - }; - mm_segment_t oldmm = get_fs(); - int rc; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone, so we only receive 1 frag at a time. */ - LASSERT (conn->ksnc_rx_niov > 0); - LASSERT (fragsize <= conn->ksnc_rx_nob_wanted); - - set_fs (KERNEL_DS); - rc = sock_recvmsg (conn->ksnc_sock, &msg, fragsize, MSG_DONTWAIT); - /* NB this is just a boolean............................^ */ - set_fs (oldmm); - - if (rc <= 0) - return (rc); - - conn->ksnc_rx_nob_wanted -= rc; - conn->ksnc_rx_nob_left -= rc; - - if (rc < fragsize) { - iov->iov_base = (void *)(vaddr + rc); - iov->iov_len = fragsize - rc; - return (-EAGAIN); - } - - LASSERT (rc == fragsize); - conn->ksnc_rx_iov++; - conn->ksnc_rx_niov--; - return (1); -} - -int -ksocknal_recv_kiov (ksock_conn_t *conn) -{ - ptl_kiov_t *kiov = conn->ksnc_rx_kiov; - struct page *page = kiov->kiov_page; - int offset = kiov->kiov_offset; - int fragsize = kiov->kiov_len; - unsigned long vaddr = ((unsigned long)kmap (page)) + offset; - struct iovec fragiov = { .iov_base = (void *)vaddr, - .iov_len = fragsize}; - struct msghdr msg = { - .msg_name = NULL, - .msg_namelen = 0, - .msg_iov = &fragiov, - .msg_iovlen = 1, - .msg_control = NULL, - .msg_controllen = 0, - .msg_flags = 0 - }; - mm_segment_t oldmm = get_fs(); - int rc; - - /* NB we can't trust socket ops to either consume our iovs - * or leave them alone, so we only receive 1 frag at a time. */ - LASSERT (fragsize <= conn->ksnc_rx_nob_wanted); - LASSERT (conn->ksnc_rx_nkiov > 0); - LASSERT (offset + fragsize <= PAGE_SIZE); - - set_fs (KERNEL_DS); - rc = sock_recvmsg (conn->ksnc_sock, &msg, fragsize, MSG_DONTWAIT); - /* NB this is just a boolean............................^ */ - set_fs (oldmm); - kunmap (page); - - if (rc <= 0) - return (rc); - - conn->ksnc_rx_nob_wanted -= rc; - conn->ksnc_rx_nob_left -= rc; - - if (rc < fragsize) { - kiov->kiov_offset = offset + rc; - kiov->kiov_len = fragsize - rc; - return (-EAGAIN); - } - - LASSERT (rc == fragsize); - conn->ksnc_rx_kiov++; - conn->ksnc_rx_nkiov--; - return (1); -} - -int -ksocknal_recvmsg (ksock_conn_t *conn) -{ - int rc; - int got_some = 0; - ENTRY; - - LASSERT (!in_interrupt ()); - - for (;;) { - LASSERT (conn->ksnc_rx_nob_wanted > 0); - - if (conn->ksnc_rx_niov != 0) - rc = ksocknal_recv_iov (conn); - else - rc = ksocknal_recv_kiov (conn); - - /* CAVEAT EMPTOR: we return... - * <= 0 for error (0 == EOF) and > 0 for success (unlike sendmsg()) */ - - if (rc <= 0) /* error/EOF or partial receive */ - RETURN ((got_some || rc == -EAGAIN) ? 1 : rc); - - if (conn->ksnc_rx_nob_wanted == 0) - RETURN (1); - - got_some = 0; - } -} - -#if SOCKNAL_ZC -void -ksocknal_zc_callback (zccd_t *zcd) -{ - ksock_tx_t *tx = KSOCK_ZCCD_2_TX(zcd); - ksock_sched_t *sched = tx->tx_sched; - unsigned long flags; - ENTRY; - - /* Schedule tx for cleanup (can't do it now due to lock conflicts) */ - - spin_lock_irqsave (&sched->kss_lock, flags); - - list_add_tail (&tx->tx_list, &sched->kss_zctxdone_list); - if (waitqueue_active (&sched->kss_waitq)) - wake_up (&sched->kss_waitq); - - spin_unlock_irqrestore (&sched->kss_lock, flags); - EXIT; -} -#endif - -void -ksocknal_tx_done (ksock_tx_t *tx) -{ - long flags; - ksock_ltx_t *ltx; - ENTRY; - - atomic_dec (&ksocknal_packets_being_sent); - - if (tx->tx_isfwd) { /* was a forwarded packet? */ - kpr_fwd_done (&ksocknal_data.ksnd_router, - KSOCK_TX_2_KPR_FWD_DESC (tx), 0); - EXIT; - return; - } - - /* local send */ - ltx = KSOCK_TX_2_KSOCK_LTX (tx); - - lib_finalize (&ksocknal_lib, ltx->ltx_private, ltx->ltx_cookie); - - spin_lock_irqsave (&ksocknal_data.ksnd_idle_ltx_lock, flags); - - list_add_tail (<x->ltx_tx.tx_list, ltx->ltx_idle); - - /* normal tx desc => wakeup anyone blocking for one */ - if (ltx->ltx_idle == &ksocknal_data.ksnd_idle_ltx_list && - waitqueue_active (&ksocknal_data.ksnd_idle_ltx_waitq)) - wake_up (&ksocknal_data.ksnd_idle_ltx_waitq); - - spin_unlock_irqrestore (&ksocknal_data.ksnd_idle_ltx_lock, flags); - EXIT; -} - -void -ksocknal_process_transmit (ksock_sched_t *sched, long *irq_flags) -{ - ksock_conn_t *conn; - ksock_tx_t *tx; - int rc; - - LASSERT (!list_empty (&sched->kss_tx_conns)); - conn = list_entry(sched->kss_tx_conns.next, ksock_conn_t, ksnc_tx_list); - list_del (&conn->ksnc_tx_list); - - LASSERT (conn->ksnc_tx_scheduled); - LASSERT (conn->ksnc_tx_ready); - LASSERT (!list_empty (&conn->ksnc_tx_queue)); - tx = list_entry (conn->ksnc_tx_queue.next, ksock_tx_t, tx_list); - /* assume transmit will complete now, so dequeue while I've got lock */ - list_del (&tx->tx_list); - - spin_unlock_irqrestore (&sched->kss_lock, *irq_flags); - - LASSERT (tx->tx_nob > 0); - - conn->ksnc_tx_ready = 0;/* write_space may race with me and set ready */ - mb(); /* => clear BEFORE trying to write */ - - rc = ksocknal_sendmsg (conn->ksnc_sock, tx, - !list_empty (&conn->ksnc_tx_queue)); /* more to come? */ - - CDEBUG (D_NET, "send(%d) %d\n", tx->tx_nob, rc); - - if (rc != 0) { -#warning FIXME: handle socket errors properly - CERROR("Error socknal send(%d) %p: %d\n", tx->tx_nob, conn, rc); - /* kid on for now the whole packet went. - * NB when we handle the error better, we'll still need to - * block for zccd completion. - */ - tx->tx_nob = 0; - } - - if (tx->tx_nob == 0) /* nothing left to send */ - { - /* everything went; assume more can go, so prevent write_space locking */ - conn->ksnc_tx_ready = 1; - - ksocknal_put_conn (conn); /* release packet's ref */ - atomic_inc (&ksocknal_packets_being_sent); -#if SOCKNAL_ZC - if (atomic_read (&tx->tx_zccd.zccd_count) != 1) { - /* zccd skbufs are still in-flight. Release my - * initial ref on zccd, so callback can occur */ - zccd_put (&tx->tx_zccd); - } else -#endif - ksocknal_tx_done (tx); - - spin_lock_irqsave (&sched->kss_lock, *irq_flags); - } else { - spin_lock_irqsave (&sched->kss_lock, *irq_flags); - - /* back onto HEAD of tx_queue */ - list_add (&tx->tx_list, &conn->ksnc_tx_queue); - } - - if (!conn->ksnc_tx_ready || /* no space to write now */ - list_empty (&conn->ksnc_tx_queue)) {/* nothing to write */ - conn->ksnc_tx_scheduled = 0; /* not being scheduled */ - ksocknal_put_conn (conn); /* release scheduler's ref */ - } else /* let scheduler call me again */ - list_add_tail (&conn->ksnc_tx_list, &sched->kss_tx_conns); -} - -void -ksocknal_launch_packet (ksock_conn_t *conn, ksock_tx_t *tx) -{ - unsigned long flags; - ksock_sched_t *sched = conn->ksnc_scheduler; - - /* Ensure the frags we've been given EXACTLY match the number of - * bytes we want to send. Many TCP/IP stacks disregard any total - * size parameters passed to them and just look at the frags. - * - * We always expect at least 1 mapped fragment containing the - * complete portals header. - */ - LASSERT (lib_iov_nob (tx->tx_niov, tx->tx_iov) + - lib_kiov_nob (tx->tx_nkiov, tx->tx_kiov) == tx->tx_nob); - LASSERT (tx->tx_niov >= 1); - LASSERT (tx->tx_iov[0].iov_len >= sizeof (ptl_hdr_t)); - - CDEBUG (D_NET, "type %d, nob %d niov %d nkiov %d\n", - ((ptl_hdr_t *)tx->tx_iov[0].iov_base)->type, tx->tx_nob, - tx->tx_niov, tx->tx_nkiov); - -#if SOCKNAL_ZC - zccd_init (&tx->tx_zccd, ksocknal_zc_callback); - /* NB this sets 1 ref on zccd, so the callback can only occur - * after I've released this ref */ - tx->tx_sched = sched; -#endif - spin_lock_irqsave (&sched->kss_lock, flags); - - list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue); - - if (conn->ksnc_tx_ready && /* able to send */ - !conn->ksnc_tx_scheduled) { /* not scheduled to send */ - list_add_tail (&conn->ksnc_tx_list, &sched->kss_tx_conns); - conn->ksnc_tx_scheduled = 1; - atomic_inc (&conn->ksnc_refcount); /* extra ref for scheduler */ - if (waitqueue_active (&sched->kss_waitq)) - wake_up (&sched->kss_waitq); - } - - spin_unlock_irqrestore (&sched->kss_lock, flags); - - atomic_inc (&ksocknal_packets_launched); -} - -ksock_conn_t * -ksocknal_send_target (ptl_nid_t nid) -{ - ptl_nid_t gatewaynid; - ksock_conn_t *conn; - int rc; - - if ((conn = ksocknal_get_conn (nid)) == NULL) { - /* It's not a peer; try to find a gateway */ - rc = kpr_lookup (&ksocknal_data.ksnd_router, nid, &gatewaynid); - if (rc != 0) { - CERROR("Can't route to "LPX64": router error %d\n", - nid, rc); - return (NULL); - } - - if ((conn = ksocknal_get_conn (gatewaynid)) == NULL) { - CERROR ("Can't route to "LPX64": gateway "LPX64 - " is not a peer\n", nid, gatewaynid); - return (NULL); - } - } - - return (conn); -} - -ksock_ltx_t * -ksocknal_setup_hdr (nal_cb_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type) -{ - ksock_ltx_t *ltx; - - /* I may not block for a transmit descriptor if I might block the - * receiver, or an interrupt handler. */ - ltx = ksocknal_get_ltx (!(type == PTL_MSG_ACK || - type == PTL_MSG_REPLY || - in_interrupt ())); - if (ltx == NULL) { - CERROR ("Can't allocate tx desc\n"); - return (NULL); - } - - /* Init local send packet (storage for hdr, finalize() args) */ - ltx->ltx_hdr = *hdr; - ltx->ltx_private = private; - ltx->ltx_cookie = cookie; - - /* Init common ltx_tx */ - ltx->ltx_tx.tx_isfwd = 0; - ltx->ltx_tx.tx_nob = sizeof (*hdr); - - /* We always have 1 mapped frag for the header */ - ltx->ltx_tx.tx_niov = 1; - ltx->ltx_tx.tx_iov = <x->ltx_iov_space.hdr; - ltx->ltx_tx.tx_iov[0].iov_base = <x->ltx_hdr; - ltx->ltx_tx.tx_iov[0].iov_len = sizeof (ltx->ltx_hdr); - - ltx->ltx_tx.tx_kiov = NULL; - ltx->ltx_tx.tx_nkiov = 0; - - return (ltx); -} - -int -ksocknal_send (nal_cb_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, struct iovec *payload_iov, - size_t payload_len) -{ - ksock_ltx_t *ltx; - ksock_conn_t *conn; - - /* NB 'private' is different depending on what we're sending. - * Just ignore it until we can rely on it - * - * Also, the return code from this procedure is ignored. - * If we can't send, we must still complete with lib_finalize(). - * We'll have to wait for 3.2 to return an error event. - */ - - CDEBUG(D_NET, - "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64 - " pid %d\n", payload_len, payload_niov, nid, pid); - - conn = ksocknal_send_target (nid); - if (conn == NULL) { - lib_finalize (&ksocknal_lib, private, cookie); - return (-1); - } - - ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type); - if (ltx == NULL) { - ksocknal_put_conn (conn); - lib_finalize (&ksocknal_lib, private, cookie); - return (-1); - } - - /* append the payload_iovs to the one pointing at the header */ - LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0); - LASSERT (payload_niov <= PTL_MD_MAX_IOV); - - memcpy (ltx->ltx_tx.tx_iov + 1, payload_iov, - payload_niov * sizeof (*payload_iov)); - ltx->ltx_tx.tx_niov = 1 + payload_niov; - ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len; - - ksocknal_launch_packet (conn, <x->ltx_tx); - return (0); -} - -int -ksocknal_send_pages (nal_cb_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, ptl_kiov_t *payload_iov, size_t payload_len) -{ - ksock_ltx_t *ltx; - ksock_conn_t *conn; - - /* NB 'private' is different depending on what we're sending. - * Just ignore it until we can rely on it */ - - CDEBUG(D_NET, - "sending "LPSZ" bytes in %d mapped frags to nid: "LPX64" pid %d\n", - payload_len, payload_niov, nid, pid); - - conn = ksocknal_send_target (nid); - if (conn == NULL) - return (-1); - - ltx = ksocknal_setup_hdr (nal, private, cookie, hdr, type); - if (ltx == NULL) { - ksocknal_put_conn (conn); - return (-1); - } - - LASSERT (ltx->ltx_tx.tx_niov == 1 && ltx->ltx_tx.tx_nkiov == 0); - LASSERT (payload_niov <= PTL_MD_MAX_IOV); - - ltx->ltx_tx.tx_kiov = ltx->ltx_iov_space.payload.kiov; - memcpy (ltx->ltx_tx.tx_kiov, payload_iov, - payload_niov * sizeof (*payload_iov)); - ltx->ltx_tx.tx_nkiov = payload_niov; - ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len; - - ksocknal_launch_packet (conn, <x->ltx_tx); - return (0); -} - -void -ksocknal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) -{ - ksock_conn_t *conn; - ptl_nid_t nid = fwd->kprfd_gateway_nid; - ksock_tx_t *tx = (ksock_tx_t *)&fwd->kprfd_scratch; - - CDEBUG (D_NET, "Forwarding [%p] -> "LPX64" ("LPX64"))\n", fwd, - fwd->kprfd_gateway_nid, fwd->kprfd_target_nid); - - /* I'm the gateway; must be the last hop */ - if (nid == ksocknal_lib.ni.nid) - nid = fwd->kprfd_target_nid; - - conn = ksocknal_get_conn (nid); - if (conn == NULL) { - CERROR ("[%p] fwd to "LPX64" isn't a peer\n", fwd, nid); - kpr_fwd_done (&ksocknal_data.ksnd_router, fwd, -EHOSTUNREACH); - return; - } - - /* This forward has now got a ref on conn */ - - tx->tx_isfwd = 1; /* This is a forwarding packet */ - tx->tx_nob = fwd->kprfd_nob; - tx->tx_niov = fwd->kprfd_niov; - tx->tx_iov = fwd->kprfd_iov; - tx->tx_nkiov = 0; - tx->tx_kiov = NULL; - - ksocknal_launch_packet (conn, tx); -} - -int -ksocknal_thread_start (int (*fn)(void *arg), void *arg) -{ - long pid = kernel_thread (fn, arg, 0); - - if (pid < 0) - return ((int)pid); - - atomic_inc (&ksocknal_data.ksnd_nthreads); - return (0); -} - -void -ksocknal_thread_fini (void) -{ - atomic_dec (&ksocknal_data.ksnd_nthreads); -} - -void -ksocknal_fmb_callback (void *arg, int error) -{ - ksock_fmb_t *fmb = (ksock_fmb_t *)arg; - ksock_fmb_pool_t *fmp = fmb->fmb_pool; - ptl_hdr_t *hdr = (ptl_hdr_t *) page_address(fmb->fmb_pages[0]); - ksock_conn_t *conn = NULL; - ksock_sched_t *sched; - long flags; - - if (error != 0) - CERROR("Failed to route packet from "LPX64" to "LPX64": %d\n", - NTOH__u64(hdr->src_nid), NTOH__u64(hdr->dest_nid), - error); - else - CDEBUG (D_NET, "routed packet from "LPX64" to "LPX64": OK\n", - NTOH__u64 (hdr->src_nid), NTOH__u64 (hdr->dest_nid)); - - spin_lock_irqsave (&fmp->fmp_lock, flags); - - list_add (&fmb->fmb_list, &fmp->fmp_idle_fmbs); - - if (!list_empty (&fmp->fmp_blocked_conns)) { - conn = list_entry (fmb->fmb_pool->fmp_blocked_conns.next, - ksock_conn_t, ksnc_rx_list); - list_del (&conn->ksnc_rx_list); - } - - spin_unlock_irqrestore (&fmp->fmp_lock, flags); - - if (conn == NULL) - return; - - CDEBUG (D_NET, "Scheduling conn %p\n", conn); - LASSERT (conn->ksnc_rx_scheduled); - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_FMB_SLEEP); - - conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB; - - sched = conn->ksnc_scheduler; - - spin_lock_irqsave (&sched->kss_lock, flags); - - list_add_tail (&conn->ksnc_rx_list, &sched->kss_rx_conns); - - if (waitqueue_active (&sched->kss_waitq)) - wake_up (&sched->kss_waitq); - - spin_unlock_irqrestore (&sched->kss_lock, flags); -} - -ksock_fmb_t * -ksocknal_get_idle_fmb (ksock_conn_t *conn) -{ - int payload_nob = conn->ksnc_rx_nob_left; - int packet_nob = sizeof (ptl_hdr_t) + payload_nob; - long flags; - ksock_fmb_pool_t *pool; - ksock_fmb_t *fmb; - - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB); - LASSERT (ksocknal_data.ksnd_fmbs != NULL); - - if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE) - pool = &ksocknal_data.ksnd_small_fmp; - else - pool = &ksocknal_data.ksnd_large_fmp; - - spin_lock_irqsave (&pool->fmp_lock, flags); - - if (!list_empty (&pool->fmp_idle_fmbs)) { - fmb = list_entry(pool->fmp_idle_fmbs.next, - ksock_fmb_t, fmb_list); - list_del (&fmb->fmb_list); - spin_unlock_irqrestore (&pool->fmp_lock, flags); - - return (fmb); - } - - /* deschedule until fmb free */ - - conn->ksnc_rx_state = SOCKNAL_RX_FMB_SLEEP; - - list_add_tail (&conn->ksnc_rx_list, - &pool->fmp_blocked_conns); - - spin_unlock_irqrestore (&pool->fmp_lock, flags); - return (NULL); -} - - -int -ksocknal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb) -{ - int payload_nob = conn->ksnc_rx_nob_left; - int packet_nob = sizeof (ptl_hdr_t) + payload_nob; - ptl_nid_t dest_nid = NTOH__u64 (conn->ksnc_hdr.dest_nid); - int niov; /* at least the header */ - int nob; - - LASSERT (conn->ksnc_rx_scheduled); - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB); - LASSERT (conn->ksnc_rx_nob_wanted == conn->ksnc_rx_nob_left); - LASSERT (payload_nob >= 0); - LASSERT (packet_nob <= fmb->fmb_npages * PAGE_SIZE); - LASSERT (sizeof (ptl_hdr_t) < PAGE_SIZE); - - /* Got a forwarding buffer; copy the header we just read into the - * forwarding buffer. If there's payload start reading reading it - * into the buffer, otherwise the forwarding buffer can be kicked - * off immediately. - * - * NB fmb->fmb_iov spans the WHOLE packet. - * conn->ksnc_rx_iov spans just the payload. - */ - - fmb->fmb_iov[0].iov_base = page_address (fmb->fmb_pages[0]); - - /* copy header */ - memcpy (fmb->fmb_iov[0].iov_base, &conn->ksnc_hdr, sizeof (ptl_hdr_t)); - - if (payload_nob == 0) { /* got complete packet already */ - atomic_inc (&ksocknal_packets_received); - - CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (immediate)\n", - conn, NTOH__u64 (conn->ksnc_hdr.src_nid), - dest_nid, packet_nob); - - fmb->fmb_iov[0].iov_len = sizeof (ptl_hdr_t); - - kpr_fwd_init (&fmb->fmb_fwd, dest_nid, - packet_nob, 1, fmb->fmb_iov, - ksocknal_fmb_callback, fmb); - - /* forward it now */ - kpr_fwd_start (&ksocknal_data.ksnd_router, &fmb->fmb_fwd); - - ksocknal_new_packet (conn, 0); /* on to next packet */ - return (1); - } - - niov = 1; - if (packet_nob <= PAGE_SIZE) { /* whole packet fits in first page */ - fmb->fmb_iov[0].iov_len = packet_nob; - } else { - fmb->fmb_iov[0].iov_len = PAGE_SIZE; - nob = packet_nob - PAGE_SIZE; - - do { - LASSERT (niov < fmb->fmb_npages); - fmb->fmb_iov[niov].iov_base = - page_address (fmb->fmb_pages[niov]); - fmb->fmb_iov[niov].iov_len = MIN (PAGE_SIZE, nob); - nob -= PAGE_SIZE; - niov++; - } while (nob > 0); - } - - kpr_fwd_init (&fmb->fmb_fwd, dest_nid, - packet_nob, niov, fmb->fmb_iov, - ksocknal_fmb_callback, fmb); - - /* stash router's descriptor ready for call to kpr_fwd_start */ - conn->ksnc_cookie = &fmb->fmb_fwd; - - conn->ksnc_rx_state = SOCKNAL_RX_BODY_FWD; /* read in the payload */ - - /* payload is desc's iov-ed buffer, but skipping the hdr */ - LASSERT (niov <= sizeof (conn->ksnc_rx_iov_space) / - sizeof (struct iovec)); - - conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space; - conn->ksnc_rx_iov[0].iov_base = - (void *)(((unsigned long)fmb->fmb_iov[0].iov_base) + - sizeof (ptl_hdr_t)); - conn->ksnc_rx_iov[0].iov_len = - fmb->fmb_iov[0].iov_len - sizeof (ptl_hdr_t); - - if (niov > 1) - memcpy(&conn->ksnc_rx_iov[1], &fmb->fmb_iov[1], - (niov - 1) * sizeof (struct iovec)); - - conn->ksnc_rx_niov = niov; - - CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d reading body\n", conn, - NTOH__u64 (conn->ksnc_hdr.src_nid), dest_nid, payload_nob); - return (0); -} - -void -ksocknal_fwd_parse (ksock_conn_t *conn) -{ - ksock_conn_t *conn2; - ptl_nid_t dest_nid = NTOH__u64 (conn->ksnc_hdr.dest_nid); - int body_len = NTOH__u32 (PTL_HDR_LENGTH(&conn->ksnc_hdr)); - - CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d parsing header\n", conn, - NTOH__u64 (conn->ksnc_hdr.src_nid), - dest_nid, conn->ksnc_rx_nob_left); - - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER); - LASSERT (conn->ksnc_rx_scheduled); - - if (body_len < 0) { /* length corrupt (overflow) */ - CERROR("dropping packet from "LPX64" for "LPX64": packet " - "size %d illegal\n", NTOH__u64 (conn->ksnc_hdr.src_nid), - dest_nid, body_len); - ksocknal_new_packet (conn, 0); /* on to new packet */ - return; - } - - if (ksocknal_data.ksnd_fmbs == NULL) { /* not forwarding */ - CERROR("dropping packet from "LPX64" for "LPX64": not " - "forwarding\n", conn->ksnc_hdr.src_nid, - conn->ksnc_hdr.dest_nid); - /* on to new packet (skip this one's body) */ - ksocknal_new_packet (conn, body_len); - return; - } - - if (body_len > SOCKNAL_MAX_FWD_PAYLOAD) { /* too big to forward */ - CERROR ("dropping packet from "LPX64" for "LPX64 - ": packet size %d too big\n", conn->ksnc_hdr.src_nid, - conn->ksnc_hdr.dest_nid, body_len); - /* on to new packet (skip this one's body) */ - ksocknal_new_packet (conn, body_len); - return; - } - - /* should have gone direct */ - conn2 = ksocknal_get_conn (conn->ksnc_hdr.dest_nid); - if (conn2 != NULL) { - CERROR ("dropping packet from "LPX64" for "LPX64 - ": target is a peer\n", conn->ksnc_hdr.src_nid, - conn->ksnc_hdr.dest_nid); - ksocknal_put_conn (conn2); /* drop ref from get above */ - - /* on to next packet (skip this one's body) */ - ksocknal_new_packet (conn, body_len); - return; - } - - conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB; /* Getting FMB now */ - conn->ksnc_rx_nob_left = body_len; /* stash packet size */ - conn->ksnc_rx_nob_wanted = body_len; /* (no slop) */ -} - -int -ksocknal_new_packet (ksock_conn_t *conn, int nob_to_skip) -{ - static char ksocknal_slop_buffer[4096]; - - int nob; - int niov; - int skipped; - - if (nob_to_skip == 0) { /* right at next packet boundary now */ - conn->ksnc_rx_state = SOCKNAL_RX_HEADER; - conn->ksnc_rx_nob_wanted = sizeof (ptl_hdr_t); - conn->ksnc_rx_nob_left = sizeof (ptl_hdr_t); - - conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space; - conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_hdr; - conn->ksnc_rx_iov[0].iov_len = sizeof (ptl_hdr_t); - conn->ksnc_rx_niov = 1; - - conn->ksnc_rx_kiov = NULL; - conn->ksnc_rx_nkiov = 0; - return (1); - } - - /* Set up to skip as much a possible now. If there's more left - * (ran out of iov entries) we'll get called again */ - - conn->ksnc_rx_state = SOCKNAL_RX_SLOP; - conn->ksnc_rx_nob_left = nob_to_skip; - conn->ksnc_rx_iov = (struct iovec *)&conn->ksnc_rx_iov_space; - skipped = 0; - niov = 0; - - do { - nob = MIN (nob_to_skip, sizeof (ksocknal_slop_buffer)); - - conn->ksnc_rx_iov[niov].iov_base = ksocknal_slop_buffer; - conn->ksnc_rx_iov[niov].iov_len = nob; - niov++; - skipped += nob; - nob_to_skip -=nob; - - } while (nob_to_skip != 0 && /* mustn't overflow conn's rx iov */ - niov < sizeof(conn->ksnc_rx_iov_space) / sizeof (struct iovec)); - - conn->ksnc_rx_niov = niov; - conn->ksnc_rx_kiov = NULL; - conn->ksnc_rx_nkiov = 0; - conn->ksnc_rx_nob_wanted = skipped; - return (0); -} - -void -ksocknal_process_receive (ksock_sched_t *sched, long *irq_flags) -{ - ksock_conn_t *conn; - ksock_fmb_t *fmb; - int rc; - - /* NB: sched->ksnc_lock lock held */ - - LASSERT (!list_empty (&sched->kss_rx_conns)); - conn = list_entry(sched->kss_rx_conns.next, ksock_conn_t, ksnc_rx_list); - list_del (&conn->ksnc_rx_list); - - spin_unlock_irqrestore (&sched->kss_lock, *irq_flags); - - CDEBUG(D_NET, "sched %p conn %p\n", sched, conn); - LASSERT (atomic_read (&conn->ksnc_refcount) > 0); - LASSERT (conn->ksnc_rx_scheduled); - LASSERT (conn->ksnc_rx_ready); - - /* doesn't need a forwarding buffer */ - if (conn->ksnc_rx_state != SOCKNAL_RX_GET_FMB) - goto try_read; - - get_fmb: - fmb = ksocknal_get_idle_fmb (conn); - if (fmb == NULL) { /* conn descheduled waiting for idle fmb */ - spin_lock_irqsave (&sched->kss_lock, *irq_flags); - return; - } - - if (ksocknal_init_fmb (conn, fmb)) /* packet forwarded ? */ - goto out; /* come back later for next packet */ - - try_read: - /* NB: sched lock NOT held */ - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER || - conn->ksnc_rx_state == SOCKNAL_RX_BODY || - conn->ksnc_rx_state == SOCKNAL_RX_BODY_FWD || - conn->ksnc_rx_state == SOCKNAL_RX_SLOP); - - LASSERT (conn->ksnc_rx_nob_wanted > 0); - - conn->ksnc_rx_ready = 0;/* data ready may race with me and set ready */ - mb(); /* => clear BEFORE trying to read */ - - rc = ksocknal_recvmsg(conn); - - if (rc == 0) - goto out; - if (rc < 0) { -#warning FIXME: handle socket errors properly - CERROR ("Error socknal read %p: %d\n", conn, rc); - goto out; - } - - if (conn->ksnc_rx_nob_wanted != 0) /* short read */ - goto out; /* try again later */ - - /* got all I wanted, assume there's more - prevent data_ready locking */ - conn->ksnc_rx_ready = 1; - - switch (conn->ksnc_rx_state) { - case SOCKNAL_RX_HEADER: - /* It's not for me */ - if (conn->ksnc_hdr.type != PTL_MSG_HELLO && - NTOH__u64(conn->ksnc_hdr.dest_nid) != ksocknal_lib.ni.nid) { - ksocknal_fwd_parse (conn); - switch (conn->ksnc_rx_state) { - case SOCKNAL_RX_HEADER: /* skipped (zero payload) */ - goto out; /* => come back later */ - case SOCKNAL_RX_SLOP: /* skipping packet's body */ - goto try_read; /* => go read it */ - case SOCKNAL_RX_GET_FMB: /* forwarding */ - goto get_fmb; /* => go get a fwd msg buffer */ - default: - LBUG (); - } - /* Not Reached */ - } - - PROF_START(lib_parse); - /* sets wanted_len, iovs etc */ - lib_parse(&ksocknal_lib, &conn->ksnc_hdr, conn); - PROF_FINISH(lib_parse); - - if (conn->ksnc_rx_nob_wanted != 0) { /* need to get payload? */ - conn->ksnc_rx_state = SOCKNAL_RX_BODY; - goto try_read; /* go read the payload */ - } - /* Fall through (completed packet for me) */ - - case SOCKNAL_RX_BODY: - atomic_inc (&ksocknal_packets_received); - /* packet is done now */ - lib_finalize(&ksocknal_lib, NULL, conn->ksnc_cookie); - /* Fall through */ - - case SOCKNAL_RX_SLOP: - /* starting new packet? */ - if (ksocknal_new_packet (conn, conn->ksnc_rx_nob_left)) - goto out; /* come back later */ - goto try_read; /* try to finish reading slop now */ - - case SOCKNAL_RX_BODY_FWD: - CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (got body)\n", - conn, NTOH__u64 (conn->ksnc_hdr.src_nid), - NTOH__u64 (conn->ksnc_hdr.dest_nid), - conn->ksnc_rx_nob_left); - - atomic_inc (&ksocknal_packets_received); - - /* ksocknal_init_fmb() put router desc. in conn->ksnc_cookie */ - kpr_fwd_start (&ksocknal_data.ksnd_router, - (kpr_fwd_desc_t *)conn->ksnc_cookie); - - /* no slop in forwarded packets */ - LASSERT (conn->ksnc_rx_nob_left == 0); - - ksocknal_new_packet (conn, 0); /* on to next packet */ - goto out; /* (later) */ - - default: - } - - /* Not Reached */ - LBUG (); - - out: - spin_lock_irqsave (&sched->kss_lock, *irq_flags); - - /* no data there to read? */ - if (!conn->ksnc_rx_ready) { - /* let socket callback schedule again */ - conn->ksnc_rx_scheduled = 0; - ksocknal_put_conn (conn); /* release scheduler's ref */ - } else /* let scheduler call me again */ - list_add_tail (&conn->ksnc_rx_list, &sched->kss_rx_conns); -} - -int -ksocknal_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, - unsigned int niov, struct iovec *iov, size_t mlen, size_t rlen) -{ - ksock_conn_t *conn = (ksock_conn_t *)private; - - LASSERT (mlen <= rlen); - LASSERT (niov <= PTL_MD_MAX_IOV); - - conn->ksnc_cookie = msg; - conn->ksnc_rx_nob_wanted = mlen; - conn->ksnc_rx_nob_left = rlen; - - conn->ksnc_rx_nkiov = 0; - conn->ksnc_rx_kiov = NULL; - conn->ksnc_rx_niov = niov; - conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov; - memcpy (conn->ksnc_rx_iov, iov, niov * sizeof (*iov)); - - LASSERT (mlen == - lib_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) + - lib_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov)); - - return (rlen); -} - -int -ksocknal_recv_pages (nal_cb_t *nal, void *private, lib_msg_t *msg, - unsigned int niov, ptl_kiov_t *kiov, size_t mlen, size_t rlen) -{ - ksock_conn_t *conn = (ksock_conn_t *)private; - - LASSERT (mlen <= rlen); - LASSERT (niov <= PTL_MD_MAX_IOV); - - conn->ksnc_cookie = msg; - conn->ksnc_rx_nob_wanted = mlen; - conn->ksnc_rx_nob_left = rlen; - - conn->ksnc_rx_niov = 0; - conn->ksnc_rx_iov = NULL; - conn->ksnc_rx_nkiov = niov; - conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov; - memcpy (conn->ksnc_rx_kiov, kiov, niov * sizeof (*kiov)); - - LASSERT (mlen == - lib_iov_nob (conn->ksnc_rx_niov, conn->ksnc_rx_iov) + - lib_kiov_nob (conn->ksnc_rx_nkiov, conn->ksnc_rx_kiov)); - - return (rlen); -} - -int ksocknal_scheduler (void *arg) -{ - ksock_sched_t *sched = (ksock_sched_t *)arg; - unsigned long flags; - int rc; - int nloops = 0; - int id = sched - ksocknal_data.ksnd_schedulers; - char name[16]; -#if (CONFIG_SMP && CPU_AFFINITY) -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - int cpu = cpu_logical_map(id % num_online_cpus()); -#else -#warning "Take care of architecure specific logical APIC map" - int cpu = 1; /* Have to change later. */ -#endif /* LINUX_VERSION_CODE */ - - set_cpus_allowed (current, 1 << cpu); - id = cpu; -#endif /* CONFIG_SMP && CPU_AFFINITY */ - - snprintf (name, sizeof (name),"ksocknald[%d]", id); - kportal_daemonize (name); - kportal_blockallsigs (); - - spin_lock_irqsave (&sched->kss_lock, flags); - - while (!ksocknal_data.ksnd_shuttingdown) { - int did_something = 0; - - /* Ensure I progress everything semi-fairly */ - - if (!list_empty (&sched->kss_rx_conns)) { - did_something = 1; - /* drops & regains kss_lock */ - ksocknal_process_receive (sched, &flags); - } - - if (!list_empty (&sched->kss_tx_conns)) { - did_something = 1; - /* drops and regains kss_lock */ - ksocknal_process_transmit (sched, &flags); - } -#if SOCKNAL_ZC - if (!list_empty (&sched->kss_zctxdone_list)) { - ksock_tx_t *tx = - list_entry(sched->kss_zctxdone_list.next, - ksock_tx_t, tx_list); - did_something = 1; - - list_del (&tx->tx_list); - spin_unlock_irqrestore (&sched->kss_lock, flags); - - ksocknal_tx_done (tx); - - spin_lock_irqsave (&sched->kss_lock, flags); - } -#endif - if (!did_something || /* nothing to do */ - ++nloops == SOCKNAL_RESCHED) { /* hogging CPU? */ - spin_unlock_irqrestore (&sched->kss_lock, flags); - - nloops = 0; - - if (!did_something) { /* wait for something to do */ -#if SOCKNAL_ZC - rc = wait_event_interruptible (sched->kss_waitq, - ksocknal_data.ksnd_shuttingdown || - !list_empty(&sched->kss_rx_conns) || - !list_empty(&sched->kss_tx_conns) || - !list_empty(&sched->kss_zctxdone_list)); -#else - rc = wait_event_interruptible (sched->kss_waitq, - ksocknal_data.ksnd_shuttingdown || - !list_empty(&sched->kss_rx_conns) || - !list_empty(&sched->kss_tx_conns)); -#endif - LASSERT (rc == 0); - } else - our_cond_resched(); - - spin_lock_irqsave (&sched->kss_lock, flags); - } - } - - spin_unlock_irqrestore (&sched->kss_lock, flags); - ksocknal_thread_fini (); - return (0); -} - -void -ksocknal_data_ready (struct sock *sk, int n) -{ - unsigned long flags; - ksock_conn_t *conn; - ksock_sched_t *sched; - ENTRY; - - /* interleave correctly with closing sockets... */ - read_lock (&ksocknal_data.ksnd_socklist_lock); - - conn = sk->user_data; - if (conn == NULL) { /* raced with ksocknal_close_sock */ - LASSERT (sk->data_ready != &ksocknal_data_ready); - sk->data_ready (sk, n); - } else if (!conn->ksnc_rx_ready) { /* new news */ - /* Set ASAP in case of concurrent calls to me */ - conn->ksnc_rx_ready = 1; - - sched = conn->ksnc_scheduler; - - spin_lock_irqsave (&sched->kss_lock, flags); - - /* Set again (process_receive may have cleared while I blocked for the lock) */ - conn->ksnc_rx_ready = 1; - - if (!conn->ksnc_rx_scheduled) { /* not being progressed */ - list_add_tail(&conn->ksnc_rx_list, - &sched->kss_rx_conns); - conn->ksnc_rx_scheduled = 1; - /* extra ref for scheduler */ - atomic_inc (&conn->ksnc_refcount); - - if (waitqueue_active (&sched->kss_waitq)) - wake_up (&sched->kss_waitq); - } - - spin_unlock_irqrestore (&sched->kss_lock, flags); - } - - read_unlock (&ksocknal_data.ksnd_socklist_lock); - - EXIT; -} - -void -ksocknal_write_space (struct sock *sk) -{ - unsigned long flags; - ksock_conn_t *conn; - ksock_sched_t *sched; - - /* interleave correctly with closing sockets... */ - read_lock (&ksocknal_data.ksnd_socklist_lock); - - conn = sk->user_data; - - CDEBUG(D_NET, "sk %p wspace %d low water %d conn %p%s%s%s\n", - sk, tcp_wspace(sk), SOCKNAL_TX_LOW_WATER(sk), conn, - (conn == NULL) ? "" : (test_bit (0, &conn->ksnc_tx_ready) ? - " ready" : " blocked"), - (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ? - " scheduled" : " idle"), - (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ? - " empty" : " queued")); - - if (conn == NULL) { /* raced with ksocknal_close_sock */ - LASSERT (sk->write_space != &ksocknal_write_space); - sk->write_space (sk); - } else if (tcp_wspace(sk) >= SOCKNAL_TX_LOW_WATER(sk)) { /* got enough space */ - clear_bit (SOCK_NOSPACE, &sk->socket->flags); - - if (!conn->ksnc_tx_ready) { /* new news */ - /* Set ASAP in case of concurrent calls to me */ - conn->ksnc_tx_ready = 1; - - sched = conn->ksnc_scheduler; - - spin_lock_irqsave (&sched->kss_lock, flags); - - /* Set again (process_transmit may have - cleared while I blocked for the lock) */ - conn->ksnc_tx_ready = 1; - - if (!conn->ksnc_tx_scheduled && // not being progressed - !list_empty(&conn->ksnc_tx_queue)){//packets to send - list_add_tail (&conn->ksnc_tx_list, - &sched->kss_tx_conns); - conn->ksnc_tx_scheduled = 1; - /* extra ref for scheduler */ - atomic_inc (&conn->ksnc_refcount); - - if (waitqueue_active (&sched->kss_waitq)) - wake_up (&sched->kss_waitq); - } - - spin_unlock_irqrestore (&sched->kss_lock, flags); - } - } - - read_unlock (&ksocknal_data.ksnd_socklist_lock); -} - -int -ksocknal_reaper (void *arg) -{ - unsigned long flags; - ksock_conn_t *conn; - int rc; - - kportal_daemonize ("ksocknal_reaper"); - kportal_blockallsigs (); - - while (!ksocknal_data.ksnd_shuttingdown) { - spin_lock_irqsave (&ksocknal_data.ksnd_reaper_lock, flags); - - if (list_empty (&ksocknal_data.ksnd_reaper_list)) { - conn = NULL; - } else { - conn = list_entry (ksocknal_data.ksnd_reaper_list.next, - ksock_conn_t, ksnc_list); - list_del (&conn->ksnc_list); - } - - spin_unlock_irqrestore (&ksocknal_data.ksnd_reaper_lock, flags); - - if (conn != NULL) - ksocknal_close_conn (conn); - else { - rc = wait_event_interruptible (ksocknal_data.ksnd_reaper_waitq, - ksocknal_data.ksnd_shuttingdown || - !list_empty(&ksocknal_data.ksnd_reaper_list)); - LASSERT (rc == 0); - } - } - - ksocknal_thread_fini (); - return (0); -} - -nal_cb_t ksocknal_lib = { - nal_data: &ksocknal_data, /* NAL private data */ - cb_send: ksocknal_send, - cb_send_pages: ksocknal_send_pages, - cb_recv: ksocknal_recv, - cb_recv_pages: ksocknal_recv_pages, - cb_read: ksocknal_read, - cb_write: ksocknal_write, - cb_callback: ksocknal_callback, - cb_malloc: ksocknal_malloc, - cb_free: ksocknal_free, - cb_printf: ksocknal_printf, - cb_cli: ksocknal_cli, - cb_sti: ksocknal_sti, - cb_dist: ksocknal_dist -}; diff --git a/lustre/portals/knals/toenal/.cvsignore b/lustre/portals/knals/toenal/.cvsignore deleted file mode 100644 index e995588..0000000 --- a/lustre/portals/knals/toenal/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lustre/portals/knals/toenal/Makefile.am b/lustre/portals/knals/toenal/Makefile.am deleted file mode 100644 index 9bfff64..0000000 --- a/lustre/portals/knals/toenal/Makefile.am +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../../Rules.linux - -MODULE = ktoenal -modulenet_DATA = ktoenal.o -EXTRA_PROGRAMS = ktoenal - -DEFS = -ktoenal_SOURCES = toenal.c toenal_cb.c toenal.h diff --git a/lustre/portals/knals/toenal/toenal.c b/lustre/portals/knals/toenal/toenal.c deleted file mode 100644 index 1f5dc38..0000000 --- a/lustre/portals/knals/toenal/toenal.c +++ /dev/null @@ -1,629 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Zach Brown <zab@zabbo.net> - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Eric Barton <eric@bartonsoftware.com> - * Author: Kedar Sovani <kedar@calsoftinc.com> - * Author: Amey Inamdar <amey@calsoftinc.com> - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ -#include <linux/poll.h> -#include "toenal.h" - -ptl_handle_ni_t ktoenal_ni; -static nal_t ktoenal_api; -static ksock_nal_data_t ktoenal_data; - -/* -ksocknal_interface_t ktoenal_interface = { - ksni_add_sock: ktoenal_add_sock, - ksni_close_sock: ktoenal_close_sock, - ksni_set_mynid: ktoenal_set_mynid, -}; -*/ - -kpr_nal_interface_t ktoenal_router_interface = { - kprni_nalid: TOENAL, - kprni_arg: &ktoenal_data, - kprni_fwd: ktoenal_fwd_packet, -}; - - -int -ktoenal_api_forward(nal_t *nal, int id, void *args, size_t args_len, - void *ret, size_t ret_len) -{ - ksock_nal_data_t *k; - nal_cb_t *nal_cb; - - k = nal->nal_data; - nal_cb = k->ksnd_nal_cb; - - lib_dispatch(nal_cb, k, id, args, ret); /* ktoenal_send needs k */ - return PTL_OK; -} - -int -ktoenal_api_shutdown(nal_t *nal, int ni) -{ - CDEBUG (D_NET, "closing all connections\n"); - - return ktoenal_close_sock(0); /* close all sockets */ -} - -void -ktoenal_api_yield(nal_t *nal) -{ - our_cond_resched(); - return; -} - -void -ktoenal_api_lock(nal_t *nal, unsigned long *flags) -{ - ksock_nal_data_t *k; - nal_cb_t *nal_cb; - - k = nal->nal_data; - nal_cb = k->ksnd_nal_cb; - nal_cb->cb_cli(nal_cb,flags); -} - -void -ktoenal_api_unlock(nal_t *nal, unsigned long *flags) -{ - ksock_nal_data_t *k; - nal_cb_t *nal_cb; - - k = nal->nal_data; - nal_cb = k->ksnd_nal_cb; - nal_cb->cb_sti(nal_cb,flags); -} - -nal_t * -ktoenal_init(int interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t ac_size, ptl_pid_t requested_pid) -{ - CDEBUG(D_NET, "calling lib_init with nid "LPX64"\n", - ktoenal_data.ksnd_mynid); - lib_init(&ktoenal_lib, ktoenal_data.ksnd_mynid, 0, 10, ptl_size, - ac_size); - return (&ktoenal_api); -} - -/* - * EXTRA functions follow - */ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#define SOCKET_I(inode) (&(inode)->u.socket_i) -#endif -static __inline__ struct socket * -socki_lookup(struct inode *inode) -{ - return SOCKET_I(inode); -} - -int -ktoenal_set_mynid(ptl_nid_t nid) -{ - lib_ni_t *ni = &ktoenal_lib.ni; - - /* FIXME: we have to do this because we call lib_init() at module - * insertion time, which is before we have 'mynid' available. lib_init - * sets the NAL's nid, which it uses to tell other nodes where packets - * are coming from. This is not a very graceful solution to this - * problem. */ - - CDEBUG(D_IOCTL, "setting mynid to "LPX64" (old nid="LPX64")\n", nid, ni->nid); - - ktoenal_data.ksnd_mynid = nid; - ni->nid = nid; - return (0); -} - -int -ktoenal_add_sock (ptl_nid_t nid, int fd) -{ - unsigned long flags; - ksock_conn_t *conn; - struct file *file = NULL; - struct socket *sock = NULL; - int ret; - ENTRY; - - file = fget(fd); - if (file == NULL) - RETURN(-EINVAL); - - ret = -EINVAL; - sock = socki_lookup(file->f_dentry->d_inode); - if (sock == NULL) - GOTO(error, ret); - - ret = -ENOMEM; - PORTAL_ALLOC(conn, sizeof(*conn)); - if (!conn) - GOTO(error, ret); - - memset (conn, 0, sizeof (conn)); /* zero for consistency */ - file->f_flags |= O_NONBLOCK; /* Does this have any conflicts */ - conn->ksnc_file = file; - conn->ksnc_sock = sock; - conn->ksnc_peernid = nid; - atomic_set (&conn->ksnc_refcount, 1); /* 1 ref for socklist */ - - conn->ksnc_rx_ready = 0; - conn->ksnc_rx_scheduled = 0; - ktoenal_new_packet (conn, 0); - - INIT_LIST_HEAD (&conn->ksnc_tx_queue); - conn->ksnc_tx_ready = 0; - conn->ksnc_tx_scheduled = 0; - - LASSERT (!in_interrupt()); - write_lock_irqsave (&ktoenal_data.ksnd_socklist_lock, flags); - - list_add(&conn->ksnc_list, &ktoenal_data.ksnd_socklist); - write_unlock_irqrestore (&ktoenal_data.ksnd_socklist_lock, flags); - - ktoenal_data_ready(conn); - ktoenal_write_space(conn); - - ktoenal_data.ksnd_slistchange = 1; - wake_up_process(ktoenal_data.ksnd_pollthread_tsk); - /* Schedule pollthread so that it will poll - * for newly created socket - */ - - - CDEBUG(D_IOCTL, "conn [%p] registered for nid "LPX64"\n", - conn, conn->ksnc_peernid); - - /* Can't unload while connection active */ - PORTAL_MODULE_USE; - RETURN(0); - -error: - fput(file); - return (ret); -} - -/* Passing in a zero nid will close all connections */ -int -ktoenal_close_sock(ptl_nid_t nid) -{ - long flags; - ksock_conn_t *conn; - LIST_HEAD (death_row); - struct list_head *tmp; - - LASSERT (!in_interrupt()); - write_lock_irqsave (&ktoenal_data.ksnd_socklist_lock, flags); - - if (nid == 0) /* close ALL connections */ - { - /* insert 'death row' into the socket list... */ - list_add (&death_row, &ktoenal_data.ksnd_socklist); - /* ...extract and reinitialise the socket list itself... */ - list_del_init (&ktoenal_data.ksnd_socklist); - /* ...and voila, death row is the proud owner of all conns */ - } else list_for_each (tmp, &ktoenal_data.ksnd_socklist) { - - conn = list_entry (tmp, ksock_conn_t, ksnc_list); - - if (conn->ksnc_peernid == nid) - { - list_del (&conn->ksnc_list); - list_add (&conn->ksnc_list, &death_row); - break; - } - } - - - write_unlock_irqrestore (&ktoenal_data.ksnd_socklist_lock, flags); - - if (list_empty (&death_row)) - return (-ENOENT); - - do { - conn = list_entry (death_row.next, ksock_conn_t, ksnc_list); - list_del (&conn->ksnc_list); - ktoenal_put_conn (conn); /* drop ref for ksnd_socklist */ - } while (!list_empty (&death_row)); - - ktoenal_data.ksnd_slistchange = 1; - wake_up_process(ktoenal_data.ksnd_pollthread_tsk); - - return (0); -} - - -ksock_conn_t * -ktoenal_get_conn (ptl_nid_t nid) -{ - struct list_head *tmp; - ksock_conn_t *conn; - - PROF_START(conn_list_walk); - - read_lock (&ktoenal_data.ksnd_socklist_lock); - - list_for_each(tmp, &ktoenal_data.ksnd_socklist) { - - conn = list_entry(tmp, ksock_conn_t, ksnc_list); - - if (conn->ksnc_peernid == nid) - { - /* caller is referencing */ - atomic_inc (&conn->ksnc_refcount); - - read_unlock (&ktoenal_data.ksnd_socklist_lock); - - CDEBUG(D_NET, "got conn [%p] -> "LPX64" (%d)\n", - conn, nid, atomic_read (&conn->ksnc_refcount)); - - PROF_FINISH(conn_list_walk); - return (conn); - } - } - - read_unlock (&ktoenal_data.ksnd_socklist_lock); - - CDEBUG(D_NET, "No connection found when looking for nid "LPX64"\n", nid); - PROF_FINISH(conn_list_walk); - return (NULL); -} - -void -ktoenal_close_conn (ksock_conn_t *conn) -{ - CDEBUG (D_NET, "connection [%p] closed \n", conn); - - fput (conn->ksnc_file); - PORTAL_FREE (conn, sizeof (*conn)); - /* One less connection keeping us hanging on */ - PORTAL_MODULE_UNUSE; -} - -void -_ktoenal_put_conn (ksock_conn_t *conn) -{ - unsigned long flags; - - CDEBUG (D_NET, "connection [%p] handed the black spot\n", conn); - - /* "But what is the black spot, captain?" I asked. - * "That's a summons, mate..." */ - - LASSERT (atomic_read (&conn->ksnc_refcount) == 0); - LASSERT (!conn->ksnc_rx_scheduled); - - if (!in_interrupt()) - { - ktoenal_close_conn (conn); - return; - } - - spin_lock_irqsave (&ktoenal_data.ksnd_reaper_lock, flags); - - list_add (&conn->ksnc_list, &ktoenal_data.ksnd_reaper_list); - wake_up (&ktoenal_data.ksnd_reaper_waitq); - - spin_unlock_irqrestore (&ktoenal_data.ksnd_reaper_lock, flags); -} - -void -ktoenal_free_buffers (void) -{ - if (ktoenal_data.ksnd_fmbs != NULL) - { - ksock_fmb_t *fmb = (ksock_fmb_t *)ktoenal_data.ksnd_fmbs; - int i; - int j; - - for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++, fmb++) - for (j = 0; j < fmb->fmb_npages; j++) - if (fmb->fmb_pages[j] != NULL) - __free_page (fmb->fmb_pages[j]); - - PORTAL_FREE (ktoenal_data.ksnd_fmbs, - sizeof (ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS)); - } - - if (ktoenal_data.ksnd_ltxs != NULL) - PORTAL_FREE (ktoenal_data.ksnd_ltxs, - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS)); -} - -int -ktoenal_cmd(struct portal_ioctl_data * data, void * private) -{ - int rc = -EINVAL; - - LASSERT (data != NULL); - - switch(data->ioc_nal_cmd) { - case NAL_CMD_REGISTER_PEER_FD: { - rc = ktoenal_add_sock(data->ioc_nid, data->ioc_fd); - break; - } - case NAL_CMD_CLOSE_CONNECTION: { - rc = ktoenal_close_sock(data->ioc_nid); - break; - } - case NAL_CMD_REGISTER_MYNID: { - rc = ktoenal_set_mynid (data->ioc_nid); - break; - } - } - - return rc; -} - - -void __exit -ktoenal_module_fini (void) -{ - CDEBUG(D_MALLOC, "before NAL cleanup: kmem %d\n", - atomic_read (&portal_kmemory)); - - switch (ktoenal_data.ksnd_init) - { - default: - LASSERT (0); - - case SOCKNAL_INIT_ALL: - kportal_nal_unregister(TOENAL); - PORTAL_SYMBOL_UNREGISTER (ktoenal_ni); - /* fall through */ - - case SOCKNAL_INIT_PTL: - PtlNIFini(ktoenal_ni); - lib_fini(&ktoenal_lib); - /* fall through */ - - case SOCKNAL_INIT_DATA: - /* Module refcount only gets to zero when all connections - * have been closed so all lists must be empty */ - LASSERT (list_empty (&ktoenal_data.ksnd_socklist)); - LASSERT (list_empty (&ktoenal_data.ksnd_reaper_list)); - LASSERT (list_empty (&ktoenal_data.ksnd_rx_conns)); - LASSERT (list_empty (&ktoenal_data.ksnd_tx_conns)); - LASSERT (list_empty (&ktoenal_data.ksnd_small_fmp.fmp_blocked_conns)); - LASSERT (list_empty (&ktoenal_data.ksnd_large_fmp.fmp_blocked_conns)); - - kpr_shutdown (&ktoenal_data.ksnd_router); /* stop router calling me */ - - /* flag threads to terminate; wake and wait for them to die */ - ktoenal_data.ksnd_shuttingdown = 1; - wake_up_all (&ktoenal_data.ksnd_reaper_waitq); - wake_up_all (&ktoenal_data.ksnd_sched_waitq); - wake_up_process(ktoenal_data.ksnd_pollthread_tsk); - - while (atomic_read (&ktoenal_data.ksnd_nthreads) != 0) - { - CDEBUG (D_NET, "waitinf for %d threads to terminate\n", - atomic_read (&ktoenal_data.ksnd_nthreads)); - set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (HZ); - } - - kpr_deregister (&ktoenal_data.ksnd_router); - - ktoenal_free_buffers(); - /* fall through */ - - case SOCKNAL_INIT_NOTHING: - break; - } - - CDEBUG(D_MALLOC, "after NAL cleanup: kmem %d\n", - atomic_read (&portal_kmemory)); - - printk(KERN_INFO "Routing socket NAL unloaded (final mem %d)\n", - atomic_read(&portal_kmemory)); -} - -int __init -ktoenal_module_init (void) -{ - int pkmem = atomic_read(&portal_kmemory); - int rc; - int i; - int j; - - /* packet descriptor must fit in a router descriptor's scratchpad */ - LASSERT(sizeof (ksock_tx_t) <= sizeof (kprfd_scratch_t)); - - LASSERT (ktoenal_data.ksnd_init == SOCKNAL_INIT_NOTHING); - - ktoenal_api.forward = ktoenal_api_forward; - ktoenal_api.shutdown = ktoenal_api_shutdown; - ktoenal_api.yield = ktoenal_api_yield; - ktoenal_api.validate = NULL; /* our api validate is a NOOP */ - ktoenal_api.lock = ktoenal_api_lock; - ktoenal_api.unlock = ktoenal_api_unlock; - ktoenal_api.nal_data = &ktoenal_data; - - ktoenal_lib.nal_data = &ktoenal_data; - - memset (&ktoenal_data, 0, sizeof (ktoenal_data)); /* zero pointers */ - - INIT_LIST_HEAD(&ktoenal_data.ksnd_socklist); - rwlock_init(&ktoenal_data.ksnd_socklist_lock); - - ktoenal_data.ksnd_nal_cb = &ktoenal_lib; - spin_lock_init (&ktoenal_data.ksnd_nal_cb_lock); - - spin_lock_init (&ktoenal_data.ksnd_sched_lock); - - init_waitqueue_head (&ktoenal_data.ksnd_sched_waitq); - - INIT_LIST_HEAD (&ktoenal_data.ksnd_rx_conns); - INIT_LIST_HEAD (&ktoenal_data.ksnd_tx_conns); - - INIT_LIST_HEAD(&ktoenal_data.ksnd_small_fmp.fmp_idle_fmbs); - INIT_LIST_HEAD(&ktoenal_data.ksnd_small_fmp.fmp_blocked_conns); - INIT_LIST_HEAD(&ktoenal_data.ksnd_large_fmp.fmp_idle_fmbs); - INIT_LIST_HEAD(&ktoenal_data.ksnd_large_fmp.fmp_blocked_conns); - - INIT_LIST_HEAD(&ktoenal_data.ksnd_idle_nblk_ltx_list); - INIT_LIST_HEAD(&ktoenal_data.ksnd_idle_ltx_list); - init_waitqueue_head(&ktoenal_data.ksnd_idle_ltx_waitq); - - INIT_LIST_HEAD (&ktoenal_data.ksnd_reaper_list); - init_waitqueue_head(&ktoenal_data.ksnd_reaper_waitq); - spin_lock_init (&ktoenal_data.ksnd_reaper_lock); - - ktoenal_data.ksnd_init = SOCKNAL_INIT_DATA; /* flag lists/ptrs/locks initialised */ - - PORTAL_ALLOC(ktoenal_data.ksnd_fmbs, - sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS)); - if (ktoenal_data.ksnd_fmbs == NULL) - RETURN(-ENOMEM); - - /* NULL out buffer pointers etc */ - memset(ktoenal_data.ksnd_fmbs, 0, - sizeof(ksock_fmb_t) * (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS)); - - for (i = 0; i < (SOCKNAL_SMALL_FWD_NMSGS + SOCKNAL_LARGE_FWD_NMSGS); i++) - { - ksock_fmb_t *fmb = &((ksock_fmb_t *)ktoenal_data.ksnd_fmbs)[i]; - - if (i < SOCKNAL_SMALL_FWD_NMSGS) - { - fmb->fmb_npages = SOCKNAL_SMALL_FWD_PAGES; - fmb->fmb_pool = &ktoenal_data.ksnd_small_fmp; - } - else - { - fmb->fmb_npages = SOCKNAL_LARGE_FWD_PAGES; - fmb->fmb_pool = &ktoenal_data.ksnd_large_fmp; - } - - LASSERT (fmb->fmb_npages > 0); - for (j = 0; j < fmb->fmb_npages; j++) - { - fmb->fmb_pages[j] = alloc_page(GFP_KERNEL); - - if (fmb->fmb_pages[j] == NULL) - { - ktoenal_module_fini (); - return (-ENOMEM); - } - - LASSERT (page_address (fmb->fmb_pages[j]) != NULL); - } - - list_add (&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs); - } - - PORTAL_ALLOC(ktoenal_data.ksnd_ltxs, - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS)); - if (ktoenal_data.ksnd_ltxs == NULL) - { - ktoenal_module_fini (); - return (-ENOMEM); - } - - /* Deterministic bugs please */ - memset (ktoenal_data.ksnd_ltxs, 0xeb, - sizeof (ksock_ltx_t) * (SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS)); - - for (i = 0; i < SOCKNAL_NLTXS + SOCKNAL_NNBLK_LTXS; i++) - { - ksock_ltx_t *ltx = &((ksock_ltx_t *)ktoenal_data.ksnd_ltxs)[i]; - - ltx->ltx_idle = i < SOCKNAL_NLTXS ? - &ktoenal_data.ksnd_idle_ltx_list : - &ktoenal_data.ksnd_idle_nblk_ltx_list; - list_add (<x->ltx_tx.tx_list, ltx->ltx_idle); - } - - rc = PtlNIInit(ktoenal_init, 32, 4, 0, &ktoenal_ni); - if (rc != 0) - { - CERROR("ktoenal: PtlNIInit failed: error %d\n", rc); - ktoenal_module_fini (); - RETURN (rc); - } - PtlNIDebug(ktoenal_ni, ~0); - - ktoenal_data.ksnd_init = SOCKNAL_INIT_PTL; /* flag PtlNIInit() called */ - - ktoenal_data.ksnd_slistchange = 1; - for (i = 0; i < TOENAL_N_SCHED; i++) - { - rc = ktoenal_thread_start (ktoenal_scheduler, NULL); - if (rc != 0) - { - CERROR("Can't spawn socknal scheduler[%d]: %d\n", i, rc); - ktoenal_module_fini (); - RETURN (rc); - } - } - - rc = ktoenal_thread_start (ktoenal_reaper, NULL); - if (rc != 0) - { - CERROR("Can't spawn socknal reaper: %d\n", rc); - ktoenal_module_fini (); - RETURN (rc); - } - - rc = ktoenal_thread_start (ktoenal_pollthread, NULL); - if (rc != 0) - { - CERROR("Can't spawn socknal pollthread: %d\n", rc); - ktoenal_module_fini (); - RETURN (rc); - } - - rc = kpr_register(&ktoenal_data.ksnd_router, - &ktoenal_router_interface); - if (rc != 0) - CDEBUG (D_NET, "Can't initialise routing interface (rc = %d): not routing\n", rc); - - rc = kportal_nal_register(TOENAL, &ktoenal_cmd, NULL); - if (rc != 0) - CDEBUG(D_NET, "Can't initialise command interface (rc = %d)\n", - rc); - - PORTAL_SYMBOL_REGISTER(ktoenal_ni); - - /* flag everything initialised */ - ktoenal_data.ksnd_init = SOCKNAL_INIT_ALL; - - printk(KERN_INFO"Routing TOE NAL loaded (Routing %s, initial mem %d)\n", - kpr_routing(&ktoenal_data.ksnd_router) ? "enabled" : "disabled", - pkmem); - - return (0); -} - -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Kernel TCP Socket NAL v0.01"); -MODULE_LICENSE("GPL"); - -module_init(ktoenal_module_init); -module_exit(ktoenal_module_fini); - -EXPORT_SYMBOL (ktoenal_ni); diff --git a/lustre/portals/knals/toenal/toenal.h b/lustre/portals/knals/toenal/toenal.h deleted file mode 100644 index f793d3b..0000000 --- a/lustre/portals/knals/toenal/toenal.h +++ /dev/null @@ -1,236 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Zach Brown <zab@zabbo.net> - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Eric Barton <eric@bartonsoftware.com> - * Author: Kedar Sovani <kedar@calsoftinc.com> - * Author: Amey Inamdar <amey@calsoftinc.com> - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define DEBUG_PORTAL_ALLOC -#define EXPORT_SYMTAB - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <net/tcp.h> -#include <linux/uio.h> -#include <linux/sched.h> - -#include <asm/system.h> -#include <asm/uaccess.h> - -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <asm/uaccess.h> -#include <asm/segment.h> - -#define DEBUG_SUBSYSTEM S_SOCKNAL - -#include <linux/kp30.h> -#include <portals/p30.h> -#include <portals/lib-p30.h> - -#define SOCKNAL_MAX_FWD_PAYLOAD (64<<10) /* biggest payload I can forward */ - -#define SOCKNAL_NLTXS 128 /* # normal transmit messages */ -#define SOCKNAL_NNBLK_LTXS 128 /* # transmit messages reserved if can't block */ - -#define SOCKNAL_SMALL_FWD_NMSGS 128 /* # small messages I can be forwarding at any time */ -#define SOCKNAL_LARGE_FWD_NMSGS 32 /* # large messages I can be forwarding at any time */ - -#define SOCKNAL_SMALL_FWD_PAGES 1 /* # pages in a small message fwd buffer */ - -#define SOCKNAL_LARGE_FWD_PAGES (PAGE_ALIGN (sizeof (ptl_hdr_t) + SOCKNAL_MAX_FWD_PAYLOAD) >> PAGE_SHIFT) - /* # pages in a large message fwd buffer */ - -#define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */ - -#define SOCKNAL_TX_LOW_WATER(sk) (((sk)->sndbuf*8)/10) - -#define TOENAL_N_SCHED 1 - -typedef struct /* pool of forwarding buffers */ -{ - struct list_head fmp_idle_fmbs; /* buffers waiting for a connection */ - struct list_head fmp_blocked_conns; /* connections waiting for a buffer */ -} ksock_fmb_pool_t; - -typedef struct { - int ksnd_init; /* initialisation state */ - - struct list_head ksnd_socklist; /* all my connections */ - rwlock_t ksnd_socklist_lock; /* stabilise add/find/remove */ - - - ptl_nid_t ksnd_mynid; - nal_cb_t *ksnd_nal_cb; - spinlock_t ksnd_nal_cb_lock; /* lib cli/sti lock */ - - atomic_t ksnd_nthreads; /* # live threads */ - int ksnd_shuttingdown; /* tell threads to exit */ - - kpr_router_t ksnd_router; /* THE router */ - - spinlock_t ksnd_sched_lock; /* serialise packet scheduling */ - wait_queue_head_t ksnd_sched_waitq; /* where scheduler(s) wait */ - - struct list_head ksnd_rx_conns; /* conn waiting to be read */ - struct list_head ksnd_tx_conns; /* conn waiting to be written */ - - void *ksnd_fmbs; /* all the pre-allocated FMBs */ - ksock_fmb_pool_t ksnd_small_fmp; /* small message forwarding buffers */ - ksock_fmb_pool_t ksnd_large_fmp; /* large message forwarding buffers */ - - void *ksnd_ltxs; /* all the pre-allocated LTXs */ - struct list_head ksnd_idle_ltx_list; /* where to get an idle LTX */ - struct list_head ksnd_idle_nblk_ltx_list; /* where to get an idle LTX if you can't block */ - wait_queue_head_t ksnd_idle_ltx_waitq; /* where to block for an idle LTX */ - - struct list_head ksnd_reaper_list; /* conn waiting to be reaped */ - wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */ - spinlock_t ksnd_reaper_lock; /* serialise */ - - struct task_struct *ksnd_pollthread_tsk;/* task_struct for the poll thread */ - poll_table ksnd_pwait; /* poll wait table for the socket */ - int ksnd_slistchange; /* informs the pollthread that - * the socklist has changed */ -} ksock_nal_data_t; - -#define SOCKNAL_INIT_NOTHING 0 -#define SOCKNAL_INIT_DATA 1 -#define SOCKNAL_INIT_PTL 2 -#define SOCKNAL_INIT_ALL 3 - -typedef struct /* transmit packet */ -{ - struct list_head tx_list; /* queue on conn for transmission etc */ - char tx_isfwd; /* forwarding / sourced here */ - int tx_nob; /* # packet bytes */ - int tx_niov; /* # packet frags */ - struct iovec *tx_iov; /* packet frags */ -} ksock_tx_t; - -typedef struct /* locally transmitted packet */ -{ - ksock_tx_t ltx_tx; /* send info */ - struct list_head *ltx_idle; /* where to put when idle */ - void *ltx_private; /* lib_finalize() callback arg */ - void *ltx_cookie; /* lib_finalize() callback arg */ - struct iovec ltx_iov[1 + PTL_MD_MAX_IOV]; /* msg frags */ - ptl_hdr_t ltx_hdr; /* buffer for packet header */ -} ksock_ltx_t; - -#define KSOCK_TX_2_KPR_FWD_DESC(ptr) list_entry (ptr, kpr_fwd_desc_t, kprfd_scratch) -/* forwarded packets (router->socknal) embedded in kpr_fwd_desc_t::kprfd_scratch */ - -#define KSOCK_TX_2_KSOCK_LTX(ptr) list_entry (ptr, ksock_ltx_t, ltx_tx) -/* local packets (lib->socknal) embedded in ksock_ltx_t::ltx_tx */ - -/* NB list_entry() is used here as convenient macro for calculating a - * pointer to a struct from the addres of a member. - */ - -typedef struct /* Kernel portals Socket Forwarding message buffer */ -{ /* (socknal->router) */ - struct list_head fmb_list; /* queue idle */ - kpr_fwd_desc_t fmb_fwd; /* router's descriptor */ - int fmb_npages; /* # pages allocated */ - ksock_fmb_pool_t *fmb_pool; /* owning pool */ - struct page *fmb_pages[SOCKNAL_LARGE_FWD_PAGES]; - struct iovec fmb_iov[SOCKNAL_LARGE_FWD_PAGES]; -} ksock_fmb_t; - -#define SOCKNAL_RX_HEADER 1 /* reading header */ -#define SOCKNAL_RX_BODY 2 /* reading body (to deliver here) */ -#define SOCKNAL_RX_BODY_FWD 3 /* reading body (to forward) */ -#define SOCKNAL_RX_SLOP 4 /* skipping body */ -#define SOCKNAL_RX_GET_FMB 5 /* scheduled for forwarding */ -#define SOCKNAL_RX_FMB_SLEEP 6 /* blocked waiting for a fwd desc */ - -typedef struct -{ - struct list_head ksnc_list; /* stash on global socket list */ - struct file *ksnc_file; /* socket filp */ - struct socket *ksnc_sock; /* socket */ - ptl_nid_t ksnc_peernid; /* who's on the other end */ - atomic_t ksnc_refcount; /* # users */ - - /* READER */ - struct list_head ksnc_rx_list; /* where I enq waiting input or a forwarding descriptor */ - unsigned long ksnc_rx_ready; /* data ready to read */ - int ksnc_rx_scheduled; /* being progressed */ - int ksnc_rx_state; /* what is being read */ - int ksnc_rx_nob_left; /* # bytes to next hdr/body */ - int ksnc_rx_nob_wanted; /* bytes actually wanted */ - int ksnc_rx_niov; /* # frags */ - struct iovec ksnc_rx_iov[1 + PTL_MD_MAX_IOV]; /* the frags */ - - void *ksnc_cookie; /* rx lib_finalize passthru arg */ - ptl_hdr_t ksnc_hdr; /* where I read headers into */ - - /* WRITER */ - struct list_head ksnc_tx_list; /* where I enq waiting for output space */ - struct list_head ksnc_tx_queue; /* packets waiting to be sent */ - unsigned long ksnc_tx_ready; /* write space */ - int ksnc_tx_scheduled; /* being progressed */ - -} ksock_conn_t; - -extern int ktoenal_add_sock (ptl_nid_t nid, int fd); -extern int ktoenal_close_sock(ptl_nid_t nid); -extern int ktoenal_set_mynid(ptl_nid_t nid); -extern int ktoenal_push_sock(ptl_nid_t nid); -extern ksock_conn_t *ktoenal_get_conn (ptl_nid_t nid); -extern void _ktoenal_put_conn (ksock_conn_t *conn); -extern void ktoenal_close_conn (ksock_conn_t *conn); - -static inline void -ktoenal_put_conn (ksock_conn_t *conn) -{ - CDEBUG (D_OTHER, "putting conn[%p] -> "LPX64" (%d)\n", - conn, conn->ksnc_peernid, atomic_read (&conn->ksnc_refcount)); - - if (atomic_dec_and_test (&conn->ksnc_refcount)) - _ktoenal_put_conn (conn); -} - -extern int ktoenal_thread_start (int (*fn)(void *arg), void *arg); -extern int ktoenal_new_packet (ksock_conn_t *conn, int skip); -extern void ktoenal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd); -extern int ktoenal_scheduler (void *arg); -extern int ktoenal_reaper (void *arg); -extern int ktoenal_pollthread (void *arg); -extern void ktoenal_data_ready(ksock_conn_t *conn); -extern void ktoenal_write_space(ksock_conn_t *conn); - - -extern nal_cb_t ktoenal_lib; -extern ksock_nal_data_t ktoenal_data; diff --git a/lustre/portals/knals/toenal/toenal_cb.c b/lustre/portals/knals/toenal/toenal_cb.c deleted file mode 100644 index ec37f6f..0000000 --- a/lustre/portals/knals/toenal/toenal_cb.c +++ /dev/null @@ -1,1219 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Zach Brown <zab@zabbo.net> - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Eric Barton <eric@bartonsoftware.com> - * Author: Kedar Sovani <kedar@calsoftinc.com> - * Author: Amey Inamdar <amey@calsoftinc.com> - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <linux/poll.h> -#include "toenal.h" - -atomic_t ktoenal_packets_received; -long ktoenal_packets_launched; -long ktoenal_packets_transmitted; - -/* - * LIB functions follow - * - */ -int -ktoenal_read(nal_cb_t *nal, void *private, void *dst_addr, - user_ptr src_addr, size_t len) -{ - CDEBUG(D_NET, LPX64": reading %ld bytes from %p -> %p\n", - nal->ni.nid, (long)len, src_addr, dst_addr); - - memcpy( dst_addr, src_addr, len ); - return 0; -} - -int -ktoenal_write(nal_cb_t *nal, void *private, user_ptr dst_addr, - void *src_addr, size_t len) -{ - CDEBUG(D_NET, LPX64": writing %ld bytes from %p -> %p\n", - nal->ni.nid, (long)len, src_addr, dst_addr); - - memcpy( dst_addr, src_addr, len ); - return 0; -} - -int -ktoenal_callback (nal_cb_t * nal, void *private, lib_eq_t *eq, - ptl_event_t *ev) -{ - CDEBUG(D_NET, LPX64": callback eq %p ev %p\n", - nal->ni.nid, eq, ev); - - if (eq->event_callback != NULL) - eq->event_callback(ev); - - return 0; -} - -void * -ktoenal_malloc(nal_cb_t *nal, size_t len) -{ - void *buf; - - PORTAL_ALLOC(buf, len); - - if (buf != NULL) - memset(buf, 0, len); - - return (buf); -} - -void -ktoenal_free(nal_cb_t *nal, void *buf, size_t len) -{ - PORTAL_FREE(buf, len); -} - -void -ktoenal_printf(nal_cb_t *nal, const char *fmt, ...) -{ - va_list ap; - char msg[256]; - - va_start (ap, fmt); - vsnprintf (msg, sizeof (msg), fmt, ap); /* sprint safely */ - va_end (ap); - - msg[sizeof (msg) - 1] = 0; /* ensure terminated */ - - CDEBUG (D_NET, "%s", msg); -} - -void -ktoenal_cli(nal_cb_t *nal, unsigned long *flags) -{ - ksock_nal_data_t *data = nal->nal_data; - - spin_lock(&data->ksnd_nal_cb_lock); -} - -void -ktoenal_sti(nal_cb_t *nal, unsigned long *flags) -{ - ksock_nal_data_t *data; - data = nal->nal_data; - - spin_unlock(&data->ksnd_nal_cb_lock); -} - -int -ktoenal_dist(nal_cb_t *nal, ptl_nid_t nid, unsigned long *dist) -{ - /* I would guess that if ktoenal_get_conn(nid) == NULL, - and we're not routing, then 'nid' is very distant :) */ - if ( nal->ni.nid == nid ) { - *dist = 0; - } else { - *dist = 1; - } - - return 0; -} - -ksock_ltx_t * -ktoenal_get_ltx (int may_block) -{ - long flags; - ksock_ltx_t *ltx = NULL; - - for (;;) - { - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags); - - if (!list_empty (&ktoenal_data.ksnd_idle_ltx_list)) - { - ltx = list_entry (ktoenal_data.ksnd_idle_ltx_list.next, ksock_ltx_t, ltx_tx.tx_list); - list_del (<x->ltx_tx.tx_list); - break; - } - - if (!may_block) - { - if (!list_empty (&ktoenal_data.ksnd_idle_nblk_ltx_list)) - { - ltx = list_entry (ktoenal_data.ksnd_idle_nblk_ltx_list.next, - ksock_ltx_t, ltx_tx.tx_list); - list_del (<x->ltx_tx.tx_list); - } - break; - } - - spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags); - - wait_event (ktoenal_data.ksnd_idle_ltx_waitq, - !list_empty (&ktoenal_data.ksnd_idle_ltx_list)); - } - - spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags); - - return (ltx); -} - -int -ktoenal_sendmsg (struct file *sock, struct iovec *iov, int niov, int nob, int flags) -{ - /* NB This procedure "consumes" iov (actually we do, tcp_sendmsg doesn't) - */ - mm_segment_t oldmm; - int rc; - - LASSERT (niov > 0); - LASSERT (nob > 0); - - oldmm = get_fs(); - set_fs (KERNEL_DS); - -#ifdef PORTAL_DEBUG - { - int total_nob; - int i; - - for (i = total_nob = 0; i < niov; i++) - total_nob += iov[i].iov_len; - - LASSERT (nob == total_nob); - } -#endif - LASSERT (!in_interrupt()); - - rc = sock->f_op->writev(sock, iov, niov, NULL); - - set_fs (oldmm); - - if (rc > 0) /* sent something? */ - { - nob = rc; /* consume iov */ - for (;;) - { - LASSERT (niov > 0); - - if (iov->iov_len >= nob) - { - iov->iov_len -= nob; - iov->iov_base = (void *)(((unsigned long)iov->iov_base) + nob); - break; - } - nob -= iov->iov_len; - iov->iov_len = 0; - iov++; - niov--; - } - } - - return (rc); -} - -int -ktoenal_recvmsg(struct file *sock, struct iovec *iov, int niov, int toread) -{ - /* NB This procedure "consumes" iov (actually tcp_recvmsg does) - */ - mm_segment_t oldmm; - int ret, i, len = 0, origlen = 0; - - PROF_START(our_recvmsg); - for(i = 0; i < niov; i++) { - len += iov[i].iov_len; - if(len >= toread) - break; - } - - if(len >= toread) { - origlen = iov[i].iov_len; - iov[i].iov_len -= (len - toread); - } - else { /* i == niov */ - i = niov - 1; - } - - oldmm = get_fs(); - set_fs(KERNEL_DS); - - ret = sock->f_op->readv(sock, iov, i + 1, NULL); - - set_fs(oldmm); - - if(origlen) - iov[i].iov_len = origlen; - - PROF_FINISH(our_recvmsg); - return ret; -} - -void -ktoenal_process_transmit (ksock_conn_t *conn, long *irq_flags) -{ - ksock_tx_t *tx = list_entry (conn->ksnc_tx_queue.next, ksock_tx_t, tx_list); - int rc; - - LASSERT (conn->ksnc_tx_scheduled); - LASSERT (conn->ksnc_tx_ready); - LASSERT (!list_empty (&conn->ksnc_tx_queue)); - - /* assume transmit will complete now, so dequeue while I've got the lock */ - list_del (&tx->tx_list); - - spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, *irq_flags); - - LASSERT (tx->tx_nob > 0); - - conn->ksnc_tx_ready = 0; /* write_space may race with me and set ready */ - mb(); /* => clear BEFORE trying to write */ - - rc = ktoenal_sendmsg (conn->ksnc_file, - tx->tx_iov, tx->tx_niov, tx->tx_nob, - list_empty (&conn->ksnc_tx_queue) ? - MSG_DONTWAIT : (MSG_DONTWAIT | MSG_MORE)); - - CDEBUG (D_NET, "send(%d) %d\n", tx->tx_nob, rc); - - if (rc < 0) /* error */ - { - if (rc == -EAGAIN) /* socket full => */ - rc = 0; /* nothing sent */ - else - { -#warning FIXME: handle socket errors properly - CERROR ("Error socknal send(%d) %p: %d\n", tx->tx_nob, conn, rc); - rc = tx->tx_nob; /* kid on for now whole packet went */ - } - } - - if (rc == tx->tx_nob) /* everything went */ - { - conn->ksnc_tx_ready = 1; /* assume more can go (ASAP) */ - ktoenal_put_conn (conn); /* release packet's ref */ - - if (tx->tx_isfwd) /* was a forwarded packet? */ - { - kpr_fwd_done (&ktoenal_data.ksnd_router, - KSOCK_TX_2_KPR_FWD_DESC (tx), 0); - - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags); - } - else /* local send */ - { - ksock_ltx_t *ltx = KSOCK_TX_2_KSOCK_LTX (tx); - - lib_finalize (&ktoenal_lib, ltx->ltx_private, ltx->ltx_cookie); - - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags); - - list_add (<x->ltx_tx.tx_list, ltx->ltx_idle); - - /* normal tx desc => wakeup anyone blocking for one */ - if (ltx->ltx_idle == &ktoenal_data.ksnd_idle_ltx_list && - waitqueue_active (&ktoenal_data.ksnd_idle_ltx_waitq)) - wake_up (&ktoenal_data.ksnd_idle_ltx_waitq); - } - ktoenal_packets_transmitted++; - } - else - { - tx->tx_nob -= rc; - - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags); - - /* back onto HEAD of tx_queue */ - list_add (&tx->tx_list, &conn->ksnc_tx_queue); - } - - if (!conn->ksnc_tx_ready || /* no space to write now */ - list_empty (&conn->ksnc_tx_queue)) /* nothing to write */ - { - conn->ksnc_tx_scheduled = 0; /* not being scheduled */ - ktoenal_put_conn (conn); /* release scheduler's ref */ - } - else /* let scheduler call me again */ - list_add_tail (&conn->ksnc_tx_list, &ktoenal_data.ksnd_tx_conns); -} - -void -ktoenal_launch_packet (ksock_conn_t *conn, ksock_tx_t *tx) -{ - long flags; - int nob = tx->tx_nob; - struct iovec *iov = tx->tx_iov; - int niov = 1; - - LASSERT (nob >= sizeof (ptl_hdr_t)); - - /* Truncate iov to exactly match total packet length - * since socket sendmsg pays no attention to requested length. - */ - for (;;) - { - LASSERT (niov <= tx->tx_niov); - LASSERT (iov->iov_len >= 0); - - if (iov->iov_len >= nob) - { - iov->iov_len = nob; - break; - } - nob -= iov->iov_len; - iov++; - niov++; - } - tx->tx_niov = niov; - - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags); - list_add_tail (&tx->tx_list, &conn->ksnc_tx_queue); - - if (conn->ksnc_tx_ready && /* able to send */ - !conn->ksnc_tx_scheduled) /* not scheduled to send */ - { - list_add_tail (&conn->ksnc_tx_list, &ktoenal_data.ksnd_tx_conns); - conn->ksnc_tx_scheduled = 1; - atomic_inc (&conn->ksnc_refcount); /* extra ref for scheduler */ - if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq)) - wake_up (&ktoenal_data.ksnd_sched_waitq); - } - - ktoenal_packets_launched++; - spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags); -} - -int -ktoenal_send(nal_cb_t *nal, void *private, lib_msg_t *cookie, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - unsigned int payload_niov, struct iovec *payload_iov, size_t payload_len) -{ - ptl_nid_t gatewaynid; - ksock_conn_t *conn; - ksock_ltx_t *ltx; - int rc; - int i; - - /* By this point, as it happens, we have absolutely no idea what - * 'private' is. It might be ksock_nal_data or it might be ksock_conn. - * Ha ha, isn't that a funny joke? - * - * FIXME: this is not the right way to fix this; the right way is to - * always pass in the same kind of structure. This is hard right now. - * To revisit this issue, set a breakpoint in here and watch for when - * it's called from lib_finalize. I think this occurs when we send a - * packet as a side-effect of another packet, such as when an ACK has - * been requested. -phil */ - - CDEBUG(D_NET, "sending %d bytes from [%d](%p,%d)... to nid: " - LPX64" pid %d\n", (int)payload_len, payload_niov, - payload_niov > 0 ? payload_iov[0].iov_base : NULL, - (int)(payload_niov > 0 ? payload_iov[0].iov_len : 0), nid, pid); - - if ((conn = ktoenal_get_conn (nid)) == NULL) - { - /* It's not a peer; try to find a gateway */ - rc = kpr_lookup (&ktoenal_data.ksnd_router, nid, &gatewaynid); - if (rc != 0) - { - CERROR ("Can't route to "LPX64": router error %d\n", nid, rc); - return (-1); - } - - if ((conn = ktoenal_get_conn (gatewaynid)) == NULL) - { - CERROR ("Can't route to "LPX64": gateway "LPX64" is not a peer\n", - nid, gatewaynid); - return (-1); - } - } - - /* This transmit has now got a ref on conn */ - - /* I may not block for a transmit descriptor if I might block the - * receiver, or an interrupt handler. */ - ltx = ktoenal_get_ltx (!(type == PTL_MSG_ACK || - type == PTL_MSG_REPLY || - in_interrupt ())); - if (ltx == NULL) - { - CERROR ("Can't allocate tx desc\n"); - ktoenal_put_conn (conn); - return (-1); - } - - /* Init common (to sends and forwards) packet part */ - ltx->ltx_tx.tx_isfwd = 0; - ltx->ltx_tx.tx_nob = sizeof (*hdr) + payload_len; - ltx->ltx_tx.tx_niov = 1 + payload_niov; - ltx->ltx_tx.tx_iov = ltx->ltx_iov; - - /* Init local send packet (storage for hdr, finalize() args, iov) */ - ltx->ltx_hdr = *hdr; - ltx->ltx_private = private; - ltx->ltx_cookie = cookie; - - ltx->ltx_iov[0].iov_base = <x->ltx_hdr; - ltx->ltx_iov[0].iov_len = sizeof (ltx->ltx_hdr); - - LASSERT (payload_niov <= PTL_MD_MAX_IOV); - - for (i = 0; i < payload_niov; i++) - { - ltx->ltx_iov[1 + i].iov_base = payload_iov[i].iov_base; - ltx->ltx_iov[1 + i].iov_len = payload_iov[i].iov_len; - } - - ktoenal_launch_packet (conn, <x->ltx_tx); - return (0); -} - -void -ktoenal_fwd_packet (void *arg, kpr_fwd_desc_t *fwd) -{ - ksock_conn_t *conn; - ptl_nid_t nid = fwd->kprfd_gateway_nid; - ksock_tx_t *tx = (ksock_tx_t *)&fwd->kprfd_scratch; - - CDEBUG (D_NET, "Forwarding [%p] -> "LPX64" ("LPX64"))\n", fwd, - fwd->kprfd_gateway_nid, fwd->kprfd_target_nid); - - if (nid == ktoenal_lib.ni.nid) /* I'm the gateway; must be the last hop */ - nid = fwd->kprfd_target_nid; - - conn = ktoenal_get_conn (nid); - if (conn == NULL) - { - CERROR ("[%p] fwd to "LPX64" isn't a peer\n", fwd, nid); - kpr_fwd_done (&ktoenal_data.ksnd_router, fwd, -EHOSTUNREACH); - return; - } - - /* This forward has now got a ref on conn */ - - tx->tx_isfwd = 1; /* This is a forwarding packet */ - tx->tx_nob = fwd->kprfd_nob; - tx->tx_niov = fwd->kprfd_niov; - tx->tx_iov = fwd->kprfd_iov; - - ktoenal_launch_packet (conn, tx); -} - -int -ktoenal_thread_start (int (*fn)(void *arg), void *arg) -{ - long pid = kernel_thread (fn, arg, 0); - - if (pid < 0) - return ((int)pid); - - atomic_inc (&ktoenal_data.ksnd_nthreads); - return (0); -} - -void -ktoenal_thread_fini (void) -{ - atomic_dec (&ktoenal_data.ksnd_nthreads); -} - -void -ktoenal_fmb_callback (void *arg, int error) -{ - ksock_fmb_t *fmb = (ksock_fmb_t *)arg; - ptl_hdr_t *hdr = (ptl_hdr_t *) page_address(fmb->fmb_pages[0]); - ksock_conn_t *conn; - long flags; - - CDEBUG (D_NET, "routed packet from "LPX64" to "LPX64": %d\n", - hdr->src_nid, hdr->dest_nid, error); - - if (error != 0) - CERROR ("Failed to route packet from "LPX64" to "LPX64": %d\n", - hdr->src_nid, hdr->dest_nid, error); - - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags); - - list_add (&fmb->fmb_list, &fmb->fmb_pool->fmp_idle_fmbs); - - if (!list_empty (&fmb->fmb_pool->fmp_blocked_conns)) - { - conn = list_entry (fmb->fmb_pool->fmp_blocked_conns.next, ksock_conn_t, ksnc_rx_list); - list_del (&conn->ksnc_rx_list); - - CDEBUG (D_NET, "Scheduling conn %p\n", conn); - LASSERT (conn->ksnc_rx_scheduled); - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_FMB_SLEEP); - - conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB; - list_add_tail (&conn->ksnc_rx_list, &ktoenal_data.ksnd_rx_conns); - - if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq)) - wake_up (&ktoenal_data.ksnd_sched_waitq); - } - - spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags); -} - -ksock_fmb_t * -ktoenal_get_idle_fmb (ksock_conn_t *conn) -{ - /* NB called with sched lock held */ - int payload_nob = conn->ksnc_rx_nob_left; - int packet_nob = sizeof (ptl_hdr_t) + payload_nob; - ksock_fmb_pool_t *pool; - ksock_fmb_t *fmb; - - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB); - - if (packet_nob <= SOCKNAL_SMALL_FWD_PAGES * PAGE_SIZE) - pool = &ktoenal_data.ksnd_small_fmp; - else - pool = &ktoenal_data.ksnd_large_fmp; - - if (!list_empty (&pool->fmp_idle_fmbs)) - { - fmb = list_entry (pool->fmp_idle_fmbs.next, ksock_fmb_t, fmb_list); - list_del (&fmb->fmb_list); - return (fmb); - } - - /* deschedule until fmb free */ - - conn->ksnc_rx_state = SOCKNAL_RX_FMB_SLEEP; - - list_add_tail (&conn->ksnc_rx_list, - &pool->fmp_blocked_conns); - return (NULL); -} - - -int -ktoenal_init_fmb (ksock_conn_t *conn, ksock_fmb_t *fmb) -{ - int payload_nob = conn->ksnc_rx_nob_left; - int packet_nob = sizeof (ptl_hdr_t) + payload_nob; - int niov; /* at least the header */ - int nob; - - LASSERT (conn->ksnc_rx_scheduled); - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_GET_FMB); - LASSERT (conn->ksnc_rx_nob_wanted == conn->ksnc_rx_nob_left); - LASSERT (payload_nob >= 0); - LASSERT (packet_nob <= fmb->fmb_npages * PAGE_SIZE); - LASSERT (sizeof (ptl_hdr_t) < PAGE_SIZE); - - /* Got a forwarding buffer; copy the header we just read into the - * forwarding buffer. If there's payload start reading reading it - * into the buffer, otherwise the forwarding buffer can be kicked - * off immediately. - * - * NB fmb->fmb_iov spans the WHOLE packet. - * conn->ksnc_rx_iov spans just the payload. - */ - - fmb->fmb_iov[0].iov_base = page_address (fmb->fmb_pages[0]); - - memcpy (fmb->fmb_iov[0].iov_base, &conn->ksnc_hdr, sizeof (ptl_hdr_t)); /* copy header */ - - if (payload_nob == 0) /* got complete packet already */ - { - atomic_inc (&ktoenal_packets_received); - - CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (immediate)\n", conn, - conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, packet_nob); - - fmb->fmb_iov[0].iov_len = sizeof (ptl_hdr_t); - - kpr_fwd_init (&fmb->fmb_fwd, conn->ksnc_hdr.dest_nid, - packet_nob, 1, fmb->fmb_iov, - ktoenal_fmb_callback, fmb); - - kpr_fwd_start (&ktoenal_data.ksnd_router, &fmb->fmb_fwd); /* forward it now */ - - ktoenal_new_packet (conn, 0); /* on to next packet */ - return (1); - } - - niov = 1; - if (packet_nob <= PAGE_SIZE) /* whole packet fits in first page */ - fmb->fmb_iov[0].iov_len = packet_nob; - else - { - fmb->fmb_iov[0].iov_len = PAGE_SIZE; - nob = packet_nob - PAGE_SIZE; - - do - { - LASSERT (niov < fmb->fmb_npages); - fmb->fmb_iov[niov].iov_base = page_address (fmb->fmb_pages[niov]); - fmb->fmb_iov[niov].iov_len = MIN (PAGE_SIZE, nob); - nob -= PAGE_SIZE; - niov++; - } while (nob > 0); - } - - kpr_fwd_init (&fmb->fmb_fwd, conn->ksnc_hdr.dest_nid, - packet_nob, niov, fmb->fmb_iov, - ktoenal_fmb_callback, fmb); - - /* stash router's descriptor ready for call to kpr_fwd_start */ - conn->ksnc_cookie = &fmb->fmb_fwd; - - conn->ksnc_rx_state = SOCKNAL_RX_BODY_FWD; /* read in the payload */ - - /* payload is desc's iov-ed buffer, but skipping the hdr */ - LASSERT (niov <= sizeof (conn->ksnc_rx_iov) / sizeof (conn->ksnc_rx_iov[0])); - - conn->ksnc_rx_iov[0].iov_base = (void *)(((unsigned long)fmb->fmb_iov[0].iov_base) + sizeof (ptl_hdr_t)); - conn->ksnc_rx_iov[0].iov_len = fmb->fmb_iov[0].iov_len - sizeof (ptl_hdr_t); - - if (niov > 1) - memcpy (&conn->ksnc_rx_iov[1], &fmb->fmb_iov[1], (niov - 1) * sizeof (struct iovec)); - - conn->ksnc_rx_niov = niov; - - CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d reading body\n", conn, - conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, payload_nob); - return (0); -} - -void -ktoenal_fwd_parse (ksock_conn_t *conn) -{ - ksock_conn_t *conn2; - int body_len; - - CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d parsing header\n", conn, - conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, conn->ksnc_rx_nob_left); - - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER); - LASSERT (conn->ksnc_rx_scheduled); - - switch (conn->ksnc_hdr.type) - { - case PTL_MSG_GET: - case PTL_MSG_ACK: - body_len = 0; - break; - case PTL_MSG_PUT: - body_len = conn->ksnc_hdr.msg.put.length; - break; - case PTL_MSG_REPLY: - body_len = conn->ksnc_hdr.msg.reply.length; - break; - default: - /* Unrecognised packet type */ - CERROR ("Unrecognised packet type %d from "LPX64" for "LPX64"\n", - conn->ksnc_hdr.type, conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid); - /* Ignore this header and go back to reading a new packet. */ - ktoenal_new_packet (conn, 0); - return; - } - - if (body_len < 0) /* length corrupt */ - { - CERROR ("dropping packet from "LPX64" for "LPX64": packet size %d illegal\n", - conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, body_len); - ktoenal_new_packet (conn, 0); /* on to new packet */ - return; - } - - if (body_len > SOCKNAL_MAX_FWD_PAYLOAD) /* too big to forward */ - { - CERROR ("dropping packet from "LPX64" for "LPX64": packet size %d too big\n", - conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, body_len); - ktoenal_new_packet (conn, body_len); /* on to new packet (skip this one's body) */ - return; - } - - conn2 = ktoenal_get_conn (conn->ksnc_hdr.dest_nid); /* should have gone direct */ - if (conn2 != NULL) - { - CERROR ("dropping packet from "LPX64" for "LPX64": target is a peer\n", - conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid); - ktoenal_put_conn (conn2); /* drop ref from get above */ - - ktoenal_new_packet (conn, body_len); /* on to next packet (skip this one's body) */ - return; - } - - conn->ksnc_rx_state = SOCKNAL_RX_GET_FMB; /* Getting FMB now */ - conn->ksnc_rx_nob_left = body_len; /* stash packet size */ - conn->ksnc_rx_nob_wanted = body_len; /* (no slop) */ -} - -int -ktoenal_new_packet (ksock_conn_t *conn, int nob_to_skip) -{ - static char ktoenal_slop_buffer[4096]; - - int nob; - int niov; - int skipped; - - if (nob_to_skip == 0) /* right at next packet boundary now */ - { - conn->ksnc_rx_state = SOCKNAL_RX_HEADER; - conn->ksnc_rx_nob_wanted = sizeof (ptl_hdr_t); - conn->ksnc_rx_nob_left = sizeof (ptl_hdr_t); - - conn->ksnc_rx_iov[0].iov_base = (char *)&conn->ksnc_hdr; - conn->ksnc_rx_iov[0].iov_len = sizeof (ptl_hdr_t); - conn->ksnc_rx_niov = 1; - return (1); - } - - /* set up to skip as much a possible now */ - /* if there's more left (ran out of iov entries) we'll get called again */ - - conn->ksnc_rx_state = SOCKNAL_RX_SLOP; - conn->ksnc_rx_nob_left = nob_to_skip; - skipped = 0; - niov = 0; - - do - { - nob = MIN (nob_to_skip, sizeof (ktoenal_slop_buffer)); - - conn->ksnc_rx_iov[niov].iov_base = ktoenal_slop_buffer; - conn->ksnc_rx_iov[niov].iov_len = nob; - niov++; - skipped += nob; - nob_to_skip -=nob; - - } while (nob_to_skip != 0 && /* mustn't overflow conn's rx iov */ - niov < sizeof (conn->ksnc_rx_iov)/sizeof (conn->ksnc_rx_iov[0])); - - conn->ksnc_rx_niov = niov; - conn->ksnc_rx_nob_wanted = skipped; - return (0); -} - -void -ktoenal_process_receive (ksock_conn_t *conn, long *irq_flags) -{ - ksock_fmb_t *fmb; - int len; - LASSERT (atomic_read (&conn->ksnc_refcount) > 0); - LASSERT (conn->ksnc_rx_scheduled); - LASSERT (conn->ksnc_rx_ready); - - /* NB: sched lock held */ - CDEBUG(D_NET, "conn %p\n", conn); - - if (conn->ksnc_rx_state != SOCKNAL_RX_GET_FMB) /* doesn't need a forwarding buffer */ - { - spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, *irq_flags); - goto try_read; - } - - get_fmb: - /* NB: sched lock held */ - fmb = ktoenal_get_idle_fmb (conn); - if (fmb == NULL) /* conn descheduled waiting for idle fmb */ - return; - - spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, *irq_flags); - - if (ktoenal_init_fmb (conn, fmb)) /* packet forwarded ? */ - goto out; /* come back later for next packet */ - - try_read: - /* NB: sched lock NOT held */ - LASSERT (conn->ksnc_rx_state == SOCKNAL_RX_HEADER || - conn->ksnc_rx_state == SOCKNAL_RX_BODY || - conn->ksnc_rx_state == SOCKNAL_RX_BODY_FWD || - conn->ksnc_rx_state == SOCKNAL_RX_SLOP); - - LASSERT (conn->ksnc_rx_niov > 0); - LASSERT (conn->ksnc_rx_nob_wanted > 0); - - conn->ksnc_rx_ready = 0; /* data ready may race with me and set ready */ - mb(); /* => clear BEFORE trying to read */ - - /* NB ktoenal_recvmsg "consumes" the iov passed to it */ - len = ktoenal_recvmsg(conn->ksnc_file, - conn->ksnc_rx_iov, conn->ksnc_rx_niov, - conn->ksnc_rx_nob_wanted); - CDEBUG (D_NET, "%p read(%d) %d\n", conn, conn->ksnc_rx_nob_wanted, len); - - if (len <= 0) /* nothing ready (EAGAIN) or EOF or error */ - { - if (len != -EAGAIN && /* ! nothing to read now */ - len != 0) /* ! nothing to read ever */ - { -#warning FIXME: handle socket errors properly - CERROR ("Error socknal read(%d) %p: %d\n", - conn->ksnc_rx_nob_wanted, conn, len); - } - goto out; /* come back when there's data ready */ - } - - LASSERT (len <= conn->ksnc_rx_nob_wanted); - conn->ksnc_rx_nob_wanted -= len; - conn->ksnc_rx_nob_left -= len; - - if (conn->ksnc_rx_nob_wanted != 0) /* short read */ - goto out; /* try again later */ - - conn->ksnc_rx_ready = 1; /* assume there's more to be had */ - - switch (conn->ksnc_rx_state) - { - case SOCKNAL_RX_HEADER: - if (conn->ksnc_hdr.dest_nid != ktoenal_lib.ni.nid) /* It's not for me */ - { - ktoenal_fwd_parse (conn); - switch (conn->ksnc_rx_state) - { - case SOCKNAL_RX_HEADER: /* skipped this packet (zero payload) */ - goto out; /* => come back later */ - case SOCKNAL_RX_SLOP: /* skipping this packet's body */ - goto try_read; /* => go read it */ - case SOCKNAL_RX_GET_FMB: /* forwarding */ - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags); - goto get_fmb; /* => go get a fwd msg buffer */ - default: - } - /* Not Reached */ - LBUG (); - } - - PROF_START(lib_parse); - lib_parse(&ktoenal_lib, &conn->ksnc_hdr, conn); /* sets wanted_len, iovs etc */ - PROF_FINISH(lib_parse); - - if (conn->ksnc_rx_nob_wanted != 0) /* need to get some payload? */ - { - conn->ksnc_rx_state = SOCKNAL_RX_BODY; - goto try_read; /* go read the payload */ - } - /* Fall through (completed packet for me) */ - - case SOCKNAL_RX_BODY: - atomic_inc (&ktoenal_packets_received); - lib_finalize(&ktoenal_lib, NULL, conn->ksnc_cookie); /* packet is done now */ - /* Fall through */ - - case SOCKNAL_RX_SLOP: - if (ktoenal_new_packet (conn, conn->ksnc_rx_nob_left)) /* starting new packet? */ - goto out; /* come back later */ - goto try_read; /* try to finish reading slop now */ - - case SOCKNAL_RX_BODY_FWD: - CDEBUG (D_NET, "%p "LPX64"->"LPX64" %d fwd_start (got body)\n", conn, - conn->ksnc_hdr.src_nid, conn->ksnc_hdr.dest_nid, conn->ksnc_rx_nob_left); - - atomic_inc (&ktoenal_packets_received); - - /* ktoenal_init_fmb() stashed router descriptor in conn->ksnc_cookie */ - kpr_fwd_start (&ktoenal_data.ksnd_router, (kpr_fwd_desc_t *)conn->ksnc_cookie); - - LASSERT (conn->ksnc_rx_nob_left == 0); /* no slop in forwarded packets */ - - ktoenal_new_packet (conn, 0); /* on to next packet */ - goto out; /* (later) */ - - default: - } - - /* Not Reached */ - LBUG (); - - out: - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, *irq_flags); - - if (!conn->ksnc_rx_ready) /* no data there to read? */ - { - conn->ksnc_rx_scheduled = 0; /* let socket callback schedule again */ - ktoenal_put_conn (conn); /* release scheduler's ref */ - } - else /* let scheduler call me again */ - list_add_tail (&conn->ksnc_rx_list, &ktoenal_data.ksnd_rx_conns); -} - -int -ktoenal_recv(nal_cb_t *nal, void *private, lib_msg_t *msg, - unsigned int niov, struct iovec *iov, size_t mlen, size_t rlen) -{ - ksock_conn_t *conn = (ksock_conn_t *)private; - int i; - - conn->ksnc_cookie = msg; - - LASSERT (niov <= PTL_MD_MAX_IOV); - for (i = 0; i < niov; i++) - { - conn->ksnc_rx_iov[i].iov_len = iov[i].iov_len; - conn->ksnc_rx_iov[i].iov_base = iov[i].iov_base; - } - - conn->ksnc_rx_niov = niov; - conn->ksnc_rx_nob_wanted = mlen; - conn->ksnc_rx_nob_left = rlen; - - return (rlen); -} - -int -ktoenal_scheduler (void *arg) -{ - unsigned long flags; - ksock_conn_t *conn; - int rc; - int nloops = 0; - - kportal_daemonize ("ktoenal_sched"); - kportal_blockallsigs (); - - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags); - - while (!ktoenal_data.ksnd_shuttingdown) - { - int did_something = 0; - - /* Ensure I progress everything semi-fairly */ - - if (!list_empty (&ktoenal_data.ksnd_rx_conns)) - { - did_something = 1; - conn = list_entry (ktoenal_data.ksnd_rx_conns.next, - ksock_conn_t, ksnc_rx_list); - list_del (&conn->ksnc_rx_list); - - ktoenal_process_receive (conn, &flags); /* drops & regains ksnd_sched_lock */ - } - - if (!list_empty (&ktoenal_data.ksnd_tx_conns)) - { - did_something = 1; - conn = list_entry (ktoenal_data.ksnd_tx_conns.next, - ksock_conn_t, ksnc_tx_list); - - list_del (&conn->ksnc_tx_list); - ktoenal_process_transmit (conn, &flags); /* drops and regains ksnd_sched_lock */ - } - - if (!did_something || /* nothing to do */ - ++nloops == SOCKNAL_RESCHED) /* hogging CPU? */ - { - spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags); - - nloops = 0; - - if (!did_something) { /* wait for something to do */ - rc = wait_event_interruptible (ktoenal_data.ksnd_sched_waitq, - ktoenal_data.ksnd_shuttingdown || - !list_empty (&ktoenal_data.ksnd_rx_conns) || - !list_empty (&ktoenal_data.ksnd_tx_conns)); - LASSERT (rc == 0); - } else - our_cond_resched(); - - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags); - } - } - - spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags); - ktoenal_thread_fini (); - return (0); -} - - -int -ktoenal_reaper (void *arg) -{ - unsigned long flags; - ksock_conn_t *conn; - int rc; - - kportal_daemonize ("ktoenal_reaper"); - kportal_blockallsigs (); - - while (!ktoenal_data.ksnd_shuttingdown) - { - spin_lock_irqsave (&ktoenal_data.ksnd_reaper_lock, flags); - - if (list_empty (&ktoenal_data.ksnd_reaper_list)) - conn = NULL; - else - { - conn = list_entry (ktoenal_data.ksnd_reaper_list.next, - ksock_conn_t, ksnc_list); - list_del (&conn->ksnc_list); - } - - spin_unlock_irqrestore (&ktoenal_data.ksnd_reaper_lock, flags); - - if (conn != NULL) - ktoenal_close_conn (conn); - else { - rc = wait_event_interruptible (ktoenal_data.ksnd_reaper_waitq, - ktoenal_data.ksnd_shuttingdown || - !list_empty(&ktoenal_data.ksnd_reaper_list)); - LASSERT (rc == 0); - } - } - - ktoenal_thread_fini (); - return (0); -} - -#define POLLREAD (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI) -#define POLLWRITE (POLLOUT | POLLWRNORM | POLLWRBAND) - -int -ktoenal_pollthread(void *arg) -{ - unsigned int mask; - struct list_head *tmp; - ksock_conn_t *conn; - - /* Save the task struct for waking it up */ - ktoenal_data.ksnd_pollthread_tsk = current; - - kportal_daemonize ("ktoenal_pollthread"); - kportal_blockallsigs (); - - poll_initwait(&ktoenal_data.ksnd_pwait); - - while(!ktoenal_data.ksnd_shuttingdown) { - - set_current_state(TASK_INTERRUPTIBLE); - - read_lock (&ktoenal_data.ksnd_socklist_lock); - list_for_each(tmp, &ktoenal_data.ksnd_socklist) { - - conn = list_entry(tmp, ksock_conn_t, ksnc_list); - atomic_inc(&conn->ksnc_refcount); - read_unlock (&ktoenal_data.ksnd_socklist_lock); - - mask = conn->ksnc_file->f_op->poll(conn->ksnc_file, - ktoenal_data.ksnd_slistchange ? - &ktoenal_data.ksnd_pwait : NULL); - - if(mask & POLLREAD) { - ktoenal_data_ready(conn); - - } - if (mask & POLLWRITE) { - ktoenal_write_space(conn); - - } - if (mask & (POLLERR | POLLHUP)) { - /* Do error processing */ - } - - read_lock (&ktoenal_data.ksnd_socklist_lock); - if(atomic_dec_and_test(&conn->ksnc_refcount)) - _ktoenal_put_conn(conn); - } - ktoenal_data.ksnd_slistchange = 0; - read_unlock (&ktoenal_data.ksnd_socklist_lock); - - schedule_timeout(MAX_SCHEDULE_TIMEOUT); - if(ktoenal_data.ksnd_slistchange) { - poll_freewait(&ktoenal_data.ksnd_pwait); - poll_initwait(&ktoenal_data.ksnd_pwait); - } - } - poll_freewait(&ktoenal_data.ksnd_pwait); - ktoenal_thread_fini(); - return (0); -} - -void -ktoenal_data_ready (ksock_conn_t *conn) -{ - unsigned long flags; - ENTRY; - - if (!test_and_set_bit (0, &conn->ksnc_rx_ready)) { - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags); - - if (!conn->ksnc_rx_scheduled) { /* not being progressed */ - list_add_tail (&conn->ksnc_rx_list, - &ktoenal_data.ksnd_rx_conns); - conn->ksnc_rx_scheduled = 1; - /* extra ref for scheduler */ - atomic_inc (&conn->ksnc_refcount); - - /* This is done to avoid the effects of a sequence - * of events in which the rx_ready is lost - */ - conn->ksnc_rx_ready=1; - - if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq)) - wake_up (&ktoenal_data.ksnd_sched_waitq); - } - - spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags); - } - - EXIT; -} - -void -ktoenal_write_space (ksock_conn_t *conn) -{ - unsigned long flags; - - CDEBUG (D_NET, "conn %p%s%s%s\n", - conn, - (conn == NULL) ? "" : (test_bit (0, &conn->ksnc_tx_ready) ? " ready" : " blocked"), - (conn == NULL) ? "" : (conn->ksnc_tx_scheduled ? " scheduled" : " idle"), - (conn == NULL) ? "" : (list_empty (&conn->ksnc_tx_queue) ? " empty" : " queued")); - - - if (!test_and_set_bit (0, &conn->ksnc_tx_ready)) { - spin_lock_irqsave (&ktoenal_data.ksnd_sched_lock, flags); - - if (!list_empty (&conn->ksnc_tx_queue) && /* packets to send */ - !conn->ksnc_tx_scheduled) { /* not being progressed */ - - list_add_tail (&conn->ksnc_tx_list, - &ktoenal_data.ksnd_tx_conns); - conn->ksnc_tx_scheduled = 1; - /* extra ref for scheduler */ - atomic_inc (&conn->ksnc_refcount); - - if (waitqueue_active (&ktoenal_data.ksnd_sched_waitq)) - wake_up (&ktoenal_data.ksnd_sched_waitq); - } - spin_unlock_irqrestore (&ktoenal_data.ksnd_sched_lock, flags); - } -} - -nal_cb_t ktoenal_lib = { - nal_data: &ktoenal_data, /* NAL private data */ - cb_send: ktoenal_send, - cb_recv: ktoenal_recv, - cb_read: ktoenal_read, - cb_write: ktoenal_write, - cb_callback: ktoenal_callback, - cb_malloc: ktoenal_malloc, - cb_free: ktoenal_free, - cb_printf: ktoenal_printf, - cb_cli: ktoenal_cli, - cb_sti: ktoenal_sti, - cb_dist: ktoenal_dist -}; diff --git a/lustre/portals/libcfs/.cvsignore b/lustre/portals/libcfs/.cvsignore deleted file mode 100644 index 67d1a3d..0000000 --- a/lustre/portals/libcfs/.cvsignore +++ /dev/null @@ -1,4 +0,0 @@ -.deps -Makefile -Makefile.in -link-stamp diff --git a/lustre/portals/libcfs/Makefile.am b/lustre/portals/libcfs/Makefile.am deleted file mode 100644 index 20d7fbd..0000000 --- a/lustre/portals/libcfs/Makefile.am +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (C) 2001, 2002 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - - -MODULE = portals -modulenet_DATA = portals.o -EXTRA_PROGRAMS = portals - -LIBLINKS := lib-dispatch.c lib-eq.c lib-init.c lib-md.c lib-me.c lib-move.c lib-msg.c lib-ni.c lib-pid.c -APILINKS := api-eq.c api-errno.c api-init.c api-me.c api-ni.c api-wrap.c -LINKS = $(APILINKS) $(LIBLINKS) -DISTCLEANFILES = $(LINKS) link-stamp *.orig *.rej - -$(LINKS): link-stamp -link-stamp: - -list='$(LIBLINKS)'; for f in $$list; do echo $$f ; ln -sf $(srcdir)/../portals/$$f .; done - -list='$(APILINKS)'; for f in $$list; do echo $$f ; ln -sf $(srcdir)/../portals/$$f .; done - echo timestamp > link-stamp - -DEFS = -portals_SOURCES = $(LINKS) module.c proc.c debug.c - -# Don't distribute any patched files. -dist-hook: - list='$(EXT2C)'; for f in $$list; do rm -f $(distdir)/$$f; done - -include ../Rules.linux diff --git a/lustre/portals/libcfs/Makefile.mk b/lustre/portals/libcfs/Makefile.mk deleted file mode 100644 index 3196ea2..0000000 --- a/lustre/portals/libcfs/Makefile.mk +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include fs/lustre/portals/Kernelenv - -obj-y += libcfs.o -licfs-objs := module.o proc.o debug.o \ No newline at end of file diff --git a/lustre/portals/libcfs/debug.c b/lustre/portals/libcfs/debug.c deleted file mode 100644 index 8d26dbb..0000000 --- a/lustre/portals/libcfs/debug.c +++ /dev/null @@ -1,830 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define EXPORT_SYMTAB - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/kmod.h> -#include <linux/notifier.h> -#include <linux/kernel.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <linux/interrupt.h> -#include <asm/system.h> -#include <asm/uaccess.h> -#include <linux/completion.h> - -#include <linux/fs.h> -#include <linux/stat.h> -#include <asm/uaccess.h> -#include <asm/segment.h> -#include <linux/miscdevice.h> - -# define DEBUG_SUBSYSTEM S_PORTALS - -#include <linux/kp30.h> - -#define DEBUG_OVERFLOW 1024 -static char *debug_buf = NULL; -static unsigned long debug_size = 0; -static atomic_t debug_off_a = ATOMIC_INIT(0); -static int debug_wrapped; -wait_queue_head_t debug_ctlwq; -#define DAEMON_SND_SIZE (64 << 10) - -/* - * used by the daemon to keep track the offset into debug_buffer for the next - * write to the file. Usually, the daemon is to write out buffer - * from debug_daemon_next_write upto debug_off - * variable usage - * Reader - portals_debug_msg() - * Writer - portals_debug_daemon() - * portals_debug_daemon_start() during daemon init time - * portals_debug_daemon_continue() to reset to debug_off - * portals_debug_clear_buffer() reset to debug_off for clear - * Note that *_start(), *_continue() & *clear_buffer() should serialized; - */ -static atomic_t debug_daemon_next_write; - -/* - * A debug_daemon can be in following states - * stopped - stopped state means there is no debug_daemon running. - * accordingly, it must be in paused state - * a daemon is in !stopped && !paused state after - * "lctl debug_daemon start" creates debug_daemon successfully - * Variable Usage - * Reader - portals_debug_daemon() - * portals_debug_set_daemon() routines - * Writer - portals_debug_set_daemon() routines - * portals_debug_daemon() on IO error - * paused - a debug_daemon state is changed from !paused into paused - * when "lctl debug_daemon paused" is issued - * "lctl debug_daemon continue" gets a daemon into !paused mode - * Reader - portals_debug_set_daemon() routines - * portals_debug_msg() - * Writer - portals_debug_set_daemon() on init - * portals_debug_daemon() - * - * Daemon state diagram. - * (stopped, paused) - * | <-- debug_daemon start - * V - * (!stopped, !paused) - * | <-- debug_daemon pause - * V - * (!stopped, paused) - * | <-- debug_daemon continue - * V - * (!stopped, !paused) - * | <-- debug_daemon stop - * V - * (stopped, paused) - * Overlapped - this is a state when CDEBUG is too fast for the daemon to - * write out the debug_bufferr. That is, debug_off is to - * overlap debug_daemon_next_write; - * Reader - portals_debug_msg() - * Writer - portals_debug_msg() - */ - -/* - * Description on Trace Daemon Synchronization - * - * Three categories of code are synchronizing between each other - * 1. lctl, portals_debug_set_daemon(), the user debug control code, - * as well as portals_debug_clear_buffer() - * 2. CDEBUG, portals_debug_msg(), the debug put messages routine - * 3. Daemon, portals_debug_daemon(), to write out debug log file - * - * - * Three different controls for synchronizations - * - * 1. debug_daemon_semaphore - * The usage of this semaphore is to serialize multiple lctl controls - * in manipulating debug daemon state. The semaphore serves as the - * gatekeeper to allow only one user control thread, at any giving time, - * to access debug daemon state and keeps the other user control requests - * in wait state until the current control request is serviced. - * - * 2. wait_queue_head_t lctl (paired with lctl_event flag) - * Lctl event is the event between portals_debug_set_daemon() and - * portals_debug_daemon(). Lctl is an indicator for portals_debug_daemon() - * to flush data out to file. portals_debug_daemon() is to use lctl event - * as signal channel to wakeup portals_debug_set_daemon() upon flush - * operation is done. - * - * Producer : - * portals_debug_daemon() uses to wake up - * portals_debug_set_daemon(), pause and stop, routines - * Consumer : - * portals_debug_set_daemon(), stop and pause operations, - * wait and sleep on the event - * - * 3. wait_queue_head_t daemon (paired with daemon_event flag) - * This is an event channel to wakeup portals_debug_daemon. Daemon - * wakes up to run whenever there is an event posted. Daemon handles - * 2 types of operations . 1. Writes data out to debug file, 2. Flushes - * file and terminates base on lctl event. - * File operation - - * Daemon is normally in a sleep state. - * Daemon is woken up through daemon event whenever CDEBUG is - * putting data over any 64K boundary. - * File flush and termination - - * On portals_debug_daemon_stop/pause() operations, lctl control - * is to wake up daemon through daemon event. - * - * We can't use sleep_on() and wake_up() to replace daemon event because - * portals_debug_daemon() must catch the wakeup operation posted by - * portals_debug_daemon_stop/pause(). Otherwise, stop and pause may - * stuck in lctl wait event. - * - * Producer : - * a. portals_debug_daemon_pause() and portals_debug_daemon_stop() - * uses the event to wake up portals_debug_daemon() - * b. portals_debug_msg() uses the event to wake up - * portals_debug_daemon() whenever the data output is acrossing - * a 64K bytes boundary. - * Consumer : - * portals_debug_daemon() wakes up upon daemon event. - * - * Sequence for portals_debug_daemon_stop() operation - * - * _Portals_debug_daemon_stop()_ _Daemon_ - * Wait_event(daemon) or running - * Paused = 1; - * Wakeup_event (daemon) - * Wait_event(lctl) - * Set force_flush flag if lctlevnt - * Flush data - * Wakeup_event (lctl) - * Wait_event(daemon) - * Stopped = 1; - * Wakeup_event (daemon) - * Wait_event(lctl) - * Exit daemon loop if (Stopped) - * Wakeup_event (lctl) - * Exit - * Return to user application - * - * - * _Portals_debug_msg()_ _Daemon_ - * Wait_event(daemon) or running - * If (WriteStart<64K<WriteEnd) - * Wakeup_event(daemon) - * Do file IO - * Wait_event(daemon) - */ -struct debug_daemon_state { - unsigned long overlapped; - unsigned long stopped; - atomic_t paused; - unsigned long lctl_event; /* event for lctl */ - wait_queue_head_t lctl; - unsigned long daemon_event; /* event for daemon */ - wait_queue_head_t daemon; -}; -static struct debug_daemon_state debug_daemon_state; -static DECLARE_MUTEX(debug_daemon_semaphore); - -static loff_t daemon_file_size_limit; -char debug_daemon_file_path[1024] = ""; - -spinlock_t portals_debug_lock = SPIN_LOCK_UNLOCKED; -char debug_file_path[1024] = "/tmp/lustre-log"; -char debug_file_name[1024]; -int handled_panic; /* to avoid recursive calls to notifiers */ -char portals_upcall[1024] = "/usr/lib/lustre/portals_upcall"; - - -int portals_do_debug_dumplog(void *arg) -{ - struct file *file; - void *journal_info; - int rc; - mm_segment_t oldfs; - unsigned long debug_off; - - kportal_daemonize(""); - - reparent_to_init(); - journal_info = current->journal_info; - current->journal_info = NULL; - sprintf(debug_file_name, "%s.%ld", debug_file_path, CURRENT_TIME); - file = filp_open(debug_file_name, O_CREAT|O_TRUNC|O_RDWR, 0644); - - if (!file || IS_ERR(file)) { - CERROR("cannot open %s for dumping: %ld\n", debug_file_name, - PTR_ERR(file)); - GOTO(out, PTR_ERR(file)); - } else { - printk(KERN_ALERT "dumping log to %s ... writing ...\n", - debug_file_name); - } - - debug_off = atomic_read(&debug_off_a); - oldfs = get_fs(); - set_fs(get_ds()); - if (debug_wrapped) { - rc = file->f_op->write(file, debug_buf + debug_off + 1, - debug_size-debug_off-1, &file->f_pos); - rc += file->f_op->write(file, debug_buf, debug_off + 1, - &file->f_pos); - } else { - rc = file->f_op->write(file, debug_buf, debug_off,&file->f_pos); - } - printk("wrote %d bytes\n", rc); - set_fs(oldfs); - - rc = file->f_op->fsync(file, file->f_dentry, 1); - if (rc) - CERROR("sync returns %d\n", rc); - filp_close(file, 0); -out: - current->journal_info = journal_info; - wake_up(&debug_ctlwq); - return 0; -} - -int portals_debug_daemon(void *arg) -{ - struct file *file; - void *journal_info; - mm_segment_t oldfs; - unsigned long force_flush = 0; - unsigned long size, off, flags; - int rc; - - kportal_daemonize("ldebug_daemon"); - reparent_to_init(); - journal_info = current->journal_info; - current->journal_info = NULL; - - file = filp_open(debug_daemon_file_path, - O_CREAT|O_TRUNC|O_RDWR|O_LARGEFILE, 0644); - - if (!file || IS_ERR(file)) { - CERROR("cannot open %s for logging", debug_daemon_file_path); - GOTO(out1, PTR_ERR(file)); - } else { - printk(KERN_ALERT "daemon dumping log to %s ... writing ...\n", - debug_daemon_file_path); - } - - debug_daemon_state.overlapped = 0; - debug_daemon_state.stopped = 0; - - spin_lock_irqsave(&portals_debug_lock, flags); - off = atomic_read(&debug_off_a) + 1; - if (debug_wrapped) - off = (off >= debug_size)? 0 : off; - else - off = 0; - atomic_set(&debug_daemon_next_write, off); - atomic_set(&debug_daemon_state.paused, 0); - spin_unlock_irqrestore(&portals_debug_lock, flags); - - oldfs = get_fs(); - set_fs(KERNEL_DS); - while (1) { - unsigned long ending; - unsigned long start, tail; - long delta; - - debug_daemon_state.daemon_event = 0; - - ending = atomic_read(&debug_off_a); - start = atomic_read(&debug_daemon_next_write); - - /* check if paused is imposed by lctl ? */ - force_flush = !debug_daemon_state.lctl_event; - - delta = ending - start; - tail = debug_size - start; - size = (delta >= 0) ? delta : tail; - while (size && (force_flush || (delta < 0) || - (size >= DAEMON_SND_SIZE))) { - if (daemon_file_size_limit) { - int ssize = daemon_file_size_limit - file->f_pos; - if (size > ssize) - size = ssize; - } - - rc = file->f_op->write(file, debug_buf+start, - size, &file->f_pos); - if (rc < 0) { - printk(KERN_ALERT - "Debug_daemon write error %d\n", rc); - goto out; - } - start += rc; - delta = ending - start; - tail = debug_size - start; - if (tail == 0) - start = 0; - if (delta >= 0) - size = delta; - else - size = (tail == 0) ? ending : tail; - if (daemon_file_size_limit == file->f_pos) { - // file wrapped around - file->f_pos = 0; - } - } - atomic_set(&debug_daemon_next_write, start); - if (force_flush) { - rc = file->f_op->fsync(file, file->f_dentry, 1); - if (rc < 0) { - printk(KERN_ALERT - "Debug_daemon sync error %d\n", rc); - goto out; - } - if (debug_daemon_state.stopped) - break; - debug_daemon_state.lctl_event = 1; - wake_up(&debug_daemon_state.lctl); - } - wait_event(debug_daemon_state.daemon, - debug_daemon_state.daemon_event); - } -out: - atomic_set(&debug_daemon_state.paused, 1); - debug_daemon_state.stopped = 1; - set_fs(oldfs); - filp_close(file, 0); - current->journal_info = journal_info; -out1: - debug_daemon_state.lctl_event = 1; - wake_up(&debug_daemon_state.lctl); - return 0; -} - -void portals_debug_print(void) -{ - unsigned long dumplen = 64 * 1024; - char *start1, *start2; - char *end1, *end2; - unsigned long debug_off = atomic_read(&debug_off_a); - - start1 = debug_buf + debug_off - dumplen; - if (start1 < debug_buf) { - start1 += debug_size; - end1 = debug_buf + debug_size - 1; - start2 = debug_buf; - end2 = debug_buf + debug_off; - } else { - end1 = debug_buf + debug_off; - start2 = debug_buf + debug_off; - end2 = debug_buf + debug_off; - } - - while (start1 < end1) { - int count = MIN(1024, end1 - start1); - printk("%*s", count, start1); - start1 += 1024; - } - while (start2 < end2) { - int count = MIN(1024, end2 - start2); - printk("%*s", count, start2); - start2 += 1024; - } -} - -void portals_debug_dumplog(void) -{ - int rc; - ENTRY; - - init_waitqueue_head(&debug_ctlwq); - - rc = kernel_thread(portals_do_debug_dumplog, - NULL, CLONE_VM | CLONE_FS | CLONE_FILES); - if (rc < 0) { - printk(KERN_ERR "cannot start dump thread\n"); - return; - } - sleep_on(&debug_ctlwq); -} - -int portals_debug_daemon_start(char *file, unsigned int size) -{ - int rc; - - if (!debug_daemon_state.stopped) - return -EALREADY; - - if (file != NULL) - strncpy(debug_daemon_file_path, file, 1024); - - init_waitqueue_head(&debug_daemon_state.lctl); - init_waitqueue_head(&debug_daemon_state.daemon); - - daemon_file_size_limit = size << 20; - - debug_daemon_state.lctl_event = 0; - rc = kernel_thread(portals_debug_daemon, NULL, 0); - if (rc < 0) { - printk(KERN_ERR "cannot start debug daemon thread\n"); - strncpy(debug_daemon_file_path, "\0", 1); - return rc; - } - wait_event(debug_daemon_state.lctl, debug_daemon_state.lctl_event); - return 0; -} - -int portals_debug_daemon_pause(void) -{ - if (atomic_read(&debug_daemon_state.paused)) - return -EALREADY; - - atomic_set(&debug_daemon_state.paused, 1); - debug_daemon_state.lctl_event = 0; - debug_daemon_state.daemon_event = 1; - wake_up(&debug_daemon_state.daemon); - wait_event(debug_daemon_state.lctl, debug_daemon_state.lctl_event); - return 0; -} - -int portals_debug_daemon_continue(void) -{ - if (!atomic_read(&debug_daemon_state.paused)) - return -EINVAL; - if (debug_daemon_state.stopped) - return -EINVAL; - - debug_daemon_state.overlapped = 0; - atomic_set(&debug_daemon_next_write, atomic_read(&debug_off_a)); - atomic_set(&debug_daemon_state.paused, 0); - return 0; -} - -int portals_debug_daemon_stop(void) -{ - if (debug_daemon_state.stopped) - return -EALREADY; - - if (!atomic_read(&debug_daemon_state.paused)) - portals_debug_daemon_pause(); - - debug_daemon_state.lctl_event = 0; - debug_daemon_state.stopped = 1; - - debug_daemon_state.daemon_event = 1; - wake_up(&debug_daemon_state.daemon); - wait_event(debug_daemon_state.lctl, debug_daemon_state.lctl_event); - - debug_daemon_file_path[0] = '\0'; - return 0; -} - -int portals_debug_set_daemon(unsigned int cmd, unsigned int length, - char *filename, unsigned int size) -{ - int rc = -EINVAL; - - down(&debug_daemon_semaphore); - switch (cmd) { - case DEBUG_DAEMON_START: - if (length && (filename[length -1] != '\0')) { - CERROR("Invalid filename for debug_daemon\n"); - rc = -EINVAL; - break; - } - rc = portals_debug_daemon_start(filename, size); - break; - case DEBUG_DAEMON_STOP: - rc = portals_debug_daemon_stop(); - break; - case DEBUG_DAEMON_PAUSE: - rc = portals_debug_daemon_pause(); - break; - case DEBUG_DAEMON_CONTINUE: - rc = portals_debug_daemon_continue(); - break; - default: - CERROR("unknown set_daemon cmd\n"); - } - up(&debug_daemon_semaphore); - return rc; -} - -static int panic_dumplog(struct notifier_block *self, unsigned long unused1, - void *unused2) -{ - if (handled_panic) - return 0; - else - handled_panic = 1; - - if (in_interrupt()) { - portals_debug_print(); - return 0; - } - - while (current->lock_depth >= 0) - unlock_kernel(); - portals_debug_dumplog(); - return 0; -} - -static struct notifier_block lustre_panic_notifier = { - notifier_call : panic_dumplog, - next : NULL, - priority : 10000 -}; - -int portals_debug_init(unsigned long bufsize) -{ - unsigned long debug_off = atomic_read(&debug_off_a); - if (debug_buf != NULL) - return -EALREADY; - - atomic_set(&debug_daemon_state.paused, 1); - debug_daemon_state.stopped = 1; - - debug_buf = vmalloc(bufsize + DEBUG_OVERFLOW); - if (debug_buf == NULL) - return -ENOMEM; - memset(debug_buf, 0, debug_size); - debug_wrapped = 0; - - printk(KERN_INFO "Portals: allocated %lu byte debug buffer at %p.\n", - bufsize, debug_buf); - atomic_set(&debug_off_a, debug_off); - notifier_chain_register(&panic_notifier_list, &lustre_panic_notifier); - debug_size = bufsize; - - return 0; -} - -int portals_debug_cleanup(void) -{ - notifier_chain_unregister(&panic_notifier_list, &lustre_panic_notifier); - if (debug_buf == NULL) - return -EINVAL; - - down(&debug_daemon_semaphore); - portals_debug_daemon_stop(); - - vfree(debug_buf); - atomic_set(&debug_off_a, 0); - up(&debug_daemon_semaphore); - - return 0; -} - -int portals_debug_clear_buffer(void) -{ - unsigned long flags; - unsigned long state; - - if (debug_buf == NULL) - return -EINVAL; - - down(&debug_daemon_semaphore); - state = atomic_read(&debug_daemon_state.paused); - if (!state) - portals_debug_daemon_pause(); - spin_lock_irqsave(&portals_debug_lock, flags); - atomic_set(&debug_off_a, 0); - debug_wrapped = 0; - atomic_set(&debug_daemon_next_write, 0); - debug_daemon_state.overlapped = 0; - spin_unlock_irqrestore(&portals_debug_lock, flags); - - if (!state) - atomic_set(&debug_daemon_state.paused, 0); - up(&debug_daemon_semaphore); - - return 0; -} - -/* Debug markers, although printed by S_PORTALS - * should not be be marked as such. - */ -#undef DEBUG_SUBSYSTEM -#define DEBUG_SUBSYSTEM S_UNDEFINED -int portals_debug_mark_buffer(char *text) -{ - if (debug_buf == NULL) - return -EINVAL; - - CDEBUG(0, "*******************************************************************************\n"); - CDEBUG(0, "DEBUG MARKER: %s\n", text); - CDEBUG(0, "*******************************************************************************\n"); - - return 0; -} -#undef DEBUG_SUBSYSTEM -#define DEBUG_SUBSYSTEM S_PORTALS - -__s32 portals_debug_copy_to_user(char *buf, unsigned long len) -{ - int rc; - unsigned long debug_off; - unsigned long flags; - - if (len < debug_size) - return -ENOSPC; - - debug_off = atomic_read(&debug_off_a); - spin_lock_irqsave(&portals_debug_lock, flags); - if (debug_wrapped) { - /* All of this juggling with the 1s is to keep the trailing nul - * (which falls at debug_buf + debug_off) at the end of what we - * copy into user space */ - copy_to_user(buf, debug_buf + debug_off + 1, - debug_size - debug_off - 1); - copy_to_user(buf + debug_size - debug_off - 1, - debug_buf, debug_off + 1); - rc = debug_size; - } else { - copy_to_user(buf, debug_buf, debug_off); - rc = debug_off; - } - spin_unlock_irqrestore(&portals_debug_lock, flags); - - return rc; -} - -/* FIXME: I'm not very smart; someone smarter should make this better. */ -void -portals_debug_msg (int subsys, int mask, char *file, char *fn, int line, - unsigned long stack, const char *format, ...) -{ - va_list ap; - unsigned long flags; - int max_nob; - int prefix_nob; - int msg_nob; - struct timeval tv; - unsigned long base_offset; - unsigned long debug_off; - - if (debug_buf == NULL) { - printk("portals_debug_msg: debug_buf is NULL!\n"); - return; - } - - spin_lock_irqsave(&portals_debug_lock, flags); - debug_off = atomic_read(&debug_off_a); - if (!atomic_read(&debug_daemon_state.paused)) { - unsigned long available; - long delta; - long v = atomic_read(&debug_daemon_next_write); - - delta = debug_off - v; - available = (delta>=0) ? debug_size-delta : -delta; - // Check if we still have enough debug buffer for CDEBUG - if (available < DAEMON_SND_SIZE) { - /* Drop CDEBUG packets until enough debug_buffer is - * available */ - if (debug_daemon_state.overlapped) - goto out; - /* If this is the first time, leave a marker in the - * output */ - debug_daemon_state.overlapped = 1; - ap = NULL; - format = "DEBUG MARKER: Debug buffer overlapped\n"; - } else /* More space just became available */ - debug_daemon_state.overlapped = 0; - } - - max_nob = debug_size - debug_off + DEBUG_OVERFLOW; - if (max_nob <= 0) { - spin_unlock_irqrestore(&portals_debug_lock, flags); - printk("logic error in portals_debug_msg: <0 bytes to write\n"); - return; - } - - /* NB since we pass a non-zero sized buffer (at least) on the first - * print, we can be assured that by the end of all the snprinting, - * we _do_ have a terminated buffer, even if our message got truncated. - */ - - do_gettimeofday(&tv); - - prefix_nob = snprintf(debug_buf + debug_off, max_nob, - "%02x:%06x:%d:%lu.%06lu ", - subsys >> 24, mask, smp_processor_id(), - tv.tv_sec, tv.tv_usec); - max_nob -= prefix_nob; - -#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) - msg_nob = snprintf(debug_buf + debug_off + prefix_nob, max_nob, - "(%s:%d:%s() %d | %d+%lu): ", - file, line, fn, current->pid, - current->thread.extern_pid, stack); -#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - msg_nob = snprintf(debug_buf + debug_off + prefix_nob, max_nob, - "(%s:%d:%s() %d | %d+%lu): ", - file, line, fn, current->pid, - current->thread.mode.tt.extern_pid, stack); -#else - msg_nob = snprintf(debug_buf + debug_off + prefix_nob, max_nob, - "(%s:%d:%s() %d+%lu): ", - file, line, fn, current->pid, stack); -#endif - max_nob -= msg_nob; - - va_start(ap, format); - msg_nob += vsnprintf(debug_buf + debug_off + prefix_nob + msg_nob, - max_nob, format, ap); - max_nob -= msg_nob; - va_end(ap); - - /* Print to console, while msg is contiguous in debug_buf */ - /* NB safely terminated see above */ - if ((mask & D_EMERG) != 0) - printk(KERN_EMERG "%s", debug_buf + debug_off + prefix_nob); - if ((mask & D_ERROR) != 0) - printk(KERN_ERR "%s", debug_buf + debug_off + prefix_nob); - else if (portal_printk) - printk("<%d>%s", portal_printk, debug_buf+debug_off+prefix_nob); - base_offset = debug_off & 0xFFFF; - - debug_off += prefix_nob + msg_nob; - if (debug_off > debug_size) { - memcpy(debug_buf, debug_buf + debug_size, - debug_off - debug_size + 1); - debug_off -= debug_size; - debug_wrapped = 1; - } - - atomic_set(&debug_off_a, debug_off); - if (!atomic_read(&debug_daemon_state.paused) && - ((base_offset+prefix_nob+msg_nob) >= DAEMON_SND_SIZE)) { - debug_daemon_state.daemon_event = 1; - wake_up(&debug_daemon_state.daemon); - } -out: - spin_unlock_irqrestore(&portals_debug_lock, flags); -} - -void portals_debug_set_level(unsigned int debug_level) -{ - printk("Setting portals debug level to %08x\n", debug_level); - portal_debug = debug_level; -} - -void portals_run_lbug_upcall(char * file, char *fn, int line) -{ - char *argv[6]; - char *envp[3]; - char buf[32]; - int rc; - - ENTRY; - snprintf (buf, sizeof buf, "%d", line); - - argv[0] = portals_upcall; - argv[1] = "LBUG"; - argv[2] = file; - argv[3] = fn; - argv[4] = buf; - argv[5] = NULL; - - envp[0] = "HOME=/"; - envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; - envp[2] = NULL; - - rc = call_usermodehelper(argv[0], argv, envp); - if (rc < 0) { - CERROR("Error invoking lbug upcall %s %s %s %s %s: %d; check " - "/proc/sys/portals/upcall\n", - argv[0], argv[1], argv[2], argv[3], argv[4], rc); - - } else { - CERROR("Invoked upcall %s %s %s %s %s\n", - argv[0], argv[1], argv[2], argv[3], argv[4]); - } -} - - -EXPORT_SYMBOL(portals_debug_dumplog); -EXPORT_SYMBOL(portals_debug_msg); -EXPORT_SYMBOL(portals_debug_set_level); -EXPORT_SYMBOL(portals_run_lbug_upcall); diff --git a/lustre/portals/libcfs/module.c b/lustre/portals/libcfs/module.c deleted file mode 100644 index 14cc325..0000000 --- a/lustre/portals/libcfs/module.c +++ /dev/null @@ -1,575 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef EXPORT_SYMTAB -# define EXPORT_SYMTAB -#endif -#define DEBUG_SUBSYSTEM S_PORTALS - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/init.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> - -#include <asm/system.h> -#include <asm/uaccess.h> - -#include <linux/fs.h> -#include <linux/stat.h> -#include <asm/uaccess.h> -#include <asm/segment.h> -#include <linux/miscdevice.h> - -#include <portals/lib-p30.h> -#include <portals/p30.h> -#include <linux/kp30.h> -#include <linux/portals_compat25.h> - -#define PORTAL_MINOR 240 - -extern void (kping_client)(struct portal_ioctl_data *); - -struct nal_cmd_handler { - nal_cmd_handler_t nch_handler; - void * nch_private; -}; - -static struct nal_cmd_handler nal_cmd[NAL_MAX_NR + 1]; -struct semaphore nal_cmd_sem; - -#ifdef PORTAL_DEBUG -void -kportal_assertion_failed (char *expr, char *file, char *func, int line) -{ - portals_debug_msg(0, D_EMERG, file, func, line, CDEBUG_STACK(), - "ASSERTION(%s) failed\n", expr); - LBUG_WITH_LOC(file, func, line); -} -#endif - -void -kportal_daemonize (char *str) -{ -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,63)) - daemonize(str); -#else - daemonize(); - snprintf (current->comm, sizeof (current->comm), "%s", str); -#endif -} - -void -kportal_blockallsigs () -{ - unsigned long flags; - - SIGNAL_MASK_LOCK(current, flags); - sigfillset(¤t->blocked); - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); -} - -/* called when opening /dev/device */ -static int kportal_psdev_open(struct inode * inode, struct file * file) -{ - ENTRY; - - if (!inode) - RETURN(-EINVAL); - PORTAL_MODULE_USE; - RETURN(0); -} - -/* called when closing /dev/device */ -static int kportal_psdev_release(struct inode * inode, struct file * file) -{ - ENTRY; - - if (!inode) - RETURN(-EINVAL); - - PORTAL_MODULE_UNUSE; - RETURN(0); -} - -static inline void freedata(void *data, int len) -{ - PORTAL_FREE(data, len); -} - -static int -kportal_add_route(int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid, - ptl_nid_t hi_nid) -{ - int rc; - kpr_control_interface_t *ci; - - ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET (kpr_control_interface); - if (ci == NULL) - return (-ENODEV); - - rc = ci->kprci_add_route (gateway_nalid, gateway_nid, lo_nid, hi_nid); - - PORTAL_SYMBOL_PUT(kpr_control_interface); - return (rc); -} - -static int -kportal_del_route(ptl_nid_t target) -{ - int rc; - kpr_control_interface_t *ci; - - ci = (kpr_control_interface_t *)PORTAL_SYMBOL_GET(kpr_control_interface); - if (ci == NULL) - return (-ENODEV); - - rc = ci->kprci_del_route (target); - - PORTAL_SYMBOL_PUT(kpr_control_interface); - return (rc); -} - -static int -kportal_get_route(int index, __u32 *gateway_nalidp, ptl_nid_t *gateway_nidp, - ptl_nid_t *lo_nidp, ptl_nid_t *hi_nidp) -{ - int gateway_nalid; - ptl_nid_t gateway_nid; - ptl_nid_t lo_nid; - ptl_nid_t hi_nid; - int rc; - kpr_control_interface_t *ci; - - ci = (kpr_control_interface_t *) PORTAL_SYMBOL_GET(kpr_control_interface); - if (ci == NULL) - return (-ENODEV); - - rc = ci->kprci_get_route(index, &gateway_nalid, &gateway_nid, &lo_nid, - &hi_nid); - - if (rc == 0) { - CDEBUG(D_IOCTL, "got route [%d] %d "LPX64":"LPX64" - "LPX64"\n", - index, gateway_nalid, gateway_nid, lo_nid, hi_nid); - - *gateway_nalidp = (__u32)gateway_nalid; - *gateway_nidp = (__u32)gateway_nid; - *lo_nidp = (__u32)lo_nid; - *hi_nidp = (__u32)hi_nid; - } - - PORTAL_SYMBOL_PUT (kpr_control_interface); - return (rc); -} - -static int -kportal_nal_cmd(int nal, struct portal_ioctl_data *data) -{ - int rc = -EINVAL; - - ENTRY; - - down(&nal_cmd_sem); - if (nal > 0 && nal <= NAL_MAX_NR && nal_cmd[nal].nch_handler) { - CDEBUG(D_IOCTL, "calling handler nal: %d, cmd: %d\n", nal, data->ioc_nal_cmd); - rc = nal_cmd[nal].nch_handler(data, nal_cmd[nal].nch_private); - } - up(&nal_cmd_sem); - RETURN(rc); -} - -ptl_handle_ni_t * -kportal_get_ni (int nal) -{ - - switch (nal) - { - case QSWNAL: - return (PORTAL_SYMBOL_GET(kqswnal_ni)); - case SOCKNAL: - return (PORTAL_SYMBOL_GET(ksocknal_ni)); - case TOENAL: - return (PORTAL_SYMBOL_GET(ktoenal_ni)); - case GMNAL: - return (PORTAL_SYMBOL_GET(kgmnal_ni)); - case TCPNAL: - /* userspace NAL */ - return (NULL); - case SCIMACNAL: - return (PORTAL_SYMBOL_GET(kscimacnal_ni)); - default: - /* A warning to a naive caller */ - CERROR ("unknown nal: %d\n", nal); - return (NULL); - } -} - -void -kportal_put_ni (int nal) -{ - - switch (nal) - { - case QSWNAL: - PORTAL_SYMBOL_PUT(kqswnal_ni); - break; - case SOCKNAL: - PORTAL_SYMBOL_PUT(ksocknal_ni); - break; - case TOENAL: - PORTAL_SYMBOL_PUT(ktoenal_ni); - break; - case GMNAL: - PORTAL_SYMBOL_PUT(kgmnal_ni); - break; - case TCPNAL: - /* A lesson to a malicious caller */ - LBUG (); - case SCIMACNAL: - PORTAL_SYMBOL_PUT(kscimacnal_ni); - break; - default: - CERROR ("unknown nal: %d\n", nal); - } -} - -int -kportal_nal_register(int nal, nal_cmd_handler_t handler, void * private) -{ - int rc = 0; - - CDEBUG(D_IOCTL, "Register NAL %d, handler: %p\n", nal, handler); - - if (nal > 0 && nal <= NAL_MAX_NR) { - down(&nal_cmd_sem); - if (nal_cmd[nal].nch_handler != NULL) - rc = -EBUSY; - else { - nal_cmd[nal].nch_handler = handler; - nal_cmd[nal].nch_private = private; - } - up(&nal_cmd_sem); - } - return rc; -} - -int -kportal_nal_unregister(int nal) -{ - int rc = 0; - - CDEBUG(D_IOCTL, "Unregister NAL %d\n", nal); - - if (nal > 0 && nal <= NAL_MAX_NR) { - down(&nal_cmd_sem); - nal_cmd[nal].nch_handler = NULL; - nal_cmd[nal].nch_private = NULL; - up(&nal_cmd_sem); - } - return rc; -} - - -static int kportal_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) -{ - int err = 0; - char buf[1024]; - struct portal_ioctl_data *data; - - ENTRY; - - if ( _IOC_TYPE(cmd) != IOC_PORTAL_TYPE || - _IOC_NR(cmd) < IOC_PORTAL_MIN_NR || - _IOC_NR(cmd) > IOC_PORTAL_MAX_NR ) { - CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n", - _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd)); - RETURN(-EINVAL); - } - - if (portal_ioctl_getdata(buf, buf + 800, (void *)arg)) { - CERROR("PORTALS ioctl: data error\n"); - RETURN(-EINVAL); - } - - data = (struct portal_ioctl_data *)buf; - - switch (cmd) { - case IOC_PORTAL_SET_DAEMON: - RETURN (portals_debug_set_daemon ( - (unsigned int) data->ioc_count, - (unsigned int) data->ioc_inllen1, - (char *) data->ioc_inlbuf1, - (unsigned int) data->ioc_misc)); - case IOC_PORTAL_GET_DEBUG: { - __s32 size = portals_debug_copy_to_user(data->ioc_pbuf1, - data->ioc_plen1); - - if (size < 0) - RETURN(size); - - data->ioc_size = size; - err = copy_to_user((char *)arg, data, sizeof(*data)); - RETURN(err); - } - case IOC_PORTAL_CLEAR_DEBUG: - portals_debug_clear_buffer(); - RETURN(0); - case IOC_PORTAL_PANIC: - if (!capable (CAP_SYS_BOOT)) - RETURN (-EPERM); - panic("debugctl-invoked panic"); - RETURN(0); - case IOC_PORTAL_MARK_DEBUG: - if (data->ioc_inlbuf1 == NULL || - data->ioc_inlbuf1[data->ioc_inllen1 - 1] != '\0') - RETURN(-EINVAL); - portals_debug_mark_buffer(data->ioc_inlbuf1); - RETURN(0); - case IOC_PORTAL_PING: { - void (*ping)(struct portal_ioctl_data *); - - CDEBUG(D_IOCTL, "doing %d pings to nid "LPU64"\n", - data->ioc_count, data->ioc_nid); - ping = PORTAL_SYMBOL_GET(kping_client); - if (!ping) - CERROR("PORTAL_SYMBOL_GET failed\n"); - else { - ping(data); - PORTAL_SYMBOL_PUT(kping_client); - } - RETURN(0); - } - - case IOC_PORTAL_ADD_ROUTE: - CDEBUG(D_IOCTL, "Adding route: [%d] "LPU64" : "LPU64" - "LPU64"\n", - data->ioc_nal, data->ioc_nid, data->ioc_nid2, - data->ioc_nid3); - err = kportal_add_route(data->ioc_nal, data->ioc_nid, - MIN (data->ioc_nid2, data->ioc_nid3), - MAX (data->ioc_nid2, data->ioc_nid3)); - break; - - case IOC_PORTAL_DEL_ROUTE: - CDEBUG (D_IOCTL, "Removing route to "LPU64"\n", data->ioc_nid); - err = kportal_del_route (data->ioc_nid); - break; - - case IOC_PORTAL_GET_ROUTE: - CDEBUG (D_IOCTL, "Getting route [%d]\n", data->ioc_count); - err = kportal_get_route(data->ioc_count, &data->ioc_nal, - &data->ioc_nid, &data->ioc_nid2, - &data->ioc_nid3); - if (err == 0) - if (copy_to_user((char *)arg, data, sizeof (*data))) - err = -EFAULT; - break; - - case IOC_PORTAL_GET_NID: { - const ptl_handle_ni_t *nip; - ptl_process_id_t pid; - - CDEBUG (D_IOCTL, "Getting nid [%d]\n", data->ioc_nal); - - nip = kportal_get_ni (data->ioc_nal); - if (nip == NULL) - RETURN (-EINVAL); - - err = PtlGetId (*nip, &pid); - LASSERT (err == PTL_OK); - kportal_put_ni (data->ioc_nal); - - data->ioc_nid = pid.nid; - if (copy_to_user ((char *)arg, data, sizeof (*data))) - err = -EFAULT; - break; - } - - case IOC_PORTAL_NAL_CMD: - CDEBUG (D_IOCTL, "nal command nal %d cmd %d\n", data->ioc_nal, - data->ioc_nal_cmd); - err = kportal_nal_cmd(data->ioc_nal, data); - if (err == 0) - if (copy_to_user((char *)arg, data, sizeof (*data))) - err = -EFAULT; - break; - - case IOC_PORTAL_FAIL_NID: { - const ptl_handle_ni_t *nip; - - CDEBUG (D_IOCTL, "fail nid: [%d] "LPU64" count %d\n", - data->ioc_nal, data->ioc_nid, data->ioc_count); - - nip = kportal_get_ni (data->ioc_nal); - if (nip == NULL) - return (-EINVAL); - - err = PtlFailNid (*nip, data->ioc_nid, data->ioc_count); - kportal_put_ni (data->ioc_nal); - break; - } - - default: - err = -EINVAL; - break; - } - - RETURN(err); -} - - -static struct file_operations portalsdev_fops = { - ioctl: kportal_ioctl, - open: kportal_psdev_open, - release: kportal_psdev_release -}; - - -static struct miscdevice portal_dev = { - PORTAL_MINOR, - "portals", - &portalsdev_fops -}; - -extern int insert_proc(void); -extern void remove_proc(void); -MODULE_AUTHOR("Peter J. Braam <braam@clusterfs.com>"); -MODULE_DESCRIPTION("Portals v3.1"); -MODULE_LICENSE("GPL"); - -static int init_kportals_module(void) -{ - int rc; - - rc = portals_debug_init(5 * 1024 * 1024); - if (rc < 0) { - printk(KERN_ERR "portals_debug_init: %d\n", rc); - return (rc); - } - - sema_init(&nal_cmd_sem, 1); - - rc = misc_register(&portal_dev); - if (rc) { - CERROR("misc_register: error %d\n", rc); - goto cleanup_debug; - } - - rc = PtlInit(); - if (rc) { - CERROR("PtlInit: error %d\n", rc); - goto cleanup_deregister; - } - - rc = insert_proc(); - if (rc) { - CERROR("insert_proc: error %d\n", rc); - goto cleanup_fini; - } - - CDEBUG (D_OTHER, "portals setup OK\n"); - return (0); - - cleanup_fini: - PtlFini(); - cleanup_deregister: - misc_deregister(&portal_dev); - cleanup_debug: - portals_debug_cleanup(); - return rc; -} - -static void exit_kportals_module(void) -{ - int rc; - - remove_proc(); - PtlFini(); - - CDEBUG(D_MALLOC, "before Portals cleanup: kmem %d\n", - atomic_read(&portal_kmemory)); - - - rc = misc_deregister(&portal_dev); - if (rc) - CERROR("misc_deregister error %d\n", rc); - - if (atomic_read(&portal_kmemory) != 0) - CERROR("Portals memory leaked: %d bytes\n", - atomic_read(&portal_kmemory)); - - rc = portals_debug_cleanup(); - if (rc) - printk(KERN_ERR "portals_debug_cleanup: %d\n", rc); -} - -EXPORT_SYMBOL(lib_dispatch); -EXPORT_SYMBOL(PtlMEAttach); -EXPORT_SYMBOL(PtlMEInsert); -EXPORT_SYMBOL(PtlMEUnlink); -EXPORT_SYMBOL(PtlEQAlloc); -EXPORT_SYMBOL(PtlMDAttach); -EXPORT_SYMBOL(PtlMDUnlink); -EXPORT_SYMBOL(PtlNIInit); -EXPORT_SYMBOL(PtlNIFini); -EXPORT_SYMBOL(PtlNIDebug); -EXPORT_SYMBOL(PtlInit); -EXPORT_SYMBOL(PtlFini); -EXPORT_SYMBOL(PtlPut); -EXPORT_SYMBOL(PtlGet); -EXPORT_SYMBOL(ptl_err_str); -EXPORT_SYMBOL(portal_subsystem_debug); -EXPORT_SYMBOL(portal_debug); -EXPORT_SYMBOL(portal_stack); -EXPORT_SYMBOL(portal_printk); -EXPORT_SYMBOL(PtlEQWait); -EXPORT_SYMBOL(PtlEQFree); -EXPORT_SYMBOL(PtlEQGet); -EXPORT_SYMBOL(PtlGetId); -EXPORT_SYMBOL(PtlMDBind); -EXPORT_SYMBOL(lib_iov_nob); -EXPORT_SYMBOL(lib_copy_iov2buf); -EXPORT_SYMBOL(lib_copy_buf2iov); -EXPORT_SYMBOL(lib_kiov_nob); -EXPORT_SYMBOL(lib_copy_kiov2buf); -EXPORT_SYMBOL(lib_copy_buf2kiov); -EXPORT_SYMBOL(lib_finalize); -EXPORT_SYMBOL(lib_parse); -EXPORT_SYMBOL(lib_init); -EXPORT_SYMBOL(lib_fini); -EXPORT_SYMBOL(portal_kmemory); -EXPORT_SYMBOL(kportal_daemonize); -EXPORT_SYMBOL(kportal_blockallsigs); -EXPORT_SYMBOL(kportal_nal_register); -EXPORT_SYMBOL(kportal_nal_unregister); -EXPORT_SYMBOL(kportal_assertion_failed); -EXPORT_SYMBOL(dispatch_name); -EXPORT_SYMBOL(kportal_get_ni); -EXPORT_SYMBOL(kportal_put_ni); - -module_init(init_kportals_module); -module_exit (exit_kportals_module); diff --git a/lustre/portals/libcfs/proc.c b/lustre/portals/libcfs/proc.c deleted file mode 100644 index 2fa739a..0000000 --- a/lustre/portals/libcfs/proc.c +++ /dev/null @@ -1,290 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * Author: Zach Brown <zab@zabbo.net> - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define EXPORT_SYMTAB - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/errno.h> -#include <linux/smp_lock.h> -#include <linux/unistd.h> -#include <net/sock.h> -#include <linux/uio.h> - -#include <asm/system.h> -#include <asm/uaccess.h> - -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/stat.h> -#include <linux/list.h> -#include <asm/uaccess.h> -#include <asm/segment.h> - -#include <linux/proc_fs.h> -#include <linux/sysctl.h> - -# define DEBUG_SUBSYSTEM S_PORTALS - -#include <linux/kp30.h> -#include <asm/div64.h> - -static struct ctl_table_header *portals_table_header = NULL; -extern char debug_file_path[1024]; -extern char debug_daemon_file_path[1024]; -extern char portals_upcall[1024]; - -#define PSDEV_PORTALS (0x100) -#define PSDEV_DEBUG 1 /* control debugging */ -#define PSDEV_SUBSYSTEM_DEBUG 2 /* control debugging */ -#define PSDEV_PRINTK 3 /* force all errors to console */ -#define PSDEV_DEBUG_PATH 4 /* crashdump log location */ -#define PSDEV_DEBUG_DUMP_PATH 5 /* crashdump tracelog location */ -#define PSDEV_PORTALS_UPCALL 6 /* User mode upcall script */ - -#define PORTALS_PRIMARY_CTLCNT 6 -static struct ctl_table portals_table[PORTALS_PRIMARY_CTLCNT + 1] = { - {PSDEV_DEBUG, "debug", &portal_debug, sizeof(int), 0644, NULL, - &proc_dointvec}, - {PSDEV_SUBSYSTEM_DEBUG, "subsystem_debug", &portal_subsystem_debug, - sizeof(int), 0644, NULL, &proc_dointvec}, - {PSDEV_PRINTK, "printk", &portal_printk, sizeof(int), 0644, NULL, - &proc_dointvec}, - {PSDEV_DEBUG_PATH, "debug_path", debug_file_path, - sizeof(debug_file_path), 0644, NULL, &proc_dostring, &sysctl_string}, - {PSDEV_DEBUG_DUMP_PATH, "debug_daemon_path", debug_daemon_file_path, - sizeof(debug_daemon_file_path), 0644, NULL, &proc_dostring, - &sysctl_string}, - {PSDEV_PORTALS_UPCALL, "upcall", portals_upcall, - sizeof(portals_upcall), 0644, NULL, &proc_dostring, - &sysctl_string}, - {0} -}; - -static struct ctl_table top_table[2] = { - {PSDEV_PORTALS, "portals", NULL, 0, 0555, portals_table}, - {0} -}; - - -#ifdef PORTALS_PROFILING -/* - * profiling stuff. we do this statically for now 'cause its simple, - * but we could do some tricks with elf sections to have this array - * automatically built. - */ -#define def_prof(FOO) [PROF__##FOO] = {#FOO, 0, } - -struct prof_ent prof_ents[] = { - def_prof(our_recvmsg), - def_prof(our_sendmsg), - def_prof(socknal_recv), - def_prof(lib_parse), - def_prof(conn_list_walk), - def_prof(memcpy), - def_prof(lib_finalize), - def_prof(pingcli_time), - def_prof(gmnal_send), - def_prof(gmnal_recv), -}; - -EXPORT_SYMBOL(prof_ents); - -/* - * this function is as crazy as the proc filling api - * requires. - * - * buffer: page allocated for us to scribble in. the - * data returned to the user will be taken from here. - * *start: address of the pointer that will tell the - * caller where in buffer the data the user wants is. - * ppos: offset in the entire /proc file that the user - * currently wants. - * wanted: the amount of data the user wants. - * - * while going, 'curpos' is the offset in the entire - * file where we currently are. We only actually - * start filling buffer when we get to a place in - * the file that the user cares about. - * - * we take care to only sprintf when the user cares because - * we're holding a lock while we do this. - * - * we're smart and know that we generate fixed size lines. - * we only start writing to the buffer when the user cares. - * This is unpredictable because we don't snapshot the - * list between calls that are filling in a file from - * the list. The list could change mid read and the - * output will look very weird indeed. oh well. - */ - -static int prof_read_proc(char *buffer, char **start, off_t ppos, int wanted, - int *eof, void *data) -{ - int len = 0, i; - int curpos; - char *header = "Interval Cycles_per (Starts Finishes Total)\n"; - int header_len = strlen(header); - char *format = "%-15s %.12Ld (%.12d %.12d %.12Ld)"; - int line_len = (15 + 1 + 12 + 2 + 12 + 1 + 12 + 1 + 12 + 1); - - *start = buffer; - - if (ppos < header_len) { - int diff = MIN(header_len, wanted); - memcpy(buffer, header + ppos, diff); - len += diff; - ppos += diff; - } - - if (len >= wanted) - goto out; - - curpos = header_len; - - for ( i = 0; i < MAX_PROFS ; i++) { - int copied; - struct prof_ent *pe = &prof_ents[i]; - long long cycles_per; - /* - * find the part of the array that the buffer wants - */ - if (ppos >= (curpos + line_len)) { - curpos += line_len; - continue; - } - /* the clever caller split a line */ - if (ppos > curpos) { - *start = buffer + (ppos - curpos); - } - - if (pe->finishes == 0) - cycles_per = 0; - else - { - cycles_per = pe->total_cycles; - do_div (cycles_per, pe->finishes); - } - - copied = sprintf(buffer + len, format, pe->str, cycles_per, - pe->starts, pe->finishes, pe->total_cycles); - - len += copied; - - /* pad to line len, -1 for \n */ - if ((copied < line_len-1)) { - int diff = (line_len-1) - copied; - memset(buffer + len, ' ', diff); - len += diff; - copied += diff; - } - - buffer[len++]= '\n'; - - /* bail if we have enough */ - if (((buffer + len) - *start) >= wanted) - break; - - curpos += line_len; - } - - /* lameness */ - if (i == MAX_PROFS) - *eof = 1; - out: - - return MIN(((buffer + len) - *start), wanted); -} - -/* - * all kids love /proc :/ - */ -static unsigned char basedir[]="net/portals"; -#endif /* PORTALS_PROFILING */ - -int insert_proc(void) -{ -#if PORTALS_PROFILING - unsigned char dir[128]; - struct proc_dir_entry *ent; - - if (ARRAY_SIZE(prof_ents) != MAX_PROFS) { - CERROR("profiling enum and array are out of sync.\n"); - return -1; - } - - /* - * This is pretty lame. assuming that failure just - * means that they already existed. - */ - strcat(dir, basedir); - create_proc_entry(dir, S_IFDIR, 0); - - strcat(dir, "/cycles"); - ent = create_proc_entry(dir, 0, 0); - if (!ent) { - CERROR("couldn't register %s?\n", dir); - return -1; - } - - ent->data = NULL; - ent->read_proc = prof_read_proc; -#endif /* PORTALS_PROFILING */ - -#ifdef CONFIG_SYSCTL - if (!portals_table_header) - portals_table_header = register_sysctl_table(top_table, 0); -#endif - - return 0; -} - -void remove_proc(void) -{ -#if PORTALS_PROFILING - unsigned char dir[128]; - int end; - - dir[0]='\0'; - strcat(dir, basedir); - - end = strlen(dir); - - strcat(dir, "/cycles"); - remove_proc_entry(dir,0); - - dir[end] = '\0'; - remove_proc_entry(dir,0); -#endif /* PORTALS_PROFILING */ - -#ifdef CONFIG_SYSCTL - if (portals_table_header) - unregister_sysctl_table(portals_table_header); - portals_table_header = NULL; -#endif -} diff --git a/lustre/portals/packaging/.cvsignore b/lustre/portals/packaging/.cvsignore deleted file mode 100644 index fd1d56a..0000000 --- a/lustre/portals/packaging/.cvsignore +++ /dev/null @@ -1,8 +0,0 @@ -Makefile -Makefile.in -aclocal.m4 -config.log -config.status -config.cache -configure -portals.spec diff --git a/lustre/portals/packaging/Makefile.am b/lustre/portals/packaging/Makefile.am deleted file mode 100644 index 126bc69..0000000 --- a/lustre/portals/packaging/Makefile.am +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright (C) 2002 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -EXTRA_DIST = portals.spec \ No newline at end of file diff --git a/lustre/portals/packaging/portals.spec.in b/lustre/portals/packaging/portals.spec.in deleted file mode 100644 index e196b3f..0000000 --- a/lustre/portals/packaging/portals.spec.in +++ /dev/null @@ -1,116 +0,0 @@ -%define kversion @RELEASE@ -%define linuxdir @LINUX@ -%define version HEAD - -Summary: Sandia Portals Message Passing - utilities -Name: portals -Version: %{version} -Release: 0210101748uml -Copyright: LGPL -Group: Utilities/System -BuildRoot: /var/tmp/portals-%{version}-root -Source: http://sandiaportals.org/portals-%{version}.tar.gz - -%description -Sandia Portals message passing package. Contains kernel modules, libraries and utilities. - -%package -n portals-modules -Summary: Kernel modules and NAL's for portals -Group: Development/Kernel - -%description -n portals-modules -Object-Based Disk storage drivers for Linux %{kversion}. - -%package -n portals-source -Summary: Portals kernel source for rebuilding with other kernels -Group: Development/Kernel - -%description -n portals-source -Portals kernel source for rebuilding with other kernels - -%prep -%setup -n portals-%{version} - -%build -rm -rf $RPM_BUILD_ROOT - -# Create the pristine source directory. -srcdir=$RPM_BUILD_ROOT/usr/src/portals-%{version} -mkdir -p $srcdir -find . -name CVS -prune -o -print | cpio -ap $srcdir - -# Set an explicit path to our Linux tree, if we can. -conf_flag= -linuxdir=%{linuxdir} -test -d $linuxdir && conf_flag=--with-linux=$linuxdir -./configure $conf_flag -make - -%install -make install prefix=$RPM_BUILD_ROOT - -%ifarch alpha -# this hurts me - conf_flag= - linuxdir=%{linuxdir} - test -d $linuxdir && conf_flag=--with-linux=$linuxdir - make clean - ./configure --enable-rtscts-myrinet $conf_flag - make - cp linux/rtscts/rtscts.o $RPM_BUILD_ROOT/lib/modules/%{kversion}/kernel/net/portals/rtscts_myrinet.o - cp user/myrinet_utils/mcpload $RPM_BUILD_ROOT/usr/sbin/mcpload -%endif - - -%files -%attr(-, root, root) %doc COPYING -%attr(-, root, root) /usr/sbin/acceptor -%attr(-, root, root) /usr/sbin/ptlctl -%attr(-, root, root) /usr/sbin/debugctl -%ifarch alpha -%attr(-, root, root) /usr/sbin/mcpload -%endif -%attr(-, root, root) /lib/libmyrnal.a -%attr(-, root, root) /lib/libptlapi.a -%attr(-, root, root) /lib/libptlctl.a -%attr(-, root, root) /lib/libprocbridge.a -%attr(-, root, root) /lib/libptllib.a -%attr(-, root, root) /lib/libtcpnal.a -%attr(-, root, root) /lib/libtcpnalutil.a -%attr(-, root, root) /usr/include/portals/*.h -%attr(-, root, root) /usr/include/portals/base/*.h -%attr(-, root, root) /usr/include/linux/*.h - -%files -n portals-modules -%attr(-, root, root) %doc COPYING -%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/portals.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/kptlrouter.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/kptrxtx.o -%ifarch alpha -%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/p3mod.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/rtscts.o -%endif -%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/portals/*nal.o - -%files -n portals-source -%attr(-, root, root) /usr/src/portals-%{version} - -%post -if [ ! -e /dev/portals ]; then - mknod /dev/portals c 10 240 -fi -depmod -ae || exit 0 - -grep -q portals /etc/modules.conf || \ - echo 'alias char-major-10-240 portals' >> /etc/modules.conf - -grep -q '/dev/portals' /etc/modules.conf || \ - echo 'alias /dev/portals portals' >> /etc/modules.conf - -%postun -depmod -ae || exit 0 - -%clean -#rm -rf $RPM_BUILD_ROOT - -# end of file diff --git a/lustre/portals/portals/.cvsignore b/lustre/portals/portals/.cvsignore deleted file mode 100644 index e995588..0000000 --- a/lustre/portals/portals/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lustre/portals/portals/Makefile.am b/lustre/portals/portals/Makefile.am deleted file mode 100644 index 8c03749..0000000 --- a/lustre/portals/portals/Makefile.am +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (C) 2002 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - - -CPPFLAGS= -INCLUDES=-I$(top_srcdir)/portals/include -I$(top_srcdir)/include -lib_LIBRARIES= libportals.a -libportals_a_SOURCES= api-eq.c api-init.c api-me.c api-errno.c api-ni.c api-wrap.c lib-dispatch.c lib-init.c lib-me.c lib-msg.c lib-eq.c lib-md.c lib-move.c lib-ni.c lib-pid.c diff --git a/lustre/portals/portals/Makefile.mk b/lustre/portals/portals/Makefile.mk deleted file mode 100644 index 5627ef7..0000000 --- a/lustre/portals/portals/Makefile.mk +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../Kernelenv - -obj-y += portals.o -portals-objs := lib-dispatch.o lib-eq.o lib-init.o lib-md.o lib-me.o lib-move.o lib-msg.o lib-ni.o lib-not-impl.o lib-pid.o api-eq.o api-errno.o api-init.o api-md.o api-me.o api-ni.o api-wrap.o diff --git a/lustre/portals/portals/api-eq.c b/lustre/portals/portals/api-eq.c deleted file mode 100644 index e066619..0000000 --- a/lustre/portals/portals/api-eq.c +++ /dev/null @@ -1,158 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * api/api-eq.c - * User-level event queue management routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <portals/api-support.h> - -int ptl_eq_init(void) -{ - /* Nothing to do anymore... */ - return PTL_OK; -} - -void ptl_eq_fini(void) -{ - /* Nothing to do anymore... */ -} - -int ptl_eq_ni_init(nal_t * nal) -{ - /* Nothing to do anymore... */ - return PTL_OK; -} - -void ptl_eq_ni_fini(nal_t * nal) -{ - /* Nothing to do anymore... */ -} - -int PtlEQGet(ptl_handle_eq_t eventq, ptl_event_t * ev) -{ - ptl_eq_t *eq; - int rc, new_index; - unsigned long flags; - ptl_event_t *new_event; - nal_t *nal; - ENTRY; - - if (!ptl_init) - RETURN(PTL_NOINIT); - - nal = ptl_hndl2nal(&eventq); - if (!nal) - RETURN(PTL_INV_EQ); - - eq = ptl_handle2usereq(&eventq); - nal->lock(nal, &flags); - - /* size must be a power of 2 to handle a wrapped sequence # */ - LASSERT (eq->size != 0 && - eq->size == LOWEST_BIT_SET (eq->size)); - - new_index = eq->sequence & (eq->size - 1); - new_event = &eq->base[new_index]; - CDEBUG(D_INFO, "new_event: %p, sequence: %lu, eq->size: %u\n", - new_event, eq->sequence, eq->size); - if (PTL_SEQ_GT (eq->sequence, new_event->sequence)) { - nal->unlock(nal, &flags); - RETURN(PTL_EQ_EMPTY); - } - - *ev = *new_event; - - /* Set the unlinked_me interface number if there is one to pass - * back, since the NAL hasn't a clue what it is and therefore can't - * set it. */ - if (!PtlHandleEqual (ev->unlinked_me, PTL_HANDLE_NONE)) - ev->unlinked_me.nal_idx = eventq.nal_idx; - - /* ensure event is delivered correctly despite possible - races with lib_finalize */ - if (eq->sequence != new_event->sequence) { - CERROR("DROPPING EVENT: eq seq %lu ev seq %lu\n", - eq->sequence, new_event->sequence); - rc = PTL_EQ_DROPPED; - } else { - rc = PTL_OK; - } - - eq->sequence = new_event->sequence + 1; - nal->unlock(nal, &flags); - RETURN(rc); -} - - -int PtlEQWait(ptl_handle_eq_t eventq_in, ptl_event_t *event_out) -{ - int rc; - - /* PtlEQGet does the handle checking */ - while ((rc = PtlEQGet(eventq_in, event_out)) == PTL_EQ_EMPTY) { - nal_t *nal = ptl_hndl2nal(&eventq_in); - - if (nal->yield) - nal->yield(nal); - } - - return rc; -} - -#ifndef __KERNEL__ -static jmp_buf eq_jumpbuf; - -static void eq_timeout(int signal) -{ - longjmp(eq_jumpbuf, -1); -} - -int PtlEQWait_timeout(ptl_handle_eq_t eventq_in, ptl_event_t * event_out, - int timeout) -{ - static void (*prev) (int); - static int left_over; - time_t time_at_start; - int rc; - - if (setjmp(eq_jumpbuf)) { - signal(SIGALRM, prev); - alarm(left_over - timeout); - return PTL_EQ_EMPTY; - } - - left_over = alarm(timeout); - prev = signal(SIGALRM, eq_timeout); - time_at_start = time(NULL); - if (left_over < timeout) - alarm(left_over); - - rc = PtlEQWait(eventq_in, event_out); - - signal(SIGALRM, prev); - alarm(left_over); /* Should compute how long we waited */ - - return rc; -} - -#endif - diff --git a/lustre/portals/portals/api-errno.c b/lustre/portals/portals/api-errno.c deleted file mode 100644 index 026c93b..0000000 --- a/lustre/portals/portals/api-errno.c +++ /dev/null @@ -1,55 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * api/api-errno.c - * Instantiate the string table of errors - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - */ - -/* If you change these, you must update the number table in portals/errno.h */ -const char *ptl_err_str[] = { - "PTL_OK", - "PTL_SEGV", - - "PTL_NOSPACE", - "PTL_INUSE", - "PTL_VAL_FAILED", - - "PTL_NAL_FAILED", - "PTL_NOINIT", - "PTL_INIT_DUP", - "PTL_INIT_INV", - "PTL_AC_INV_INDEX", - - "PTL_INV_ASIZE", - "PTL_INV_HANDLE", - "PTL_INV_MD", - "PTL_INV_ME", - "PTL_INV_NI", -/* If you change these, you must update the number table in portals/errno.h */ - "PTL_ILL_MD", - "PTL_INV_PROC", - "PTL_INV_PSIZE", - "PTL_INV_PTINDEX", - "PTL_INV_REG", - - "PTL_INV_SR_INDX", - "PTL_ML_TOOLONG", - "PTL_ADDR_UNKNOWN", - "PTL_INV_EQ", - "PTL_EQ_DROPPED", - - "PTL_EQ_EMPTY", - "PTL_NOUPDATE", - "PTL_FAIL", - "PTL_NOT_IMPLEMENTED", - "PTL_NO_ACK", - - "PTL_IOV_TOO_MANY", - "PTL_IOV_TOO_SMALL", - - "PTL_EQ_INUSE", - "PTL_MD_INUSE" -}; -/* If you change these, you must update the number table in portals/errno.h */ diff --git a/lustre/portals/portals/api-init.c b/lustre/portals/portals/api-init.c deleted file mode 100644 index e59c922..0000000 --- a/lustre/portals/portals/api-init.c +++ /dev/null @@ -1,71 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * api/api-init.c - * Initialization and global data for the p30 user side library - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <portals/api-support.h> - -int ptl_init; -unsigned int portal_subsystem_debug = 0xfff7e3ff; -unsigned int portal_debug = ~0; -unsigned int portal_printk; -unsigned int portal_stack; - -#ifdef __KERNEL__ -atomic_t portal_kmemory = ATOMIC_INIT(0); -#endif - -int __p30_initialized; -int __p30_myr_initialized; -int __p30_ip_initialized; -ptl_handle_ni_t __myr_ni_handle; -ptl_handle_ni_t __ip_ni_handle; - -int __p30_myr_timeout = 10; -int __p30_ip_timeout; - -int PtlInit(void) -{ - - if (ptl_init) - return PTL_OK; - - ptl_ni_init(); - ptl_me_init(); - ptl_eq_init(); - ptl_init = 1; - __p30_initialized = 1; - - return PTL_OK; -} - - -void PtlFini(void) -{ - - /* Reverse order of initialization */ - ptl_eq_fini(); - ptl_me_fini(); - ptl_ni_fini(); - ptl_init = 0; -} diff --git a/lustre/portals/portals/api-me.c b/lustre/portals/portals/api-me.c deleted file mode 100644 index e724e58..0000000 --- a/lustre/portals/portals/api-me.c +++ /dev/null @@ -1,42 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * api/api-me.c - * Match Entry local operations. - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <portals/api-support.h> - -int ptl_me_init(void) -{ - return PTL_OK; -} -void ptl_me_fini(void) -{ /* Nothing to do */ -} -int ptl_me_ni_init(nal_t * nal) -{ - return PTL_OK; -} - -void ptl_me_ni_fini(nal_t * nal) -{ /* Nothing to do... */ -} diff --git a/lustre/portals/portals/api-ni.c b/lustre/portals/portals/api-ni.c deleted file mode 100644 index b2e069e..0000000 --- a/lustre/portals/portals/api-ni.c +++ /dev/null @@ -1,197 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * api/api-ni.c - * Network Interface code - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <portals/api-support.h> - -/* Put some magic in the NI handle so uninitialised/zeroed handles are easy - * to spot */ -#define NI_HANDLE_MAGIC 0xebc0de00 -#define NI_HANDLE_MASK 0x000000ff -#define MAX_NIS 8 -static nal_t *ptl_interfaces[MAX_NIS]; -int ptl_num_interfaces = 0; - -nal_t *ptl_hndl2nal(ptl_handle_any_t *handle) -{ - unsigned int idx = handle->nal_idx; - - /* XXX we really rely on the caller NOT racing with interface - * setup/teardown. That ensures her NI handle can't get - * invalidated out from under her (or worse, swapped for a - * completely different interface!) */ - - if (((idx ^ NI_HANDLE_MAGIC) & ~NI_HANDLE_MASK) != 0) - return NULL; - - idx &= NI_HANDLE_MASK; - if (idx < MAX_NIS) - return ptl_interfaces[idx]; - - return NULL; -} - -int ptl_ni_init(void) -{ - int i; - - LASSERT (MAX_NIS <= (NI_HANDLE_MASK + 1)); - - for (i = 0; i < MAX_NIS; i++) - ptl_interfaces[i] = NULL; - - return PTL_OK; -} - -void ptl_ni_fini(void) -{ - int i; - - for (i = 0; i < MAX_NIS; i++) { - nal_t *nal = ptl_interfaces[i]; - if (!nal) - continue; - - if (nal->shutdown) - nal->shutdown(nal, i); - } -} - -#ifdef __KERNEL__ -DECLARE_MUTEX(ptl_ni_init_mutex); - -static void ptl_ni_init_mutex_enter (void) -{ - down (&ptl_ni_init_mutex); -} - -static void ptl_ni_init_mutex_exit (void) -{ - up (&ptl_ni_init_mutex); -} - -#else -static void ptl_ni_init_mutex_enter (void) -{ -} - -static void ptl_ni_init_mutex_exit (void) -{ -} - -#endif - -int PtlNIInit(ptl_interface_t interface, ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, ptl_pid_t requested_pid, - ptl_handle_ni_t * handle) -{ - nal_t *nal; - int i; - - if (!ptl_init) - return PTL_NOINIT; - - ptl_ni_init_mutex_enter (); - - nal = interface(ptl_num_interfaces, ptl_size, acl_size, requested_pid); - - if (!nal) { - ptl_ni_init_mutex_exit (); - return PTL_NAL_FAILED; - } - - for (i = 0; i < ptl_num_interfaces; i++) { - if (ptl_interfaces[i] == nal) { - nal->refct++; - handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | i; - fprintf(stderr, "Returning existing NAL (%d)\n", i); - ptl_ni_init_mutex_exit (); - return PTL_OK; - } - } - nal->refct = 1; - - if (ptl_num_interfaces >= MAX_NIS) { - if (nal->shutdown) - nal->shutdown (nal, ptl_num_interfaces); - ptl_ni_init_mutex_exit (); - return PTL_NOSPACE; - } - - handle->nal_idx = (NI_HANDLE_MAGIC & ~NI_HANDLE_MASK) | ptl_num_interfaces; - ptl_interfaces[ptl_num_interfaces++] = nal; - - ptl_eq_ni_init(nal); - ptl_me_ni_init(nal); - - ptl_ni_init_mutex_exit (); - return PTL_OK; -} - - -int PtlNIFini(ptl_handle_ni_t ni) -{ - nal_t *nal; - int idx; - int rc; - - if (!ptl_init) - return PTL_NOINIT; - - ptl_ni_init_mutex_enter (); - - nal = ptl_hndl2nal (&ni); - if (nal == NULL) { - ptl_ni_init_mutex_exit (); - return PTL_INV_HANDLE; - } - - idx = ni.nal_idx & NI_HANDLE_MASK; - - nal->refct--; - if (nal->refct > 0) { - ptl_ni_init_mutex_exit (); - return PTL_OK; - } - - ptl_me_ni_fini(nal); - ptl_eq_ni_fini(nal); - - rc = PTL_OK; - if (nal->shutdown) - rc = nal->shutdown(nal, idx); - - ptl_interfaces[idx] = NULL; - ptl_num_interfaces--; - - ptl_ni_init_mutex_exit (); - return rc; -} - -int PtlNIHandle(ptl_handle_any_t handle_in, ptl_handle_ni_t * ni_out) -{ - *ni_out = handle_in; - - return PTL_OK; -} diff --git a/lustre/portals/portals/api-wrap.c b/lustre/portals/portals/api-wrap.c deleted file mode 100644 index e54707f..0000000 --- a/lustre/portals/portals/api-wrap.c +++ /dev/null @@ -1,599 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * api/api-wrap.c - * User-level wrappers that dispatch across the protection boundaries - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -# define DEBUG_SUBSYSTEM S_PORTALS -#include <portals/api-support.h> - -static int do_forward(ptl_handle_any_t any_h, int cmd, void *argbuf, - int argsize, void *retbuf, int retsize) -{ - nal_t *nal; - - if (!ptl_init) { - fprintf(stderr, "PtlGetId: Not initialized\n"); - return PTL_NOINIT; - } - - nal = ptl_hndl2nal(&any_h); - if (!nal) - return PTL_INV_HANDLE; - - nal->forward(nal, cmd, argbuf, argsize, retbuf, retsize); - - return PTL_OK; -} - -int PtlGetId(ptl_handle_ni_t ni_handle, ptl_process_id_t *id) -{ - PtlGetId_in args; - PtlGetId_out ret; - int rc; - - args.handle_in = ni_handle; - - rc = do_forward(ni_handle, PTL_GETID, &args, sizeof(args), &ret, - sizeof(ret)); - if (rc != PTL_OK) - return rc; - - if (id) - *id = ret.id_out; - - return ret.rc; -} - -int PtlFailNid (ptl_handle_ni_t interface, ptl_nid_t nid, unsigned int threshold) -{ - PtlFailNid_in args; - PtlFailNid_out ret; - int rc; - - args.interface = interface; - args.nid = nid; - args.threshold = threshold; - - rc = do_forward (interface, PTL_FAILNID, - &args, sizeof(args), &ret, sizeof (ret)); - - return ((rc != PTL_OK) ? rc : ret.rc); -} - -int PtlNIStatus(ptl_handle_ni_t interface_in, ptl_sr_index_t register_in, - ptl_sr_value_t * status_out) -{ - PtlNIStatus_in args; - PtlNIStatus_out ret; - int rc; - - args.interface_in = interface_in; - args.register_in = register_in; - - rc = do_forward(interface_in, PTL_NISTATUS, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return rc; - - if (status_out) - *status_out = ret.status_out; - - return ret.rc; -} - -int PtlNIDist(ptl_handle_ni_t interface_in, ptl_process_id_t process_in, - unsigned long *distance_out) -{ - PtlNIDist_in args; - PtlNIDist_out ret; - int rc; - - args.interface_in = interface_in; - args.process_in = process_in; - - rc = do_forward(interface_in, PTL_NIDIST, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return rc; - - if (distance_out) - *distance_out = ret.distance_out; - - return ret.rc; -} - - - -unsigned int PtlNIDebug(ptl_handle_ni_t ni, unsigned int mask_in) -{ - PtlNIDebug_in args; - PtlNIDebug_out ret; - int rc; - - args.mask_in = mask_in; - - rc = do_forward(ni, PTL_NIDEBUG, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return rc; - - return ret.rc; -} - -int PtlMEAttach(ptl_handle_ni_t interface_in, ptl_pt_index_t index_in, - ptl_process_id_t match_id_in, ptl_match_bits_t match_bits_in, - ptl_match_bits_t ignore_bits_in, ptl_unlink_t unlink_in, - ptl_ins_pos_t pos_in, ptl_handle_me_t * handle_out) -{ - PtlMEAttach_in args; - PtlMEAttach_out ret; - int rc; - - args.interface_in = interface_in; - args.index_in = index_in; - args.match_id_in = match_id_in; - args.match_bits_in = match_bits_in; - args.ignore_bits_in = ignore_bits_in; - args.unlink_in = unlink_in; - args.position_in = pos_in; - - rc = do_forward(interface_in, PTL_MEATTACH, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return rc; - - if (handle_out) { - handle_out->nal_idx = interface_in.nal_idx; - handle_out->cookie = ret.handle_out.cookie; - } - - return ret.rc; -} - -int PtlMEInsert(ptl_handle_me_t current_in, ptl_process_id_t match_id_in, - ptl_match_bits_t match_bits_in, ptl_match_bits_t ignore_bits_in, - ptl_unlink_t unlink_in, ptl_ins_pos_t position_in, - ptl_handle_me_t * handle_out) -{ - PtlMEInsert_in args; - PtlMEInsert_out ret; - int rc; - - args.current_in = current_in; - args.match_id_in = match_id_in; - args.match_bits_in = match_bits_in; - args.ignore_bits_in = ignore_bits_in; - args.unlink_in = unlink_in; - args.position_in = position_in; - - rc = do_forward(current_in, PTL_MEINSERT, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc; - - if (handle_out) { - handle_out->nal_idx = current_in.nal_idx; - handle_out->cookie = ret.handle_out.cookie; - } - return ret.rc; -} - -int PtlMEUnlink(ptl_handle_me_t current_in) -{ - PtlMEUnlink_in args; - PtlMEUnlink_out ret; - int rc; - - args.current_in = current_in; - args.unlink_in = PTL_RETAIN; - - rc = do_forward(current_in, PTL_MEUNLINK, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc; - - return ret.rc; -} - -int PtlTblDump(ptl_handle_ni_t ni, int index_in) -{ - PtlTblDump_in args; - PtlTblDump_out ret; - int rc; - - args.index_in = index_in; - - rc = do_forward(ni, PTL_TBLDUMP, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return rc; - - return ret.rc; -} - -int PtlMEDump(ptl_handle_me_t current_in) -{ - PtlMEDump_in args; - PtlMEDump_out ret; - int rc; - - args.current_in = current_in; - - rc = do_forward(current_in, PTL_MEDUMP, &args, sizeof(args), &ret, - sizeof(ret)); - - if (rc != PTL_OK) - return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc; - - return ret.rc; -} - -static int validate_md(ptl_handle_any_t current_in, ptl_md_t md_in) -{ - nal_t *nal; - int rc; - int i; - - if (!ptl_init) { - fprintf(stderr, "PtlMDAttach/Bind/Update: Not initialized\n"); - return PTL_NOINIT; - } - - nal = ptl_hndl2nal(¤t_in); - if (!nal) - return PTL_INV_HANDLE; - - if (nal->validate != NULL) /* nal->validate not a NOOP */ - { - if ((md_in.options & PTL_MD_IOV) == 0) /* contiguous */ - { - rc = nal->validate (nal, md_in.start, md_in.length); - if (rc) - return (PTL_SEGV); - } - else - { - struct iovec *iov = (struct iovec *)md_in.start; - - for (i = 0; i < md_in.niov; i++, iov++) - { - rc = nal->validate (nal, iov->iov_base, iov->iov_len); - if (rc) - return (PTL_SEGV); - } - } - } - - return 0; -} - -static ptl_handle_eq_t md2eq (ptl_md_t *md) -{ - if (PtlHandleEqual (md->eventq, PTL_EQ_NONE)) - return (PTL_EQ_NONE); - - return (ptl_handle2usereq (&md->eventq)->cb_eq_handle); -} - - -int PtlMDAttach(ptl_handle_me_t me_in, ptl_md_t md_in, - ptl_unlink_t unlink_in, ptl_handle_md_t * handle_out) -{ - PtlMDAttach_in args; - PtlMDAttach_out ret; - int rc; - - rc = validate_md(me_in, md_in); - if (rc == PTL_OK) { - args.eq_in = md2eq(&md_in); - args.me_in = me_in; - args.md_in = md_in; - args.unlink_in = unlink_in; - - rc = do_forward(me_in, PTL_MDATTACH, - &args, sizeof(args), &ret, sizeof(ret)); - } - - if (rc != PTL_OK) - return (rc == PTL_INV_HANDLE) ? PTL_INV_ME : rc; - - if (handle_out) { - handle_out->nal_idx = me_in.nal_idx; - handle_out->cookie = ret.handle_out.cookie; - } - return ret.rc; -} - - - -int PtlMDBind(ptl_handle_ni_t ni_in, ptl_md_t md_in, - ptl_handle_md_t * handle_out) -{ - PtlMDBind_in args; - PtlMDBind_out ret; - int rc; - - rc = validate_md(ni_in, md_in); - if (rc != PTL_OK) - return rc; - - args.eq_in = md2eq(&md_in); - args.ni_in = ni_in; - args.md_in = md_in; - - rc = do_forward(ni_in, PTL_MDBIND, - &args, sizeof(args), &ret, sizeof(ret)); - - if (rc != PTL_OK) - return rc; - - if (handle_out) { - handle_out->nal_idx = ni_in.nal_idx; - handle_out->cookie = ret.handle_out.cookie; - } - return ret.rc; -} - -int PtlMDUpdate(ptl_handle_md_t md_in, ptl_md_t *old_inout, - ptl_md_t *new_inout, ptl_handle_eq_t testq_in) -{ - PtlMDUpdate_internal_in args; - PtlMDUpdate_internal_out ret; - int rc; - - args.md_in = md_in; - - if (old_inout) { - args.old_inout = *old_inout; - args.old_inout_valid = 1; - } else - args.old_inout_valid = 0; - - if (new_inout) { - rc = validate_md (md_in, *new_inout); - if (rc != PTL_OK) - return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc; - args.new_inout = *new_inout; - args.new_inout_valid = 1; - } else - args.new_inout_valid = 0; - - if (PtlHandleEqual (testq_in, PTL_EQ_NONE)) { - args.testq_in = PTL_EQ_NONE; - args.sequence_in = -1; - } else { - ptl_eq_t *eq = ptl_handle2usereq (&testq_in); - - args.testq_in = eq->cb_eq_handle; - args.sequence_in = eq->sequence; - } - - rc = do_forward(md_in, PTL_MDUPDATE, &args, sizeof(args), &ret, - sizeof(ret)); - if (rc != PTL_OK) - return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc; - - if (old_inout) - *old_inout = ret.old_inout; - - return ret.rc; -} - -int PtlMDUnlink(ptl_handle_md_t md_in) -{ - PtlMDUnlink_in args; - PtlMDUnlink_out ret; - int rc; - - args.md_in = md_in; - rc = do_forward(md_in, PTL_MDUNLINK, &args, sizeof(args), &ret, - sizeof(ret)); - if (rc != PTL_OK) - return (rc == PTL_INV_HANDLE) ? PTL_INV_MD : rc; - - return ret.rc; -} - -int PtlEQAlloc(ptl_handle_ni_t interface, ptl_size_t count, - int (*callback) (ptl_event_t * event), - ptl_handle_eq_t * handle_out) -{ - ptl_eq_t *eq = NULL; - ptl_event_t *ev = NULL; - PtlEQAlloc_in args; - PtlEQAlloc_out ret; - int rc, i; - nal_t *nal; - - if (!ptl_init) - return PTL_NOINIT; - - nal = ptl_hndl2nal (&interface); - if (nal == NULL) - return PTL_INV_HANDLE; - - if (count != LOWEST_BIT_SET(count)) { /* not a power of 2 already */ - do { /* knock off all but the top bit... */ - count &= ~LOWEST_BIT_SET (count); - } while (count != LOWEST_BIT_SET(count)); - - count <<= 1; /* ...and round up */ - } - - if (count == 0) /* catch bad parameter / overflow on roundup */ - return (PTL_VAL_FAILED); - - PORTAL_ALLOC(ev, count * sizeof(ptl_event_t)); - if (!ev) - return PTL_NOSPACE; - - for (i = 0; i < count; i++) - ev[i].sequence = 0; - - if (nal->validate != NULL) { - rc = nal->validate(nal, ev, count * sizeof(ptl_event_t)); - if (rc != PTL_OK) - goto fail; - } - - args.ni_in = interface; - args.count_in = count; - args.base_in = ev; - args.len_in = count * sizeof(*ev); - args.callback_in = callback; - - rc = do_forward(interface, PTL_EQALLOC, &args, sizeof(args), &ret, - sizeof(ret)); - if (rc != PTL_OK) - goto fail; - if (ret.rc) - GOTO(fail, rc = ret.rc); - - PORTAL_ALLOC(eq, sizeof(*eq)); - if (!eq) { - rc = PTL_NOSPACE; - goto fail; - } - - eq->sequence = 1; - eq->size = count; - eq->base = ev; - - /* EQ handles are a little wierd. PtlEQGet() just looks at the - * queued events in shared memory. It doesn't want to do_forward() - * at all, so the cookie in the EQ handle we pass out of here is - * simply a pointer to the event queue we just set up. We stash - * the handle returned by do_forward(), so we can pass it back via - * do_forward() when we need to. */ - - eq->cb_eq_handle.nal_idx = interface.nal_idx; - eq->cb_eq_handle.cookie = ret.handle_out.cookie; - - handle_out->nal_idx = interface.nal_idx; - handle_out->cookie = (__u64)((unsigned long)eq); - return PTL_OK; - -fail: - PORTAL_FREE(ev, count * sizeof(ptl_event_t)); - return rc; -} - -int PtlEQFree(ptl_handle_eq_t eventq) -{ - PtlEQFree_in args; - PtlEQFree_out ret; - ptl_eq_t *eq; - int rc; - - eq = ptl_handle2usereq (&eventq); - args.eventq_in = eq->cb_eq_handle; - - rc = do_forward(eq->cb_eq_handle, PTL_EQFREE, &args, - sizeof(args), &ret, sizeof(ret)); - - /* XXX we're betting rc == PTL_OK here */ - PORTAL_FREE(eq->base, eq->size * sizeof(ptl_event_t)); - PORTAL_FREE(eq, sizeof(*eq)); - - return rc; -} - -int PtlACEntry(ptl_handle_ni_t ni_in, ptl_ac_index_t index_in, - ptl_process_id_t match_id_in, ptl_pt_index_t portal_in) -{ - PtlACEntry_in args; - PtlACEntry_out ret; - int rc; - - /* - * Copy arguments into the argument block to - * hand to the forwarding object - */ - args.ni_in = ni_in; - args.index_in = index_in; - args.match_id_in = match_id_in; - args.portal_in = portal_in; - - rc = do_forward(ni_in, PTL_ACENTRY, &args, sizeof(args), &ret, - sizeof(ret)); - - return (rc != PTL_OK) ? rc : ret.rc; -} - -int PtlPut(ptl_handle_md_t md_in, ptl_ack_req_t ack_req_in, - ptl_process_id_t target_in, ptl_pt_index_t portal_in, - ptl_ac_index_t cookie_in, ptl_match_bits_t match_bits_in, - ptl_size_t offset_in, ptl_hdr_data_t hdr_data_in) -{ - PtlPut_in args; - PtlPut_out ret; - int rc; - - /* - * Copy arguments into the argument block to - * hand to the forwarding object - */ - args.md_in = md_in; - args.ack_req_in = ack_req_in; - args.target_in = target_in; - args.portal_in = portal_in; - args.cookie_in = cookie_in; - args.match_bits_in = match_bits_in; - args.offset_in = offset_in; - args.hdr_data_in = hdr_data_in; - - rc = do_forward(md_in, PTL_PUT, &args, sizeof(args), &ret, sizeof(ret)); - - return (rc != PTL_OK) ? rc : ret.rc; -} - -int PtlGet(ptl_handle_md_t md_in, ptl_process_id_t target_in, - ptl_pt_index_t portal_in, ptl_ac_index_t cookie_in, - ptl_match_bits_t match_bits_in, ptl_size_t offset_in) -{ - PtlGet_in args; - PtlGet_out ret; - int rc; - - /* - * Copy arguments into the argument block to - * hand to the forwarding object - */ - args.md_in = md_in; - args.target_in = target_in; - args.portal_in = portal_in; - args.cookie_in = cookie_in; - args.match_bits_in = match_bits_in; - args.offset_in = offset_in; - - rc = do_forward(md_in, PTL_GET, &args, sizeof(args), &ret, sizeof(ret)); - - return (rc != PTL_OK) ? rc : ret.rc; -} diff --git a/lustre/portals/portals/lib-dispatch.c b/lustre/portals/portals/lib-dispatch.c deleted file mode 100644 index 13036c7..0000000 --- a/lustre/portals/portals/lib-dispatch.c +++ /dev/null @@ -1,80 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-dispatch.c - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_PORTALS -#include <portals/lib-p30.h> -#include <portals/lib-dispatch.h> - -typedef struct { - int (*fun) (nal_cb_t * nal, void *private, void *in, void *out); - char *name; -} dispatch_table_t; - -static dispatch_table_t dispatch_table[] = { - [PTL_GETID] {do_PtlGetId, "PtlGetId"}, - [PTL_NISTATUS] {do_PtlNIStatus, "PtlNIStatus"}, - [PTL_NIDIST] {do_PtlNIDist, "PtlNIDist"}, - [PTL_NIDEBUG] {do_PtlNIDebug, "PtlNIDebug"}, - [PTL_MEATTACH] {do_PtlMEAttach, "PtlMEAttach"}, - [PTL_MEINSERT] {do_PtlMEInsert, "PtlMEInsert"}, - [PTL_MEUNLINK] {do_PtlMEUnlink, "PtlMEUnlink"}, - [PTL_TBLDUMP] {do_PtlTblDump, "PtlTblDump"}, - [PTL_MEDUMP] {do_PtlMEDump, "PtlMEDump"}, - [PTL_MDATTACH] {do_PtlMDAttach, "PtlMDAttach"}, - [PTL_MDBIND] {do_PtlMDBind, "PtlMDBind"}, - [PTL_MDUPDATE] {do_PtlMDUpdate_internal, "PtlMDUpdate_internal"}, - [PTL_MDUNLINK] {do_PtlMDUnlink, "PtlMDUnlink"}, - [PTL_EQALLOC] {do_PtlEQAlloc_internal, "PtlEQAlloc_internal"}, - [PTL_EQFREE] {do_PtlEQFree_internal, "PtlEQFree_internal"}, - [PTL_PUT] {do_PtlPut, "PtlPut"}, - [PTL_GET] {do_PtlGet, "PtlGet"}, - [PTL_FAILNID] {do_PtlFailNid, "PtlFailNid"}, - /* */ {0, ""} -}; - -/* - * This really should be elsewhere, but lib-p30/dispatch.c is - * an automatically generated file. - */ -void lib_dispatch(nal_cb_t * nal, void *private, int index, void *arg_block, - void *ret_block) -{ - lib_ni_t *ni = &nal->ni; - - if (index < 0 || index > LIB_MAX_DISPATCH || - !dispatch_table[index].fun) { - CDEBUG(D_NET, LPU64": Invalid API call %d\n", ni->nid, index); - return; - } - - CDEBUG(D_NET, LPU64": API call %s (%d)\n", ni->nid, - dispatch_table[index].name, index); - - dispatch_table[index].fun(nal, private, arg_block, ret_block); -} - -char *dispatch_name(int index) -{ - return dispatch_table[index].name; -} diff --git a/lustre/portals/portals/lib-eq.c b/lustre/portals/portals/lib-eq.c deleted file mode 100644 index ce343c1..0000000 --- a/lustre/portals/portals/lib-eq.c +++ /dev/null @@ -1,128 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-eq.c - * Library level Event queue management routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_PORTALS -#include <portals/lib-p30.h> -#include <portals/arg-blocks.h> - -int do_PtlEQAlloc_internal(nal_cb_t * nal, void *private, void *v_args, - void *v_ret) -{ - /* - * Incoming: - * ptl_handle_ni_t ni_in - * ptl_size_t count_in - * void * base_in - * - * Outgoing: - * ptl_handle_eq_t * handle_out - */ - - PtlEQAlloc_in *args = v_args; - PtlEQAlloc_out *ret = v_ret; - - lib_eq_t *eq; - unsigned long flags; - - /* api should have rounded up */ - if (args->count_in != LOWEST_BIT_SET (args->count_in)) - return ret->rc = PTL_VAL_FAILED; - - eq = lib_eq_alloc (nal); - if (eq == NULL) - return (ret->rc = PTL_NOSPACE); - - state_lock(nal, &flags); - - if (nal->cb_map != NULL) { - struct iovec iov = { - .iov_base = args->base_in, - .iov_len = args->count_in * sizeof (ptl_event_t) }; - - ret->rc = nal->cb_map (nal, 1, &iov, &eq->eq_addrkey); - if (ret->rc != PTL_OK) { - lib_eq_free (nal, eq); - - state_unlock (nal, &flags); - return (ret->rc); - } - } - - eq->sequence = 1; - eq->base = args->base_in; - eq->size = args->count_in; - eq->eq_refcount = 0; - eq->event_callback = args->callback_in; - - lib_initialise_handle (nal, &eq->eq_lh, PTL_COOKIE_TYPE_EQ); - list_add (&eq->eq_list, &nal->ni.ni_active_eqs); - - state_unlock(nal, &flags); - - ptl_eq2handle(&ret->handle_out, eq); - return (ret->rc = PTL_OK); -} - -int do_PtlEQFree_internal(nal_cb_t * nal, void *private, void *v_args, - void *v_ret) -{ - /* - * Incoming: - * ptl_handle_eq_t eventq_in - * - * Outgoing: - */ - - PtlEQFree_in *args = v_args; - PtlEQFree_out *ret = v_ret; - lib_eq_t *eq; - long flags; - - state_lock (nal, &flags); - - eq = ptl_handle2eq(&args->eventq_in, nal); - if (eq == NULL) { - ret->rc = PTL_INV_EQ; - } else if (eq->eq_refcount != 0) { - ret->rc = PTL_EQ_INUSE; - } else { - if (nal->cb_unmap != NULL) { - struct iovec iov = { - .iov_base = eq->base, - .iov_len = eq->size * sizeof (ptl_event_t) }; - - nal->cb_unmap(nal, 1, &iov, &eq->eq_addrkey); - } - - lib_invalidate_handle (nal, &eq->eq_lh); - list_del (&eq->eq_list); - lib_eq_free (nal, eq); - ret->rc = PTL_OK; - } - - state_unlock (nal, &flags); - - return (ret->rc); -} diff --git a/lustre/portals/portals/lib-init.c b/lustre/portals/portals/lib-init.c deleted file mode 100644 index 99c4d32..0000000 --- a/lustre/portals/portals/lib-init.c +++ /dev/null @@ -1,474 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-init.c - * Start up the internal library and clear all structures - * Called by the NAL when it initializes. Safe to call multiple times. - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -# define DEBUG_SUBSYSTEM S_PORTALS -#include <portals/lib-p30.h> - -#ifdef __KERNEL__ -# include <linux/string.h> /* for memset() */ -# include <linux/kp30.h> -# ifdef KERNEL_ADDR_CACHE -# include <compute/OS/addrCache/cache.h> -# endif -#else -# include <string.h> -# include <sys/time.h> -#endif - -#ifdef PTL_USE_SLAB_CACHE -static int ptl_slab_users; - -kmem_cache_t *ptl_md_slab; -kmem_cache_t *ptl_msg_slab; -kmem_cache_t *ptl_me_slab; -kmem_cache_t *ptl_eq_slab; - -atomic_t md_in_use_count; -atomic_t msg_in_use_count; -atomic_t me_in_use_count; -atomic_t eq_in_use_count; - -/* NB zeroing in ctor and on freeing ensures items that - * kmem_cache_validate() OK, but haven't been initialised - * as an MD/ME/EQ can't have valid handles - */ -static void -ptl_md_slab_ctor (void *obj, kmem_cache_t *slab, unsigned long flags) -{ - memset (obj, 0, sizeof (lib_md_t)); -} - -static void -ptl_me_slab_ctor (void *obj, kmem_cache_t *slab, unsigned long flags) -{ - memset (obj, 0, sizeof (lib_me_t)); -} - -static void -ptl_eq_slab_ctor (void *obj, kmem_cache_t *slab, unsigned long flags) -{ - memset (obj, 0, sizeof (lib_eq_t)); -} - -int -kportal_descriptor_setup (nal_cb_t *nal) -{ - /* NB on failure caller must still call kportal_descriptor_cleanup */ - /* ****** */ - - /* We'll have 1 set of slabs for ALL the nals :) */ - - if (ptl_slab_users++) - return 0; - - ptl_md_slab = kmem_cache_create("portals_MD", - sizeof(lib_md_t), 0, - SLAB_HWCACHE_ALIGN, - ptl_md_slab_ctor, NULL); - if (!ptl_md_slab) { - CERROR("couldn't allocate ptl_md_t slab"); - RETURN (PTL_NOSPACE); - } - - /* NB no ctor for msgs; they don't need handle verification */ - ptl_msg_slab = kmem_cache_create("portals_MSG", - sizeof(lib_msg_t), 0, - SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (!ptl_msg_slab) { - CERROR("couldn't allocate ptl_msg_t slab"); - RETURN (PTL_NOSPACE); - } - - ptl_me_slab = kmem_cache_create("portals_ME", - sizeof(lib_me_t), 0, - SLAB_HWCACHE_ALIGN, - ptl_me_slab_ctor, NULL); - if (!ptl_me_slab) { - CERROR("couldn't allocate ptl_me_t slab"); - RETURN (PTL_NOSPACE); - } - - ptl_eq_slab = kmem_cache_create("portals_EQ", - sizeof(lib_eq_t), 0, - SLAB_HWCACHE_ALIGN, - ptl_eq_slab_ctor, NULL); - if (!ptl_eq_slab) { - CERROR("couldn't allocate ptl_eq_t slab"); - RETURN (PTL_NOSPACE); - } - - RETURN(PTL_OK); -} - -void -kportal_descriptor_cleanup (nal_cb_t *nal) -{ - if (--ptl_slab_users != 0) - return; - - LASSERT (atomic_read (&md_in_use_count) == 0); - LASSERT (atomic_read (&me_in_use_count) == 0); - LASSERT (atomic_read (&eq_in_use_count) == 0); - LASSERT (atomic_read (&msg_in_use_count) == 0); - - if (ptl_md_slab != NULL) - kmem_cache_destroy(ptl_md_slab); - if (ptl_msg_slab != NULL) - kmem_cache_destroy(ptl_msg_slab); - if (ptl_me_slab != NULL) - kmem_cache_destroy(ptl_me_slab); - if (ptl_eq_slab != NULL) - kmem_cache_destroy(ptl_eq_slab); -} -#else - -int -lib_freelist_init (nal_cb_t *nal, lib_freelist_t *fl, int n, int size) -{ - char *space; - - LASSERT (n > 0); - - size += offsetof (lib_freeobj_t, fo_contents); - - space = nal->cb_malloc (nal, n * size); - if (space == NULL) - return (PTL_NOSPACE); - - INIT_LIST_HEAD (&fl->fl_list); - fl->fl_objs = space; - fl->fl_nobjs = n; - fl->fl_objsize = size; - - do - { - memset (space, 0, size); - list_add ((struct list_head *)space, &fl->fl_list); - space += size; - } while (--n != 0); - - return (PTL_OK); -} - -void -lib_freelist_fini (nal_cb_t *nal, lib_freelist_t *fl) -{ - struct list_head *el; - int count; - - if (fl->fl_nobjs == 0) - return; - - count = 0; - for (el = fl->fl_list.next; el != &fl->fl_list; el = el->next) - count++; - - LASSERT (count == fl->fl_nobjs); - - nal->cb_free (nal, fl->fl_objs, fl->fl_nobjs * fl->fl_objsize); - memset (fl, 0, sizeof (fl)); -} - -int -kportal_descriptor_setup (nal_cb_t *nal) -{ - /* NB on failure caller must still call kportal_descriptor_cleanup */ - /* ****** */ - int rc; - - memset (&nal->ni.ni_free_mes, 0, sizeof (nal->ni.ni_free_mes)); - memset (&nal->ni.ni_free_msgs, 0, sizeof (nal->ni.ni_free_msgs)); - memset (&nal->ni.ni_free_mds, 0, sizeof (nal->ni.ni_free_mds)); - memset (&nal->ni.ni_free_eqs, 0, sizeof (nal->ni.ni_free_eqs)); - - rc = lib_freelist_init (nal, &nal->ni.ni_free_mes, - MAX_MES, sizeof (lib_me_t)); - if (rc != PTL_OK) - return (rc); - - rc = lib_freelist_init (nal, &nal->ni.ni_free_msgs, - MAX_MSGS, sizeof (lib_msg_t)); - if (rc != PTL_OK) - return (rc); - - rc = lib_freelist_init (nal, &nal->ni.ni_free_mds, - MAX_MDS, sizeof (lib_md_t)); - if (rc != PTL_OK) - return (rc); - - rc = lib_freelist_init (nal, &nal->ni.ni_free_eqs, - MAX_EQS, sizeof (lib_eq_t)); - return (rc); -} - -void -kportal_descriptor_cleanup (nal_cb_t *nal) -{ - lib_freelist_fini (nal, &nal->ni.ni_free_mes); - lib_freelist_fini (nal, &nal->ni.ni_free_msgs); - lib_freelist_fini (nal, &nal->ni.ni_free_mds); - lib_freelist_fini (nal, &nal->ni.ni_free_eqs); -} - -#endif - -__u64 -lib_create_interface_cookie (nal_cb_t *nal) -{ - /* NB the interface cookie in wire handles guards against delayed - * replies and ACKs appearing valid in a new instance of the same - * interface. Initialisation time, even if it's only implemented - * to millisecond resolution is probably easily good enough. */ - struct timeval tv; - __u64 cookie; -#ifndef __KERNEL__ - int rc = gettimeofday (&tv, NULL); - LASSERT (rc == 0); -#else - do_gettimeofday(&tv); -#endif - cookie = tv.tv_sec; - cookie *= 1000000; - cookie += tv.tv_usec; - return (cookie); -} - -int -lib_setup_handle_hash (nal_cb_t *nal) -{ - lib_ni_t *ni = &nal->ni; - int i; - - /* Arbitrary choice of hash table size */ -#ifdef __KERNEL__ - ni->ni_lh_hash_size = PAGE_SIZE / sizeof (struct list_head); -#else - ni->ni_lh_hash_size = (MAX_MES + MAX_MDS + MAX_EQS)/4; -#endif - ni->ni_lh_hash_table = - (struct list_head *)nal->cb_malloc (nal, ni->ni_lh_hash_size - * sizeof (struct list_head)); - if (ni->ni_lh_hash_table == NULL) - return (PTL_NOSPACE); - - for (i = 0; i < ni->ni_lh_hash_size; i++) - INIT_LIST_HEAD (&ni->ni_lh_hash_table[i]); - - ni->ni_next_object_cookie = PTL_COOKIE_TYPES; - - return (PTL_OK); -} - -void -lib_cleanup_handle_hash (nal_cb_t *nal) -{ - lib_ni_t *ni = &nal->ni; - - if (ni->ni_lh_hash_table == NULL) - return; - - nal->cb_free (nal, ni->ni_lh_hash_table, - ni->ni_lh_hash_size * sizeof (struct list_head)); -} - -lib_handle_t * -lib_lookup_cookie (nal_cb_t *nal, __u64 cookie, int type) -{ - /* ALWAYS called with statelock held */ - lib_ni_t *ni = &nal->ni; - struct list_head *list; - struct list_head *el; - unsigned int hash; - - if ((cookie & (PTL_COOKIE_TYPES - 1)) != type) - return (NULL); - - hash = ((unsigned int)cookie) % ni->ni_lh_hash_size; - list = &ni->ni_lh_hash_table[hash]; - - list_for_each (el, list) { - lib_handle_t *lh = list_entry (el, lib_handle_t, lh_hash_chain); - - if (lh->lh_cookie == cookie) - return (lh); - } - - return (NULL); -} - -void -lib_initialise_handle (nal_cb_t *nal, lib_handle_t *lh, int type) -{ - /* ALWAYS called with statelock held */ - lib_ni_t *ni = &nal->ni; - unsigned int hash; - - LASSERT (type >= 0 && type < PTL_COOKIE_TYPES); - lh->lh_cookie = ni->ni_next_object_cookie | type; - ni->ni_next_object_cookie += PTL_COOKIE_TYPES; - - hash = ((unsigned int)lh->lh_cookie) % ni->ni_lh_hash_size; - list_add (&lh->lh_hash_chain, &ni->ni_lh_hash_table[hash]); -} - -void -lib_invalidate_handle (nal_cb_t *nal, lib_handle_t *lh) -{ - list_del (&lh->lh_hash_chain); -} - -int -lib_init(nal_cb_t * nal, ptl_nid_t nid, ptl_pid_t pid, int gsize, - ptl_pt_index_t ptl_size, ptl_ac_index_t acl_size) -{ - int rc = PTL_OK; - lib_ni_t *ni = &nal->ni; - int i; - ENTRY; - - /* NB serialised in PtlNIInit() */ - - if (ni->refcnt != 0) { /* already initialised */ - ni->refcnt++; - goto out; - } - - lib_assert_wire_constants (); - - /* - * Allocate the portal table for this interface - * and all per-interface objects. - */ - memset(&ni->counters, 0, sizeof(lib_counters_t)); - - rc = kportal_descriptor_setup (nal); - if (rc != PTL_OK) - goto out; - - INIT_LIST_HEAD (&ni->ni_active_msgs); - INIT_LIST_HEAD (&ni->ni_active_mds); - INIT_LIST_HEAD (&ni->ni_active_eqs); - - INIT_LIST_HEAD (&ni->ni_test_peers); - - ni->ni_interface_cookie = lib_create_interface_cookie (nal); - ni->ni_next_object_cookie = 0; - rc = lib_setup_handle_hash (nal); - if (rc != PTL_OK) - goto out; - - ni->nid = nid; - ni->pid = pid; - - ni->num_nodes = gsize; - ni->tbl.size = ptl_size; - - ni->tbl.tbl = nal->cb_malloc(nal, sizeof(struct list_head) * ptl_size); - if (ni->tbl.tbl == NULL) { - rc = PTL_NOSPACE; - goto out; - } - - for (i = 0; i < ptl_size; i++) - INIT_LIST_HEAD(&(ni->tbl.tbl[i])); - - ni->debug = PTL_DEBUG_NONE; - ni->up = 1; - ni->refcnt++; - - out: - if (rc != PTL_OK) { - lib_cleanup_handle_hash (nal); - kportal_descriptor_cleanup (nal); - } - - RETURN (rc); -} - -int -lib_fini(nal_cb_t * nal) -{ - lib_ni_t *ni = &nal->ni; - int idx; - - ni->refcnt--; - - if (ni->refcnt != 0) - goto out; - - /* NB no stat_lock() since this is the last reference. The NAL - * should have shut down already, so it should be safe to unlink - * and free all descriptors, even those that appear committed to a - * network op (eg MD with non-zero pending count) - */ - - for (idx = 0; idx < ni->tbl.size; idx++) - while (!list_empty (&ni->tbl.tbl[idx])) { - lib_me_t *me = list_entry (ni->tbl.tbl[idx].next, - lib_me_t, me_list); - - CERROR ("Active me %p on exit\n", me); - list_del (&me->me_list); - lib_me_free (nal, me); - } - - while (!list_empty (&ni->ni_active_mds)) { - lib_md_t *md = list_entry (ni->ni_active_mds.next, - lib_md_t, md_list); - - CERROR ("Active md %p on exit\n", md); - list_del (&md->md_list); - lib_md_free (nal, md); - } - - while (!list_empty (&ni->ni_active_eqs)) { - lib_eq_t *eq = list_entry (ni->ni_active_eqs.next, - lib_eq_t, eq_list); - - CERROR ("Active eq %p on exit\n", eq); - list_del (&eq->eq_list); - lib_eq_free (nal, eq); - } - - while (!list_empty (&ni->ni_active_msgs)) { - lib_msg_t *msg = list_entry (ni->ni_active_msgs.next, - lib_msg_t, msg_list); - - CERROR ("Active msg %p on exit\n", msg); - list_del (&msg->msg_list); - lib_msg_free (nal, msg); - } - - nal->cb_free(nal, ni->tbl.tbl, sizeof(struct list_head) * ni->tbl.size); - ni->up = 0; - - lib_cleanup_handle_hash (nal); - kportal_descriptor_cleanup (nal); - - out: - return (PTL_OK); -} diff --git a/lustre/portals/portals/lib-md.c b/lustre/portals/portals/lib-md.c deleted file mode 100644 index a79e2be..0000000 --- a/lustre/portals/portals/lib-md.c +++ /dev/null @@ -1,412 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-md.c - * Memory Descriptor management routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef __KERNEL__ -# include <stdio.h> -#else -# define DEBUG_SUBSYSTEM S_PORTALS -# include <linux/kp30.h> -#endif - -#include <portals/lib-p30.h> -#include <portals/arg-blocks.h> - -/* - * must be called with state lock held - */ -void lib_md_unlink(nal_cb_t * nal, lib_md_t * md) -{ - lib_me_t *me = md->me; - - if (md->pending != 0) { - CDEBUG(D_NET, "Queueing unlink of md %p\n", md); - md->md_flags |= PTL_MD_FLAG_UNLINK; - return; - } - - CDEBUG(D_NET, "Unlinking md %p\n", md); - - if ((md->options & PTL_MD_KIOV) != 0) { - if (nal->cb_unmap_pages != NULL) - nal->cb_unmap_pages (nal, md->md_niov, md->md_iov.kiov, - &md->md_addrkey); - } else if (nal->cb_unmap != NULL) - nal->cb_unmap (nal, md->md_niov, md->md_iov.iov, - &md->md_addrkey); - - if (me) { - me->md = NULL; - if (me->unlink == PTL_UNLINK) - lib_me_unlink(nal, me); - } - - if (md->eq != NULL) - { - md->eq->eq_refcount--; - LASSERT (md->eq->eq_refcount >= 0); - } - - lib_invalidate_handle (nal, &md->md_lh); - list_del (&md->md_list); - lib_md_free(nal, md); -} - -/* must be called with state lock held */ -static int lib_md_build(nal_cb_t *nal, lib_md_t *new, void *private, - ptl_md_t *md, ptl_handle_eq_t *eqh, int unlink) -{ - const int max_size_opts = PTL_MD_AUTO_UNLINK | - PTL_MD_MAX_SIZE; - lib_eq_t *eq = NULL; - int rc; - int i; - - /* NB we are passes an allocated, but uninitialised/active md. - * if we return success, caller may lib_md_unlink() it. - * otherwise caller may only lib_md_free() it. - */ - - if (!PtlHandleEqual (*eqh, PTL_EQ_NONE)) { - eq = ptl_handle2eq(eqh, nal); - if (eq == NULL) - return PTL_INV_EQ; - } - - if ((md->options & PTL_MD_IOV) != 0 && /* discontiguous MD */ - md->niov > PTL_MD_MAX_IOV) /* too many fragments */ - return PTL_IOV_TOO_MANY; - - if ((md->options & max_size_opts) != 0 && /* max size used */ - (md->max_size < 0 || md->max_size > md->length)) // illegal max_size - return PTL_INV_MD; - - new->me = NULL; - new->start = md->start; - new->length = md->length; - new->offset = 0; - new->max_size = md->max_size; - new->unlink = unlink; - new->options = md->options; - new->user_ptr = md->user_ptr; - new->eq = eq; - new->threshold = md->threshold; - new->pending = 0; - new->md_flags = 0; - - if ((md->options & PTL_MD_IOV) != 0) { - int total_length = 0; - - if ((md->options & PTL_MD_KIOV) != 0) /* Can't specify both */ - return PTL_INV_MD; - - new->md_niov = md->niov; - - if (nal->cb_read (nal, private, new->md_iov.iov, md->start, - md->niov * sizeof (new->md_iov.iov[0]))) - return PTL_SEGV; - - for (i = 0; i < new->md_niov; i++) { - /* We take the base address on trust */ - if (new->md_iov.iov[i].iov_len <= 0) /* invalid length */ - return PTL_VAL_FAILED; - - total_length += new->md_iov.iov[i].iov_len; - } - - if (md->length > total_length) - return PTL_IOV_TOO_SMALL; - - if (nal->cb_map != NULL) { - rc = nal->cb_map (nal, new->md_niov, new->md_iov.iov, - &new->md_addrkey); - if (rc != PTL_OK) - return (rc); - } - } else if ((md->options & PTL_MD_KIOV) != 0) { -#ifndef __KERNEL__ - return PTL_INV_MD; -#else - int total_length = 0; - - /* Trap attempt to use paged I/O if unsupported early. */ - if (nal->cb_send_pages == NULL || - nal->cb_recv_pages == NULL) - return PTL_INV_MD; - - new->md_niov = md->niov; - - if (nal->cb_read (nal, private, new->md_iov.kiov, md->start, - md->niov * sizeof (new->md_iov.kiov[0]))) - return PTL_SEGV; - - for (i = 0; i < new->md_niov; i++) { - /* We take the page pointer on trust */ - if (new->md_iov.kiov[i].kiov_offset + - new->md_iov.kiov[i].kiov_len > PAGE_SIZE ) - return PTL_VAL_FAILED; /* invalid length */ - - total_length += new->md_iov.kiov[i].kiov_len; - } - - if (md->length > total_length) - return PTL_IOV_TOO_SMALL; - - if (nal->cb_map_pages != NULL) { - rc = nal->cb_map_pages (nal, new->md_niov, new->md_iov.kiov, - &new->md_addrkey); - if (rc != PTL_OK) - return (rc); - } -#endif - } else { /* contiguous */ - new->md_niov = 1; - new->md_iov.iov[0].iov_base = md->start; - new->md_iov.iov[0].iov_len = md->length; - - if (nal->cb_map != NULL) { - rc = nal->cb_map (nal, new->md_niov, new->md_iov.iov, - &new->md_addrkey); - if (rc != PTL_OK) - return (rc); - } - } - - if (eq != NULL) - eq->eq_refcount++; - - /* It's good; let handle2md succeed and add to active mds */ - lib_initialise_handle (nal, &new->md_lh, PTL_COOKIE_TYPE_MD); - list_add (&new->md_list, &nal->ni.ni_active_mds); - - return PTL_OK; -} - -/* must be called with state lock held */ -void lib_md_deconstruct(nal_cb_t * nal, lib_md_t * md, ptl_md_t * new) -{ - /* NB this doesn't copy out all the iov entries so when a - * discontiguous MD is copied out, the target gets to know the - * original iov pointer (in start) and the number of entries it had - * and that's all. - */ - new->start = md->start; - new->length = md->length; - new->threshold = md->threshold; - new->max_size = md->max_size; - new->options = md->options; - new->user_ptr = md->user_ptr; - ptl_eq2handle(&new->eventq, md->eq); - new->niov = ((md->options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0) ? 0 : md->md_niov; -} - -int do_PtlMDAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - /* - * Incoming: - * ptl_handle_me_t current_in - * ptl_md_t md_in - * ptl_unlink_t unlink_in - * - * Outgoing: - * ptl_handle_md_t * handle_out - */ - - PtlMDAttach_in *args = v_args; - PtlMDAttach_out *ret = v_ret; - lib_me_t *me; - lib_md_t *md; - unsigned long flags; - - md = lib_md_alloc (nal); - if (md == NULL) - return (ret->rc = PTL_NOSPACE); - - state_lock(nal, &flags); - - me = ptl_handle2me(&args->me_in, nal); - if (me == NULL) { - ret->rc = PTL_INV_ME; - } else if (me->md != NULL) { - ret->rc = PTL_INUSE; - } else { - ret->rc = lib_md_build(nal, md, private, &args->md_in, - &args->eq_in, args->unlink_in); - - if (ret->rc == PTL_OK) { - me->md = md; - md->me = me; - - ptl_md2handle(&ret->handle_out, md); - - state_unlock (nal, &flags); - return (PTL_OK); - } - } - - lib_md_free (nal, md); - - state_unlock (nal, &flags); - return (ret->rc); -} - -int do_PtlMDBind(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - /* - * Incoming: - * ptl_handle_ni_t ni_in - * ptl_md_t md_in - * - * Outgoing: - * ptl_handle_md_t * handle_out - */ - - PtlMDBind_in *args = v_args; - PtlMDBind_out *ret = v_ret; - lib_md_t *md; - unsigned long flags; - - md = lib_md_alloc (nal); - if (md == NULL) - return (ret->rc = PTL_NOSPACE); - - state_lock(nal, &flags); - - ret->rc = lib_md_build(nal, md, private, - &args->md_in, &args->eq_in, PTL_UNLINK); - - if (ret->rc == PTL_OK) { - ptl_md2handle(&ret->handle_out, md); - - state_unlock(nal, &flags); - return (PTL_OK); - } - - lib_md_free (nal, md); - - state_unlock(nal, &flags); - return (ret->rc); -} - -int do_PtlMDUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - PtlMDUnlink_in *args = v_args; - PtlMDUnlink_out *ret = v_ret; - - lib_md_t *md; - unsigned long flags; - - state_lock(nal, &flags); - - md = ptl_handle2md(&args->md_in, nal); - if (md == NULL) { - ret->rc = PTL_INV_MD; - } else if (md->pending != 0) { /* being filled/spilled */ - ret->rc = PTL_MD_INUSE; - } else { - /* Callers attempting to unlink a busy MD which will get - * unlinked once the net op completes should see INUSE, - * before completion and INV_MD thereafter. LASSERT we've - * got that right... */ - LASSERT ((md->md_flags & PTL_MD_FLAG_UNLINK) == 0); - - lib_md_deconstruct(nal, md, &ret->status_out); - lib_md_unlink(nal, md); - ret->rc = PTL_OK; - } - - state_unlock(nal, &flags); - - return (ret->rc); -} - -int do_PtlMDUpdate_internal(nal_cb_t * nal, void *private, void *v_args, - void *v_ret) -{ - /* - * Incoming: - * ptl_handle_md_t md_in - * ptl_md_t * old_inout - * ptl_md_t * new_inout - * ptl_handle_eq_t testq_in - * ptl_seq_t sequence_in - * - * Outgoing: - * ptl_md_t * old_inout - * ptl_md_t * new_inout - */ - PtlMDUpdate_internal_in *args = v_args; - PtlMDUpdate_internal_out *ret = v_ret; - lib_md_t *md; - lib_eq_t *test_eq = NULL; - ptl_md_t *new = &args->new_inout; - unsigned long flags; - - state_lock(nal, &flags); - - md = ptl_handle2md(&args->md_in, nal); - if (md == NULL) { - ret->rc = PTL_INV_MD; - goto out; - } - - if (args->old_inout_valid) - lib_md_deconstruct(nal, md, &ret->old_inout); - - if (!args->new_inout_valid) { - ret->rc = PTL_OK; - goto out; - } - - if (!PtlHandleEqual (args->testq_in, PTL_EQ_NONE)) { - test_eq = ptl_handle2eq(&args->testq_in, nal); - if (test_eq == NULL) { - ret->rc = PTL_INV_EQ; - goto out; - } - } - - if (md->pending != 0) { - ret->rc = PTL_NOUPDATE; - goto out; - } - - if (test_eq == NULL || - test_eq->sequence == args->sequence_in) { - lib_me_t *me = md->me; - -#warning this does not track eq refcounts properly - - ret->rc = lib_md_build(nal, md, private, - new, &new->eventq, md->unlink); - - md->me = me; - } else { - ret->rc = PTL_NOUPDATE; - } - - out: - state_unlock(nal, &flags); - return (ret->rc); -} diff --git a/lustre/portals/portals/lib-me.c b/lustre/portals/portals/lib-me.c deleted file mode 100644 index bd1af5b..0000000 --- a/lustre/portals/portals/lib-me.c +++ /dev/null @@ -1,227 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-me.c - * Match Entry management routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef __KERNEL__ -# include <stdio.h> -#else -# define DEBUG_SUBSYSTEM S_PORTALS -# include <linux/kp30.h> -#endif - -#include <portals/lib-p30.h> -#include <portals/arg-blocks.h> - -static void lib_me_dump(nal_cb_t * nal, lib_me_t * me); - -int do_PtlMEAttach(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - PtlMEAttach_in *args = v_args; - PtlMEAttach_out *ret = v_ret; - lib_ni_t *ni = &nal->ni; - lib_ptl_t *tbl = &ni->tbl; - unsigned long flags; - lib_me_t *me; - - if (args->index_in < 0 || args->index_in >= tbl->size) - return ret->rc = PTL_INV_PTINDEX; - - /* Should check for valid matchid, but not yet */ - if (0) - return ret->rc = PTL_INV_PROC; - - me = lib_me_alloc (nal); - if (me == NULL) - return (ret->rc = PTL_NOSPACE); - - state_lock(nal, &flags); - - me->match_id = args->match_id_in; - me->match_bits = args->match_bits_in; - me->ignore_bits = args->ignore_bits_in; - me->unlink = args->unlink_in; - me->md = NULL; - - lib_initialise_handle (nal, &me->me_lh, PTL_COOKIE_TYPE_ME); - - if (args->position_in == PTL_INS_AFTER) - list_add_tail(&me->me_list, &(tbl->tbl[args->index_in])); - else - list_add(&me->me_list, &(tbl->tbl[args->index_in])); - - ptl_me2handle(&ret->handle_out, me); - - state_unlock(nal, &flags); - - return ret->rc = PTL_OK; -} - -int do_PtlMEInsert(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - PtlMEInsert_in *args = v_args; - PtlMEInsert_out *ret = v_ret; - unsigned long flags; - lib_me_t *me; - lib_me_t *new; - - new = lib_me_alloc (nal); - if (new == NULL) - return (ret->rc = PTL_NOSPACE); - - /* Should check for valid matchid, but not yet */ - - state_lock(nal, &flags); - - me = ptl_handle2me(&args->current_in, nal); - if (me == NULL) { - lib_me_free (nal, new); - - state_unlock (nal, &flags); - return (ret->rc = PTL_INV_ME); - } - - new->match_id = args->match_id_in; - new->match_bits = args->match_bits_in; - new->ignore_bits = args->ignore_bits_in; - new->unlink = args->unlink_in; - new->md = NULL; - - lib_initialise_handle (nal, &new->me_lh, PTL_COOKIE_TYPE_ME); - - if (args->position_in == PTL_INS_AFTER) - list_add_tail(&new->me_list, &me->me_list); - else - list_add(&new->me_list, &me->me_list); - - ptl_me2handle(&ret->handle_out, new); - - state_unlock(nal, &flags); - - return ret->rc = PTL_OK; -} - -int do_PtlMEUnlink(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - PtlMEUnlink_in *args = v_args; - PtlMEUnlink_out *ret = v_ret; - unsigned long flags; - lib_me_t *me; - - state_lock(nal, &flags); - - me = ptl_handle2me(&args->current_in, nal); - if (me == NULL) { - ret->rc = PTL_INV_ME; - } else { - lib_me_unlink(nal, me); - ret->rc = PTL_OK; - } - - state_unlock(nal, &flags); - - return (ret->rc); -} - -/* call with state_lock please */ -void lib_me_unlink(nal_cb_t *nal, lib_me_t *me) -{ - lib_ni_t *ni = &nal->ni; - - if (ni->debug & PTL_DEBUG_UNLINK) { - ptl_handle_any_t handle; - ptl_me2handle(&handle, me); - } - - list_del (&me->me_list); - - if (me->md) { - me->md->me = NULL; - lib_md_unlink(nal, me->md); - } - - lib_invalidate_handle (nal, &me->me_lh); - lib_me_free(nal, me); -} - -int do_PtlTblDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - PtlTblDump_in *args = v_args; - PtlTblDump_out *ret = v_ret; - lib_ptl_t *tbl = &nal->ni.tbl; - ptl_handle_any_t handle; - struct list_head *tmp; - unsigned long flags; - - if (args->index_in < 0 || args->index_in >= tbl->size) - return ret->rc = PTL_INV_PTINDEX; - - nal->cb_printf(nal, "Portal table index %d\n", args->index_in); - - state_lock(nal, &flags); - list_for_each(tmp, &(tbl->tbl[args->index_in])) { - lib_me_t *me = list_entry(tmp, lib_me_t, me_list); - ptl_me2handle(&handle, me); - lib_me_dump(nal, me); - } - state_unlock(nal, &flags); - - return ret->rc = PTL_OK; -} - -int do_PtlMEDump(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - PtlMEDump_in *args = v_args; - PtlMEDump_out *ret = v_ret; - lib_me_t *me; - unsigned long flags; - - state_lock(nal, &flags); - - me = ptl_handle2me(&args->current_in, nal); - if (me == NULL) { - ret->rc = PTL_INV_ME; - } else { - lib_me_dump(nal, me); - ret->rc = PTL_OK; - } - - state_unlock(nal, &flags); - - return ret->rc; -} - -static void lib_me_dump(nal_cb_t * nal, lib_me_t * me) -{ - nal->cb_printf(nal, "Match Entry %p ("LPX64")\n", me, - me->me_lh.lh_cookie); - - nal->cb_printf(nal, "\tMatch/Ignore\t= %016lx / %016lx\n", - me->match_bits, me->ignore_bits); - - nal->cb_printf(nal, "\tMD\t= %p\n", me->md); - nal->cb_printf(nal, "\tprev\t= %p\n", - list_entry(me->me_list.prev, lib_me_t, me_list)); - nal->cb_printf(nal, "\tnext\t= %p\n", - list_entry(me->me_list.next, lib_me_t, me_list)); -} diff --git a/lustre/portals/portals/lib-move.c b/lustre/portals/portals/lib-move.c deleted file mode 100644 index fde4f16..0000000 --- a/lustre/portals/portals/lib-move.c +++ /dev/null @@ -1,1379 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-move.c - * Data movement routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef __KERNEL__ -# include <stdio.h> -#else -# define DEBUG_SUBSYSTEM S_PORTALS -# include <linux/kp30.h> -#endif -#include <portals/p30.h> -#include <portals/lib-p30.h> -#include <portals/arg-blocks.h> - -/* - * Right now it does not check access control lists. - * - * We only support one MD per ME, which is how the Portals 3.1 spec is written. - * All previous complication is removed. - */ - -static lib_me_t * -lib_find_me(nal_cb_t *nal, int index, int op_mask, ptl_nid_t src_nid, - ptl_pid_t src_pid, ptl_size_t rlength, ptl_size_t roffset, - ptl_match_bits_t match_bits, ptl_size_t *mlength_out, - ptl_size_t *offset_out, int *unlink_out) -{ - lib_ni_t *ni = &nal->ni; - struct list_head *match_list = &ni->tbl.tbl[index]; - struct list_head *tmp; - lib_me_t *me; - lib_md_t *md; - ptl_size_t mlength; - ptl_size_t offset; - - ENTRY; - - CDEBUG (D_NET, "Request from "LPU64".%d of length %d into portal %d " - "MB="LPX64"\n", src_nid, src_pid, rlength, index, match_bits); - - if (index < 0 || index >= ni->tbl.size) { - CERROR("Invalid portal %d not in [0-%d]\n", - index, ni->tbl.size); - goto failed; - } - - list_for_each (tmp, match_list) { - me = list_entry(tmp, lib_me_t, me_list); - md = me->md; - - /* ME attached but MD not attached yet */ - if (md == NULL) - continue; - - LASSERT (me == md->me); - - /* MD deactivated */ - if (md->threshold == 0) - continue; - - /* mismatched MD op */ - if ((md->options & op_mask) == 0) - continue; - - /* mismatched ME nid/pid? */ - if (me->match_id.nid != PTL_NID_ANY && - me->match_id.nid != src_nid) - continue; - - if (me->match_id.pid != PTL_PID_ANY && - me->match_id.pid != src_pid) - continue; - - /* mismatched ME matchbits? */ - if (((me->match_bits ^ match_bits) & ~me->ignore_bits) != 0) - continue; - - /* Hurrah! This _is_ a match; check it out... */ - - if ((md->options & PTL_MD_MANAGE_REMOTE) == 0) - offset = md->offset; - else - offset = roffset; - - mlength = md->length - offset; - if ((md->options & PTL_MD_MAX_SIZE) != 0 && - mlength > md->max_size) - mlength = md->max_size; - - if (rlength <= mlength) { /* fits in allowed space */ - mlength = rlength; - } else if ((md->options & PTL_MD_TRUNCATE) == 0) { - /* this packet _really_ is too big */ - CERROR("Matching packet %d too big: %d left, " - "%d allowed\n", rlength, md->length - offset, - mlength); - goto failed; - } - - md->offset = offset + mlength; - - *offset_out = offset; - *mlength_out = mlength; - *unlink_out = ((md->options & PTL_MD_AUTO_UNLINK) != 0 && - md->offset >= (md->length - md->max_size)); - RETURN (me); - } - - failed: - CERROR (LPU64": Dropping %s from "LPU64".%d portal %d match "LPX64 - " offset %d length %d: no match\n", - ni->nid, (op_mask == PTL_MD_OP_GET) ? "GET" : "PUT", - src_nid, src_pid, index, match_bits, roffset, rlength); - RETURN(NULL); -} - -int do_PtlFailNid (nal_cb_t *nal, void *private, void *v_args, void *v_ret) -{ - PtlFailNid_in *args = v_args; - PtlFailNid_out *ret = v_ret; - lib_test_peer_t *tp; - unsigned long flags; - struct list_head *el; - struct list_head *next; - struct list_head cull; - - if (args->threshold != 0) { - /* Adding a new entry */ - tp = (lib_test_peer_t *)nal->cb_malloc (nal, sizeof (*tp)); - if (tp == NULL) - return (ret->rc = PTL_FAIL); - - tp->tp_nid = args->nid; - tp->tp_threshold = args->threshold; - - state_lock (nal, &flags); - list_add (&tp->tp_list, &nal->ni.ni_test_peers); - state_unlock (nal, &flags); - return (ret->rc = PTL_OK); - } - - /* removing entries */ - INIT_LIST_HEAD (&cull); - - state_lock (nal, &flags); - - list_for_each_safe (el, next, &nal->ni.ni_test_peers) { - tp = list_entry (el, lib_test_peer_t, tp_list); - - if (tp->tp_threshold == 0 || /* needs culling anyway */ - args->nid == PTL_NID_ANY || /* removing all entries */ - tp->tp_nid == args->nid) /* matched this one */ - { - list_del (&tp->tp_list); - list_add (&tp->tp_list, &cull); - } - } - - state_unlock (nal, &flags); - - while (!list_empty (&cull)) { - tp = list_entry (cull.next, lib_test_peer_t, tp_list); - - list_del (&tp->tp_list); - nal->cb_free (nal, tp, sizeof (*tp)); - } - return (ret->rc = PTL_OK); -} - -static int -fail_peer (nal_cb_t *nal, ptl_nid_t nid, int outgoing) -{ - lib_test_peer_t *tp; - struct list_head *el; - struct list_head *next; - unsigned long flags; - struct list_head cull; - int fail = 0; - - INIT_LIST_HEAD (&cull); - - state_lock (nal, &flags); - - list_for_each_safe (el, next, &nal->ni.ni_test_peers) { - tp = list_entry (el, lib_test_peer_t, tp_list); - - if (tp->tp_threshold == 0) { - /* zombie entry */ - if (outgoing) { - /* only cull zombies on outgoing tests, - * since we may be at interrupt priority on - * incoming messages. */ - list_del (&tp->tp_list); - list_add (&tp->tp_list, &cull); - } - continue; - } - - if (tp->tp_nid == PTL_NID_ANY || /* fail every peer */ - nid == tp->tp_nid) { /* fail this peer */ - fail = 1; - - if (tp->tp_threshold != PTL_MD_THRESH_INF) { - tp->tp_threshold--; - if (outgoing && - tp->tp_threshold == 0) { - /* see above */ - list_del (&tp->tp_list); - list_add (&tp->tp_list, &cull); - } - } - break; - } - } - - state_unlock (nal, &flags); - - while (!list_empty (&cull)) { - tp = list_entry (cull.next, lib_test_peer_t, tp_list); - list_del (&tp->tp_list); - - nal->cb_free (nal, tp, sizeof (*tp)); - } - - return (fail); -} - -ptl_size_t -lib_iov_nob (int niov, struct iovec *iov) -{ - ptl_size_t nob = 0; - - while (niov-- > 0) - nob += (iov++)->iov_len; - - return (nob); -} - -void -lib_copy_iov2buf (char *dest, int niov, struct iovec *iov, ptl_size_t len) -{ - ptl_size_t nob; - - while (len > 0) - { - LASSERT (niov > 0); - nob = MIN (iov->iov_len, len); - memcpy (dest, iov->iov_base, nob); - - len -= nob; - dest += nob; - niov--; - iov++; - } -} - -void -lib_copy_buf2iov (int niov, struct iovec *iov, char *src, ptl_size_t len) -{ - ptl_size_t nob; - - while (len > 0) - { - LASSERT (niov > 0); - nob = MIN (iov->iov_len, len); - memcpy (iov->iov_base, src, nob); - - len -= nob; - src += nob; - niov--; - iov++; - } -} - -static int -lib_extract_iov (struct iovec *dst, lib_md_t *md, - ptl_size_t offset, ptl_size_t len) -{ - /* Initialise 'dst' to the subset of 'src' starting at 'offset', - * for exactly 'len' bytes, and return the number of entries. - * NB not destructive to 'src' */ - int src_niov = md->md_niov; - struct iovec *src = md->md_iov.iov; - ptl_size_t frag_len; - int dst_niov; - - LASSERT (len >= 0); - LASSERT (offset >= 0); - LASSERT (offset + len <= md->length); - - if (len == 0) /* no data => */ - return (0); /* no frags */ - - LASSERT (src_niov > 0); - while (offset >= src->iov_len) { /* skip initial frags */ - offset -= src->iov_len; - src_niov--; - src++; - LASSERT (src_niov > 0); - } - - dst_niov = 1; - for (;;) { - LASSERT (src_niov > 0); - LASSERT (dst_niov <= PTL_MD_MAX_IOV); - - frag_len = src->iov_len - offset; - dst->iov_base = ((char *)src->iov_base) + offset; - - if (len <= frag_len) { - dst->iov_len = len; - return (dst_niov); - } - - dst->iov_len = frag_len; - - len -= frag_len; - dst++; - src++; - dst_niov++; - src_niov--; - offset = 0; - } -} - -#ifndef __KERNEL__ -ptl_size_t -lib_kiov_nob (int niov, ptl_kiov_t *kiov) -{ - LASSERT (0); - return (0); -} - -void -lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, ptl_size_t len) -{ - LASSERT (0); -} - -void -lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, char *dest, ptl_size_t len) -{ - LASSERT (0); -} - -static int -lib_extract_kiov (ptl_kiov_t *dst, lib_md_t *md, - ptl_size_t offset, ptl_size_t len) -{ - LASSERT (0); -} - -#else - -ptl_size_t -lib_kiov_nob (int niov, ptl_kiov_t *kiov) -{ - ptl_size_t nob = 0; - - while (niov-- > 0) - nob += (kiov++)->kiov_len; - - return (nob); -} - -void -lib_copy_kiov2buf (char *dest, int niov, ptl_kiov_t *kiov, ptl_size_t len) -{ - ptl_size_t nob; - char *addr; - - LASSERT (!in_interrupt ()); - while (len > 0) - { - LASSERT (niov > 0); - nob = MIN (kiov->kiov_len, len); - - addr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset; - memcpy (dest, addr, nob); - kunmap (kiov->kiov_page); - - len -= nob; - dest += nob; - niov--; - kiov++; - } -} - -void -lib_copy_buf2kiov (int niov, ptl_kiov_t *kiov, char *src, ptl_size_t len) -{ - ptl_size_t nob; - char *addr; - - LASSERT (!in_interrupt ()); - while (len > 0) - { - LASSERT (niov > 0); - nob = MIN (kiov->kiov_len, len); - - addr = ((char *)kmap (kiov->kiov_page)) + kiov->kiov_offset; - memcpy (addr, src, nob); - kunmap (kiov->kiov_page); - - len -= nob; - src += nob; - niov--; - kiov++; - } -} - -static int -lib_extract_kiov (ptl_kiov_t *dst, lib_md_t *md, - ptl_size_t offset, ptl_size_t len) -{ - /* Initialise 'dst' to the subset of 'src' starting at 'offset', - * for exactly 'len' bytes, and return the number of entries. - * NB not destructive to 'src' */ - int src_niov = md->md_niov; - ptl_kiov_t *src = md->md_iov.kiov; - ptl_size_t frag_len; - int dst_niov; - - LASSERT (len >= 0); - LASSERT (offset >= 0); - LASSERT (offset + len <= md->length); - - if (len == 0) /* no data => */ - return (0); /* no frags */ - - LASSERT (src_niov > 0); - while (offset >= src->kiov_len) { /* skip initial frags */ - offset -= src->kiov_len; - src_niov--; - src++; - LASSERT (src_niov > 0); - } - - dst_niov = 1; - for (;;) { - LASSERT (src_niov > 0); - LASSERT (dst_niov <= PTL_MD_MAX_IOV); - - frag_len = src->kiov_len - offset; - dst->kiov_page = src->kiov_page; - dst->kiov_offset = src->kiov_offset + offset; - - if (len <= frag_len) { - dst->kiov_len = len; - LASSERT (dst->kiov_offset + dst->kiov_len <= PAGE_SIZE); - return (dst_niov); - } - - dst->kiov_len = frag_len; - LASSERT (dst->kiov_offset + dst->kiov_len <= PAGE_SIZE); - - len -= frag_len; - dst++; - src++; - dst_niov++; - src_niov--; - offset = 0; - } -} -#endif - -void -lib_recv (nal_cb_t *nal, void *private, lib_msg_t *msg, lib_md_t *md, - ptl_size_t offset, ptl_size_t mlen, ptl_size_t rlen) -{ - int niov; - - if (mlen == 0) - nal->cb_recv (nal, private, msg, 0, NULL, 0, rlen); - else if ((md->options & PTL_MD_KIOV) == 0) { - niov = lib_extract_iov (msg->msg_iov.iov, md, offset, mlen); - nal->cb_recv (nal, private, msg, - niov, msg->msg_iov.iov, mlen, rlen); - } else { - niov = lib_extract_kiov (msg->msg_iov.kiov, md, offset, mlen); - nal->cb_recv_pages (nal, private, msg, - niov, msg->msg_iov.kiov, mlen, rlen); - } -} - -int -lib_send (nal_cb_t *nal, void *private, lib_msg_t *msg, - ptl_hdr_t *hdr, int type, ptl_nid_t nid, ptl_pid_t pid, - lib_md_t *md, ptl_size_t offset, ptl_size_t len) -{ - int niov; - - if (len == 0) - return (nal->cb_send (nal, private, msg, - hdr, type, nid, pid, - 0, NULL, 0)); - - if ((md->options & PTL_MD_KIOV) == 0) { - niov = lib_extract_iov (msg->msg_iov.iov, md, offset, len); - return (nal->cb_send (nal, private, msg, - hdr, type, nid, pid, - niov, msg->msg_iov.iov, len)); - } - - niov = lib_extract_kiov (msg->msg_iov.kiov, md, offset, len); - return (nal->cb_send_pages (nal, private, msg, - hdr, type, nid, pid, - niov, msg->msg_iov.kiov, len)); -} - -static lib_msg_t * -get_new_msg (nal_cb_t *nal, lib_md_t *md) -{ - /* ALWAYS called holding the state_lock */ - lib_counters_t *counters = &nal->ni.counters; - lib_msg_t *msg = lib_msg_alloc (nal); - - if (msg == NULL) - return (NULL); - - memset (msg, 0, sizeof (*msg)); - - msg->send_ack = 0; - - msg->md = md; - msg->ev.arrival_time = get_cycles(); - md->pending++; - if (md->threshold != PTL_MD_THRESH_INF) { - LASSERT (md->threshold > 0); - md->threshold--; - } - - counters->msgs_alloc++; - if (counters->msgs_alloc > counters->msgs_max) - counters->msgs_max = counters->msgs_alloc; - - list_add (&msg->msg_list, &nal->ni.ni_active_msgs); - - return (msg); -} - - -/* - * Incoming messages have a ptl_msg_t object associated with them - * by the library. This object encapsulates the state of the - * message and allows the NAL to do non-blocking receives or sends - * of long messages. - * - */ -static int parse_put(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) -{ - lib_ni_t *ni = &nal->ni; - ptl_size_t mlength = 0; - ptl_size_t offset = 0; - int unlink = 0; - lib_me_t *me; - lib_md_t *md; - lib_msg_t *msg; - unsigned long flags; - - /* Convert put fields to host byte order */ - hdr->msg.put.match_bits = NTOH__u64 (hdr->msg.put.match_bits); - hdr->msg.put.ptl_index = NTOH__u32 (hdr->msg.put.ptl_index); - hdr->msg.put.offset = NTOH__u32 (hdr->msg.put.offset); - - state_lock(nal, &flags); - - me = lib_find_me(nal, hdr->msg.put.ptl_index, PTL_MD_OP_PUT, - hdr->src_nid, hdr->src_pid, - PTL_HDR_LENGTH (hdr), hdr->msg.put.offset, - hdr->msg.put.match_bits, - &mlength, &offset, &unlink); - if (me == NULL) - goto drop; - - md = me->md; - CDEBUG(D_NET, "Incoming put index %x from "LPU64"/%u of length %d/%d " - "into md "LPX64" [%d] + %d\n", hdr->msg.put.ptl_index, - hdr->src_nid, hdr->src_pid, mlength, PTL_HDR_LENGTH(hdr), - md->md_lh.lh_cookie, md->md_niov, offset); - - msg = get_new_msg (nal, md); - if (msg == NULL) { - CERROR(LPU64": Dropping PUT from "LPU64": can't allocate msg\n", - ni->nid, hdr->src_nid); - goto drop; - } - - if (!ptl_is_wire_handle_none(&hdr->msg.put.ack_wmd) && - !(md->options & PTL_MD_ACK_DISABLE)) { - msg->send_ack = 1; - msg->ack_wmd = hdr->msg.put.ack_wmd; - msg->nid = hdr->src_nid; - msg->pid = hdr->src_pid; - msg->ev.match_bits = hdr->msg.put.match_bits; - } - - if (md->eq) { - msg->ev.type = PTL_EVENT_PUT; - msg->ev.initiator.nid = hdr->src_nid; - msg->ev.initiator.pid = hdr->src_pid; - msg->ev.portal = hdr->msg.put.ptl_index; - msg->ev.match_bits = hdr->msg.put.match_bits; - msg->ev.rlength = PTL_HDR_LENGTH(hdr); - msg->ev.mlength = mlength; - msg->ev.offset = offset; - msg->ev.hdr_data = hdr->msg.put.hdr_data; - - /* NB if this match has exhausted the MD, we can't be sure - * that this event will the the last one associated with - * this MD in the event queue (another message already - * matching this ME/MD could end up being last). So we - * remember the ME handle anyway and check again when we're - * allocating our slot in the event queue. - */ - ptl_me2handle (&msg->ev.unlinked_me, me); - - lib_md_deconstruct(nal, md, &msg->ev.mem_desc); - } - - ni->counters.recv_count++; - ni->counters.recv_length += mlength; - - /* only unlink after MD's pending count has been bumped - * in get_new_msg() otherwise lib_me_unlink() will nuke it */ - if (unlink) { - md->md_flags |= PTL_MD_FLAG_AUTO_UNLINKED; - lib_me_unlink (nal, me); - } - - state_unlock(nal, &flags); - - lib_recv (nal, private, msg, md, offset, mlength, PTL_HDR_LENGTH (hdr)); - return 0; - - drop: - nal->ni.counters.drop_count++; - nal->ni.counters.drop_length += PTL_HDR_LENGTH(hdr); - state_unlock (nal, &flags); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); - return -1; -} - -static int parse_get(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) -{ - lib_ni_t *ni = &nal->ni; - ptl_size_t mlength = 0; - ptl_size_t offset = 0; - int unlink = 0; - lib_me_t *me; - lib_md_t *md; - lib_msg_t *msg; - ptl_hdr_t reply; - unsigned long flags; - int rc; - - /* Convert get fields to host byte order */ - hdr->msg.get.match_bits = NTOH__u64 (hdr->msg.get.match_bits); - hdr->msg.get.ptl_index = NTOH__u32 (hdr->msg.get.ptl_index); - hdr->msg.get.sink_length = NTOH__u32 (hdr->msg.get.sink_length); - hdr->msg.get.src_offset = NTOH__u32 (hdr->msg.get.src_offset); - - /* compatibility check until field is deleted */ - if (hdr->msg.get.return_offset != 0) - CERROR("Unexpected non-zero get.return_offset %x from " - LPU64"\n", hdr->msg.get.return_offset, hdr->src_nid); - - state_lock(nal, &flags); - - me = lib_find_me(nal, hdr->msg.get.ptl_index, PTL_MD_OP_GET, - hdr->src_nid, hdr->src_pid, - hdr->msg.get.sink_length, hdr->msg.get.src_offset, - hdr->msg.get.match_bits, - &mlength, &offset, &unlink); - if (me == NULL) - goto drop; - - md = me->md; - CDEBUG(D_NET, "Incoming get index %d from "LPU64".%u of length %d/%d " - "from md "LPX64" [%d] + %d\n", hdr->msg.get.ptl_index, - hdr->src_nid, hdr->src_pid, mlength, PTL_HDR_LENGTH(hdr), - md->md_lh.lh_cookie, md->md_niov, offset); - - msg = get_new_msg (nal, md); - if (msg == NULL) { - CERROR(LPU64": Dropping GET from "LPU64": can't allocate msg\n", - ni->nid, hdr->src_nid); - goto drop; - } - - if (md->eq) { - msg->ev.type = PTL_EVENT_GET; - msg->ev.initiator.nid = hdr->src_nid; - msg->ev.initiator.pid = hdr->src_pid; - msg->ev.portal = hdr->msg.get.ptl_index; - msg->ev.match_bits = hdr->msg.get.match_bits; - msg->ev.rlength = PTL_HDR_LENGTH(hdr); - msg->ev.mlength = mlength; - msg->ev.offset = offset; - msg->ev.hdr_data = 0; - - /* NB if this match has exhausted the MD, we can't be sure - * that this event will the the last one associated with - * this MD in the event queue (another message already - * matching this ME/MD could end up being last). So we - * remember the ME handle anyway and check again when we're - * allocating our slot in the event queue. - */ - ptl_me2handle (&msg->ev.unlinked_me, me); - - lib_md_deconstruct(nal, md, &msg->ev.mem_desc); - } - - ni->counters.send_count++; - ni->counters.send_length += mlength; - - /* only unlink after MD's refcount has been bumped - * in get_new_msg() otherwise lib_me_unlink() will nuke it */ - if (unlink) { - md->md_flags |= PTL_MD_FLAG_AUTO_UNLINKED; - lib_me_unlink (nal, me); - } - - state_unlock(nal, &flags); - - memset (&reply, 0, sizeof (reply)); - reply.type = HTON__u32 (PTL_MSG_REPLY); - reply.dest_nid = HTON__u64 (hdr->src_nid); - reply.src_nid = HTON__u64 (ni->nid); - reply.dest_pid = HTON__u32 (hdr->src_pid); - reply.src_pid = HTON__u32 (ni->pid); - PTL_HDR_LENGTH(&reply) = HTON__u32 (mlength); - - reply.msg.reply.dst_wmd = hdr->msg.get.return_wmd; - - rc = lib_send (nal, private, msg, &reply, PTL_MSG_REPLY, - hdr->src_nid, hdr->src_pid, md, offset, mlength); - if (rc != 0) { - CERROR(LPU64": Dropping GET from "LPU64": send REPLY failed\n", - ni->nid, hdr->src_nid); - state_lock (nal, &flags); - goto drop; - } - - /* Complete the incoming message */ - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); - return (rc); - drop: - ni->counters.drop_count++; - ni->counters.drop_length += hdr->msg.get.sink_length; - state_unlock(nal, &flags); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); - return -1; -} - -static int parse_reply(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) -{ - lib_ni_t *ni = &nal->ni; - lib_md_t *md; - int rlength; - int length; - lib_msg_t *msg; - unsigned long flags; - - /* compatibility check until field is deleted */ - if (hdr->msg.reply.dst_offset != 0) - CERROR("Unexpected non-zero reply.dst_offset %x from "LPU64"\n", - hdr->msg.reply.dst_offset, hdr->src_nid); - - state_lock(nal, &flags); - - /* NB handles only looked up by creator (no flips) */ - md = ptl_wire_handle2md(&hdr->msg.reply.dst_wmd, nal); - if (md == NULL || md->threshold == 0) { - CERROR (LPU64": Dropping REPLY from "LPU64" for %s MD "LPX64"."LPX64"\n", - ni->nid, hdr->src_nid, - md == NULL ? "invalid" : "inactive", - hdr->msg.reply.dst_wmd.wh_interface_cookie, - hdr->msg.reply.dst_wmd.wh_object_cookie); - goto drop; - } - - LASSERT (md->offset == 0); - - length = rlength = PTL_HDR_LENGTH(hdr); - - if (length > md->length) { - if ((md->options & PTL_MD_TRUNCATE) == 0) { - CERROR (LPU64": Dropping REPLY from "LPU64 - " length %d for MD "LPX64" would overflow (%d)\n", - ni->nid, hdr->src_nid, length, - hdr->msg.reply.dst_wmd.wh_object_cookie, - md->length); - goto drop; - } - length = md->length; - } - - CDEBUG(D_NET, "Reply from "LPU64" of length %d/%d into md "LPX64"\n", - hdr->src_nid, length, rlength, - hdr->msg.reply.dst_wmd.wh_object_cookie); - - msg = get_new_msg (nal, md); - if (msg == NULL) { - CERROR(LPU64": Dropping REPLY from "LPU64": can't " - "allocate msg\n", ni->nid, hdr->src_nid); - goto drop; - } - - if (md->eq) { - msg->ev.type = PTL_EVENT_REPLY; - msg->ev.initiator.nid = hdr->src_nid; - msg->ev.initiator.pid = hdr->src_pid; - msg->ev.rlength = rlength; - msg->ev.mlength = length; - msg->ev.offset = 0; - - lib_md_deconstruct(nal, md, &msg->ev.mem_desc); - } - - ni->counters.recv_count++; - ni->counters.recv_length += length; - - state_unlock(nal, &flags); - - lib_recv (nal, private, msg, md, 0, length, rlength); - return 0; - - drop: - nal->ni.counters.drop_count++; - nal->ni.counters.drop_length += PTL_HDR_LENGTH(hdr); - state_unlock (nal, &flags); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); - return -1; -} - -static int parse_ack(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) -{ - lib_ni_t *ni = &nal->ni; - lib_md_t *md; - lib_msg_t *msg = NULL; - unsigned long flags; - - /* Convert ack fields to host byte order */ - hdr->msg.ack.match_bits = NTOH__u64 (hdr->msg.ack.match_bits); - hdr->msg.ack.mlength = NTOH__u32 (hdr->msg.ack.mlength); - - state_lock(nal, &flags); - - /* NB handles only looked up by creator (no flips) */ - md = ptl_wire_handle2md(&hdr->msg.ack.dst_wmd, nal); - if (md == NULL || md->threshold == 0) { - CDEBUG(D_INFO, LPU64": Dropping ACK from "LPU64" to %s MD " - LPX64"."LPX64"\n", ni->nid, hdr->src_nid, - (md == NULL) ? "invalid" : "inactive", - hdr->msg.ack.dst_wmd.wh_interface_cookie, - hdr->msg.ack.dst_wmd.wh_object_cookie); - goto drop; - } - - CDEBUG(D_NET, LPU64": ACK from "LPU64" into md "LPX64"\n", - ni->nid, hdr->src_nid, - hdr->msg.ack.dst_wmd.wh_object_cookie); - - msg = get_new_msg (nal, md); - if (msg == NULL) { - CERROR(LPU64": Dropping ACK from "LPU64": can't allocate msg\n", - ni->nid, hdr->src_nid); - goto drop; - } - - if (md->eq) { - msg->ev.type = PTL_EVENT_ACK; - msg->ev.initiator.nid = hdr->src_nid; - msg->ev.initiator.pid = hdr->src_pid; - msg->ev.mlength = hdr->msg.ack.mlength; - msg->ev.match_bits = hdr->msg.ack.match_bits; - - lib_md_deconstruct(nal, md, &msg->ev.mem_desc); - } - - ni->counters.recv_count++; - state_unlock(nal, &flags); - lib_recv (nal, private, msg, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); - return 0; - - drop: - nal->ni.counters.drop_count++; - state_unlock (nal, &flags); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); - return -1; -} - -static char * -hdr_type_string (ptl_hdr_t *hdr) -{ - switch (hdr->type) { - case PTL_MSG_ACK: - return ("ACK"); - case PTL_MSG_PUT: - return ("PUT"); - case PTL_MSG_GET: - return ("GET"); - case PTL_MSG_REPLY: - return ("REPLY"); - case PTL_MSG_HELLO: - return ("HELLO"); - default: - return ("<UNKNOWN>"); - } -} - -void print_hdr(nal_cb_t * nal, ptl_hdr_t * hdr) -{ - char *type_str = hdr_type_string (hdr); - - nal->cb_printf(nal, "P3 Header at %p of type %s\n", hdr, type_str); - nal->cb_printf(nal, " From nid/pid %Lu/%Lu", hdr->src_nid, - hdr->src_pid); - nal->cb_printf(nal, " To nid/pid %Lu/%Lu\n", hdr->dest_nid, - hdr->dest_pid); - - switch (hdr->type) { - default: - break; - - case PTL_MSG_PUT: - nal->cb_printf(nal, - " Ptl index %d, ack md "LPX64"."LPX64", " - "match bits "LPX64"\n", - hdr->msg.put.ptl_index, - hdr->msg.put.ack_wmd.wh_interface_cookie, - hdr->msg.put.ack_wmd.wh_object_cookie, - hdr->msg.put.match_bits); - nal->cb_printf(nal, - " Length %d, offset %d, hdr data "LPX64"\n", - PTL_HDR_LENGTH(hdr), hdr->msg.put.offset, - hdr->msg.put.hdr_data); - break; - - case PTL_MSG_GET: - nal->cb_printf(nal, - " Ptl index %d, return md "LPX64"."LPX64", " - "match bits "LPX64"\n", hdr->msg.get.ptl_index, - hdr->msg.get.return_wmd.wh_interface_cookie, - hdr->msg.get.return_wmd.wh_object_cookie, - hdr->msg.get.match_bits); - nal->cb_printf(nal, - " Length %d, src offset %d\n", - hdr->msg.get.sink_length, - hdr->msg.get.src_offset); - break; - - case PTL_MSG_ACK: - nal->cb_printf(nal, " dst md "LPX64"."LPX64", " - "manipulated length %d\n", - hdr->msg.ack.dst_wmd.wh_interface_cookie, - hdr->msg.ack.dst_wmd.wh_object_cookie, - hdr->msg.ack.mlength); - break; - - case PTL_MSG_REPLY: - nal->cb_printf(nal, " dst md "LPX64"."LPX64", " - "length %d\n", - hdr->msg.reply.dst_wmd.wh_interface_cookie, - hdr->msg.reply.dst_wmd.wh_object_cookie, - PTL_HDR_LENGTH(hdr)); - } - -} /* end of print_hdr() */ - - -int lib_parse(nal_cb_t * nal, ptl_hdr_t * hdr, void *private) -{ - unsigned long flags; - - /* NB static check; optimizer will elide this if it's right */ - LASSERT (offsetof (ptl_hdr_t, msg.ack.length) == - offsetof (ptl_hdr_t, msg.put.length)); - LASSERT (offsetof (ptl_hdr_t, msg.ack.length) == - offsetof (ptl_hdr_t, msg.get.length)); - LASSERT (offsetof (ptl_hdr_t, msg.ack.length) == - offsetof (ptl_hdr_t, msg.reply.length)); - - /* convert common fields to host byte order */ - hdr->dest_nid = NTOH__u64 (hdr->dest_nid); - hdr->src_nid = NTOH__u64 (hdr->src_nid); - hdr->dest_pid = NTOH__u32 (hdr->dest_pid); - hdr->src_pid = NTOH__u32 (hdr->src_pid); - hdr->type = NTOH__u32 (hdr->type); - PTL_HDR_LENGTH(hdr) = NTOH__u32 (PTL_HDR_LENGTH(hdr)); -#if 0 - nal->cb_printf(nal, "%d: lib_parse: nal=%p hdr=%p type=%d\n", - nal->ni.nid, nal, hdr, hdr->type); - print_hdr(nal, hdr); -#endif - if (hdr->type == PTL_MSG_HELLO) { - /* dest_nid is really ptl_magicversion_t */ - ptl_magicversion_t *mv = (ptl_magicversion_t *)&hdr->dest_nid; - - CERROR (LPU64": Dropping unexpected HELLO message: " - "magic %d, version %d.%d from "LPD64"\n", - nal->ni.nid, mv->magic, - mv->version_major, mv->version_minor, - hdr->src_nid); - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); - return (-1); - } - - if (hdr->dest_nid != nal->ni.nid) { - CERROR(LPU64": Dropping %s message from "LPU64" to "LPU64 - " (not me)\n", nal->ni.nid, hdr_type_string (hdr), - hdr->src_nid, hdr->dest_nid); - - state_lock (nal, &flags); - nal->ni.counters.drop_count++; - nal->ni.counters.drop_length += PTL_HDR_LENGTH(hdr); - state_unlock (nal, &flags); - - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); - return (-1); - } - - if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */ - fail_peer (nal, hdr->src_nid, 0)) /* shall we now? */ - { - CERROR(LPU64": Dropping incoming %s from "LPU64 - ": simulated failure\n", - nal->ni.nid, hdr_type_string (hdr), - hdr->src_nid); - return (-1); - } - - switch (hdr->type) { - case PTL_MSG_ACK: - return (parse_ack(nal, hdr, private)); - case PTL_MSG_PUT: - return (parse_put(nal, hdr, private)); - break; - case PTL_MSG_GET: - return (parse_get(nal, hdr, private)); - break; - case PTL_MSG_REPLY: - return (parse_reply(nal, hdr, private)); - break; - default: - CERROR(LPU64": Dropping <unknown> message from "LPU64 - ": Bad type=0x%x\n", nal->ni.nid, hdr->src_nid, - hdr->type); - - lib_recv (nal, private, NULL, NULL, 0, 0, PTL_HDR_LENGTH (hdr)); - return (-1); - } -} - - -int do_PtlPut(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - /* - * Incoming: - * ptl_handle_md_t md_in - * ptl_ack_req_t ack_req_in - * ptl_process_id_t target_in - * ptl_pt_index_t portal_in - * ptl_ac_index_t cookie_in - * ptl_match_bits_t match_bits_in - * ptl_size_t offset_in - * - * Outgoing: - */ - - PtlPut_in *args = v_args; - PtlPut_out *ret = v_ret; - ptl_hdr_t hdr; - - lib_ni_t *ni = &nal->ni; - lib_md_t *md; - lib_msg_t *msg = NULL; - ptl_process_id_t *id = &args->target_in; - unsigned long flags; - - if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */ - fail_peer (nal, id->nid, 1)) /* shall we now? */ - { - CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n", - nal->ni.nid, id->nid); - return (ret->rc = PTL_INV_PROC); - } - - ret->rc = PTL_OK; - state_lock(nal, &flags); - md = ptl_handle2md(&args->md_in, nal); - if (md == NULL || !md->threshold) { - state_unlock(nal, &flags); - return ret->rc = PTL_INV_MD; - } - - CDEBUG(D_NET, "PtlPut -> %Lu: %lu\n", (unsigned long long)id->nid, - (unsigned long)id->pid); - - memset (&hdr, 0, sizeof (hdr)); - hdr.type = HTON__u32 (PTL_MSG_PUT); - hdr.dest_nid = HTON__u64 (id->nid); - hdr.src_nid = HTON__u64 (ni->nid); - hdr.dest_pid = HTON__u32 (id->pid); - hdr.src_pid = HTON__u32 (ni->pid); - PTL_HDR_LENGTH(&hdr) = HTON__u32 (md->length); - - /* NB handles only looked up by creator (no flips) */ - if (args->ack_req_in == PTL_ACK_REQ) { - hdr.msg.put.ack_wmd.wh_interface_cookie = ni->ni_interface_cookie; - hdr.msg.put.ack_wmd.wh_object_cookie = md->md_lh.lh_cookie; - } else { - hdr.msg.put.ack_wmd = PTL_WIRE_HANDLE_NONE; - } - - hdr.msg.put.match_bits = HTON__u64 (args->match_bits_in); - hdr.msg.put.ptl_index = HTON__u32 (args->portal_in); - hdr.msg.put.offset = HTON__u32 (args->offset_in); - hdr.msg.put.hdr_data = args->hdr_data_in; - - ni->counters.send_count++; - ni->counters.send_length += md->length; - - msg = get_new_msg (nal, md); - if (msg == NULL) { - CERROR("BAD: could not allocate msg!\n"); - state_unlock(nal, &flags); - return ret->rc = PTL_NOSPACE; - } - - /* - * If this memory descriptor has an event queue associated with - * it we need to allocate a message state object and record the - * information about this operation that will be recorded into - * event queue once the message has been completed. - * - * NB. We're now committed to the GET, since we just marked the MD - * busy. Callers who observe this (by getting PTL_MD_INUSE from - * PtlMDUnlink()) expect a completion event to tell them when the - * MD becomes idle. - */ - if (md->eq) { - msg->ev.type = PTL_EVENT_SENT; - msg->ev.initiator.nid = ni->nid; - msg->ev.initiator.pid = ni->pid; - msg->ev.portal = args->portal_in; - msg->ev.match_bits = args->match_bits_in; - msg->ev.rlength = md->length; - msg->ev.mlength = md->length; - msg->ev.offset = args->offset_in; - msg->ev.hdr_data = args->hdr_data_in; - - lib_md_deconstruct(nal, md, &msg->ev.mem_desc); - } - - state_unlock(nal, &flags); - - lib_send (nal, private, msg, &hdr, PTL_MSG_PUT, - id->nid, id->pid, md, 0, md->length); - - return ret->rc = PTL_OK; -} - - -int do_PtlGet(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - /* - * Incoming: - * ptl_handle_md_t md_in - * ptl_process_id_t target_in - * ptl_pt_index_t portal_in - * ptl_ac_index_t cookie_in - * ptl_match_bits_t match_bits_in - * ptl_size_t offset_in - * - * Outgoing: - */ - - PtlGet_in *args = v_args; - PtlGet_out *ret = v_ret; - ptl_hdr_t hdr; - lib_msg_t *msg = NULL; - lib_ni_t *ni = &nal->ni; - ptl_process_id_t *id = &args->target_in; - lib_md_t *md; - unsigned long flags; - - if (!list_empty (&nal->ni.ni_test_peers) && /* normally we don't */ - fail_peer (nal, id->nid, 1)) /* shall we now? */ - { - CERROR(LPU64": Dropping PUT to "LPU64": simulated failure\n", - nal->ni.nid, id->nid); - return (ret->rc = PTL_INV_PROC); - } - - state_lock(nal, &flags); - md = ptl_handle2md(&args->md_in, nal); - if (md == NULL || !md->threshold) { - state_unlock(nal, &flags); - return ret->rc = PTL_INV_MD; - } - - LASSERT (md->offset == 0); - - CDEBUG(D_NET, "PtlGet -> %Lu: %lu\n", (unsigned long long)id->nid, - (unsigned long)id->pid); - - memset (&hdr, 0, sizeof (hdr)); - hdr.type = HTON__u32 (PTL_MSG_GET); - hdr.dest_nid = HTON__u64 (id->nid); - hdr.src_nid = HTON__u64 (ni->nid); - hdr.dest_pid = HTON__u32 (id->pid); - hdr.src_pid = HTON__u32 (ni->pid); - PTL_HDR_LENGTH(&hdr) = 0; - - /* NB handles only looked up by creator (no flips) */ - hdr.msg.get.return_wmd.wh_interface_cookie = ni->ni_interface_cookie; - hdr.msg.get.return_wmd.wh_object_cookie = md->md_lh.lh_cookie; - - hdr.msg.get.match_bits = HTON__u64 (args->match_bits_in); - hdr.msg.get.ptl_index = HTON__u32 (args->portal_in); - hdr.msg.get.src_offset = HTON__u32 (args->offset_in); - hdr.msg.get.sink_length = HTON__u32 (md->length); - - ni->counters.send_count++; - - msg = get_new_msg (nal, md); - if (msg == NULL) { - CERROR("do_PtlGet: BAD - could not allocate cookie!\n"); - state_unlock(nal, &flags); - return ret->rc = PTL_NOSPACE; - } - - /* - * If this memory descriptor has an event queue associated with - * it we must allocate a message state object that will record - * the information to be filled in once the message has been - * completed. More information is in the do_PtlPut() comments. - * - * NB. We're now committed to the GET, since we just marked the MD - * busy. Callers who observe this (by getting PTL_MD_INUSE from - * PtlMDUnlink()) expect a completion event to tell them when the - * MD becomes idle. - */ - if (md->eq) { - msg->ev.type = PTL_EVENT_SENT; - msg->ev.initiator.nid = ni->nid; - msg->ev.initiator.pid = ni->pid; - msg->ev.portal = args->portal_in; - msg->ev.match_bits = args->match_bits_in; - msg->ev.rlength = md->length; - msg->ev.mlength = md->length; - msg->ev.offset = args->offset_in; - msg->ev.hdr_data = 0; - - lib_md_deconstruct(nal, md, &msg->ev.mem_desc); - } - - state_unlock(nal, &flags); - - lib_send (nal, private, msg, &hdr, PTL_MSG_GET, - id->nid, id->pid, NULL, 0, 0); - - return ret->rc = PTL_OK; -} - -void lib_assert_wire_constants (void) -{ - /* Wire protocol assertions generated by 'wirecheck' */ - - /* Constants... */ - LASSERT (PORTALS_PROTO_MAGIC == 0xeebc0ded); - LASSERT (PORTALS_PROTO_VERSION_MAJOR == 0); - LASSERT (PORTALS_PROTO_VERSION_MINOR == 1); - LASSERT (PTL_MSG_ACK == 0); - LASSERT (PTL_MSG_PUT == 1); - LASSERT (PTL_MSG_GET == 2); - LASSERT (PTL_MSG_REPLY == 3); - LASSERT (PTL_MSG_HELLO == 4); - - /* Checks for struct ptl_handle_wire_t */ - LASSERT (sizeof (ptl_handle_wire_t) == 16); - LASSERT (offsetof (ptl_handle_wire_t, wh_interface_cookie) == 0); - LASSERT (sizeof (((ptl_handle_wire_t *)0)->wh_interface_cookie) == 8); - LASSERT (offsetof (ptl_handle_wire_t, wh_object_cookie) == 8); - LASSERT (sizeof (((ptl_handle_wire_t *)0)->wh_object_cookie) == 8); - - /* Checks for struct ptl_magicversion_t */ - LASSERT (sizeof (ptl_magicversion_t) == 8); - LASSERT (offsetof (ptl_magicversion_t, magic) == 0); - LASSERT (sizeof (((ptl_magicversion_t *)0)->magic) == 4); - LASSERT (offsetof (ptl_magicversion_t, version_major) == 4); - LASSERT (sizeof (((ptl_magicversion_t *)0)->version_major) == 2); - LASSERT (offsetof (ptl_magicversion_t, version_minor) == 6); - LASSERT (sizeof (((ptl_magicversion_t *)0)->version_minor) == 2); - - /* Checks for struct ptl_hdr_t */ - LASSERT (sizeof (ptl_hdr_t) == 72); - LASSERT (offsetof (ptl_hdr_t, dest_nid) == 0); - LASSERT (sizeof (((ptl_hdr_t *)0)->dest_nid) == 8); - LASSERT (offsetof (ptl_hdr_t, src_nid) == 8); - LASSERT (sizeof (((ptl_hdr_t *)0)->src_nid) == 8); - LASSERT (offsetof (ptl_hdr_t, dest_pid) == 16); - LASSERT (sizeof (((ptl_hdr_t *)0)->dest_pid) == 4); - LASSERT (offsetof (ptl_hdr_t, src_pid) == 20); - LASSERT (sizeof (((ptl_hdr_t *)0)->src_pid) == 4); - LASSERT (offsetof (ptl_hdr_t, type) == 24); - LASSERT (sizeof (((ptl_hdr_t *)0)->type) == 4); - - /* Ack */ - LASSERT (offsetof (ptl_hdr_t, msg.ack.mlength) == 28); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.ack.mlength) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.ack.dst_wmd) == 32); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.ack.dst_wmd) == 16); - LASSERT (offsetof (ptl_hdr_t, msg.ack.match_bits) == 48); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.ack.match_bits) == 8); - LASSERT (offsetof (ptl_hdr_t, msg.ack.length) == 56); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.ack.length) == 4); - - /* Put */ - LASSERT (offsetof (ptl_hdr_t, msg.put.ptl_index) == 28); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.ptl_index) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.put.ack_wmd) == 32); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.ack_wmd) == 16); - LASSERT (offsetof (ptl_hdr_t, msg.put.match_bits) == 48); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.match_bits) == 8); - LASSERT (offsetof (ptl_hdr_t, msg.put.length) == 56); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.length) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.put.offset) == 60); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.offset) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.put.hdr_data) == 64); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.put.hdr_data) == 8); - - /* Get */ - LASSERT (offsetof (ptl_hdr_t, msg.get.ptl_index) == 28); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.ptl_index) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.get.return_wmd) == 32); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.return_wmd) == 16); - LASSERT (offsetof (ptl_hdr_t, msg.get.match_bits) == 48); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.match_bits) == 8); - LASSERT (offsetof (ptl_hdr_t, msg.get.length) == 56); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.length) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.get.src_offset) == 60); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.src_offset) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.get.return_offset) == 64); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.return_offset) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.get.sink_length) == 68); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.get.sink_length) == 4); - - /* Reply */ - LASSERT (offsetof (ptl_hdr_t, msg.reply.dst_wmd) == 32); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.reply.dst_wmd) == 16); - LASSERT (offsetof (ptl_hdr_t, msg.reply.dst_offset) == 48); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.reply.dst_offset) == 4); - LASSERT (offsetof (ptl_hdr_t, msg.reply.length) == 56); - LASSERT (sizeof (((ptl_hdr_t *)0)->msg.reply.length) == 4); -} diff --git a/lustre/portals/portals/lib-msg.c b/lustre/portals/portals/lib-msg.c deleted file mode 100644 index f10892c..0000000 --- a/lustre/portals/portals/lib-msg.c +++ /dev/null @@ -1,163 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-msg.c - * Message decoding, parsing and finalizing routines - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef __KERNEL__ -# include <stdio.h> -#else -# define DEBUG_SUBSYSTEM S_PORTALS -# include <linux/kp30.h> -#endif - -#include <portals/lib-p30.h> - -int lib_finalize(nal_cb_t * nal, void *private, lib_msg_t *msg) -{ - lib_md_t *md; - lib_eq_t *eq; - int rc; - unsigned long flags; - - /* ni went down while processing this message */ - if (nal->ni.up == 0) { - return -1; - } - - if (msg == NULL) - return 0; - - rc = 0; - if (msg->send_ack) { - ptl_hdr_t ack; - - LASSERT (!ptl_is_wire_handle_none (&msg->ack_wmd)); - - memset (&ack, 0, sizeof (ack)); - ack.type = HTON__u32 (PTL_MSG_ACK); - ack.dest_nid = HTON__u64 (msg->nid); - ack.src_nid = HTON__u64 (nal->ni.nid); - ack.dest_pid = HTON__u32 (msg->pid); - ack.src_pid = HTON__u32 (nal->ni.pid); - PTL_HDR_LENGTH(&ack) = 0; - - ack.msg.ack.dst_wmd = msg->ack_wmd; - ack.msg.ack.match_bits = msg->ev.match_bits; - ack.msg.ack.mlength = HTON__u32 (msg->ev.mlength); - - rc = lib_send (nal, private, NULL, &ack, PTL_MSG_ACK, - msg->nid, msg->pid, NULL, 0, 0); - } - - md = msg->md; - LASSERT (md->pending > 0); /* I've not dropped my ref yet */ - eq = md->eq; - - state_lock(nal, &flags); - - if (eq != NULL) { - ptl_event_t *ev = &msg->ev; - ptl_event_t *eq_slot; - - /* I have to hold the lock while I bump the sequence number - * and copy the event into the queue. If not, and I was - * interrupted after bumping the sequence number, other - * events could fill the queue, including the slot I just - * allocated to this event. On resuming, I would overwrite - * a more 'recent' event with old event state, and - * processes taking events off the queue would not detect - * overflow correctly. - */ - - ev->sequence = eq->sequence++;/* Allocate the next queue slot */ - - /* size must be a power of 2 to handle a wrapped sequence # */ - LASSERT (eq->size != 0 && - eq->size == LOWEST_BIT_SET (eq->size)); - eq_slot = eq->base + (ev->sequence & (eq->size - 1)); - - /* Invalidate unlinked_me unless this is the last - * event for an auto-unlinked MD. Note that if md was - * auto-unlinked, md->pending can only decrease - */ - if ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINKED) == 0 || /* not auto-unlinked */ - md->pending != 1) /* not last ref */ - ev->unlinked_me = PTL_HANDLE_NONE; - - /* Copy the event into the allocated slot, ensuring all the - * rest of the event's contents have been copied _before_ - * the sequence number gets updated. A processes 'getting' - * an event waits on the next queue slot's sequence to be - * 'new'. When it is, _all_ other event fields had better - * be consistent. I assert 'sequence' is the last member, - * so I only need a 2 stage copy. - */ - LASSERT(sizeof (ptl_event_t) == - offsetof(ptl_event_t, sequence) + sizeof(ev->sequence)); - - rc = nal->cb_write (nal, private, (user_ptr)eq_slot, ev, - offsetof (ptl_event_t, sequence)); - LASSERT (rc == 0); - -#ifdef __KERNEL__ - barrier(); -#endif - /* Updating the sequence number is what makes the event 'new' */ - - /* cb_write is not necessarily atomic, so this could - cause a race with PtlEQGet */ - rc = nal->cb_write(nal, private, (user_ptr)&eq_slot->sequence, - (void *)&ev->sequence,sizeof (ev->sequence)); - LASSERT (rc == 0); - -#ifdef __KERNEL__ - barrier(); -#endif - - /* I must also ensure that (a) callbacks are made in the - * same order as the events land in the queue, and (b) the - * callback occurs before the event can be removed from the - * queue, so I can't drop the lock during the callback. */ - if (nal->cb_callback != NULL) - nal->cb_callback(nal, private, eq, ev); - else if (eq->event_callback != NULL) - (void)((eq->event_callback) (ev)); - } - - LASSERT ((md->md_flags & PTL_MD_FLAG_AUTO_UNLINKED) == 0 || - (md->md_flags & PTL_MD_FLAG_UNLINK) != 0); - - md->pending--; - if (md->pending == 0 && /* no more outstanding operations on this md */ - (md->threshold == 0 || /* done its business */ - (md->md_flags & PTL_MD_FLAG_UNLINK) != 0)) /* marked for death */ - lib_md_unlink(nal, md); - - list_del (&msg->msg_list); - nal->ni.counters.msgs_alloc--; - lib_msg_free(nal, msg); - - state_unlock(nal, &flags); - - return rc; -} diff --git a/lustre/portals/portals/lib-ni.c b/lustre/portals/portals/lib-ni.c deleted file mode 100644 index aa30329..0000000 --- a/lustre/portals/portals/lib-ni.c +++ /dev/null @@ -1,128 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-ni.c - * Network status registers and distance functions. - * - * Copyright (c) 2001-2003 Cluster File Systems, Inc. - * Copyright (c) 2001-2002 Sandia National Laboratories - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_PORTALS -#include <portals/lib-p30.h> -#include <portals/arg-blocks.h> - -#define MAX_DIST 18446744073709551615UL - -int do_PtlNIDebug(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - PtlNIDebug_in *args = v_args; - PtlNIDebug_out *ret = v_ret; - lib_ni_t *ni = &nal->ni; - - ret->rc = ni->debug; - ni->debug = args->mask_in; - - return 0; -} - -int do_PtlNIStatus(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - /* - * Incoming: - * ptl_handle_ni_t interface_in - * ptl_sr_index_t register_in - * - * Outgoing: - * ptl_sr_value_t * status_out - */ - - PtlNIStatus_in *args = v_args; - PtlNIStatus_out *ret = v_ret; - lib_ni_t *ni = &nal->ni; - lib_counters_t *count = &ni->counters; - - if (!args) - return ret->rc = PTL_SEGV; - - ret->rc = PTL_OK; - ret->status_out = 0; - - /* - * I hate this sort of code.... Hash tables, offset lists? - * Treat the counters as an array of ints? - */ - if (args->register_in == PTL_SR_DROP_COUNT) - ret->status_out = count->drop_count; - - else if (args->register_in == PTL_SR_DROP_LENGTH) - ret->status_out = count->drop_length; - - else if (args->register_in == PTL_SR_RECV_COUNT) - ret->status_out = count->recv_count; - - else if (args->register_in == PTL_SR_RECV_LENGTH) - ret->status_out = count->recv_length; - - else if (args->register_in == PTL_SR_SEND_COUNT) - ret->status_out = count->send_count; - - else if (args->register_in == PTL_SR_SEND_LENGTH) - ret->status_out = count->send_length; - - else if (args->register_in == PTL_SR_MSGS_MAX) - ret->status_out = count->msgs_max; - else - ret->rc = PTL_INV_SR_INDX; - - return ret->rc; -} - - -int do_PtlNIDist(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - /* - * Incoming: - * ptl_handle_ni_t interface_in - * ptl_process_id_t process_in - - * - * Outgoing: - * unsigned long * distance_out - - */ - - PtlNIDist_in *args = v_args; - PtlNIDist_out *ret = v_ret; - - unsigned long dist; - ptl_process_id_t id_in = args->process_in; - ptl_nid_t nid; - int rc; - - nid = id_in.nid; - - if ((rc = nal->cb_dist(nal, nid, &dist)) != 0) { - ret->distance_out = (unsigned long) MAX_DIST; - return PTL_INV_PROC; - } - - ret->distance_out = dist; - - return ret->rc = PTL_OK; -} diff --git a/lustre/portals/portals/lib-pid.c b/lustre/portals/portals/lib-pid.c deleted file mode 100644 index 12eebb5..0000000 --- a/lustre/portals/portals/lib-pid.c +++ /dev/null @@ -1,58 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * lib/lib-pid.c - * - * Process identification routines - * Copyright (C) 2001-2003 Cluster File Systems, Inc. - * Copyright (C) 2001-2003 Cluster File Systems, Inc. - * - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* This should be removed. The NAL should have the PID information */ -#define DEBUG_SUBSYSTEM S_PORTALS - -#if defined (__KERNEL__) -# include <linux/kernel.h> -extern int getpid(void); -#else -# include <stdio.h> -# include <unistd.h> -#endif -#include <portals/lib-p30.h> -#include <portals/arg-blocks.h> - -int do_PtlGetId(nal_cb_t * nal, void *private, void *v_args, void *v_ret) -{ - /* - * Incoming: - * ptl_handle_ni_t handle_in - * - * Outgoing: - * ptl_process_id_t * id_out - * ptl_id_t * gsize_out - */ - - PtlGetId_out *ret = v_ret; - lib_ni_t *ni = &nal->ni; - - ret->id_out.nid = ni->nid; - ret->id_out.pid = ni->pid; - - return ret->rc = PTL_OK; -} diff --git a/lustre/portals/router/.cvsignore b/lustre/portals/router/.cvsignore deleted file mode 100644 index e995588..0000000 --- a/lustre/portals/router/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lustre/portals/router/Makefile.am b/lustre/portals/router/Makefile.am deleted file mode 100644 index 1c8087b..0000000 --- a/lustre/portals/router/Makefile.am +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../Rules.linux - -MODULE = kptlrouter -modulenet_DATA = kptlrouter.o -EXTRA_PROGRAMS = kptlrouter - - -#CFLAGS:= @KCFLAGS@ -#CPPFLAGS:=@KCPPFLAGS@ -DEFS = -kptlrouter_SOURCES = router.c proc.c router.h diff --git a/lustre/portals/router/Makefile.mk b/lustre/portals/router/Makefile.mk deleted file mode 100644 index 64bd09b..0000000 --- a/lustre/portals/router/Makefile.mk +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../Kernelenv - -obj-y += kptlrouter.o -kptlrouter-objs := router.o proc.o diff --git a/lustre/portals/router/proc.c b/lustre/portals/router/proc.c deleted file mode 100644 index dd65b34..0000000 --- a/lustre/portals/router/proc.c +++ /dev/null @@ -1,78 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Portals - * http://sourceforge.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "router.h" - -#define KPR_PROC_ROUTER "sys/portals/router" - -int -kpr_proc_read (char *page, char **start, off_t off, int count, int *eof, void *data) -{ - unsigned long long bytes = kpr_fwd_bytes; - unsigned long packets = kpr_fwd_packets; - unsigned long errors = kpr_fwd_errors; - unsigned int qdepth = atomic_read (&kpr_queue_depth); - int len; - - *eof = 1; - if (off != 0) - return (0); - - len = sprintf (page, "%Ld %ld %ld %d\n", bytes, packets, errors, qdepth); - - *start = page; - return (len); -} - -int -kpr_proc_write (struct file *file, const char *ubuffer, unsigned long count, void *data) -{ - /* Ignore what we've been asked to write, and just zero the stats counters */ - kpr_fwd_bytes = 0; - kpr_fwd_packets = 0; - kpr_fwd_errors = 0; - - return (count); -} - -void -kpr_proc_init(void) -{ - struct proc_dir_entry *entry = create_proc_entry (KPR_PROC_ROUTER, S_IFREG | S_IRUGO | S_IWUSR, NULL); - - if (entry == NULL) - { - CERROR("couldn't create proc entry %s\n", KPR_PROC_ROUTER); - return; - } - - entry->data = NULL; - entry->read_proc = kpr_proc_read; - entry->write_proc = kpr_proc_write; -} - -void -kpr_proc_fini(void) -{ - remove_proc_entry(KPR_PROC_ROUTER, 0); -} diff --git a/lustre/portals/router/router.c b/lustre/portals/router/router.c deleted file mode 100644 index 6074c3c..0000000 --- a/lustre/portals/router/router.c +++ /dev/null @@ -1,449 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Portals - * http://sourceforge.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "router.h" - -struct list_head kpr_routes; -struct list_head kpr_nals; - -unsigned long long kpr_fwd_bytes; -unsigned long kpr_fwd_packets; -unsigned long kpr_fwd_errors; -atomic_t kpr_queue_depth; - -/* Mostly the tables are read-only (thread and interrupt context) - * - * Once in a blue moon we register/deregister NALs and add/remove routing - * entries (thread context only)... */ -rwlock_t kpr_rwlock; - -kpr_router_interface_t kpr_router_interface = { - kprri_register: kpr_register_nal, - kprri_lookup: kpr_lookup_target, - kprri_fwd_start: kpr_forward_packet, - kprri_fwd_done: kpr_complete_packet, - kprri_shutdown: kpr_shutdown_nal, - kprri_deregister: kpr_deregister_nal, -}; - -kpr_control_interface_t kpr_control_interface = { - kprci_add_route: kpr_add_route, - kprci_del_route: kpr_del_route, - kprci_get_route: kpr_get_route, -}; - -int -kpr_register_nal (kpr_nal_interface_t *nalif, void **argp) -{ - long flags; - struct list_head *e; - kpr_nal_entry_t *ne; - - CDEBUG (D_OTHER, "Registering NAL %d\n", nalif->kprni_nalid); - - PORTAL_ALLOC (ne, sizeof (*ne)); - if (ne == NULL) - return (-ENOMEM); - - memset (ne, 0, sizeof (*ne)); - memcpy ((void *)&ne->kpne_interface, (void *)nalif, sizeof (*nalif)); - - LASSERT (!in_interrupt()); - write_lock_irqsave (&kpr_rwlock, flags); - - for (e = kpr_nals.next; e != &kpr_nals; e = e->next) - { - kpr_nal_entry_t *ne2 = list_entry (e, kpr_nal_entry_t, kpne_list); - - if (ne2->kpne_interface.kprni_nalid == ne->kpne_interface.kprni_nalid) - { - write_unlock_irqrestore (&kpr_rwlock, flags); - - CERROR ("Attempt to register same NAL %d twice\n", ne->kpne_interface.kprni_nalid); - - PORTAL_FREE (ne, sizeof (*ne)); - return (-EEXIST); - } - } - - list_add (&ne->kpne_list, &kpr_nals); - - write_unlock_irqrestore (&kpr_rwlock, flags); - - *argp = ne; - PORTAL_MODULE_USE; - return (0); -} - -void -kpr_shutdown_nal (void *arg) -{ - long flags; - kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg; - - CDEBUG (D_OTHER, "Shutting down NAL %d\n", ne->kpne_interface.kprni_nalid); - - LASSERT (!ne->kpne_shutdown); - LASSERT (!in_interrupt()); - - write_lock_irqsave (&kpr_rwlock, flags); /* locking a bit spurious... */ - ne->kpne_shutdown = 1; - write_unlock_irqrestore (&kpr_rwlock, flags); /* except it's a memory barrier */ - - while (atomic_read (&ne->kpne_refcount) != 0) - { - CDEBUG (D_NET, "Waiting for refcount on NAL %d to reach zero (%d)\n", - ne->kpne_interface.kprni_nalid, atomic_read (&ne->kpne_refcount)); - - set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (HZ); - } -} - -void -kpr_deregister_nal (void *arg) -{ - long flags; - kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg; - - CDEBUG (D_OTHER, "Deregister NAL %d\n", ne->kpne_interface.kprni_nalid); - - LASSERT (ne->kpne_shutdown); /* caller must have issued shutdown already */ - LASSERT (atomic_read (&ne->kpne_refcount) == 0); /* can't be busy */ - LASSERT (!in_interrupt()); - - write_lock_irqsave (&kpr_rwlock, flags); - - list_del (&ne->kpne_list); - - write_unlock_irqrestore (&kpr_rwlock, flags); - - PORTAL_FREE (ne, sizeof (*ne)); - PORTAL_MODULE_UNUSE; -} - - -int -kpr_lookup_target (void *arg, ptl_nid_t target_nid, ptl_nid_t *gateway_nidp) -{ - kpr_nal_entry_t *ne = (kpr_nal_entry_t *)arg; - struct list_head *e; - int rc = -ENOENT; - - CDEBUG (D_OTHER, "lookup "LPX64" from NAL %d\n", target_nid, ne->kpne_interface.kprni_nalid); - - if (ne->kpne_shutdown) /* caller is shutting down */ - return (-ENOENT); - - read_lock (&kpr_rwlock); - - /* Search routes for one that has a gateway to target_nid on the callers network */ - - for (e = kpr_routes.next; e != &kpr_routes; e = e->next) - { - kpr_route_entry_t *re = list_entry (e, kpr_route_entry_t, kpre_list); - - if (re->kpre_lo_nid > target_nid || - re->kpre_hi_nid < target_nid) - continue; - - /* found table entry */ - - if (re->kpre_gateway_nalid != ne->kpne_interface.kprni_nalid) /* different NAL */ - rc = -EHOSTUNREACH; - else - { - rc = 0; - *gateway_nidp = re->kpre_gateway_nid; - } - break; - } - - read_unlock (&kpr_rwlock); - - CDEBUG (D_OTHER, "lookup "LPX64" from NAL %d: %d ("LPX64")\n", - target_nid, ne->kpne_interface.kprni_nalid, rc, - (rc == 0) ? *gateway_nidp : (ptl_nid_t)0); - return (rc); -} - -void -kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd) -{ - kpr_nal_entry_t *src_ne = (kpr_nal_entry_t *)arg; - ptl_nid_t target_nid = fwd->kprfd_target_nid; - int nob = fwd->kprfd_nob; - struct list_head *e; - - CDEBUG (D_OTHER, "forward [%p] "LPX64" from NAL %d\n", fwd, - target_nid, src_ne->kpne_interface.kprni_nalid); - - LASSERT (nob >= sizeof (ptl_hdr_t)); /* at least got a packet header */ - LASSERT (nob == lib_iov_nob (fwd->kprfd_niov, fwd->kprfd_iov)); - - atomic_inc (&kpr_queue_depth); - atomic_inc (&src_ne->kpne_refcount); /* source nal is busy until fwd completes */ - - kpr_fwd_packets++; /* (loose) stats accounting */ - kpr_fwd_bytes += nob; - - if (src_ne->kpne_shutdown) /* caller is shutting down */ - goto out; - - fwd->kprfd_router_arg = src_ne; /* stash caller's nal entry */ - - read_lock (&kpr_rwlock); - - /* Search routes for one that has a gateway to target_nid NOT on the caller's network */ - - for (e = kpr_routes.next; e != &kpr_routes; e = e->next) - { - kpr_route_entry_t *re = list_entry (e, kpr_route_entry_t, kpre_list); - - if (re->kpre_lo_nid > target_nid || /* no match */ - re->kpre_hi_nid < target_nid) - continue; - - CDEBUG (D_OTHER, "forward [%p] "LPX64" from NAL %d: match "LPX64" on NAL %d\n", fwd, - target_nid, src_ne->kpne_interface.kprni_nalid, - re->kpre_gateway_nid, re->kpre_gateway_nalid); - - if (re->kpre_gateway_nalid == src_ne->kpne_interface.kprni_nalid) - break; /* don't route to same NAL */ - - /* Search for gateway's NAL's entry */ - - for (e = kpr_nals.next; e != &kpr_nals; e = e->next) - { - kpr_nal_entry_t *dst_ne = list_entry (e, kpr_nal_entry_t, kpne_list); - - if (re->kpre_gateway_nalid != dst_ne->kpne_interface.kprni_nalid) /* no match */ - continue; - - if (dst_ne->kpne_shutdown) /* don't route if NAL is shutting down */ - break; - - fwd->kprfd_gateway_nid = re->kpre_gateway_nid; - atomic_inc (&dst_ne->kpne_refcount); /* dest nal is busy until fwd completes */ - - read_unlock (&kpr_rwlock); - - CDEBUG (D_OTHER, "forward [%p] "LPX64" from NAL %d: "LPX64" on NAL %d\n", fwd, - target_nid, src_ne->kpne_interface.kprni_nalid, - fwd->kprfd_gateway_nid, dst_ne->kpne_interface.kprni_nalid); - - dst_ne->kpne_interface.kprni_fwd (dst_ne->kpne_interface.kprni_arg, fwd); - return; - } - break; - } - - read_unlock (&kpr_rwlock); - out: - kpr_fwd_errors++; - - CDEBUG (D_OTHER, "Failed to forward [%p] "LPX64" from NAL %d\n", fwd, - target_nid, src_ne->kpne_interface.kprni_nalid); - - /* Can't find anywhere to forward to */ - (fwd->kprfd_callback)(fwd->kprfd_callback_arg, -EHOSTUNREACH); - - atomic_dec (&kpr_queue_depth); - atomic_dec (&src_ne->kpne_refcount); -} - -void -kpr_complete_packet (void *arg, kpr_fwd_desc_t *fwd, int error) -{ - kpr_nal_entry_t *dst_ne = (kpr_nal_entry_t *)arg; - kpr_nal_entry_t *src_ne = (kpr_nal_entry_t *)fwd->kprfd_router_arg; - - CDEBUG (D_OTHER, "complete(1) [%p] from NAL %d to NAL %d: %d\n", fwd, - src_ne->kpne_interface.kprni_nalid, dst_ne->kpne_interface.kprni_nalid, error); - - atomic_dec (&dst_ne->kpne_refcount); /* CAVEAT EMPTOR dst_ne can disappear now!!! */ - - (fwd->kprfd_callback)(fwd->kprfd_callback_arg, error); - - CDEBUG (D_OTHER, "complete(2) [%p] from NAL %d: %d\n", fwd, - src_ne->kpne_interface.kprni_nalid, error); - - atomic_dec (&kpr_queue_depth); - atomic_dec (&src_ne->kpne_refcount); /* CAVEAT EMPTOR src_ne can disappear now!!! */ -} - -int -kpr_add_route (int gateway_nalid, ptl_nid_t gateway_nid, ptl_nid_t lo_nid, - ptl_nid_t hi_nid) -{ - long flags; - struct list_head *e; - kpr_route_entry_t *re; - - CDEBUG(D_OTHER, "Add route: %d "LPX64" : "LPX64" - "LPX64"\n", - gateway_nalid, gateway_nid, lo_nid, hi_nid); - - LASSERT(lo_nid <= hi_nid); - - PORTAL_ALLOC (re, sizeof (*re)); - if (re == NULL) - return (-ENOMEM); - - re->kpre_gateway_nalid = gateway_nalid; - re->kpre_gateway_nid = gateway_nid; - re->kpre_lo_nid = lo_nid; - re->kpre_hi_nid = hi_nid; - - LASSERT(!in_interrupt()); - write_lock_irqsave (&kpr_rwlock, flags); - - for (e = kpr_routes.next; e != &kpr_routes; e = e->next) { - kpr_route_entry_t *re2 = list_entry(e, kpr_route_entry_t, - kpre_list); - - if (re->kpre_lo_nid > re2->kpre_hi_nid || - re->kpre_hi_nid < re2->kpre_lo_nid) - continue; - - CERROR ("Attempt to add duplicate routes ["LPX64" - "LPX64"]" - "to ["LPX64" - "LPX64"]\n", - re->kpre_lo_nid, re->kpre_hi_nid, - re2->kpre_lo_nid, re2->kpre_hi_nid); - - write_unlock_irqrestore (&kpr_rwlock, flags); - - PORTAL_FREE (re, sizeof (*re)); - return (-EINVAL); - } - - list_add (&re->kpre_list, &kpr_routes); - - write_unlock_irqrestore (&kpr_rwlock, flags); - return (0); -} - -int -kpr_del_route (ptl_nid_t nid) -{ - long flags; - struct list_head *e; - - CDEBUG(D_OTHER, "Del route "LPX64"\n", nid); - - LASSERT(!in_interrupt()); - write_lock_irqsave(&kpr_rwlock, flags); - - for (e = kpr_routes.next; e != &kpr_routes; e = e->next) { - kpr_route_entry_t *re = list_entry(e, kpr_route_entry_t, - kpre_list); - - if (re->kpre_lo_nid > nid || re->kpre_hi_nid < nid) - continue; - - list_del (&re->kpre_list); - write_unlock_irqrestore(&kpr_rwlock, flags); - - PORTAL_FREE(re, sizeof (*re)); - return (0); - } - - write_unlock_irqrestore(&kpr_rwlock, flags); - return (-ENOENT); -} - -int -kpr_get_route(int idx, int *gateway_nalid, ptl_nid_t *gateway_nid, - ptl_nid_t *lo_nid, ptl_nid_t *hi_nid) -{ - struct list_head *e; - - read_lock(&kpr_rwlock); - - for (e = kpr_routes.next; e != &kpr_routes; e = e->next) { - kpr_route_entry_t *re = list_entry(e, kpr_route_entry_t, - kpre_list); - - if (idx-- == 0) { - *gateway_nalid = re->kpre_gateway_nalid; - *gateway_nid = re->kpre_gateway_nid; - *lo_nid = re->kpre_lo_nid; - *hi_nid = re->kpre_hi_nid; - - read_unlock(&kpr_rwlock); - return (0); - } - } - - read_unlock (&kpr_rwlock); - return (-ENOENT); -} - -static void __exit -kpr_finalise (void) -{ - LASSERT (list_empty (&kpr_nals)); - - while (!list_empty (&kpr_routes)) { - kpr_route_entry_t *re = list_entry(kpr_routes.next, - kpr_route_entry_t, - kpre_list); - - list_del(&re->kpre_list); - PORTAL_FREE(re, sizeof (*re)); - } - - kpr_proc_fini(); - - PORTAL_SYMBOL_UNREGISTER(kpr_router_interface); - PORTAL_SYMBOL_UNREGISTER(kpr_control_interface); - - CDEBUG(D_MALLOC, "kpr_finalise: kmem back to %d\n", - atomic_read(&portal_kmemory)); -} - -static int __init -kpr_initialise (void) -{ - CDEBUG(D_MALLOC, "kpr_initialise: kmem %d\n", - atomic_read(&portal_kmemory)); - - rwlock_init(&kpr_rwlock); - INIT_LIST_HEAD(&kpr_routes); - INIT_LIST_HEAD(&kpr_nals); - - kpr_proc_init(); - - PORTAL_SYMBOL_REGISTER(kpr_router_interface); - PORTAL_SYMBOL_REGISTER(kpr_control_interface); - return (0); -} - -MODULE_AUTHOR("Eric Barton"); -MODULE_DESCRIPTION("Kernel Portals Router v0.01"); -MODULE_LICENSE("GPL"); - -module_init (kpr_initialise); -module_exit (kpr_finalise); - -EXPORT_SYMBOL (kpr_control_interface); -EXPORT_SYMBOL (kpr_router_interface); diff --git a/lustre/portals/router/router.h b/lustre/portals/router/router.h deleted file mode 100644 index b8c3bec..0000000 --- a/lustre/portals/router/router.h +++ /dev/null @@ -1,81 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Portals - * http://sourceforge.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#ifndef _KPTLROUTER_H -#define _KPTLROUTER_H -#define EXPORT_SYMTAB - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/proc_fs.h> -#include <linux/init.h> - -#define DEBUG_SUBSYSTEM S_PTLROUTER - -#include <linux/kp30.h> -#include <portals/p30.h> -#include <portals/lib-p30.h> - -typedef struct -{ - struct list_head kpne_list; - kpr_nal_interface_t kpne_interface; - atomic_t kpne_refcount; - int kpne_shutdown; -} kpr_nal_entry_t; - -typedef struct -{ - struct list_head kpre_list; - int kpre_gateway_nalid; - ptl_nid_t kpre_gateway_nid; - ptl_nid_t kpre_lo_nid; - ptl_nid_t kpre_hi_nid; -} kpr_route_entry_t; - -extern int kpr_register_nal (kpr_nal_interface_t *nalif, void **argp); -extern int kpr_lookup_target (void *arg, ptl_nid_t target_nid, ptl_nid_t *gateway_nidp); -extern void kpr_forward_packet (void *arg, kpr_fwd_desc_t *fwd); -extern void kpr_complete_packet (void *arg, kpr_fwd_desc_t *fwd, int error); -extern void kpr_shutdown_nal (void *arg); -extern void kpr_deregister_nal (void *arg); - -extern void kpr_proc_init (void); -extern void kpr_proc_fini (void); - -extern int kpr_add_route (int gateway_nal, ptl_nid_t gateway_nid, - ptl_nid_t lo_nid, ptl_nid_t hi_nid); -extern int kpr_del_route (ptl_nid_t nid); -extern int kpr_get_route (int idx, int *gateway_nal, ptl_nid_t *gateway_nid, - ptl_nid_t *lo_nid, ptl_nid_t *hi_nid); - -extern unsigned long long kpr_fwd_bytes; -extern unsigned long kpr_fwd_packets; -extern unsigned long kpr_fwd_errors; -extern atomic_t kpr_queue_depth; - -#endif /* _KPLROUTER_H */ diff --git a/lustre/portals/tests/.cvsignore b/lustre/portals/tests/.cvsignore deleted file mode 100644 index 051d1bd..0000000 --- a/lustre/portals/tests/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -Makefile -Makefile.in -.deps diff --git a/lustre/portals/tests/Makefile.am b/lustre/portals/tests/Makefile.am deleted file mode 100644 index 7b47ae0..0000000 --- a/lustre/portals/tests/Makefile.am +++ /dev/null @@ -1,23 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -include ../Rules.linux - -LDFLAGS = -m "`$(LD) --help | awk '/supported emulations/ {print $$4}'`" -r -LINK = $(LD) $(LDFLAGS) -o $@ -DEFS = -LIBS = -MODULE = $(basename) -EXTRA_DIST = startserver.sh startclient.sh stopserver.sh stopclient.sh - -noinst_PROGRAMS = pingsrv.o pingcli.o spingsrv.o spingcli.o - -pingsrv_o_SOURCES = ping_srv.c ping.h - -pingcli_o_SOURCES = ping_cli.c ping.h - -spingsrv_o_SOURCES = sping_srv.c ping.h - -spingcli_o_SOURCES = sping_cli.c ping.h diff --git a/lustre/portals/tests/ping.h b/lustre/portals/tests/ping.h deleted file mode 100644 index f07444b..0000000 --- a/lustre/portals/tests/ping.h +++ /dev/null @@ -1,80 +0,0 @@ -#ifndef _KPING_INCLUDED -#define _KPING_INCLUDED - -#include <portals/p30.h> - - -#define PTL_PING_IN_SIZE 256 // n packets per buffer -#define PTL_PING_IN_BUFFERS 2 // n fallback buffers - -#define PTL_PING_CLIENT 4 -#define PTL_PING_SERVER 5 - -#define PING_HEADER_MAGIC 0xDEADBEEF -#define PING_BULK_MAGIC 0xCAFEBABE - -#define PING_HEAD_BITS 0x00000001 -#define PING_BULK_BITS 0x00000002 -#define PING_IGNORE_BITS 0xFFFFFFFC - -#define PTL_PING_ACK 0x01 -#define PTL_PING_VERBOSE 0x02 -#define PTL_PING_VERIFY 0x04 -#define PTL_PING_PREALLOC 0x08 - - -#define NEXT_PRIMARY_BUFFER(index) \ - (((index + 1) >= PTL_PING_IN_BUFFERS) ? 0 : (index + 1)) - -#define PDEBUG(str, err) \ - CERROR ("%s: error=%s (%d)\n", str, ptl_err_str[err], err) - - -/* Ping data to be passed via the ioctl to kernel space */ - -#if __KERNEL__ - - -#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) -#include <linux/workqueue.h> -#else -#include <linux/tqueue.h> -#endif -struct pingsrv_data { - - ptl_handle_ni_t ni; - ptl_handle_me_t me; - ptl_handle_eq_t eq; - void *in_buf; - ptl_process_id_t my_id; - ptl_process_id_t id_local; - ptl_md_t mdin; - ptl_md_t mdout; - ptl_handle_md_t mdin_h; - ptl_handle_md_t mdout_h; - ptl_event_t evnt; - struct task_struct *tsk; -}; /* struct pingsrv_data */ - -struct pingcli_data { - - struct portal_ioctl_data *args; - ptl_handle_me_t me; - ptl_handle_eq_t eq; - char *inbuf; - char *outbuf; - ptl_process_id_t myid; - ptl_process_id_t id_local; - ptl_process_id_t id_remote; - ptl_md_t md_in_head; - ptl_md_t md_out_head; - ptl_handle_md_t md_in_head_h; - ptl_handle_md_t md_out_head_h; - ptl_event_t ev; - struct task_struct *tsk; -}; /* struct pingcli_data */ - - -#endif /* __KERNEL__ */ - -#endif /* _KPING_INCLUDED */ diff --git a/lustre/portals/tests/ping_cli.c b/lustre/portals/tests/ping_cli.c deleted file mode 100644 index 389ffbb..0000000 --- a/lustre/portals/tests/ping_cli.c +++ /dev/null @@ -1,300 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL) - * Author: Brian Behlendorf <behlendorf1@llnl.gov> - * Kedar Sovani (kedar@calsoftinc.com) - * Amey Inamdar (amey@calsoftinc.com) - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define DEBUG_SUBSYSTEM S_PINGER - -#include <linux/kp30.h> -#include <portals/p30.h> -#include <linux/module.h> -#include <linux/proc_fs.h> -#include <linux/init.h> -#include <linux/poll.h> -#include "ping.h" -/* int portal_debug = D_PING_CLI; */ - - -#define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval)) - -#define MAX_TIME 100000 - -/* This should be enclosed in a structure */ - -static struct pingcli_data *client = NULL; - -static int count = 0; - -static void -pingcli_shutdown(int err) -{ - int rc; - - /* Yes, we are intentionally allowing us to fall through each - * case in to the next. This allows us to pass an error - * code to just clean up the right stuff. - */ - switch (err) { - case 1: - /* Unlink any memory descriptors we may have used */ - if ((rc = PtlMDUnlink (client->md_out_head_h))) - PDEBUG ("PtlMDUnlink", rc); - case 2: - if ((rc = PtlMDUnlink (client->md_in_head_h))) - PDEBUG ("PtlMDUnlink", rc); - - /* Free the event queue */ - if ((rc = PtlEQFree (client->eq))) - PDEBUG ("PtlEQFree", rc); - - if ((rc = PtlMEUnlink (client->me))) - PDEBUG ("PtlMEUnlink", rc); - case 3: - kportal_put_ni (client->args->ioc_nal); - - case 4: - /* Free our buffers */ - - if (client != NULL) - PORTAL_FREE (client, - sizeof(struct pingcli_data)); - } - - - CDEBUG (D_OTHER, "ping client released resources\n"); -} /* pingcli_shutdown() */ - -static int pingcli_callback(ptl_event_t *ev) -{ - int i, magic; - i = *(int *)(ev->mem_desc.start + ev->offset + sizeof(unsigned)); - magic = *(int *)(ev->mem_desc.start + ev->offset); - - if(magic != 0xcafebabe) { - printk ("Unexpected response \n"); - return 1; - } - - if((i == count) || !count) - wake_up_process (client->tsk); - else - printk ("Received response after timeout for %d\n",i); - return 1; -} - - -static struct pingcli_data * -pingcli_start(struct portal_ioctl_data *args) -{ - ptl_handle_ni_t *nip; - unsigned ping_head_magic = PING_HEADER_MAGIC; - unsigned ping_bulk_magic = PING_BULK_MAGIC; - int rc; - struct timeval tv1, tv2; - client->tsk = current; - client->args = args; - CDEBUG (D_OTHER, "pingcli_setup args: nid "LPX64", \ - nal %d, size %u, count: %u, timeout: %u\n", - args->ioc_nid, args->ioc_nal, args->ioc_size, - args->ioc_count, args->ioc_timeout); - - - PORTAL_ALLOC (client->outbuf, STDSIZE + args->ioc_size) ; - if (client->outbuf == NULL) - { - CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); - return (NULL); - } - - PORTAL_ALLOC (client->inbuf, - (args->ioc_size + STDSIZE) * args->ioc_count); - if (client->inbuf == NULL) - { - CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); - return (NULL); - } - - /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (args->ioc_nal)) == NULL) - { - CERROR ("NAL %d not loaded\n", args->ioc_nal); - pingcli_shutdown (4); - return (NULL); - } - - /* Based on the initialization aquire our unique portal ID. */ - if ((rc = PtlGetId (*nip, &client->myid))) - { - CERROR ("PtlGetId error %d\n", rc); - pingcli_shutdown (2); - return (NULL); - } - - /* Setup the local match entries */ - client->id_local.nid = PTL_NID_ANY; - client->id_local.pid = PTL_PID_ANY; - - /* Setup the remote match entries */ - client->id_remote.nid = args->ioc_nid; - client->id_remote.pid = 0; - - if ((rc = PtlMEAttach (*nip, PTL_PING_CLIENT, - client->id_local, 0, ~0, PTL_RETAIN, - PTL_INS_AFTER, &client->me))) - { - CERROR ("PtlMEAttach error %d\n", rc); - pingcli_shutdown (2); - return (NULL); - } - - /* Allocate the event queue for this network interface */ - if ((rc = PtlEQAlloc (*nip, 64, pingcli_callback, &client->eq))) - { - CERROR ("PtlEQAlloc error %d\n", rc); - pingcli_shutdown (2); - return (NULL); - } - - count = args->ioc_count; - - client->md_in_head.start = client->inbuf; - client->md_in_head.length = (args->ioc_size + STDSIZE) - * count; - client->md_in_head.threshold = PTL_MD_THRESH_INF; - client->md_in_head.options = PTL_MD_OP_PUT; - client->md_in_head.user_ptr = NULL; - client->md_in_head.eventq = client->eq; - memset (client->inbuf, 0, (args->ioc_size + STDSIZE) * count); - - /* Attach the incoming buffer */ - if ((rc = PtlMDAttach (client->me, client->md_in_head, - PTL_UNLINK, &client->md_in_head_h))) { - CERROR ("PtlMDAttach error %d\n", rc); - pingcli_shutdown (1); - return (NULL); - } - /* Setup the outgoing ping header */ - client->md_out_head.start = client->outbuf; - client->md_out_head.length = STDSIZE + args->ioc_size; - client->md_out_head.threshold = args->ioc_count; - client->md_out_head.options = PTL_MD_OP_PUT; - client->md_out_head.user_ptr = NULL; - client->md_out_head.eventq = PTL_EQ_NONE; - - memcpy (client->outbuf, &ping_head_magic, sizeof(ping_bulk_magic)); - - count = 0; - - /* Bind the outgoing ping header */ - if ((rc=PtlMDBind (*nip, client->md_out_head, - &client->md_out_head_h))) { - CERROR ("PtlMDBind error %d\n", rc); - pingcli_shutdown (1); - return NULL; - } - while ((args->ioc_count - count)) { - memcpy (client->outbuf + sizeof(unsigned), - &(count), sizeof(unsigned)); - /* Put the ping packet */ - do_gettimeofday (&tv1); - - memcpy(client->outbuf+sizeof(unsigned)+sizeof(unsigned),&tv1, - sizeof(struct timeval)); - - if((rc = PtlPut (client->md_out_head_h, PTL_NOACK_REQ, - client->id_remote, PTL_PING_SERVER, 0, 0, 0, 0))) { - PDEBUG ("PtlPut (header)", rc); - pingcli_shutdown (1); - return NULL; - } - printk ("sent msg no %d", count); - - set_current_state (TASK_INTERRUPTIBLE); - rc = schedule_timeout (20 * args->ioc_timeout); - if (rc == 0) { - printk (" :: timeout .....\n"); - } else { - do_gettimeofday (&tv2); - printk(" :: Reply in %u usec\n", - (unsigned)((tv2.tv_sec - tv1.tv_sec) - * 1000000 + (tv2.tv_usec - tv1.tv_usec))); - } - count++; - } - - if (client->outbuf != NULL) - PORTAL_FREE (client->outbuf, STDSIZE + args->ioc_size); - - if (client->inbuf != NULL) - PORTAL_FREE (client->inbuf, - (args->ioc_size + STDSIZE) * args->ioc_count); - - pingcli_shutdown (2); - - /* Success! */ - return NULL; -} /* pingcli_setup() */ - - - -/* called by the portals_ioctl for ping requests */ -static int kping_client(struct portal_ioctl_data *args) -{ - PORTAL_ALLOC (client, sizeof(struct pingcli_data)); - if (client == NULL) - { - CERROR ("Unable to allocate client structure\n"); - return (0); - } - memset (client, 0, sizeof(struct pingcli_data)); - pingcli_start (args); - - return 0; -} /* kping_client() */ - - -static int __init pingcli_init(void) -{ - PORTAL_SYMBOL_REGISTER(kping_client); - return 0; -} /* pingcli_init() */ - - -static void __exit pingcli_cleanup(void) -{ - PORTAL_SYMBOL_UNREGISTER (kping_client); -} /* pingcli_cleanup() */ - - -MODULE_AUTHOR("Brian Behlendorf (LLNL)"); -MODULE_DESCRIPTION("A simple kernel space ping client for portals testing"); -MODULE_LICENSE("GPL"); - -module_init(pingcli_init); -module_exit(pingcli_cleanup); - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -EXPORT_SYMBOL (kping_client); -#endif diff --git a/lustre/portals/tests/ping_srv.c b/lustre/portals/tests/ping_srv.c deleted file mode 100644 index 1037d09..0000000 --- a/lustre/portals/tests/ping_srv.c +++ /dev/null @@ -1,308 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL) - * Author: Brian Behlendorf <behlendorf1@llnl.gov> - * Amey Inamdar <amey@calsoftinc.com> - * Kedar Sovani <kedar@calsoftinc.com> - * - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_PINGER - -#include <linux/kp30.h> -#include <portals/p30.h> -#include "ping.h" - -#include <linux/module.h> -#include <linux/proc_fs.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/version.h> -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include <linux/workqueue.h> -#else -#include <linux/tqueue.h> -#endif -#include <linux/wait.h> -#include <linux/smp_lock.h> - -#include <asm/unistd.h> -#include <asm/semaphore.h> - -#define STDSIZE (sizeof(int) + sizeof(int) + sizeof(struct timeval)) -#define MAXSIZE (16*1024*1024) - -static unsigned ping_head_magic; -static unsigned ping_bulk_magic; -static int nal = 0; // Your NAL, -static unsigned long packets_valid = 0; // Valid packets -static int running = 1; -atomic_t pkt; - -static struct pingsrv_data *server=NULL; // Our ping server - -static void *pingsrv_shutdown(int err) -{ - int rc; - - /* Yes, we are intentionally allowing us to fall through each - * case in to the next. This allows us to pass an error - * code to just clean up the right stuff. - */ - switch (err) { - case 1: - /* Unlink any memory descriptors we may have used */ - if ((rc = PtlMDUnlink (server->mdin_h))) - PDEBUG ("PtlMDUnlink (out head buffer)", rc); - case 2: - /* Free the event queue */ - if ((rc = PtlEQFree (server->eq))) - PDEBUG ("PtlEQFree", rc); - - /* Unlink the client portal from the ME list */ - if ((rc = PtlMEUnlink (server->me))) - PDEBUG ("PtlMEUnlink", rc); - - case 3: - kportal_put_ni (nal); - - case 4: - - case 5: - if (server->in_buf != NULL) - PORTAL_FREE (server->in_buf, MAXSIZE); - - if (server != NULL) - PORTAL_FREE (server, - sizeof (struct pingsrv_data)); - - } - - CDEBUG (D_OTHER, "ping sever resources released\n"); - return NULL; -} /* pingsrv_shutdown() */ - - -int pingsrv_thread(void *arg) -{ - int rc; - unsigned long magic; - unsigned long ping_bulk_magic = 0xcafebabe; - - kportal_daemonize ("pingsrv"); - server->tsk = current; - - while (running) { - set_current_state (TASK_INTERRUPTIBLE); - if (atomic_read (&pkt) == 0) { - schedule_timeout (MAX_SCHEDULE_TIMEOUT); - continue; - } - - magic = *((int *)(server->evnt.mem_desc.start - + server->evnt.offset)); - - - if(magic != 0xdeadbeef) { - printk("Unexpected Packet to the server\n"); - - } - memcpy (server->in_buf, &ping_bulk_magic, sizeof(ping_bulk_magic)); - - server->mdout.length = server->evnt.rlength; - server->mdout.start = server->in_buf; - server->mdout.threshold = 1; - server->mdout.options = PTL_MD_OP_PUT; - server->mdout.user_ptr = NULL; - server->mdout.eventq = PTL_EQ_NONE; - - /* Bind the outgoing buffer */ - if ((rc = PtlMDBind (server->ni, server->mdout, - &server->mdout_h))) { - PDEBUG ("PtlMDBind", rc); - pingsrv_shutdown (1); - return 1; - } - - - server->mdin.start = server->in_buf; - server->mdin.length = MAXSIZE; - server->mdin.threshold = 1; - server->mdin.options = PTL_MD_OP_PUT; - server->mdin.user_ptr = NULL; - server->mdin.eventq = server->eq; - - if ((rc = PtlMDAttach (server->me, server->mdin, - PTL_UNLINK, &server->mdin_h))) { - PDEBUG ("PtlMDAttach (bulk)", rc); - CDEBUG (D_OTHER, "ping server resources allocated\n"); - } - - if ((rc = PtlPut (server->mdout_h, PTL_NOACK_REQ, - server->evnt.initiator, PTL_PING_CLIENT, 0, 0, 0, 0))) - PDEBUG ("PtlPut", rc); - - atomic_dec (&pkt); - - } - pingsrv_shutdown (1); - running = 1; - return 0; -} - -static int pingsrv_packet(ptl_event_t *ev) -{ - atomic_inc (&pkt); - wake_up_process (server->tsk); - return 1; -} /* pingsrv_head() */ - -static int pingsrv_callback(ptl_event_t *ev) -{ - - if (ev == NULL) { - CERROR ("null in callback, ev=%p\n", ev); - return 0; - } - server->evnt = *ev; - - printk ("received ping from nid "LPX64" " - "(off=%u rlen=%u mlen=%u head=%x seq=%d size=%d)\n", - ev->initiator.nid, ev->offset, ev->rlength, ev->mlength, - *((int *)(ev->mem_desc.start + ev->offset)), - *((int *)(ev->mem_desc.start + ev->offset + sizeof(unsigned))), - *((int *)(ev->mem_desc.start + ev->offset + 2 * - sizeof(unsigned)))); - - packets_valid++; - - return pingsrv_packet(ev); - -} /* pingsrv_callback() */ - - -static struct pingsrv_data *pingsrv_setup(void) -{ - ptl_handle_ni_t *nip; - int rc; - - /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (nal)) == NULL) { - CDEBUG (D_OTHER, "NAL %d not loaded\n", nal); - return pingsrv_shutdown (4); - } - - server->ni= *nip; - - /* Based on the initialization aquire our unique portal ID. */ - if ((rc = PtlGetId (server->ni, &server->my_id))) { - PDEBUG ("PtlGetId", rc); - return pingsrv_shutdown (2); - } - - server->id_local.nid = PTL_NID_ANY; - server->id_local.pid = PTL_PID_ANY; - - /* Attach a match entries for header packets */ - if ((rc = PtlMEAttach (server->ni, PTL_PING_SERVER, - server->id_local,0, ~0, - PTL_RETAIN, PTL_INS_AFTER, &server->me))) { - PDEBUG ("PtlMEAttach", rc); - return pingsrv_shutdown (2); - } - - - if ((rc = PtlEQAlloc (server->ni, 1024, pingsrv_callback, - &server->eq))) { - PDEBUG ("PtlEQAlloc (callback)", rc); - return pingsrv_shutdown (2); - } - - PORTAL_ALLOC (server->in_buf, MAXSIZE); - if(!server->in_buf){ - CDEBUG (D_OTHER,"Allocation error\n"); - return pingsrv_shutdown(2); - } - - /* Setup the incoming buffer */ - server->mdin.start = server->in_buf; - server->mdin.length = MAXSIZE; - server->mdin.threshold = 1; - server->mdin.options = PTL_MD_OP_PUT; - server->mdin.user_ptr = NULL; - server->mdin.eventq = server->eq; - memset (server->in_buf, 0, STDSIZE); - - if ((rc = PtlMDAttach (server->me, server->mdin, - PTL_UNLINK, &server->mdin_h))) { - PDEBUG ("PtlMDAttach (bulk)", rc); - CDEBUG (D_OTHER, "ping server resources allocated\n"); - } - - /* Success! */ - return server; -} /* pingsrv_setup() */ - -static int pingsrv_start(void) -{ - /* Setup our server */ - if (!pingsrv_setup()) { - CDEBUG (D_OTHER, "pingsrv_setup() failed, server stopped\n"); - return -ENOMEM; - } - kernel_thread (pingsrv_thread,NULL,0); - return 0; -} /* pingsrv_start() */ - - - -static int __init pingsrv_init(void) -{ - ping_head_magic = PING_HEADER_MAGIC; - ping_bulk_magic = PING_BULK_MAGIC; - PORTAL_ALLOC (server, sizeof(struct pingsrv_data)); - return pingsrv_start (); -} /* pingsrv_init() */ - - -static void __exit pingsrv_cleanup(void) -{ - remove_proc_entry ("net/pingsrv", NULL); - - running = 0; - wake_up_process (server->tsk); - while (running != 1) { - set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (HZ); - } - -} /* pingsrv_cleanup() */ - - -MODULE_PARM(nal, "i"); -MODULE_PARM_DESC(nal, "Use the specified NAL " - "(6-kscimacnal, 4-toenal, 2-ksocknal, 1-kqswnal)"); - -MODULE_AUTHOR("Brian Behlendorf (LLNL)"); -MODULE_DESCRIPTION("A kernel space ping server for portals testing"); -MODULE_LICENSE("GPL"); - -module_init(pingsrv_init); -module_exit(pingsrv_cleanup); diff --git a/lustre/portals/tests/sping_cli.c b/lustre/portals/tests/sping_cli.c deleted file mode 100644 index 4cef08b..0000000 --- a/lustre/portals/tests/sping_cli.c +++ /dev/null @@ -1,276 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL) - * Author: Brian Behlendorf <behlendorf1@llnl.gov> - * Kedar Sovani (kedar@calsoftinc.com) - * Amey Inamdar (amey@calsoftinc.com) - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -/* This is a striped down version of pinger. It follows a single - * request-response protocol. Doesn't do Bulk data pinging. Also doesn't - * send multiple packets in a single ioctl. - */ - - -#define DEBUG_SUBSYSTEM S_PINGER - -#include <linux/kp30.h> -#include <portals/p30.h> -#include <linux/module.h> -#include <linux/proc_fs.h> -#include <linux/init.h> -#include <linux/poll.h> -#include "ping.h" -/* int portal_debug = D_PING_CLI; */ - - -#define STDSIZE (sizeof(int) + sizeof(int) + 4) /* The data is 4 bytes - assumed */ - -/* This should be enclosed in a structure */ - -static struct pingcli_data *client = NULL; - -static int count = 0; - -static void -pingcli_shutdown(int err) -{ - int rc; - - /* Yes, we are intentionally allowing us to fall through each - * case in to the next. This allows us to pass an error - * code to just clean up the right stuff. - */ - switch (err) { - case 1: - /* Unlink any memory descriptors we may have used */ - if ((rc = PtlMDUnlink (client->md_out_head_h))) - PDEBUG ("PtlMDUnlink", rc); - case 2: - /* Free the event queue */ - if ((rc = PtlEQFree (client->eq))) - PDEBUG ("PtlEQFree", rc); - - if ((rc = PtlMEUnlink (client->me))) - PDEBUG ("PtlMEUnlink", rc); - case 3: - kportal_put_ni (client->args->ioc_nal); - - case 4: - /* Free our buffers */ - if (client->outbuf != NULL) - PORTAL_FREE (client->outbuf, STDSIZE); - - if (client->inbuf != NULL) - PORTAL_FREE (client->inbuf, STDSIZE); - - - if (client != NULL) - PORTAL_FREE (client, - sizeof(struct pingcli_data)); - } - - - CDEBUG (D_OTHER, "ping client released resources\n"); -} /* pingcli_shutdown() */ - -static int pingcli_callback(ptl_event_t *ev) -{ - wake_up_process (client->tsk); - return 1; -} - - -static struct pingcli_data * -pingcli_start(struct portal_ioctl_data *args) -{ - const ptl_handle_ni_t *nip; - unsigned ping_head_magic = PING_HEADER_MAGIC; - int rc; - - client->tsk = current; - client->args = args; - - CDEBUG (D_OTHER, "pingcli_setup args: nid "LPX64", \ - nal %d, size %u, count: %u, timeout: %u\n", - args->ioc_nid, args->ioc_nal, args->ioc_size, - args->ioc_count, args->ioc_timeout); - - - PORTAL_ALLOC (client->outbuf, STDSIZE) ; - if (client->outbuf == NULL) - { - CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); - return (NULL); - } - - PORTAL_ALLOC (client->inbuf, STDSIZE); - - if (client->inbuf == NULL) - { - CERROR ("Unable to allocate out_buf ("LPSZ" bytes)\n", STDSIZE); - pingcli_shutdown (4); - return (NULL); - } - - /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (args->ioc_nal)) == NULL) - { - CERROR ("NAL %d not loaded.\n", args->ioc_nal); - pingcli_shutdown (4); - return (NULL); - } - - /* Based on the initialization aquire our unique portal ID. */ - if ((rc = PtlGetId (*nip, &client->myid))) - { - CERROR ("PtlGetId error %d\n", rc); - pingcli_shutdown (2); - return (NULL); - } - - /* Setup the local match entries */ - client->id_local.nid = PTL_NID_ANY; - client->id_local.pid = PTL_PID_ANY; - - /* Setup the remote match entries */ - client->id_remote.nid = args->ioc_nid; - client->id_remote.pid = 0; - - if ((rc = PtlMEAttach (*nip, PTL_PING_CLIENT, - client->id_local, 0, ~0, PTL_RETAIN, - PTL_INS_AFTER, &client->me))) - { - CERROR ("PtlMEAttach error %d\n", rc); - pingcli_shutdown (2); - return (NULL); - } - - /* Allocate the event queue for this network interface */ - if ((rc = PtlEQAlloc (*nip, 64, pingcli_callback, &client->eq))) - { - CERROR ("PtlEQAlloc error %d\n", rc); - pingcli_shutdown (2); - return (NULL); - } - - - client->md_in_head.start = client->inbuf; - client->md_in_head.length = STDSIZE; - client->md_in_head.threshold = 1; - client->md_in_head.options = PTL_MD_OP_PUT; - client->md_in_head.user_ptr = NULL; - client->md_in_head.eventq = client->eq; - memset (client->inbuf, 0, STDSIZE); - - /* Attach the incoming buffer */ - if ((rc = PtlMDAttach (client->me, client->md_in_head, - PTL_UNLINK, &client->md_in_head_h))) { - CERROR ("PtlMDAttach error %d\n", rc); - pingcli_shutdown (1); - return (NULL); - } - - /* Setup the outgoing ping header */ - client->md_out_head.start = client->outbuf; - client->md_out_head.length = STDSIZE; - client->md_out_head.threshold = 1; - client->md_out_head.options = PTL_MD_OP_PUT; - client->md_out_head.user_ptr = NULL; - client->md_out_head.eventq = PTL_EQ_NONE; - - memcpy (client->outbuf, &ping_head_magic, sizeof(ping_head_magic)); - - /* Bind the outgoing ping header */ - if ((rc=PtlMDBind (*nip, client->md_out_head, - &client->md_out_head_h))) { - CERROR ("PtlMDBind error %d\n", rc); - pingcli_shutdown (1); - return (NULL); - } - /* Put the ping packet */ - if((rc = PtlPut (client->md_out_head_h, PTL_NOACK_REQ, - client->id_remote, PTL_PING_SERVER, 0, 0, 0, 0))) { - PDEBUG ("PtlPut (header)", rc); - pingcli_shutdown (1); - return NULL; - } - - count = 0; - set_current_state (TASK_INTERRUPTIBLE); - rc = schedule_timeout (20 * args->ioc_timeout); - if (rc == 0) { - printk (" Time out on the server\n"); - pingcli_shutdown (2); - return NULL; - } else - printk("Received respose from the server \n"); - - - pingcli_shutdown (2); - - /* Success! */ - return NULL; -} /* pingcli_setup() */ - - - -/* called by the portals_ioctl for ping requests */ -static int kping_client(struct portal_ioctl_data *args) -{ - - PORTAL_ALLOC (client, sizeof(struct pingcli_data)); - memset (client, 0, sizeof(struct pingcli_data)); - if (client == NULL) - { - CERROR ("Unable to allocate client structure\n"); - return (0); - } - pingcli_start (args); - - return 0; -} /* kping_client() */ - - -static int __init pingcli_init(void) -{ - PORTAL_SYMBOL_REGISTER(kping_client); - return 0; -} /* pingcli_init() */ - - -static void __exit pingcli_cleanup(void) -{ - PORTAL_SYMBOL_UNREGISTER (kping_client); -} /* pingcli_cleanup() */ - - -MODULE_AUTHOR("Brian Behlendorf (LLNL)"); -MODULE_DESCRIPTION("A simple kernel space ping client for portals testing"); -MODULE_LICENSE("GPL"); - -module_init(pingcli_init); -module_exit(pingcli_cleanup); - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -EXPORT_SYMBOL (kping_client); -#endif diff --git a/lustre/portals/tests/sping_srv.c b/lustre/portals/tests/sping_srv.c deleted file mode 100644 index a18ea35..0000000 --- a/lustre/portals/tests/sping_srv.c +++ /dev/null @@ -1,295 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002, Lawrence Livermore National Labs (LLNL) - * Author: Brian Behlendorf <behlendorf1@llnl.gov> - * Amey Inamdar <amey@calsoftinc.com> - * Kedar Sovani <kedar@calsoftinc.com> - * - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* This is a striped down version of pinger. It follows a single - * request-response protocol. Doesn't do Bulk data pinging. Also doesn't - * send multiple packets in a single ioctl. - */ - -#define DEBUG_SUBSYSTEM S_PINGER - -#include <linux/kp30.h> -#include <portals/p30.h> -#include "ping.h" - -#include <linux/module.h> -#include <linux/proc_fs.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/sched.h> -#include <linux/version.h> -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#include <linux/workqueue.h> -#else -#include <linux/tqueue.h> -#endif -#include <linux/wait.h> -#include <linux/smp_lock.h> - -#include <asm/unistd.h> -#include <asm/semaphore.h> - -#define STDSIZE (sizeof(int) + sizeof(int) + 4) - -static int nal = 0; // Your NAL, -static unsigned long packets_valid = 0; // Valid packets -static int running = 1; -atomic_t pkt; - -static struct pingsrv_data *server=NULL; // Our ping server - -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) -#endif - -static void *pingsrv_shutdown(int err) -{ - int rc; - - /* Yes, we are intentionally allowing us to fall through each - * case in to the next. This allows us to pass an error - * code to just clean up the right stuff. - */ - switch (err) { - case 1: - /* Unlink any memory descriptors we may have used */ - if ((rc = PtlMDUnlink (server->mdin_h))) - PDEBUG ("PtlMDUnlink (out head buffer)", rc); - case 2: - /* Free the event queue */ - if ((rc = PtlEQFree (server->eq))) - PDEBUG ("PtlEQFree", rc); - - /* Unlink the client portal from the ME list */ - if ((rc = PtlMEUnlink (server->me))) - PDEBUG ("PtlMEUnlink", rc); - - case 3: - kportal_put_ni (nal); - - case 4: - - if (server->in_buf != NULL) - PORTAL_FREE (server->in_buf, STDSIZE); - - if (server != NULL) - PORTAL_FREE (server, - sizeof (struct pingsrv_data)); - - } - - CDEBUG (D_OTHER, "ping sever resources released\n"); - return NULL; -} /* pingsrv_shutdown() */ - - -int pingsrv_thread(void *arg) -{ - int rc; - - kportal_daemonize ("pingsrv"); - server->tsk = current; - - while (running) { - set_current_state (TASK_INTERRUPTIBLE); - if (atomic_read (&pkt) == 0) { - schedule_timeout (MAX_SCHEDULE_TIMEOUT); - continue; - } - - server->mdout.start = server->in_buf; - server->mdout.length = STDSIZE; - server->mdout.threshold = 1; - server->mdout.options = PTL_MD_OP_PUT; - server->mdout.user_ptr = NULL; - server->mdout.eventq = PTL_EQ_NONE; - - /* Bind the outgoing buffer */ - if ((rc = PtlMDBind (server->ni, server->mdout, - &server->mdout_h))) { - PDEBUG ("PtlMDBind", rc); - pingsrv_shutdown (1); - return 1; - } - - - server->mdin.start = server->in_buf; - server->mdin.length = STDSIZE; - server->mdin.threshold = 1; - server->mdin.options = PTL_MD_OP_PUT; - server->mdin.user_ptr = NULL; - server->mdin.eventq = server->eq; - - if ((rc = PtlMDAttach (server->me, server->mdin, - PTL_UNLINK, &server->mdin_h))) { - PDEBUG ("PtlMDAttach (bulk)", rc); - CDEBUG (D_OTHER, "ping server resources allocated\n"); - } - - if ((rc = PtlPut (server->mdout_h, PTL_NOACK_REQ, - server->evnt.initiator, PTL_PING_CLIENT, 0, 0, 0, 0))) - PDEBUG ("PtlPut", rc); - - atomic_dec (&pkt); - - } - pingsrv_shutdown (1); - running = 1; - return 0; -} - -static int pingsrv_packet(ptl_event_t *ev) -{ - atomic_inc (&pkt); - wake_up_process (server->tsk); - return 1; -} /* pingsrv_head() */ - -static int pingsrv_callback(ptl_event_t *ev) -{ - - if (ev == NULL) { - CERROR ("null in callback, ev=%p\n", ev); - return 0; - } - server->evnt = *ev; - - printk ("received ping from nid "LPX64" " - "(off=%u rlen=%u mlen=%u head=%x)\n", - ev->initiator.nid, ev->offset, ev->rlength, ev->mlength, - *((int *)(ev->mem_desc.start + ev->offset))); - - packets_valid++; - - return pingsrv_packet(ev); - -} /* pingsrv_callback() */ - - -static struct pingsrv_data *pingsrv_setup(void) -{ - ptl_handle_ni_t *nip; - int rc; - - /* Aquire and initialize the proper nal for portals. */ - if ((nip = kportal_get_ni (nal)) == NULL) { - CDEBUG (D_OTHER, "Nal %d not loaded.\n", nal); - return pingsrv_shutdown (4); - } - - server->ni= *nip; - - /* Based on the initialization aquire our unique portal ID. */ - if ((rc = PtlGetId (server->ni, &server->my_id))) { - PDEBUG ("PtlGetId", rc); - return pingsrv_shutdown (2); - } - - server->id_local.nid = PTL_NID_ANY; - server->id_local.pid = PTL_PID_ANY; - - /* Attach a match entries for header packets */ - if ((rc = PtlMEAttach (server->ni, PTL_PING_SERVER, - server->id_local,0, ~0, - PTL_RETAIN, PTL_INS_AFTER, &server->me))) { - PDEBUG ("PtlMEAttach", rc); - return pingsrv_shutdown (2); - } - - - if ((rc = PtlEQAlloc (server->ni, 64, pingsrv_callback, - &server->eq))) { - PDEBUG ("PtlEQAlloc (callback)", rc); - return pingsrv_shutdown (2); - } - - PORTAL_ALLOC (server->in_buf, STDSIZE); - if(!server->in_buf){ - CDEBUG (D_OTHER,"Allocation error\n"); - return pingsrv_shutdown(2); - } - - /* Setup the incoming buffer */ - server->mdin.start = server->in_buf; - server->mdin.length = STDSIZE; - server->mdin.threshold = 1; - server->mdin.options = PTL_MD_OP_PUT; - server->mdin.user_ptr = NULL; - server->mdin.eventq = server->eq; - memset (server->in_buf, 0, STDSIZE); - - if ((rc = PtlMDAttach (server->me, server->mdin, - PTL_UNLINK, &server->mdin_h))) { - PDEBUG ("PtlMDAttach (bulk)", rc); - CDEBUG (D_OTHER, "ping server resources allocated\n"); - } - - /* Success! */ - return server; -} /* pingsrv_setup() */ - -static int pingsrv_start(void) -{ - /* Setup our server */ - if (!pingsrv_setup()) { - CDEBUG (D_OTHER, "pingsrv_setup() failed, server stopped\n"); - return -ENOMEM; - } - kernel_thread (pingsrv_thread,NULL,0); - return 0; -} /* pingsrv_start() */ - - - -static int __init pingsrv_init(void) -{ - PORTAL_ALLOC (server, sizeof(struct pingsrv_data)); - return pingsrv_start (); -} /* pingsrv_init() */ - - -static void __exit pingsrv_cleanup(void) -{ - remove_proc_entry ("net/pingsrv", NULL); - - running = 0; - wake_up_process (server->tsk); - while (running != 1) { - set_current_state (TASK_UNINTERRUPTIBLE); - schedule_timeout (HZ); - } - -} /* pingsrv_cleanup() */ - - -MODULE_PARM(nal, "i"); -MODULE_PARM_DESC(nal, "Use the specified NAL " - "(6-kscimacnal, 4-toenal, 2-ksocknal, 1-kqswnal)"); - -MODULE_AUTHOR("Brian Behlendorf (LLNL)"); -MODULE_DESCRIPTION("A kernel space ping server for portals testing"); -MODULE_LICENSE("GPL"); - -module_init(pingsrv_init); -module_exit(pingsrv_cleanup); diff --git a/lustre/portals/tests/startclient.sh b/lustre/portals/tests/startclient.sh deleted file mode 100755 index c9b7c16..0000000 --- a/lustre/portals/tests/startclient.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/sh - -SIMPLE=${SIMPLE:-0} - -if [ $SIMPLE -eq 0 ]; then - PING=pingcli.o -else - PING=spingcli.o -fi - -case "$1" in - toe) - /sbin/insmod ../oslib/portals.o - /sbin/insmod ../toenal/ktoenal.o - /sbin/insmod ./$PING - echo ktoenal > /tmp/nal - ;; - - tcp) - /sbin/insmod ../oslib/portals.o - /sbin/insmod ../socknal/ksocknal.o - /sbin/insmod ./$PING - echo ksocknal > /tmp/nal - ;; - - elan) - /sbin/insmod ../oslib/portals.o - /sbin/insmod ../qswnal/kqswnal.o - /sbin/insmod ./$PING - echo kqswnal > /tmp/nal - ;; - - *) - echo "Usage : ${0} < tcp | toe | elan >" - exit 1; -esac -exit 0; diff --git a/lustre/portals/tests/startserver.sh b/lustre/portals/tests/startserver.sh deleted file mode 100755 index 942300e..0000000 --- a/lustre/portals/tests/startserver.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/sh - -SIMPLE=${SIMPLE:-0} - -if [ $SIMPLE -eq 0 ]; then - PING=pingsrv.o -else - PING=spingsrv.o -fi - -case "$1" in - toe) - /sbin/insmod ../oslib/portals.o - /sbin/insmod ../toenal/ktoenal.o - /sbin/insmod ./$PING nal=4 - echo ktoenal > /tmp/nal - ;; - - tcp) - /sbin/insmod ../oslib/portals.o - /sbin/insmod ../socknal/ksocknal.o - /sbin/insmod ./$PING nal=2 - echo ksocknal > /tmp/nal - ;; - - elan) - /sbin/insmod ../oslib/portals.o - /sbin/insmod ../qswnal/kqswnal.o - /sbin/insmod ./$PING nal=4 - echo kqswnal > /tmp/nal - ;; - - *) - echo "Usage : ${0} < tcp | toe | elan >" - exit 1; -esac -../utils/acceptor 9999& -exit 0; diff --git a/lustre/portals/tests/stopclient.sh b/lustre/portals/tests/stopclient.sh deleted file mode 100755 index f7e3aa1..0000000 --- a/lustre/portals/tests/stopclient.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/sh - -SIMPLE=${SIMPLE:-1} - -if [ $SIMPLE -eq 0 ]; then - PING=spingcli -else - PING=pingcli -fi - -rmmod $PING -NAL=`cat /tmp/nal`; -rmmod $NAL -rmmod portals diff --git a/lustre/portals/tests/stopserver.sh b/lustre/portals/tests/stopserver.sh deleted file mode 100644 index 3e81831..0000000 --- a/lustre/portals/tests/stopserver.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/sh - -SIMPLE=${SIMPLE:-1} - -if [ $SIMPLE -eq 0 ]; then - PING=spingsrv -else - PING=pingsrv -fi - -rmmod $PING -NAL=`cat /tmp/nal`; -rmmod $NAL -killall -9 acceptor -rm -f /var/run/acceptor-9999.pid -rmmod portals diff --git a/lustre/portals/unals/.cvsignore b/lustre/portals/unals/.cvsignore deleted file mode 100644 index e995588..0000000 --- a/lustre/portals/unals/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lustre/portals/unals/Makefile.am b/lustre/portals/unals/Makefile.am deleted file mode 100644 index dc427b0..0000000 --- a/lustre/portals/unals/Makefile.am +++ /dev/null @@ -1,5 +0,0 @@ -CPPFLAGS= -INCLUDES=-I$(top_srcdir)/portals/include -I$(top_srcdir)/include -I$(srcdir) -lib_LIBRARIES = libtcpnal.a -pkginclude_HEADERS = pqtimer.h dispatch.h table.h timer.h connection.h ipmap.h bridge.h procbridge.h -libtcpnal_a_SOURCES = debug.c pqtimer.c select.c table.c pqtimer.h dispatch.h table.h timer.h address.c procapi.c proclib.c connection.c tcpnal.c connection.h diff --git a/lustre/portals/unals/README b/lustre/portals/unals/README deleted file mode 100644 index 6cb93d9..0000000 --- a/lustre/portals/unals/README +++ /dev/null @@ -1,53 +0,0 @@ -This library implements two NAL interfaces, both running over IP. -The first, tcpnal, creates TCP connections between participating -processes in order to transport the portals requests. The second, -ernal, provides a simple transport protocol which runs over -UDP datagrams. - -The interface functions return both of these values in host order for -convenience and readability. However this means that addresses -exchanged in messages between hosts of different orderings will not -function properly. - -Both NALs use the same support functions in order to schedule events -and communicate with the generic portals implementation. - - ------------------------- - | api | - |_______________________| - | lib | - |_______________________| - | ernal | |tcpnal | - |--------| |----------| - | udpsock| |connection| - |-----------------------| - | timer/select | - ------------------------- - - - These NALs uses the framework from fdnal of a pipe between the api -and library sides. This is wrapped up in the select on the library -side, and blocks on the api side. Performance could be severely -enhanced by collapsing this aritificial barrier, by using shared -memory queues, or by wiring the api layer directly to the library. - - -nid is defined as the low order 24-bits of the IP address of the -physical node left shifted by 8 plus a virtual node number of 0 -through 255 (really only 239). The virtual node number of a tcpnal -application should be specified using the environment variable -PTL_VIRTNODE. pid is now a completely arbitrary number in the -range of 0 to 255. The IP interface used can be overridden by -specifying the appropriate hostid by setting the PTL_HOSTID -environment variable. The value can be either dotted decimal -(n.n.n.n) or hex starting with "0x". -TCPNAL: - As the NAL needs to try to send to a particular nid/pid pair, it - will open up connections on demand. Because the port associated with - the connecting socket is different from the bound port, two - connections will normally be established between a pair of peers, with - data flowing from the anonymous connect (active) port to the advertised - or well-known bound (passive) port of each peer. - - Should the connection fail to open, an error is reported to the - library component, which causes the api request to fail. diff --git a/lustre/portals/unals/address.c b/lustre/portals/unals/address.c deleted file mode 100644 index b422c3f..0000000 --- a/lustre/portals/unals/address.c +++ /dev/null @@ -1,146 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* address.c: - * this file provides functions to aquire the IP address of the node - * and translate them into a NID/PID pair which supports a static - * mapping of virtual nodes into the port range of an IP socket. -*/ - -#include <stdlib.h> -#include <netdb.h> -#include <unistd.h> -#include <stdio.h> -#include <portals/p30.h> -#include <bridge.h> -#include <ipmap.h> - - -/* Function: get_node_id - * Returns: a 32 bit id for this node, actually a big-endian IP address - * - * get_node_id() determines the host name and uses the resolver to - * find out its ip address. This is fairly fragile and inflexible, but - * explicitly asking about interfaces and their addresses is very - * complicated and nonportable. - */ -static unsigned int get_node_id(void) -{ - char buffer[255]; - unsigned int x; - struct hostent *he; - char * host_envp; - - if (!(host_envp = getenv("PTL_HOSTID"))) - { - gethostname(buffer,sizeof(buffer)); - he=gethostbyname(buffer); - if (he) - x=*(unsigned int *)he->h_addr_list[0]; - else - x = 0; - return(ntohl(x)); - } - else - { - if (host_envp[1] != 'x') - { - int a, b, c, d; - sscanf(host_envp, "%d.%d.%d.%d", &a, &b, &c, &d); - return ((a<<24) | (b<<16) | (c<<8) | d); - } - else - { - long long hostid = strtoll(host_envp, 0, 0); - return((unsigned int) hostid); - } - } -} - - -/* Function: set_address - * Arugments: t: a procnal structure to populate with the request - * - * set_address performs the bit manipulations to set the nid, pid, and - * iptop8 fields of the procnal structures. - * - * TODO: fix pidrequest to try to do dynamic binding if PTL_ID_ANY - */ - -#ifdef DIRECT_IP_MODE -void set_address(bridge t,ptl_pid_t pidrequest) -{ - int port; - if (pidrequest==(unsigned short)PTL_PID_ANY) port = 0; - else port=pidrequest; - t->nal_cb->ni.nid=get_node_id(); - t->nal_cb->ni.pid=port; -} -#else - -void set_address(bridge t,ptl_pid_t pidrequest) -{ - int virtnode, in_addr, port; - ptl_pid_t pid; - - /* get and remember my node id*/ - if (!getenv("PTL_VIRTNODE")) - virtnode = 0; - else - { - int maxvnode = PNAL_VNODE_MASK - (PNAL_BASE_PORT - >> PNAL_VNODE_SHIFT); - virtnode = atoi(getenv("PTL_VIRTNODE")); - if (virtnode > maxvnode) - { - fprintf(stderr, "PTL_VIRTNODE of %d is too large - max %d\n", - virtnode, maxvnode); - return; - } - } - - in_addr = get_node_id(); - - t->iptop8 = in_addr >> PNAL_HOSTID_SHIFT;/* for making new connections */ - t->nal_cb->ni.nid = ((in_addr & PNAL_HOSTID_MASK) - << PNAL_VNODE_SHIFT) - + virtnode; - - pid=pidrequest; - /* TODO: Support of pid PTL_ID_ANY with virtual nodes needs more work. */ -#ifdef notyet - if (pid==(unsigned short)PTL_PID_ANY) port = 0; -#endif - if (pid==(unsigned short)PTL_PID_ANY) - { - fprintf(stderr, "portal pid PTL_ID_ANY is not currently supported\n"); - return; - } - else if (pid > PNAL_PID_MASK) - { - fprintf(stderr, "portal pid of %d is too large - max %d\n", - pid, PNAL_PID_MASK); - return; - } - else port = ((virtnode << PNAL_VNODE_SHIFT) + pid) + PNAL_BASE_PORT; - t->nal_cb->ni.pid=pid; -} -#endif diff --git a/lustre/portals/unals/bridge.h b/lustre/portals/unals/bridge.h deleted file mode 100644 index 0b4940f..0000000 --- a/lustre/portals/unals/bridge.h +++ /dev/null @@ -1,29 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -#include <portals/lib-p30.h> - -typedef struct bridge { - int alive; - nal_cb_t *nal_cb; - void *lower; - void *local; - void (*shutdown)(struct bridge *); - /* this doesn't really belong here */ - unsigned char iptop8; -} *bridge; - - -nal_t *bridge_init(ptl_interface_t nal, - ptl_pid_t pid_request, - ptl_ni_limits_t *desired, - ptl_ni_limits_t *actual, - int *rc); - -typedef int (*nal_initialize)(bridge); -extern nal_initialize nal_table[PTL_IFACE_MAX]; diff --git a/lustre/portals/unals/connection.c b/lustre/portals/unals/connection.c deleted file mode 100644 index 310e899..0000000 --- a/lustre/portals/unals/connection.c +++ /dev/null @@ -1,294 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* connection.c: - This file provides a simple stateful connection manager which - builds tcp connections on demand and leaves them open for - future use. It also provides the machinery to allow peers - to connect to it -*/ - -#include <stdlib.h> -#include <pqtimer.h> -#include <dispatch.h> -#include <table.h> -#include <stdio.h> -#include <stdarg.h> -#include <string.h> -#include <unistd.h> -#include <syscall.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <netinet/in.h> -#include <connection.h> -#include <errno.h> - - -/* global variable: acceptor port */ -unsigned short tcpnal_acceptor_port = 988; - - -/* Function: compare_connection - * Arguments: connection c: a connection in the hash table - * ptl_process_id_t: an id to verify agains - * Returns: 1 if the connection is the one requested, 0 otherwise - * - * compare_connection() tests for collisions in the hash table - */ -static int compare_connection(void *arg1, void *arg2) -{ - connection c = arg1; - unsigned int * id = arg2; - return((c->ip==id[0]) && (c->port==id[1])); -} - - -/* Function: connection_key - * Arguments: ptl_process_id_t id: an id to hash - * Returns: a not-particularily-well-distributed hash - * of the id - */ -static unsigned int connection_key(unsigned int *id) -{ - return(id[0]^id[1]); -} - - -/* Function: remove_connection - * Arguments: c: the connection to remove - */ -void remove_connection(void *arg) -{ - connection c = arg; - unsigned int id[2]; - - id[0]=c->ip; - id[1]=c->port; - hash_table_remove(c->m->connections,id); - close(c->fd); - free(c); -} - - -/* Function: read_connection: - * Arguments: c: the connection to read from - * dest: the buffer to read into - * len: the number of bytes to read - * Returns: success as 1, or failure as 0 - * - * read_connection() reads data from the connection, continuing - * to read partial results until the request is satisfied or - * it errors. TODO: this read should be covered by signal protection. - */ -int read_connection(connection c, - unsigned char *dest, - int len) -{ - int offset=0,rc; - - if (len){ - do { - if((rc=syscall(SYS_read, c->fd, dest+offset, len-offset))<=0){ - if (errno==EINTR) { - rc=0; - } else { - remove_connection(c); - return(0); - } - } - offset+=rc; - } while (offset<len); - } - return(1); -} - -static int connection_input(void *d) -{ - connection c = d; - return((*c->m->handler)(c->m->handler_arg,c)); -} - - -/* Function: allocate_connection - * Arguments: t: tcpnal the allocation is occuring in the context of - * dest: portal endpoint address for this connection - * fd: open file descriptor for the socket - * Returns: an allocated connection structure - * - * just encompasses the action common to active and passive - * connections of allocation and placement in the global table - */ -static connection allocate_connection(manager m, - unsigned int ip, - unsigned short port, - int fd) -{ - connection c=malloc(sizeof(struct connection)); - unsigned int id[2]; - c->m=m; - c->fd=fd; - c->ip=ip; - c->port=port; - id[0]=ip; - id[1]=port; - register_io_handler(fd,READ_HANDLER,connection_input,c); - hash_table_insert(m->connections,c,id); - return(c); -} - - -/* Function: new_connection - * Arguments: t: opaque argument holding the tcpname - * Returns: 1 in order to reregister for new connection requests - * - * called when the bound service socket recieves - * a new connection request, it always accepts and - * installs a new connection - */ -static int new_connection(void *z) -{ - manager m=z; - struct sockaddr_in s; - int len=sizeof(struct sockaddr_in); - int fd=accept(m->bound,(struct sockaddr *)&s,&len); - unsigned int nid=*((unsigned int *)&s.sin_addr); - /* cfs specific hack */ - //unsigned short pid=s.sin_port; - allocate_connection(m,htonl(nid),0/*pid*/,fd); - return(1); -} - - -/* Function: force_tcp_connection - * Arguments: t: tcpnal - * dest: portals endpoint for the connection - * Returns: an allocated connection structure, either - * a pre-existing one, or a new connection - */ -connection force_tcp_connection(manager m, - unsigned int ip, - unsigned short port) -{ - connection c; - struct sockaddr_in addr; - unsigned int id[2]; - - port = tcpnal_acceptor_port; - - id[0]=ip; - id[1]=port; - - if (!(c=hash_table_find(m->connections,id))){ - int fd; - - bzero((char *) &addr, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(ip); - addr.sin_port = htons(port); - - if ((fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) { - perror("tcpnal socket failed"); - exit(-1); - } - if (connect(fd, - (struct sockaddr *)&addr, - sizeof(struct sockaddr_in))) - { - perror("tcpnal connect"); - return(0); - } - return(allocate_connection(m,ip,port,fd)); - } - return(c); -} - - -/* Function: bind_socket - * Arguments: t: the nal state for this interface - * port: the port to attempt to bind to - * Returns: 1 on success, or 0 on error - * - * bind_socket() attempts to allocate and bind a socket to the requested - * port, or dynamically assign one from the kernel should the port be - * zero. Sets the bound and bound_handler elements of m. - * - * TODO: The port should be an explicitly sized type. - */ -static int bind_socket(manager m,unsigned short port) -{ - struct sockaddr_in addr; - int alen=sizeof(struct sockaddr_in); - - if ((m->bound = socket(AF_INET, SOCK_STREAM, 0)) < 0) - return(0); - - bzero((char *) &addr, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = 0; - addr.sin_port = port; - - if (bind(m->bound,(struct sockaddr *)&addr,alen)<0){ - perror ("tcpnal bind"); - return(0); - } - - getsockname(m->bound,(struct sockaddr *)&addr, &alen); - - m->bound_handler=register_io_handler(m->bound,READ_HANDLER, - new_connection,m); - listen(m->bound,5); - m->port=addr.sin_port; - return(1); -} - - -/* Function: shutdown_connections - * Arguments: m: the manager structure - * - * close all connections and reclaim resources - */ -void shutdown_connections(manager m) -{ - close(m->bound); - remove_io_handler(m->bound_handler); - hash_destroy_table(m->connections,remove_connection); - free(m); -} - - -/* Function: init_connections - * Arguments: t: the nal state for this interface - * port: the port to attempt to bind to - * Returns: a newly allocated manager structure, or - * zero if the fixed port could not be bound - */ -manager init_connections(unsigned short pid, - int (*input)(void *, void *), - void *a) -{ - manager m=(manager)malloc(sizeof(struct manager)); - m->connections=hash_create_table(compare_connection,connection_key); - m->handler=input; - m->handler_arg=a; - if (bind_socket(m,pid)) return(m); - free(m); - return(0); -} diff --git a/lustre/portals/unals/connection.h b/lustre/portals/unals/connection.h deleted file mode 100644 index 6f57287..0000000 --- a/lustre/portals/unals/connection.h +++ /dev/null @@ -1,32 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -#include <table.h> - -typedef struct manager { - table connections; - int bound; - io_handler bound_handler; - int (*handler)(void *, void *); - void *handler_arg; - unsigned short port; -} *manager; - - -typedef struct connection { - unsigned int ip; - unsigned short port; - int fd; - manager m; -} *connection; - -connection force_tcp_connection(manager m, unsigned int ip, unsigned int short); -manager init_connections(unsigned short, int (*f)(void *, void *), void *); -void remove_connection(void *arg); -void shutdown_connections(manager m); -int read_connection(connection c, unsigned char *dest, int len); diff --git a/lustre/portals/unals/debug.c b/lustre/portals/unals/debug.c deleted file mode 100644 index 529bb2d..0000000 --- a/lustre/portals/unals/debug.c +++ /dev/null @@ -1,119 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <stdio.h> -#include <fcntl.h> -#include <errno.h> -#include <stdarg.h> -#include <sys/time.h> - -int smp_processor_id = 1; -char debug_file_path[1024] = "/tmp/lustre-log"; -char debug_file_name[1024]; -FILE *debug_file_fd; - -int portals_do_debug_dumplog(void *arg) -{ - printf("Look in %s\n", debug_file_name); - return 0; -} - - -void portals_debug_print(void) -{ - return; -} - - -void portals_debug_dumplog(void) -{ - printf("Look in %s\n", debug_file_name); - return; -} - - -int portals_debug_init(unsigned long bufsize) -{ - debug_file_fd = stdout; - return 0; -} - -int portals_debug_cleanup(void) -{ - return 0; //close(portals_debug_fd); -} - -int portals_debug_clear_buffer(void) -{ - return 0; -} - -int portals_debug_mark_buffer(char *text) -{ - - fprintf(debug_file_fd, "*******************************************************************************\n"); - fprintf(debug_file_fd, "DEBUG MARKER: %s\n", text); - fprintf(debug_file_fd, "*******************************************************************************\n"); - - return 0; -} - -int portals_debug_copy_to_user(char *buf, unsigned long len) -{ - return 0; -} - -/* FIXME: I'm not very smart; someone smarter should make this better. */ -void -portals_debug_msg (int subsys, int mask, char *file, char *fn, int line, - const char *format, ...) -{ - va_list ap; - unsigned long flags; - struct timeval tv; - int nob; - - - /* NB since we pass a non-zero sized buffer (at least) on the first - * print, we can be assured that by the end of all the snprinting, - * we _do_ have a terminated buffer, even if our message got truncated. - */ - - gettimeofday(&tv, NULL); - - nob += fprintf(debug_file_fd, - "%02x:%06x:%d:%lu.%06lu ", - subsys >> 24, mask, smp_processor_id, - tv.tv_sec, tv.tv_usec); - - nob += fprintf(debug_file_fd, - "(%s:%d:%s() %d+%ld): ", - file, line, fn, 0, - 8192 - ((unsigned long)&flags & 8191UL)); - - va_start (ap, format); - nob += fprintf(debug_file_fd, format, ap); - va_end (ap); - - -} - diff --git a/lustre/portals/unals/dispatch.h b/lustre/portals/unals/dispatch.h deleted file mode 100644 index 34dd070..0000000 --- a/lustre/portals/unals/dispatch.h +++ /dev/null @@ -1,39 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -/* this file is only called dispatch.h to prevent it - from colliding with /usr/include/sys/select.h */ - -typedef struct io_handler *io_handler; - -struct io_handler{ - io_handler *last; - io_handler next; - int fd; - int type; - int (*function)(void *); - void *argument; - int disabled; -}; - - -#define READ_HANDLER 1 -#define WRITE_HANDLER 2 -#define EXCEPTION_HANDLER 4 -#define ALL_HANDLER (READ_HANDLER | WRITE_HANDLER | EXCEPTION_HANDLER) - -io_handler register_io_handler(int fd, - int type, - int (*function)(void *), - void *arg); - -void remove_io_handler (io_handler i); -void init_unix_timer(void); -void select_timer_block(when until); -when now(void); diff --git a/lustre/portals/unals/ipmap.h b/lustre/portals/unals/ipmap.h deleted file mode 100644 index 85b1e18..0000000 --- a/lustre/portals/unals/ipmap.h +++ /dev/null @@ -1,38 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -#define DIRECT_IP_MODE -#ifdef DIRECT_IP_MODE -#define PNAL_NID(in_addr, port) (in_addr) -#define PNAL_PID(pid) (pid) -#define PNAL_IP(in_addr, port) (in_addr) -#define PNAL_PORT(nid, pid) (pid) -#else - -#define PNAL_BASE_PORT 4096 -#define PNAL_HOSTID_SHIFT 24 -#define PNAL_HOSTID_MASK ((1 << PNAL_HOSTID_SHIFT) - 1) -#define PNAL_VNODE_SHIFT 8 -#define PNAL_VNODE_MASK ((1 << PNAL_VNODE_SHIFT) - 1) -#define PNAL_PID_SHIFT 8 -#define PNAL_PID_MASK ((1 << PNAL_PID_SHIFT) - 1) - -#define PNAL_NID(in_addr, port) (((ntohl(in_addr) & PNAL_HOSTID_MASK) \ - << PNAL_VNODE_SHIFT) \ - | (((ntohs(port)-PNAL_BASE_PORT) >>\ - PNAL_PID_SHIFT))) -#define PNAL_PID(port) ((ntohs(port) - PNAL_BASE_PORT) & PNAL_PID_MASK) - -#define PNAL_IP(nid,t) (htonl((((unsigned)(nid))\ - >> PNAL_VNODE_SHIFT)\ - | (t->iptop8 << PNAL_HOSTID_SHIFT))) -#define PNAL_PORT(nid, pid) (htons(((((nid) & PNAL_VNODE_MASK) \ - << PNAL_VNODE_SHIFT) \ - | ((pid) & PNAL_PID_MASK)) \ - + PNAL_BASE_PORT)) -#endif diff --git a/lustre/portals/unals/pqtimer.c b/lustre/portals/unals/pqtimer.c deleted file mode 100644 index fa2fb4f..0000000 --- a/lustre/portals/unals/pqtimer.c +++ /dev/null @@ -1,226 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* timer.c: - * this file implements a simple priority-queue based timer system. when - * combined with a file which implements now() and block(), it can - * be used to provide course-grained time-based callbacks. - */ - -#include <pqtimer.h> -#include <stdlib.h> -#include <string.h> - -struct timer { - void (*function)(void *); - void *arg; - when w; - int interval; - int disable; -}; - -typedef struct thunk *thunk; -struct thunk { - void (*f)(void *); - void *a; - thunk next; -}; - -extern when now(void); - -static thunk thunks; -static int internal; -static void (*block_function)(when); -static int number_of_timers; -static int size_of_pqueue; -static timer *timers; - - -static void heal(int where) -{ - int left=(where<<1); - int right=(where<<1)+1; - int min=where; - timer temp; - - if (left <= number_of_timers) - if (timers[left]->w < timers[min]->w) min=left; - if (right <= number_of_timers) - if (timers[right]->w < timers[min]->w) min=right; - if (min != where){ - temp=timers[where]; - timers[where]=timers[min]; - timers[min]=temp; - heal(min); - } -} - -static void add_pqueue(int i) -{ - timer temp; - int parent=(i>>1); - if ((i>1) && (timers[i]->w< timers[parent]->w)){ - temp=timers[i]; - timers[i]=timers[parent]; - timers[parent]=temp; - add_pqueue(parent); - } -} - -static void add_timer(timer t) -{ - if (size_of_pqueue<(number_of_timers+2)){ - int oldsize=size_of_pqueue; - timer *new=(void *)malloc(sizeof(struct timer)*(size_of_pqueue+=10)); - memcpy(new,timers,sizeof(timer)*oldsize); - timers=new; - } - timers[++number_of_timers]=t; - add_pqueue(number_of_timers); -} - -/* Function: register_timer - * Arguments: interval: the time interval from the current time when - * the timer function should be called - * function: the function to call when the time has expired - * argument: the argument to call it with. - * Returns: a pointer to a timer structure - */ -timer register_timer(when interval, - void (*function)(void *), - void *argument) -{ - timer t=(timer)malloc(sizeof(struct timer)); - - t->arg=argument; - t->function=function; - t->interval=interval; - t->disable=0; - t->w=now()+interval; - add_timer(t); - if (!internal && (number_of_timers==1)) - block_function(t->w); - return(t); -} - -/* Function: remove_timer - * Arguments: t: - * Returns: nothing - * - * remove_timer removes a timer from the system, insuring - * that it will never be called. It does not actually - * free the timer due to reentrancy issues. - */ - -void remove_timer(timer t) -{ - t->disable=1; -} - - - -void timer_fire() -{ - timer current; - - current=timers[1]; - timers[1]=timers[number_of_timers--]; - heal(1); - if (!current->disable) { - (*current->function)(current->arg); - } - free(current); -} - -when next_timer(void) -{ - when here=now(); - - while (number_of_timers && (timers[1]->w <= here)) timer_fire(); - if (number_of_timers) return(timers[1]->w); - return(0); -} - -/* Function: timer_loop - * Arguments: none - * Returns: never - * - * timer_loop() is the blocking dispatch function for the timer. - * Is calls the block() function registered with init_timer, - * and handles associated with timers that have been registered. - */ -void timer_loop() -{ - when here; - - while (1){ - thunk z; - here=now(); - - for (z=thunks;z;z=z->next) (*z->f)(z->a); - - if (number_of_timers){ - if (timers[1]->w > here){ - (*block_function)(timers[1]->w); - } else { - timer_fire(); - } - } else { - thunk z; - for (z=thunks;z;z=z->next) (*z->f)(z->a); - (*block_function)(0); - } - } -} - - -/* Function: register_thunk - * Arguments: f: the function to call - * a: the single argument to call it with - * - * Thunk functions get called at irregular intervals, they - * should not assume when, or take a particularily long - * amount of time. Thunks are for background cleanup tasks. - */ -void register_thunk(void (*f)(void *),void *a) -{ - thunk t=(void *)malloc(sizeof(struct thunk)); - t->f=f; - t->a=a; - t->next=thunks; - thunks=t; -} - -/* Function: initialize_timer - * Arguments: block: the function to call to block for the specified interval - * - * initialize_timer() must be called before any other timer function, - * including timer_loop. - */ -void initialize_timer(void (*block)(when)) -{ - block_function=block; - number_of_timers=0; - size_of_pqueue=10; - timers=(timer *)malloc(sizeof(timer)*size_of_pqueue); - thunks=0; -} diff --git a/lustre/portals/unals/pqtimer.h b/lustre/portals/unals/pqtimer.h deleted file mode 100644 index 11efb0e..0000000 --- a/lustre/portals/unals/pqtimer.h +++ /dev/null @@ -1,25 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -typedef unsigned long long when; -when now(void); -typedef struct timer *timer; -timer register_timer(when interval, - void (*function)(void *), - void *argument); -timer register_timer_wait(void); -void remove_timer(timer); -void timer_loop(void); -void initialize_timer(void (*block)(when)); -void timer_fire(void); - - -#define HZ 0x100000000ull - - diff --git a/lustre/portals/unals/procapi.c b/lustre/portals/unals/procapi.c deleted file mode 100644 index 6da3210..0000000 --- a/lustre/portals/unals/procapi.c +++ /dev/null @@ -1,283 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* api.c: - * This file provides the 'api' side for the process-based nals. - * it is responsible for creating the 'library' side thread, - * and passing wrapped portals transactions to it. - * - * Along with initialization, shutdown, and transport to the library - * side, this file contains some stubs to satisfy the nal definition. - */ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <string.h> -#include <syscall.h> -#include <procbridge.h> -#include <pqtimer.h> -#include <dispatch.h> -#include <errno.h> - - -/* Function: forward - * Arguments: nal_t *nal: pointer to my top-side nal structure - * id: the command to pass to the lower layer - * args, args_len:pointer to and length of the request - * ret, ret_len: pointer to and size of the result - * Returns: a portals status code - * - * forwards a packaged api call from the 'api' side to the 'library' - * side, and collects the result - */ -#define forward_failure(operand,fd,buffer,length)\ - if(syscall(SYS_##operand,fd,buffer,length)!=length){\ - lib_fini(b->nal_cb);\ - return(PTL_SEGV);\ - } -static int procbridge_forward(nal_t *n, int id, void *args, ptl_size_t args_len, - void *ret, ptl_size_t ret_len) -{ - bridge b=(bridge)n->nal_data; - procbridge p=(procbridge)b->local; - int lib=p->to_lib[1]; - int k; - - forward_failure(write,lib, &id, sizeof(id)); - forward_failure(write,lib,&args_len, sizeof(args_len)); - forward_failure(write,lib,&ret_len, sizeof(ret_len)); - forward_failure(write,lib,args, args_len); - - do { - k=syscall(SYS_read, p->from_lib[0], ret, ret_len); - } while ((k!=ret_len) && (errno += EINTR)); - - if(k!=ret_len){ - perror("nal: read return block"); - return PTL_SEGV; - } - return (PTL_OK); -} -#undef forward_failure - - -/* Function: shutdown - * Arguments: nal: a pointer to my top side nal structure - * ni: my network interface index - * - * cleanup nal state, reclaim the lower side thread and - * its state using PTL_FINI codepoint - */ -static int procbridge_shutdown(nal_t *n, int ni) -{ - bridge b=(bridge)n->nal_data; - procbridge p=(procbridge)b->local; - int code=PTL_FINI; - - syscall(SYS_write, p->to_lib[1],&code,sizeof(code)); - syscall(SYS_read, p->from_lib[0],&code,sizeof(code)); - - syscall(SYS_close, p->to_lib[0]); - syscall(SYS_close, p->to_lib[1]); - syscall(SYS_close, p->from_lib[0]); - syscall(SYS_close, p->from_lib[1]); - - free(p); - return(0); -} - - -/* Function: validate - * useless stub - */ -static int procbridge_validate(nal_t *nal, void *base, ptl_size_t extent) -{ - return(0); -} - - -/* Function: yield - * Arguments: pid: - * - * this function was originally intended to allow the - * lower half thread to be scheduled to allow progress. we - * overload it to explicitly block until signalled by the - * lower half. - */ -static void procbridge_yield(nal_t *n) -{ - bridge b=(bridge)n->nal_data; - procbridge p=(procbridge)b->local; - - pthread_mutex_lock(&p->mutex); - pthread_cond_wait(&p->cond,&p->mutex); - pthread_mutex_unlock(&p->mutex); -} - - -static void procbridge_lock(nal_t * nal, unsigned long *flags){} -static void procbridge_unlock(nal_t * nal, unsigned long *flags){} -/* api_nal - * the interface vector to allow the generic code to access - * this nal. this is seperate from the library side nal_cb. - * TODO: should be dyanmically allocated - */ -static nal_t api_nal = { - ni: {0}, - nal_data: NULL, - forward: procbridge_forward, - shutdown: procbridge_shutdown, - validate: procbridge_validate, - yield: procbridge_yield, - lock: procbridge_lock, - unlock: procbridge_unlock -}; - -/* Function: bridge_init - * - * Arguments: pid: requested process id (port offset) - * PTL_ID_ANY not supported. - * desired: limits passed from the application - * and effectively ignored - * actual: limits actually allocated and returned - * - * Returns: a pointer to my statically allocated top side NAL - * structure - * - * initializes the tcp nal. we define unix_failure as an - * error wrapper to cut down clutter. - */ -#define unix_failure(operand,fd,buffer,length,text)\ - if(syscall(SYS_##operand,fd,buffer,length)!=length){\ - perror(text);\ - return(NULL);\ - } -#if 0 -static nal_t *bridge_init(ptl_interface_t nal, - ptl_pid_t pid_request, - ptl_ni_limits_t *desired, - ptl_ni_limits_t *actual, - int *rc) -{ - procbridge p; - bridge b; - static int initialized=0; - ptl_ni_limits_t limits = {-1,-1,-1,-1,-1}; - - if(initialized) return (&api_nal); - - init_unix_timer(); - - b=(bridge)malloc(sizeof(struct bridge)); - p=(procbridge)malloc(sizeof(struct procbridge)); - api_nal.nal_data=b; - b->local=p; - - if(pipe(p->to_lib) || pipe(p->from_lib)) { - perror("nal_init: pipe"); - return(NULL); - } - - if (desired) limits = *desired; - unix_failure(write,p->to_lib[1], &pid_request, sizeof(pid_request), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &limits, sizeof(ptl_ni_limits_t), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &nal, sizeof(ptl_interface_t), - "nal_init: write"); - - if(pthread_create(&p->t, NULL, nal_thread, b)) { - perror("nal_init: pthread_create"); - return(NULL); - } - - unix_failure(read,p->from_lib[0], actual, sizeof(ptl_ni_limits_t), - "tcp_init: read"); - unix_failure(read,p->from_lib[0], rc, sizeof(rc), - "nal_init: read"); - - if(*rc) return(NULL); - - initialized = 1; - pthread_mutex_init(&p->mutex,0); - pthread_cond_init(&p->cond, 0); - - return (&api_nal); -} -#endif - -ptl_nid_t tcpnal_mynid; - -nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid) -{ - procbridge p; - bridge b; - static int initialized=0; - ptl_ni_limits_t limits = {-1,-1,-1,-1,-1}; - int rc, nal_type = PTL_IFACE_TCP;/* PTL_IFACE_DEFAULT FIXME hack */ - - if(initialized) return (&api_nal); - - init_unix_timer(); - - b=(bridge)malloc(sizeof(struct bridge)); - p=(procbridge)malloc(sizeof(struct procbridge)); - api_nal.nal_data=b; - b->local=p; - - if(pipe(p->to_lib) || pipe(p->from_lib)) { - perror("nal_init: pipe"); - return(NULL); - } - - if (ptl_size) - limits.max_ptable_index = ptl_size; - if (acl_size) - limits.max_atable_index = acl_size; - - unix_failure(write,p->to_lib[1], &requested_pid, sizeof(requested_pid), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &limits, sizeof(ptl_ni_limits_t), - "nal_init: write"); - unix_failure(write,p->to_lib[1], &nal_type, sizeof(nal_type), - "nal_init: write"); - - if(pthread_create(&p->t, NULL, nal_thread, b)) { - perror("nal_init: pthread_create"); - return(NULL); - } - - unix_failure(read,p->from_lib[0], &rc, sizeof(rc), - "nal_init: read"); - - if(rc) return(NULL); - - b->nal_cb->ni.nid = tcpnal_mynid; - initialized = 1; - pthread_mutex_init(&p->mutex,0); - pthread_cond_init(&p->cond, 0); - - return (&api_nal); -} -#undef unix_failure diff --git a/lustre/portals/unals/procbridge.h b/lustre/portals/unals/procbridge.h deleted file mode 100644 index 060ae7b..0000000 --- a/lustre/portals/unals/procbridge.h +++ /dev/null @@ -1,40 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -#ifndef _PROCBRIDGE_H_ -#define _PROCBRIDGE_H_ - -#include <pthread.h> -#include <bridge.h> -#include <ipmap.h> - - -typedef struct procbridge { - pthread_t t; - pthread_cond_t cond; - pthread_mutex_t mutex; - int to_lib[2]; - int from_lib[2]; -} *procbridge; - -extern void *nal_thread(void *); - - -#define PTL_INIT (LIB_MAX_DISPATCH+1) -#define PTL_FINI (LIB_MAX_DISPATCH+2) - -#define MAX_ACLS 1 -#define MAX_PTLS 128 - -extern void set_address(bridge t,ptl_pid_t pidrequest); -extern nal_t *procbridge_interface(int num_interface, - ptl_pt_index_t ptl_size, - ptl_ac_index_t acl_size, - ptl_pid_t requested_pid); - -#endif diff --git a/lustre/portals/unals/proclib.c b/lustre/portals/unals/proclib.c deleted file mode 100644 index c3ee103..0000000 --- a/lustre/portals/unals/proclib.c +++ /dev/null @@ -1,270 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* lib.c: - * This file provides the 'library' side for the process-based nals. - * it is responsible for communication with the 'api' side and - * providing service to the generic portals 'library' - * implementation. 'library' might be better termed 'communication' - * or 'kernel'. - */ - -#include <stdlib.h> -#include <stdio.h> -#include <stdarg.h> -#include <unistd.h> -#include <syscall.h> -#include <procbridge.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <netdb.h> -#include <errno.h> -#include <timer.h> -//#include <util/pqtimer.h> -#include <dispatch.h> - -/* the following functions are stubs to satisfy the nal definition - without doing anything particularily useful*/ - -static int nal_write(nal_cb_t *nal, - void *private, - user_ptr dst_addr, - void *src_addr, - ptl_size_t len) -{ - memcpy(dst_addr, src_addr, len); - return 0; -} - -static int nal_read(nal_cb_t * nal, - void *private, - void *dst_addr, - user_ptr src_addr, - size_t len) -{ - memcpy(dst_addr, src_addr, len); - return 0; -} - -static void *nal_malloc(nal_cb_t *nal, - ptl_size_t len) -{ - void *buf = malloc(len); - return buf; -} - -static void nal_free(nal_cb_t *nal, - void *buf, - ptl_size_t len) -{ - free(buf); -} - -static void nal_printf(nal_cb_t *nal, - const char *fmt, - ...) -{ - va_list ap; - - va_start(ap, fmt); - vprintf(fmt, ap); - va_end(ap); -} - - -static void nal_cli(nal_cb_t *nal, - unsigned long *flags) -{ -} - - -static void nal_sti(nal_cb_t *nal, - unsigned long *flags) -{ -} - - -static int nal_dist(nal_cb_t *nal, - ptl_nid_t nid, - unsigned long *dist) -{ - return 0; -} - - - -/* Function: data_from_api - * Arguments: t: the nal state for this interface - * Returns: whether to continue reading from the pipe - * - * data_from_api() reads data from the api side in response - * to a select. - * - * We define data_failure() for syntactic convenience - * of unix error reporting. - */ - -#define data_failure(operand,fd,buffer,length)\ - if(syscall(SYS_##operand,fd,buffer,length)!=length){\ - lib_fini(b->nal_cb);\ - return(0);\ - } -static int data_from_api(void *arg) -{ - bridge b = arg; - procbridge p=(procbridge)b->local; - /* where are these two sizes derived from ??*/ - char arg_block[ 256 ]; - char ret_block[ 128 ]; - ptl_size_t arg_len,ret_len; - int fd=p->to_lib[0]; - int index; - - data_failure(read,fd, &index, sizeof(index)); - - if (index==PTL_FINI) { - lib_fini(b->nal_cb); - if (b->shutdown) (*b->shutdown)(b); - syscall(SYS_write, p->from_lib[1],&b->alive,sizeof(b->alive)); - - /* a heavy-handed but convenient way of shutting down - the lower side thread */ - pthread_exit(0); - } - - data_failure(read,fd, &arg_len, sizeof(arg_len)); - data_failure(read,fd, &ret_len, sizeof(ret_len)); - data_failure(read,fd, arg_block, arg_len); - - lib_dispatch(b->nal_cb, NULL, index, arg_block, ret_block); - - data_failure(write,p->from_lib[1],ret_block, ret_len); - return(1); -} -#undef data_failure - - - -static void wakeup_topside(void *z) -{ - bridge b=z; - procbridge p=b->local; - - pthread_mutex_lock(&p->mutex); - pthread_cond_broadcast(&p->cond); - pthread_mutex_unlock(&p->mutex); -} - - -/* Function: nal_thread - * Arguments: z: an opaque reference to a nal control structure - * allocated and partially populated by the api level code - * Returns: nothing, and only on error or explicit shutdown - * - * This function is the entry point of the pthread initiated on - * the api side of the interface. This thread is used to handle - * asynchronous delivery to the application. - * - * We define a limit macro to place a ceiling on limits - * for syntactic convenience - */ -#define LIMIT(x,y,max)\ - if ((unsigned int)x > max) y = max; - -extern int tcpnal_init(bridge); - -nal_initialize nal_table[PTL_IFACE_MAX]={0,tcpnal_init,0}; - -void *nal_thread(void *z) -{ - bridge b=z; - procbridge p=b->local; - int rc; - ptl_pid_t pid_request; - int nal_type; - ptl_ni_limits_t desired; - ptl_ni_limits_t actual; - - b->nal_cb=(nal_cb_t *)malloc(sizeof(nal_cb_t)); - b->nal_cb->nal_data=b; - b->nal_cb->cb_read=nal_read; - b->nal_cb->cb_write=nal_write; - b->nal_cb->cb_malloc=nal_malloc; - b->nal_cb->cb_free=nal_free; - b->nal_cb->cb_map=NULL; - b->nal_cb->cb_unmap=NULL; - b->nal_cb->cb_printf=nal_printf; - b->nal_cb->cb_cli=nal_cli; - b->nal_cb->cb_sti=nal_sti; - b->nal_cb->cb_dist=nal_dist; - - - register_io_handler(p->to_lib[0],READ_HANDLER,data_from_api,(void *)b); - - if(!(rc = syscall(SYS_read, p->to_lib[0], &pid_request, sizeof(pid_request)))) - perror("procbridge read from api"); - if(!(rc = syscall(SYS_read, p->to_lib[0], &desired, sizeof(ptl_ni_limits_t)))) - perror("procbridge read from api"); - if(!(rc = syscall(SYS_read, p->to_lib[0], &nal_type, sizeof(nal_type)))) - perror("procbridge read from api"); - - actual = desired; - LIMIT(desired.max_match_entries,actual.max_match_entries,MAX_MES); - LIMIT(desired.max_mem_descriptors,actual.max_mem_descriptors,MAX_MDS); - LIMIT(desired.max_event_queues,actual.max_event_queues,MAX_EQS); - LIMIT(desired.max_atable_index,actual.max_atable_index,MAX_ACLS); - LIMIT(desired.max_ptable_index,actual.max_ptable_index,MAX_PTLS); - - set_address(b,pid_request); - - if (nal_table[nal_type]) rc=(*nal_table[nal_type])(b); - /* initialize the generic 'library' level code */ - - rc = lib_init(b->nal_cb, - b->nal_cb->ni.nid, - b->nal_cb->ni.pid, - 10, - actual.max_ptable_index, - actual.max_atable_index); - - /* - * Whatever the initialization returned is passed back to the - * user level code for further interpretation. We just exit if - * it is non-zero since something went wrong. - */ - /* this should perform error checking */ -#if 0 - write(p->from_lib[1], &actual, sizeof(ptl_ni_limits_t)); -#endif - syscall(SYS_write, p->from_lib[1], &rc, sizeof(rc)); - - if(!rc) { - /* the thunk function is called each time the timer loop - performs an operation and returns to blocking mode. we - overload this function to inform the api side that - it may be interested in looking at the event queue */ - register_thunk(wakeup_topside,b); - timer_loop(); - } - return(0); -} -#undef LIMIT - diff --git a/lustre/portals/unals/select.c b/lustre/portals/unals/select.c deleted file mode 100644 index c4f84f4..0000000 --- a/lustre/portals/unals/select.c +++ /dev/null @@ -1,165 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* select.c: - * Provides a general mechanism for registering and dispatching - * io events through the select system call. - */ - -#ifdef sun -#include <sys/filio.h> -#else -#include <sys/ioctl.h> -#endif - -#include <sys/time.h> -#include <sys/types.h> -#include <stdlib.h> -#include <pqtimer.h> -#include <dispatch.h> - - -static struct timeval beginning_of_epoch; -static io_handler io_handlers; - -/* Function: now - * - * Return: the current time in canonical units: a 64 bit number - * where the most significant 32 bits contains the number - * of seconds, and the least signficant a count of (1/(2^32))ths - * of a second. - */ -when now() -{ - struct timeval result; - - gettimeofday(&result,0); - return((((unsigned long long)result.tv_sec)<<32)| - (((unsigned long long)result.tv_usec)<<32)/1000000); -} - - -/* Function: register_io_handler - * Arguments: fd: the file descriptor of interest - * type: a mask of READ_HANDLER, WRITE_HANDLER, EXCEPTION_HANDLER - * function: a function to call when io is available on fd - * arg: an opaque correlator to return to the handler - * Returns: a pointer to the io_handler structure - */ -io_handler register_io_handler(int fd, - int type, - int (*function)(void *), - void *arg) -{ - io_handler i=(io_handler)malloc(sizeof(struct io_handler)); - if ((i->fd=fd)>=0){ - i->type=type; - i->function=function; - i->argument=arg; - i->disabled=0; - i->last=&io_handlers; - if ((i->next=io_handlers)) i->next->last=&i->next; - io_handlers=i; - } - return(i); -} - -/* Function: remove_io_handler - * Arguments: i: a pointer to the handler to stop servicing - * - * remove_io_handler() doesn't actually free the handler, due - * to reentrancy problems. it just marks the handler for - * later cleanup by the blocking function. - */ -void remove_io_handler (io_handler i) -{ - i->disabled=1; -} - -static void set_flag(io_handler n,fd_set *fds) -{ - if (n->type & READ_HANDLER) FD_SET(n->fd,fds); - if (n->type & WRITE_HANDLER) FD_SET(n->fd,fds+1); - if (n->type & EXCEPTION_HANDLER) FD_SET(n->fd,fds+2); -} - - -/* Function: select_timer_block - * Arguments: until: an absolute time when the select should return - * - * This function dispatches the various file descriptors' handler - * functions, if the kernel indicates there is io available. - */ -void select_timer_block(when until) -{ - fd_set fds[3]; - struct timeval timeout; - struct timeval *timeout_pointer; - int result; - io_handler j; - io_handler *k; - - /* TODO: loop until the entire interval is expired*/ - if (until){ - when interval=until-now(); - timeout.tv_sec=(interval>>32); - timeout.tv_usec=((interval<<32)/1000000)>>32; - timeout_pointer=&timeout; - } else timeout_pointer=0; - - FD_ZERO(fds); - FD_ZERO(fds+1); - FD_ZERO(fds+2); - for (k=&io_handlers;*k;){ - if ((*k)->disabled){ - j=*k; - *k=(*k)->next; - free(j); - } - if (*k) { - set_flag(*k,fds); - k=&(*k)->next; - } - } - result=select(FD_SETSIZE,fds,fds+1,fds+2,timeout_pointer); - - if (result > 0) - for (j=io_handlers;j;j=j->next){ - if (!(j->disabled) && - ((FD_ISSET(j->fd,fds) && (j->type & READ_HANDLER)) || - (FD_ISSET(j->fd,fds+1) && (j->type & WRITE_HANDLER)) || - (FD_ISSET(j->fd,fds+2) && (j->type & EXCEPTION_HANDLER)))){ - if (!(*j->function)(j->argument)) - j->disabled=1; - } - } -} - -/* Function: init_unix_timer() - * is called to initialize the library - */ -void init_unix_timer() -{ - io_handlers=0; - gettimeofday(&beginning_of_epoch, 0); - initialize_timer(select_timer_block); -} diff --git a/lustre/portals/unals/table.c b/lustre/portals/unals/table.c deleted file mode 100644 index bef13c5..0000000 --- a/lustre/portals/unals/table.c +++ /dev/null @@ -1,264 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <table.h> -#include <stdlib.h> -#include <string.h> - - -/* table.c: - * a very simple hash table implementation with paramerterizable - * comparison and key generation functions. it does resize - * in order to accomidate more entries, but never collapses - * the table - */ - -static table_entry *table_lookup (table t,void *comparator, - unsigned int k, - int (*compare_function)(void *, void *), - int *success) -{ - unsigned int key=k%t->size; - table_entry *i; - - for (i=&(t->entries[key]);*i;i=&((*i)->next)){ - if (compare_function && ((*i)->key==k)) - if ((*t->compare_function)((*i)->value,comparator)){ - *success=1; - return(i); - } - } - *success=0; - return(&(t->entries[key])); -} - - -static void resize_table(table t, int size) -{ - int old_size=t->size; - table_entry *old_entries=t->entries; - int i; - table_entry j,n; - table_entry *position; - int success; - - t->size=size; - t->entries=(table_entry *)malloc(sizeof(table_entry)*t->size); - memset(t->entries,0,sizeof(table_entry)*t->size); - - for (i=0;i<old_size;i++) - for (j=old_entries[i];j;j=n){ - n=j->next; - position=table_lookup(t,0,j->key,0,&success); - j->next= *position; - *position=j; - } - free(old_entries); -} - - -/* Function: key_from_int - * Arguments: int i: value to compute the key of - * Returns: the key - */ -unsigned int key_from_int(int i) -{ - return(i); -} - - -/* Function: key_from_string - * Arguments: char *s: the null terminated string - * to compute the key of - * Returns: the key - */ -unsigned int key_from_string(char *s) -{ - unsigned int result=0; - unsigned char *n; - int i; - if (!s) return(1); - for (n=s,i=0;*n;n++,i++) result^=(*n*57)^*n*i; - return(result); -} - - -/* Function: hash_create_table - * Arguments: compare_function: a function to compare - * a table instance with a correlator - * key_function: a function to generate a 32 bit - * hash key from a correlator - * Returns: a pointer to the new table - */ -table hash_create_table (int (*compare_function)(void *, void *), - unsigned int (*key_function)(unsigned int *)) -{ - table new=(table)malloc(sizeof(struct table)); - memset(new, 0, sizeof(struct table)); - - new->compare_function=compare_function; - new->key_function=key_function; - new->number_of_entries=0; - new->size=4; - new->entries=(table_entry *)malloc(sizeof(table_entry)*new->size); - memset(new->entries,0,sizeof(table_entry)*new->size); - return(new); -} - - -/* Function: hash_table_find - * Arguments: t: a table to look in - * comparator: a value to access the table entry - * Returns: the element references to by comparator, or null - */ -void *hash_table_find (table t, void *comparator) -{ - int success; - table_entry* entry=table_lookup(t,comparator, - (*t->key_function)(comparator), - t->compare_function, - &success); - if (success) return((*entry)->value); - return(0); -} - - -/* Function: hash_table_insert - * Arguments: t: a table to insert the object - * value: the object to put in the table - * comparator: the value by which the object - * will be addressed - * Returns: nothing - */ -void hash_table_insert (table t, void *value, void *comparator) -{ - int success; - unsigned int k=(*t->key_function)(comparator); - table_entry *position=table_lookup(t,comparator,k, - t->compare_function,&success); - table_entry entry; - - if (success) { - entry = *position; - } else { - entry = (table_entry)malloc(sizeof(struct table_entry)); - memset(entry, 0, sizeof(struct table_entry)); - entry->next= *position; - *position=entry; - t->number_of_entries++; - } - entry->value=value; - entry->key=k; - if (t->number_of_entries > t->size) resize_table(t,t->size*2); -} - -/* Function: hash_table_remove - * Arguments: t: the table to remove the object from - * comparator: the index value of the object to remove - * Returns: - */ -void hash_table_remove (table t, void *comparator) -{ - int success; - table_entry temp; - table_entry *position=table_lookup(t,comparator, - (*t->key_function)(comparator), - t->compare_function,&success); - if(success) { - temp=*position; - *position=(*position)->next; - free(temp); /* the value? */ - t->number_of_entries--; - } -} - -/* Function: hash_iterate_table_entries - * Arguments: t: the table to iterate over - * handler: a function to call with each element - * of the table, along with arg - * arg: the opaque object to pass to handler - * Returns: nothing - */ -void hash_iterate_table_entries(table t, - void (*handler)(void *,void *), - void *arg) -{ - int i; - table_entry *j,*next; - - for (i=0;i<t->size;i++) - for (j=t->entries+i;*j;j=next){ - next=&((*j)->next); - (*handler)(arg,(*j)->value); - } -} - -/* Function: hash_filter_table_entries - * Arguments: t: the table to iterate over - * handler: a function to call with each element - * of the table, along with arg - * arg: the opaque object to pass to handler - * Returns: nothing - * Notes: operations on the table inside handler are not safe - * - * filter_table_entires() calls the handler function for each - * item in the table, passing it and arg. The handler function - * returns 1 if it is to be retained in the table, and 0 - * if it is to be removed. - */ -void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg) -{ - int i; - table_entry *j,*next,v; - - for (i=0;i<t->size;i++) - for (j=t->entries+i;*j;j=next){ - next=&((*j)->next); - if (!(*handler)(arg,(*j)->value)){ - next=j; - v=*j; - *j=(*j)->next; - free(v); - t->number_of_entries--; - } - } -} - -/* Function: destroy_table - * Arguments: t: the table to free - * thunk: a function to call with each element, - * most likely free() - * Returns: nothing - */ -void hash_destroy_table(table t,void (*thunk)(void *)) -{ - table_entry j,next; - int i; - for (i=0;i<t->size;i++) - for (j=t->entries[i];j;j=next){ - next=j->next; - if (thunk) (*thunk)(j->value); - free(j); - } - free(t->entries); - free(t); -} diff --git a/lustre/portals/unals/table.h b/lustre/portals/unals/table.h deleted file mode 100644 index 7fab586..0000000 --- a/lustre/portals/unals/table.h +++ /dev/null @@ -1,39 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -#ifndef E_TABLE -#define E_TABLE - -typedef struct table_entry { - unsigned int key; - void *value; - struct table_entry *next; -} *table_entry; - - -typedef struct table { - unsigned int size; - int number_of_entries; - table_entry *entries; - int (*compare_function)(void *, void *); - unsigned int (*key_function)(unsigned int *); -} *table; - -/* table.c */ -unsigned int key_from_int(int i); -unsigned int key_from_string(char *s); -table hash_create_table(int (*compare_function)(void *, void *), unsigned int (*key_function)(unsigned int *)); -void *hash_table_find(table t, void *comparator); -void hash_table_insert(table t, void *value, void *comparator); -void hash_table_remove(table t, void *comparator); -void hash_iterate_table_entries(table t, void (*handler)(void *, void *), void *arg); -void hash_filter_table_entries(table t, int (*handler)(void *, void *), void *arg); -void hash_destroy_table(table t, void (*thunk)(void *)); - -#endif diff --git a/lustre/portals/unals/tcpnal.c b/lustre/portals/unals/tcpnal.c deleted file mode 100644 index 534fc17..0000000 --- a/lustre/portals/unals/tcpnal.c +++ /dev/null @@ -1,198 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2.1 of the GNU Lesser General - * Public License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* tcpnal.c: - This file implements the TCP-based nal by providing glue - between the connection service and the generic NAL implementation */ - -#include <stdlib.h> -#include <stdio.h> -#include <stdarg.h> -#include <unistd.h> -#include <syscall.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <netinet/in.h> -#include <pqtimer.h> -#include <dispatch.h> -#include <bridge.h> -#include <ipmap.h> -#include <connection.h> - -/* Function: tcpnal_send - * Arguments: nal: pointer to my nal control block - * private: unused - * cookie: passed back to the portals library - * hdr: pointer to the portals header - * nid: destination node - * pid: destination process - * data: body of the message - * len: length of the body - * Returns: zero on success - * - * sends a packet to the peer, after insuring that a connection exists - */ -#warning FIXME: "param 'type' is newly added, make use of it!!" -int tcpnal_send(nal_cb_t *n, - void *private, - lib_msg_t *cookie, - ptl_hdr_t *hdr, - int type, - ptl_nid_t nid, - ptl_pid_t pid, - unsigned int niov, - struct iovec *iov, - size_t len) -{ - connection c; - bridge b=(bridge)n->nal_data; - struct iovec tiov[2]; - int count = 1; - - if (!(c=force_tcp_connection((manager)b->lower, - PNAL_IP(nid,b), - PNAL_PORT(nid,pid)))) - return(1); - -#if 0 - /* TODO: these results should be checked. furthermore, provision - must be made for the SIGPIPE which is delivered when - writing on a tcp socket which has closed underneath - the application. there is a linux flag in the sendmsg - call which turns off the signally behaviour, but its - nonstandard */ - syscall(SYS_write, c->fd,hdr,sizeof(ptl_hdr_t)); - LASSERT (niov <= 1); - if (len) syscall(SYS_write, c->fd,iov[0].iov_base,len); -#else - LASSERT (niov <= 1); - - tiov[0].iov_base = hdr; - tiov[0].iov_len = sizeof(ptl_hdr_t); - - if (len) { - tiov[1].iov_base = iov[0].iov_base; - tiov[1].iov_len = len; - count++; - } - - syscall(SYS_writev, c->fd, tiov, count); -#endif - lib_finalize(n, private, cookie); - - return(0); -} - - -/* Function: tcpnal_recv - * Arguments: nal_cb_t *nal: pointer to my nal control block - * void *private: connection pointer passed through - * lib_parse() - * lib_msg_t *cookie: passed back to portals library - * user_ptr data: pointer to the destination buffer - * size_t mlen: length of the body - * size_t rlen: length of data in the network - * Returns: zero on success - * - * blocking read of the requested data. must drain out the - * difference of mainpulated and requested lengths from the network - */ -int tcpnal_recv(nal_cb_t *n, - void *private, - lib_msg_t *cookie, - unsigned int niov, - struct iovec *iov, - ptl_size_t mlen, - ptl_size_t rlen) - -{ - if (mlen) { - LASSERT (niov <= 1); - read_connection(private,iov[0].iov_base,mlen); - lib_finalize(n, private, cookie); - } - - if (mlen!=rlen){ - char *trash=malloc(rlen-mlen); - - /*TODO: check error status*/ - read_connection(private,trash,rlen-mlen); - free(trash); - } - - return(rlen); -} - - -/* Function: from_connection: - * Arguments: c: the connection to read from - * Returns: whether or not to continue reading from this connection, - * expressed as a 1 to continue, and a 0 to not - * - * from_connection() is called from the select loop when i/o is - * available. It attempts to read the portals header and - * pass it to the generic library for processing. - */ -static int from_connection(void *a, void *d) -{ - connection c = d; - bridge b=a; - ptl_hdr_t hdr; - - if (read_connection(c, (unsigned char *)&hdr, sizeof(hdr))){ - lib_parse(b->nal_cb, &hdr, c); - return(1); - } - return(0); -} - - -static void tcpnal_shutdown(bridge b) -{ - shutdown_connections(b->lower); -} - -/* Function: PTL_IFACE_TCP - * Arguments: pid_request: desired port number to bind to - * desired: passed NAL limits structure - * actual: returned NAL limits structure - * Returns: a nal structure on success, or null on failure - */ -int tcpnal_init(bridge b) -{ - manager m; - - b->nal_cb->cb_send=tcpnal_send; - b->nal_cb->cb_recv=tcpnal_recv; - b->shutdown=tcpnal_shutdown; - - if (!(m=init_connections(PNAL_PORT(b->nal_cb->ni.nid, - b->nal_cb->ni.pid), - from_connection,b))){ - /* TODO: this needs to shut down the - newly created junk */ - return(PTL_NAL_FAILED); - } - /* XXX cfs hack */ - b->nal_cb->ni.pid=0; - b->lower=m; - return(PTL_OK); -} diff --git a/lustre/portals/unals/timer.h b/lustre/portals/unals/timer.h deleted file mode 100644 index aaf39d2..0000000 --- a/lustre/portals/unals/timer.h +++ /dev/null @@ -1,30 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * Copyright (c) 2002 Eric Hoffman - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -/* TODO: make this an explicit type when they become available */ -typedef unsigned long long when; - -typedef struct timer { - void (*function)(void *); - void *arg; - when w; - int interval; - int disable; -} *timer; - -timer register_timer(when, void (*f)(void *), void *a); -void remove_timer(timer t); -void timer_loop(void); -void initialize_timer(void); -void register_thunk(void (*f)(void *),void *a); - - -#define HZ 0x100000000ull - - diff --git a/lustre/portals/unals/utypes.h b/lustre/portals/unals/utypes.h deleted file mode 100644 index 7eca959..0000000 --- a/lustre/portals/unals/utypes.h +++ /dev/null @@ -1,12 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cray Inc. - * - * This file is part of Portals, http://www.sf.net/projects/sandiaportals/ - */ - -typedef unsigned short uint16; -typedef unsigned long uint32; -typedef unsigned long long uint64; -typedef unsigned char uint8; diff --git a/lustre/portals/utils/.cvsignore b/lustre/portals/utils/.cvsignore deleted file mode 100644 index 148310a..0000000 --- a/lustre/portals/utils/.cvsignore +++ /dev/null @@ -1,8 +0,0 @@ -Makefile -Makefile.in -acceptor -debugctl -ptlctl -.deps -routerstat -wirecheck \ No newline at end of file diff --git a/lustre/portals/utils/Makefile.am b/lustre/portals/utils/Makefile.am deleted file mode 100644 index 05af598..0000000 --- a/lustre/portals/utils/Makefile.am +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - - -COMPILE = gcc -Wall -g -I$(srcdir)/../include -LINK = gcc -o $@ - -sbin_PROGRAMS = acceptor ptlctl debugctl routerstat wirecheck -lib_LIBRARIES = libptlctl.a - -acceptor_SOURCES = acceptor.c # -lefence - -wirecheck_SOURCES = wirecheck.c - -libptlctl_a_SOURCES = portals.c debug.c l_ioctl.c parser.c parser.h - -ptlctl_SOURCES = ptlctl.c -ptlctl_LDADD = -L. -lptlctl -lncurses # -lefence -ptlctl_DEPENDENCIES = libptlctl.a - -debugctl_SOURCES = debugctl.c -debugctl_LDADD = -L. -lptlctl -lncurses # -lefence -debugctl_DEPENDENCIES = libptlctl.a - -routerstat_SOURCES = routerstat.c diff --git a/lustre/portals/utils/acceptor.c b/lustre/portals/utils/acceptor.c deleted file mode 100644 index c6590db..0000000 --- a/lustre/portals/utils/acceptor.c +++ /dev/null @@ -1,466 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#include <stdio.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <netinet/tcp.h> -#include <netdb.h> -#include <stdlib.h> -#include <string.h> -#include <fcntl.h> -#include <sys/ioctl.h> -#include <unistd.h> -#include <asm/byteorder.h> -#include <syslog.h> - -#include <errno.h> - -#include <portals/api-support.h> -#include <portals/list.h> -#include <portals/lib-types.h> - -/* should get this from autoconf somehow */ -#ifndef PIDFILE_DIR -#define PIDFILE_DIR "/var/run" -#endif - -#define PROGNAME "acceptor" - -void create_pidfile(char *name, int port) -{ - char pidfile[1024]; - FILE *fp; - - snprintf(pidfile, sizeof(pidfile), "%s/%s-%d.pid", - PIDFILE_DIR, name, port); - - if ((fp = fopen(pidfile, "w"))) { - fprintf(fp, "%d\n", getpid()); - fclose(fp); - } else { - syslog(LOG_ERR, "%s: %s\n", pidfile, - strerror(errno)); - } -} - -int pidfile_exists(char *name, int port) -{ - char pidfile[1024]; - - snprintf(pidfile, sizeof(pidfile), "%s/%s-%d.pid", - PIDFILE_DIR, name, port); - - if (!access(pidfile, F_OK)) { - fprintf(stderr, "%s: exists, acceptor already running.\n", - pidfile); - return (1); - } - return (0); -} - -int -parse_size (int *sizep, char *str) -{ - int size; - char mod[32]; - - switch (sscanf (str, "%d%1[gGmMkK]", &size, mod)) - { - default: - return (-1); - - case 1: - *sizep = size; - return (0); - - case 2: - switch (*mod) - { - case 'g': - case 'G': - *sizep = size << 30; - return (0); - - case 'm': - case 'M': - *sizep = size << 20; - return (0); - - case 'k': - case 'K': - *sizep = size << 10; - return (0); - - default: - *sizep = size; - return (0); - } - } -} - -void -show_connection (int fd, __u32 net_ip, ptl_nid_t nid) -{ - struct hostent *h = gethostbyaddr ((char *)&net_ip, sizeof net_ip, AF_INET); - __u32 host_ip = ntohl (net_ip); - int rxmem = 0; - int txmem = 0; - int nonagle = 0; - int len; - char host[1024]; - - len = sizeof (txmem); - if (getsockopt (fd, SOL_SOCKET, SO_SNDBUF, &txmem, &len) != 0) - perror ("Cannot get write buffer size"); - - len = sizeof (rxmem); - if (getsockopt (fd, SOL_SOCKET, SO_RCVBUF, &rxmem, &len) != 0) - perror ("Cannot get read buffer size"); - - len = sizeof (nonagle); - if (getsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &nonagle, &len) != 0) - perror ("Cannot get nagle"); - - if (h == NULL) - snprintf (host, sizeof(host), "%d.%d.%d.%d", (host_ip >> 24) & 0xff, - (host_ip >> 16) & 0xff, (host_ip >> 8) & 0xff, host_ip & 0xff); - else - snprintf (host, sizeof(host), "%s", h->h_name); - - syslog (LOG_INFO, "Accepted host: %s NID: "LPX64" snd: %d rcv %d nagle: %s\n", - host, nid, txmem, rxmem, nonagle ? "disabled" : "enabled"); -} - -int -sock_write (int cfd, void *buffer, int nob) -{ - while (nob > 0) - { - int rc = write (cfd, buffer, nob); - - if (rc < 0) - { - if (errno == EINTR) - continue; - - return (rc); - } - - if (rc == 0) - { - fprintf (stderr, "Unexpected zero sock_write\n"); - abort(); - } - - nob -= rc; - buffer = (char *)buffer + nob; - } - - return (0); -} - -int -sock_read (int cfd, void *buffer, int nob) -{ - while (nob > 0) - { - int rc = read (cfd, buffer, nob); - - if (rc < 0) - { - if (errno == EINTR) - continue; - - return (rc); - } - - if (rc == 0) /* EOF */ - { - errno = ECONNABORTED; - return (-1); - } - - nob -= rc; - buffer = (char *)buffer + nob; - } - - return (0); -} - -int -exchange_nids (int cfd, ptl_nid_t my_nid, ptl_nid_t *peer_nid) -{ - int rc; - ptl_hdr_t hdr; - ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid; - - LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid)); - - memset (&hdr, 0, sizeof (hdr)); - - hmv->magic = __cpu_to_le32 (PORTALS_PROTO_MAGIC); - hmv->version_major = __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR); - hmv->version_minor = __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR); - - hdr.src_nid = __cpu_to_le64 (my_nid); - hdr.type = __cpu_to_le32 (PTL_MSG_HELLO); - - /* Assume there's sufficient socket buffering for a portals HELLO header */ - rc = sock_write (cfd, &hdr, sizeof (hdr)); - if (rc != 0) { - perror ("Can't send initial HELLO"); - return (-1); - } - - /* First few bytes down the wire are the portals protocol magic and - * version, no matter what protocol version we're running. */ - - rc = sock_read (cfd, hmv, sizeof (*hmv)); - if (rc != 0) { - perror ("Can't read from peer"); - return (-1); - } - - if (__cpu_to_le32 (hmv->magic) != PORTALS_PROTO_MAGIC) { - fprintf (stderr, "Bad magic %#08x (%#08x expected)\n", - __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC); - return (-1); - } - - if (__cpu_to_le16 (hmv->version_major) != PORTALS_PROTO_VERSION_MAJOR || - __cpu_to_le16 (hmv->version_minor) != PORTALS_PROTO_VERSION_MINOR) { - fprintf (stderr, "Incompatible protocol version %d.%d (%d.%d expected)\n", - __cpu_to_le16 (hmv->version_major), - __cpu_to_le16 (hmv->version_minor), - PORTALS_PROTO_VERSION_MAJOR, - PORTALS_PROTO_VERSION_MINOR); - } - - /* version 0 sends magic/version as the dest_nid of a 'hello' header, - * so read the rest of it in now... */ - LASSERT (PORTALS_PROTO_VERSION_MAJOR == 0); - rc = sock_read (cfd, hmv + 1, sizeof (hdr) - sizeof (*hmv)); - if (rc != 0) { - perror ("Can't read rest of HELLO hdr"); - return (-1); - } - - /* ...and check we got what we expected */ - if (__cpu_to_le32 (hdr.type) != PTL_MSG_HELLO || - __cpu_to_le32 (PTL_HDR_LENGTH (&hdr)) != 0) { - fprintf (stderr, "Expecting a HELLO hdr with 0 payload," - " but got type %d with %d payload\n", - __cpu_to_le32 (hdr.type), - __cpu_to_le32 (PTL_HDR_LENGTH (&hdr))); - return (-1); - } - - *peer_nid = __le64_to_cpu (hdr.src_nid); - return (0); -} - -void -usage (char *myname) -{ - fprintf (stderr, "Usage: %s [-r recv_mem] [-s send_mem] [-n] [-N nal_id] port\n", myname); - exit (1); -} - -int main(int argc, char **argv) -{ - int o, fd, rc, port, pfd; - struct sockaddr_in srvaddr; - int c; - int rxmem = 0; - int txmem = 0; - int noclose = 0; - int nonagle = 1; - int nal = SOCKNAL; - int xchg_nids = 0; - int bind_irq = 0; - - while ((c = getopt (argc, argv, "N:r:s:nlxi")) != -1) - switch (c) - { - case 'r': - if (parse_size (&rxmem, optarg) != 0 || rxmem < 0) - usage (argv[0]); - break; - - case 's': - if (parse_size (&txmem, optarg) != 0 || txmem < 0) - usage (argv[0]); - break; - - case 'n': - nonagle = 0; - break; - - case 'l': - noclose = 1; - break; - - case 'x': - xchg_nids = 1; - break; - - case 'i': - bind_irq = 1; - break; - - case 'N': - if (parse_size(&nal, optarg) != 0 || - nal < 0 || nal > NAL_MAX_NR) - usage(argv[0]); - break; - - default: - usage (argv[0]); - break; - } - - if (optind >= argc) - usage (argv[0]); - - port = atol(argv[optind++]); - - if (pidfile_exists(PROGNAME, port)) - exit(1); - - memset(&srvaddr, 0, sizeof(srvaddr)); - srvaddr.sin_family = AF_INET; - srvaddr.sin_port = htons(port); - srvaddr.sin_addr.s_addr = INADDR_ANY; - - fd = socket(PF_INET, SOCK_STREAM, 0); - if (fd < 0) { - perror("opening socket"); - exit(1); - } - - o = 1; - if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &o, sizeof(o))) { - perror("Cannot set REUSEADDR socket opt"); - exit(1); - } - - if (nonagle) - { - o = 1; - rc = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &o, sizeof (o)); - if (rc != 0) - { - perror ("Cannot disable nagle"); - exit (1); - } - } - - if (txmem != 0) - { - rc = setsockopt (fd, SOL_SOCKET, SO_SNDBUF, &txmem, sizeof (txmem)); - if (rc != 0) - { - perror ("Cannot set write buffer size"); - exit (1); - } - } - - if (rxmem != 0) - { - rc = setsockopt (fd, SOL_SOCKET, SO_RCVBUF, &rxmem, sizeof (rxmem)); - if (rc != 0) - { - perror ("Cannot set read buffer size"); - exit (1); - } - } - - rc = bind(fd, (struct sockaddr *)&srvaddr, sizeof(srvaddr)); - if ( rc == -1 ) { - perror("bind: "); - exit(1); - } - - if (listen(fd, 127)) { - perror("listen: "); - exit(1); - } - fprintf(stderr, "listening on port %d\n", port); - - pfd = open("/dev/portals", O_RDWR); - if ( pfd < 0 ) { - perror("opening portals device"); - exit(1); - } - - rc = daemon(1, noclose); - if (rc < 0) { - perror("daemon(): "); - exit(1); - } - - openlog(PROGNAME, LOG_PID, LOG_DAEMON); - syslog(LOG_INFO, "started, listening on port %d\n", port); - create_pidfile(PROGNAME, port); - - while (1) { - struct sockaddr_in clntaddr; - int len = sizeof(clntaddr); - int cfd; - struct portal_ioctl_data data; - ptl_nid_t peer_nid; - - cfd = accept(fd, (struct sockaddr *)&clntaddr, &len); - if ( cfd < 0 ) { - perror("accept"); - exit(0); - continue; - } - - if (!xchg_nids) - peer_nid = ntohl (clntaddr.sin_addr.s_addr); /* HOST byte order */ - else - { - PORTAL_IOC_INIT (data); - data.ioc_nal = nal; - rc = ioctl (pfd, IOC_PORTAL_GET_NID, &data); - if (rc < 0) - { - perror ("Can't get my NID"); - close (cfd); - continue; - } - - rc = exchange_nids (cfd, data.ioc_nid, &peer_nid); - if (rc != 0) - { - close (cfd); - continue; - } - } - - show_connection (cfd, clntaddr.sin_addr.s_addr, peer_nid); - - PORTAL_IOC_INIT(data); - data.ioc_fd = cfd; - data.ioc_nal = nal; - data.ioc_nal_cmd = NAL_CMD_REGISTER_PEER_FD; - data.ioc_nid = peer_nid; - data.ioc_flags = bind_irq; - - if (ioctl(pfd, IOC_PORTAL_NAL_CMD, &data) < 0) { - perror("ioctl failed"); - - } else { - printf("client registered\n"); - } - rc = close(cfd); - if (rc) - perror ("close failed"); - } - - closelog(); - exit(0); - -} diff --git a/lustre/portals/utils/debug.c b/lustre/portals/utils/debug.c deleted file mode 100644 index 9ab1c73d..0000000 --- a/lustre/portals/utils/debug.c +++ /dev/null @@ -1,618 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Some day I'll split all of this functionality into a cfs_debug module - * of its own. That day is not today. - * - */ - -#include <stdio.h> -#include <netdb.h> -#include <stdlib.h> -#include <string.h> -#include <fcntl.h> -#include <errno.h> -#include <unistd.h> -#include <time.h> -#include <syscall.h> - -#include <sys/types.h> -#include <sys/socket.h> -#include <sys/ioctl.h> -#include <sys/stat.h> -#include <sys/mman.h> -#define BUG() /* workaround for module.h includes */ -#include <linux/version.h> - -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) -#include <linux/module.h> -#endif - -#include <portals/api-support.h> -#include <portals/ptlctl.h> -#include "parser.h" - -static char rawbuf[8192]; -static char *buf = rawbuf; -static int max = 8192; -//static int g_pfd = -1; -static int subsystem_array[1 << 8]; -static int debug_mask = ~0; - -static const char *portal_debug_subsystems[] = - {"undefined", "mdc", "mds", "osc", "ost", "class", "obdfs", "llite", - "rpc", "ext2obd", "portals", "socknal", "qswnal", "pinger", "filter", - "obdtrace", "echo", "ldlm", "lov", "gmnal", "router", "ptldb", NULL}; -static const char *portal_debug_masks[] = - {"trace", "inode", "super", "ext2", "malloc", "cache", "info", "ioctl", - "blocks", "net", "warning", "buffs", "other", "dentry", "portals", - "page", "dlmtrace", "error", "emerg", "ha", "rpctrace", "vfstrace", NULL}; - -struct debug_daemon_cmd { - char *cmd; - unsigned int cmdv; -}; - -static const struct debug_daemon_cmd portal_debug_daemon_cmd[] = { - {"start", DEBUG_DAEMON_START}, - {"stop", DEBUG_DAEMON_STOP}, - {"pause", DEBUG_DAEMON_PAUSE}, - {"continue", DEBUG_DAEMON_CONTINUE}, - {0, 0} -}; - -static int do_debug_mask(char *name, int enable) -{ - int found = 0, i; - - for (i = 0; portal_debug_subsystems[i] != NULL; i++) { - if (strcasecmp(name, portal_debug_subsystems[i]) == 0 || - strcasecmp(name, "all_subs") == 0) { - printf("%s output from subsystem \"%s\"\n", - enable ? "Enabling" : "Disabling", - portal_debug_subsystems[i]); - subsystem_array[i] = enable; - found = 1; - } - } - for (i = 0; portal_debug_masks[i] != NULL; i++) { - if (strcasecmp(name, portal_debug_masks[i]) == 0 || - strcasecmp(name, "all_types") == 0) { - printf("%s output of type \"%s\"\n", - enable ? "Enabling" : "Disabling", - portal_debug_masks[i]); - if (enable) - debug_mask |= (1 << i); - else - debug_mask &= ~(1 << i); - found = 1; - } - } - - return found; -} - -int dbg_initialize(int argc, char **argv) -{ - memset(subsystem_array, 1, sizeof(subsystem_array)); - return 0; -} - -int jt_dbg_filter(int argc, char **argv) -{ - int i; - - if (argc < 2) { - fprintf(stderr, "usage: %s <subsystem ID or debug mask>\n", - argv[0]); - return 0; - } - - for (i = 1; i < argc; i++) - if (!do_debug_mask(argv[i], 0)) - fprintf(stderr, "Unknown subsystem or debug type: %s\n", - argv[i]); - return 0; -} - -int jt_dbg_show(int argc, char **argv) -{ - int i; - - if (argc < 2) { - fprintf(stderr, "usage: %s <subsystem ID or debug mask>\n", - argv[0]); - return 0; - } - - for (i = 1; i < argc; i++) - if (!do_debug_mask(argv[i], 1)) - fprintf(stderr, "Unknown subsystem or debug type: %s\n", - argv[i]); - - return 0; -} - -static int applymask(char* procpath, int value) -{ - int rc; - char buf[64]; - int len = snprintf(buf, 64, "%d", value); - - int fd = open(procpath, O_WRONLY); - if (fd == -1) { - fprintf(stderr, "Unable to open %s: %s\n", - procpath, strerror(errno)); - return fd; - } - rc = write(fd, buf, len+1); - if (rc<0) { - fprintf(stderr, "Write to %s failed: %s\n", - procpath, strerror(errno)); - return rc; - } - close(fd); - return 0; -} - -extern char *dump_filename; -extern int dump(int dev_id, int opc, void *buf); - -static void applymask_all(unsigned int subs_mask, unsigned int debug_mask) -{ - if (!dump_filename) { - applymask("/proc/sys/portals/subsystem_debug", subs_mask); - applymask("/proc/sys/portals/debug", debug_mask); - } else { - struct portals_debug_ioctl_data data; - - data.hdr.ioc_len = sizeof(data); - data.hdr.ioc_version = 0; - data.subs = subs_mask; - data.debug = debug_mask; - - dump(OBD_DEV_ID, PTL_IOC_DEBUG_MASK, &data); - } - printf("Applied subsystem_debug=%d, debug=%d to /proc/sys/portals\n", - subs_mask, debug_mask); -} - -int jt_dbg_list(int argc, char **argv) -{ - int i; - - if (argc != 2) { - fprintf(stderr, "usage: %s <subs || types>\n", argv[0]); - return 0; - } - - if (strcasecmp(argv[1], "subs") == 0) { - printf("Subsystems: all_subs"); - for (i = 0; portal_debug_subsystems[i] != NULL; i++) - printf(", %s", portal_debug_subsystems[i]); - printf("\n"); - } else if (strcasecmp(argv[1], "types") == 0) { - printf("Types: all_types"); - for (i = 0; portal_debug_masks[i] != NULL; i++) - printf(", %s", portal_debug_masks[i]); - printf("\n"); - } - else if (strcasecmp(argv[1], "applymasks") == 0) { - unsigned int subsystem_mask = 0; - for (i = 0; portal_debug_subsystems[i] != NULL; i++) { - if (subsystem_array[i]) subsystem_mask |= (1 << i); - } - applymask_all(subsystem_mask, debug_mask); - } - return 0; -} - -/* if 'raw' is true, don't strip the debug information from the front of the - * lines */ -static void dump_buffer(FILE *fd, char *buf, int size, int raw) -{ - char *p, *z; - unsigned long subsystem, debug, dropped = 0, kept = 0; - int max_sub, max_type; - - for (max_sub = 0; portal_debug_subsystems[max_sub] != NULL; max_sub++) - ; - for (max_type = 0; portal_debug_masks[max_type] != NULL; max_type++) - ; - - while (size) { - p = memchr(buf, '\n', size); - if (!p) - break; - subsystem = strtoul(buf, &z, 16); - debug = strtoul(z + 1, &z, 16); - - z++; - /* for some reason %*s isn't working. */ - *p = '\0'; - if (subsystem < max_sub && - subsystem_array[subsystem] && - (!debug || (debug_mask & debug))) { - if (raw) - fprintf(fd, "%s\n", buf); - else - fprintf(fd, "%s\n", z); - //printf("%s\n", buf); - kept++; - } else { - //fprintf(stderr, "dropping line (%lx:%lx): %s\n", subsystem, debug, buf); - dropped++; - } - *p = '\n'; - p++; - size -= (p - buf); - buf = p; - } - - printf("Debug log: %lu lines, %lu kept, %lu dropped.\n", - dropped + kept, kept, dropped); -} - -int jt_dbg_debug_kernel(int argc, char **argv) -{ - int rc, raw = 1; - FILE *fd = stdout; - const int databuf_size = (6 << 20); - struct portal_ioctl_data data, *newdata; - char *databuf = NULL; - - if (argc > 3) { - fprintf(stderr, "usage: %s [file] [raw]\n", argv[0]); - return 0; - } - - if (argc > 1) { - fd = fopen(argv[1], "w"); - if (fd == NULL) { - fprintf(stderr, "fopen(%s) failed: %s\n", argv[1], - strerror(errno)); - return -1; - } - } - if (argc > 2) - raw = atoi(argv[2]); - - databuf = malloc(databuf_size); - if (!databuf) { - fprintf(stderr, "No memory for buffer.\n"); - goto out; - } - - memset(&data, 0, sizeof(data)); - data.ioc_plen1 = databuf_size; - data.ioc_pbuf1 = databuf; - - if (portal_ioctl_pack(&data, &buf, max) != 0) { - fprintf(stderr, "portal_ioctl_pack failed.\n"); - goto out; - } - - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_DEBUG, buf); - if (rc) { - fprintf(stderr, "IOC_PORTAL_GET_DEBUG failed: %s\n", - strerror(errno)); - goto out; - } - - newdata = (struct portal_ioctl_data *)buf; - if (newdata->ioc_size > 0) - dump_buffer(fd, databuf, newdata->ioc_size, raw); - - out: - if (databuf) - free(databuf); - if (fd != stdout) - fclose(fd); - return 0; -} - -int jt_dbg_debug_daemon(int argc, char **argv) -{ - int i, rc; - unsigned int cmd = 0; - FILE *fd = stdout; - struct portal_ioctl_data data; - - if (argc <= 1) { - fprintf(stderr, "usage: %s [start file <#MB>|stop|pause|" - "continue]\n", argv[0]); - return 0; - } - for (i = 0; portal_debug_daemon_cmd[i].cmd != NULL; i++) { - if (strcasecmp(argv[1], portal_debug_daemon_cmd[i].cmd) == 0) { - cmd = portal_debug_daemon_cmd[i].cmdv; - break; - } - } - if (portal_debug_daemon_cmd[i].cmd == NULL) { - fprintf(stderr, "usage: %s [start file <#MB>|stop|pause|" - "continue]\n", argv[0]); - return 0; - } - memset(&data, 0, sizeof(data)); - if (cmd == DEBUG_DAEMON_START) { - if (argc < 3) { - fprintf(stderr, "usage: %s [start file <#MB>|stop|" - "pause|continue]\n", argv[0]); - return 0; - } - if (access(argv[2], F_OK) != 0) { - fd = fopen(argv[2], "w"); - if (fd != NULL) { - fclose(fd); - remove(argv[2]); - goto ok; - } - } - if (access(argv[2], W_OK) == 0) - goto ok; - fprintf(stderr, "fopen(%s) failed: %s\n", argv[2], - strerror(errno)); - return -1; -ok: - data.ioc_inllen1 = strlen(argv[2]) + 1; - data.ioc_inlbuf1 = argv[2]; - data.ioc_misc = 0; - if (argc == 4) { - unsigned long size; - errno = 0; - size = strtoul(argv[3], NULL, 0); - if (errno) { - fprintf(stderr, "file size(%s): error %s\n", - argv[3], strerror(errno)); - return -1; - } - data.ioc_misc = size; - } - } - data.ioc_count = cmd; - if (portal_ioctl_pack(&data, &buf, max) != 0) { - fprintf(stderr, "portal_ioctl_pack failed.\n"); - return -1; - } - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_SET_DAEMON, buf); - if (rc < 0) { - fprintf(stderr, "IOC_PORTAL_SET_DEMON failed: %s\n", - strerror(errno)); - return rc; - } - return 0; -} - -int jt_dbg_debug_file(int argc, char **argv) -{ - int rc, fd = -1, raw = 1; - FILE *output = stdout; - char *databuf = NULL; - struct stat statbuf; - - if (argc > 4 || argc < 2) { - fprintf(stderr, "usage: %s <input> [output] [raw]\n", argv[0]); - return 0; - } - - fd = open(argv[1], O_RDONLY); - if (fd < 0) { - fprintf(stderr, "fopen(%s) failed: %s\n", argv[1], - strerror(errno)); - return -1; - } -#warning FIXME: cleanup fstat issue here -#ifndef SYS_fstat64 -#define __SYS_fstat__ SYS_fstat -#else -#define __SYS_fstat__ SYS_fstat64 -#endif - rc = syscall(__SYS_fstat__, fd, &statbuf); - if (rc < 0) { - fprintf(stderr, "fstat failed: %s\n", strerror(errno)); - goto out; - } - - if (argc >= 3) { - output = fopen(argv[2], "w"); - if (output == NULL) { - fprintf(stderr, "fopen(%s) failed: %s\n", argv[2], - strerror(errno)); - goto out; - } - } - - if (argc == 4) - raw = atoi(argv[3]); - - databuf = mmap(NULL, statbuf.st_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE, fd, 0); - if (databuf == NULL) { - fprintf(stderr, "mmap failed: %s\n", strerror(errno)); - goto out; - } - - dump_buffer(output, databuf, statbuf.st_size, raw); - - out: - if (databuf) - munmap(databuf, statbuf.st_size); - if (output != stdout) - fclose(output); - if (fd > 0) - close(fd); - return 0; -} - -int jt_dbg_clear_debug_buf(int argc, char **argv) -{ - int rc; - struct portal_ioctl_data data; - - if (argc != 1) { - fprintf(stderr, "usage: %s\n", argv[0]); - return 0; - } - - memset(&data, 0, sizeof(data)); - if (portal_ioctl_pack(&data, &buf, max) != 0) { - fprintf(stderr, "portal_ioctl_pack failed.\n"); - return -1; - } - - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_CLEAR_DEBUG, buf); - if (rc) { - fprintf(stderr, "IOC_PORTAL_CLEAR_DEBUG failed: %s\n", - strerror(errno)); - return -1; - } - return 0; -} - -int jt_dbg_mark_debug_buf(int argc, char **argv) -{ - int rc; - struct portal_ioctl_data data; - char *text; - time_t now = time(NULL); - - if (argc > 2) { - fprintf(stderr, "usage: %s [marker text]\n", argv[0]); - return 0; - } - - if (argc == 2) { - text = argv[1]; - } else { - text = ctime(&now); - text[strlen(text) - 1] = '\0'; /* stupid \n */ - } - - memset(&data, 0, sizeof(data)); - data.ioc_inllen1 = strlen(text) + 1; - data.ioc_inlbuf1 = text; - if (portal_ioctl_pack(&data, &buf, max) != 0) { - fprintf(stderr, "portal_ioctl_pack failed.\n"); - return -1; - } - - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_MARK_DEBUG, buf); - if (rc) { - fprintf(stderr, "IOC_PORTAL_MARK_DEBUG failed: %s\n", - strerror(errno)); - return -1; - } - return 0; -} - - -int jt_dbg_modules(int argc, char **argv) -{ -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - struct mod_paths { - char *name, *path; - } *mp, mod_paths[] = { - {"portals", "lustre/portals/libcfs"}, - {"ksocknal", "lustre/portals/knals/socknal"}, - {"obdclass", "lustre/obdclass"}, - {"ptlrpc", "lustre/ptlrpc"}, - {"obdext2", "lustre/obdext2"}, - {"ost", "lustre/ost"}, - {"osc", "lustre/osc"}, - {"mds", "lustre/mds"}, - {"mdc", "lustre/mdc"}, - {"llite", "lustre/llite"}, - {"obdecho", "lustre/obdecho"}, - {"ldlm", "lustre/ldlm"}, - {"obdfilter", "lustre/obdfilter"}, - {"extN", "lustre/extN"}, - {"lov", "lustre/lov"}, - {"fsfilt_ext3", "lustre/obdclass"}, - {"fsfilt_extN", "lustre/obdclass"}, - {"mds_ext2", "lustre/mds"}, - {"mds_ext3", "lustre/mds"}, - {"mds_extN", "lustre/mds"}, - {"ptlbd", "lustre/ptlbd"}, - {NULL, NULL} - }; - char *path = ".."; - char *kernel = "linux"; - - if (argc >= 2) - path = argv[1]; - if (argc == 3) - kernel = argv[2]; - if (argc > 3) { - printf("%s [path] [kernel]\n", argv[0]); - return 0; - } - - for (mp = mod_paths; mp->name != NULL; mp++) { - struct module_info info; - int rc; - size_t crap; - int query_module(const char *name, int which, void *buf, - size_t bufsize, size_t *ret); - - rc = query_module(mp->name, QM_INFO, &info, sizeof(info), - &crap); - if (rc < 0) { - if (errno != ENOENT) - printf("query_module(%s) failed: %s\n", - mp->name, strerror(errno)); - } else { - printf("add-symbol-file %s/%s/%s.o 0x%0lx\n", path, - mp->path, mp->name, - info.addr + sizeof(struct module)); - } - } - - return 0; -#else - printf("jt_dbg_module is not yet implemented for Linux 2.5\n"); - return 0; -#endif /* linux 2.5 */ -} - -int jt_dbg_panic(int argc, char **argv) -{ - int rc; - struct portal_ioctl_data data; - - if (argc != 1) { - fprintf(stderr, "usage: %s\n", argv[0]); - return 0; - } - - memset(&data, 0, sizeof(data)); - if (portal_ioctl_pack(&data, &buf, max) != 0) { - fprintf(stderr, "portal_ioctl_pack failed.\n"); - return -1; - } - - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_PANIC, buf); - if (rc) { - fprintf(stderr, "IOC_PORTAL_PANIC failed: %s\n", - strerror(errno)); - return -1; - } - return 0; -} diff --git a/lustre/portals/utils/debugctl.c b/lustre/portals/utils/debugctl.c deleted file mode 100644 index 02cb9b4..0000000 --- a/lustre/portals/utils/debugctl.c +++ /dev/null @@ -1,66 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * Some day I'll split all of this functionality into a cfs_debug module - * of its own. That day is not today. - * - */ - -#include <stdio.h> -#include <stdlib.h> -#include <portals/api-support.h> -#include <portals/ptlctl.h> -#include "parser.h" - - -command_t list[] = { - {"debug_kernel", jt_dbg_debug_kernel, 0, "usage: debug_kernel [file] [raw], get debug buffer and print it [to a file]"}, - {"debug_daemon", jt_dbg_debug_daemon, 0, "usage: debug_daemon [start file [#MB]|stop|pause|continue], control debug daemon to dump debug buffer to a file"}, - {"debug_file", jt_dbg_debug_file, 0, "usage: debug_file <input> [output] [raw], read debug buffer from input and print it [to output]"}, - {"clear", jt_dbg_clear_debug_buf, 0, "clear kernel debug buffer"}, - {"mark", jt_dbg_mark_debug_buf, 0, "insert a marker into the kernel debug buffer (args: [marker text])"}, - {"filter", jt_dbg_filter, 0, "filter certain messages (args: subsystem/debug ID)\n"}, - {"show", jt_dbg_show, 0, "enable certain messages (args: subsystem/debug ID)\n"}, - {"list", jt_dbg_list, 0, "list subsystem and debug types (args: subs or types)\n"}, - {"modules", jt_dbg_modules, 0, "provide gdb-friendly module info (arg: <path>)"}, - {"panic", jt_dbg_panic, 0, "cause the kernel to panic"}, - {"dump", jt_ioc_dump, 0, "usage: dump file, save ioctl buffer to file"}, - {"help", Parser_help, 0, "help"}, - {"exit", Parser_quit, 0, "quit"}, - {"quit", Parser_quit, 0, "quit"}, - { 0, 0, 0, NULL } -}; - -int main(int argc, char **argv) -{ - if (dbg_initialize(argc, argv) < 0) - exit(2); - - register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH); - - Parser_init("debugctl > ", list); - if (argc > 1) - return Parser_execarg(argc - 1, &argv[1], list); - - Parser_commands(); - - unregister_ioc_dev(PORTALS_DEV_ID); - return 0; -} diff --git a/lustre/portals/utils/l_ioctl.c b/lustre/portals/utils/l_ioctl.c deleted file mode 100644 index 722bb57..0000000 --- a/lustre/portals/utils/l_ioctl.c +++ /dev/null @@ -1,281 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <syscall.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <sys/mman.h> -#include <sys/ioctl.h> -#include <errno.h> -#include <unistd.h> - -#include <portals/api-support.h> -#include <portals/ptlctl.h> - -struct ioc_dev { - const char * dev_name; - int dev_fd; -}; - -static struct ioc_dev ioc_dev_list[10]; - -struct dump_hdr { - int magic; - int dev_id; - int opc; -}; - -char * dump_filename; - -static int -open_ioc_dev(int dev_id) -{ - const char * dev_name; - - if (dev_id < 0 || dev_id >= sizeof(ioc_dev_list)) - return -EINVAL; - - dev_name = ioc_dev_list[dev_id].dev_name; - if (dev_name == NULL) { - fprintf(stderr, "unknown device id: %d\n", dev_id); - return -EINVAL; - } - - if (ioc_dev_list[dev_id].dev_fd < 0) { - int fd = open(dev_name, O_RDWR); - - if (fd < 0) { - fprintf(stderr, "opening %s failed: %s\n" - "hint: the kernel modules may not be loaded\n", - dev_name, strerror(errno)); - return fd; - } - ioc_dev_list[dev_id].dev_fd = fd; - } - - return ioc_dev_list[dev_id].dev_fd; -} - - -static int -do_ioctl(int dev_id, int opc, void *buf) -{ - int fd, rc; - - fd = open_ioc_dev(dev_id); - if (fd < 0) - return fd; - - rc = ioctl(fd, opc, buf); - return rc; - -} - -static FILE * -get_dump_file() -{ - FILE *fp = NULL; - - if (!dump_filename) { - fprintf(stderr, "no dump filename\n"); - } else - fp = fopen(dump_filename, "a"); - return fp; -} - -/* - * The dump file should start with a description of which devices are - * used, but for now it will assumed whatever app reads the file will - * know what to do. */ -int -dump(int dev_id, int opc, void *buf) -{ - FILE *fp; - struct dump_hdr dump_hdr; - struct portal_ioctl_hdr * ioc_hdr = (struct portal_ioctl_hdr *) buf; - int rc; - - printf("dumping opc %x to %s\n", opc, dump_filename); - - - dump_hdr.magic = 0xdeadbeef; - dump_hdr.dev_id = dev_id; - dump_hdr.opc = opc; - - fp = get_dump_file(); - if (fp == NULL) { - fprintf(stderr, "%s: %s\n", dump_filename, - strerror(errno)); - return -EINVAL; - } - - rc = fwrite(&dump_hdr, sizeof(dump_hdr), 1, fp); - if (rc == 1) - rc = fwrite(buf, ioc_hdr->ioc_len, 1, fp); - fclose(fp); - if (rc != 1) { - fprintf(stderr, "%s: %s\n", dump_filename, - strerror(errno)); - return -EINVAL; - } - - return 0; -} - -/* register a device to send ioctls to. */ -int -register_ioc_dev(int dev_id, const char * dev_name) -{ - - if (dev_id < 0 || dev_id >= sizeof(ioc_dev_list)) - return -EINVAL; - - unregister_ioc_dev(dev_id); - - ioc_dev_list[dev_id].dev_name = dev_name; - ioc_dev_list[dev_id].dev_fd = -1; - - return dev_id; -} - -void -unregister_ioc_dev(int dev_id) -{ - - if (dev_id < 0 || dev_id >= sizeof(ioc_dev_list)) - return; - if (ioc_dev_list[dev_id].dev_name != NULL && - ioc_dev_list[dev_id].dev_fd >= 0) - close(ioc_dev_list[dev_id].dev_fd); - - ioc_dev_list[dev_id].dev_name = NULL; - ioc_dev_list[dev_id].dev_fd = -1; -} - -/* If this file is set, then all ioctl buffers will be - appended to the file. */ -int -set_ioctl_dump(char * file) -{ - if (dump_filename) - free(dump_filename); - - dump_filename = strdup(file); - return 0; -} - -int -l_ioctl(int dev_id, int opc, void *buf) -{ - if (dump_filename) - return dump(dev_id, opc, buf); - else - return do_ioctl(dev_id, opc, buf); -} - -/* Read an ioctl dump file, and call the ioc_func for each ioctl buffer - * in the file. For example: - * - * parse_dump("lctl.dump", l_ioctl); - * - * Note: if using l_ioctl, then you also need to register_ioc_dev() for - * each device used in the dump. - */ -int -parse_dump(char * dump_file, int (*ioc_func)(int dev_id, int opc, void *)) -{ - int fd, line =0; - struct stat st; - char *buf, *end; - - fd = syscall(SYS_open, dump_file, O_RDONLY); - -#warning FIXME: cleanup fstat issue here -#ifndef SYS_fstat64 -#define __SYS_fstat__ SYS_fstat -#else -#define __SYS_fstat__ SYS_fstat64 -#endif - if (syscall(__SYS_fstat__, fd, &st)) { - perror("stat fails"); - exit(1); - } - - if (st.st_size < 1) { - fprintf(stderr, "KML is empty\n"); - exit(1); - } - - buf = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE , fd, 0); - end = buf + st.st_size; - close(fd); - while (buf < end) { - struct dump_hdr *dump_hdr = (struct dump_hdr *) buf; - struct portal_ioctl_hdr * data; - char tmp[8096]; - int rc; - - line++; - - data = (struct portal_ioctl_hdr *) (buf + sizeof(*dump_hdr)); - if (buf + data->ioc_len > end ) { - fprintf(stderr, "dump file overflow, %p + %d > %p\n", buf, - data->ioc_len, end); - return -1; - } -#if 0 - printf ("dump_hdr: %lx data: %lx\n", - (unsigned long)dump_hdr - (unsigned long)buf, (unsigned long)data - (unsigned long)buf); - - printf("%d: opcode %x len: %d ver: %x ", line, dump_hdr->opc, - data->ioc_len, data->ioc_version); -#endif - - memcpy(tmp, data, data->ioc_len); - - rc = ioc_func(dump_hdr->dev_id, dump_hdr->opc, tmp); - if (rc) { - printf("failed: %d\n", rc); - exit(1); - } - - buf += data->ioc_len + sizeof(*dump_hdr); - } - return 0; -} - -int -jt_ioc_dump(int argc, char **argv) -{ - if (argc > 2) { - fprintf(stderr, "usage: %s [hostname]\n", argv[0]); - return 0; - } - printf("setting dumpfile to: %s\n", argv[1]); - - set_ioctl_dump(argv[1]); - return 0; -} diff --git a/lustre/portals/utils/parser.c b/lustre/portals/utils/parser.c deleted file mode 100644 index 4d93645..0000000 --- a/lustre/portals/utils/parser.c +++ /dev/null @@ -1,703 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <ctype.h> -#include <string.h> -#include <stddef.h> -#include <unistd.h> -#include <sys/param.h> -#include <assert.h> - -#include <config.h> -#ifdef HAVE_LIBREADLINE -#define READLINE_LIBRARY -#include <readline/readline.h> -#endif -//extern char **completion_matches __P((char *, rl_compentry_func_t *)); -extern void using_history(void); -extern void stifle_history(int); -extern void add_history(char *); - -#include "parser.h" - -static command_t * top_level; /* Top level of commands, initialized by - * InitParser */ -static char * parser_prompt = NULL;/* Parser prompt, set by InitParser */ -static int done; /* Set to 1 if user types exit or quit */ - - -/* static functions */ -static char *skipwhitespace(char *s); -static char *skiptowhitespace(char *s); -static command_t *find_cmd(char *name, command_t cmds[], char **next); -static int process(char *s, char **next, command_t *lookup, command_t **result, - char **prev); -static void print_commands(char *str, command_t *table); - -static char * skipwhitespace(char * s) -{ - char * t; - int len; - - len = (int)strlen(s); - for (t = s; t <= s + len && isspace(*t); t++); - return(t); -} - - -static char * skiptowhitespace(char * s) -{ - char * t; - - for (t = s; *t && !isspace(*t); t++); - return(t); -} - -static int line2args(char *line, char **argv, int maxargs) -{ - char *arg; - int i = 0; - - arg = strtok(line, " \t"); - if ( arg ) { - argv[i] = arg; - i++; - } else - return 0; - - while( (arg = strtok(NULL, " \t")) && (i <= maxargs)) { - argv[i] = arg; - i++; - } - return i; -} - -/* find a command -- return it if unique otherwise print alternatives */ -static command_t *Parser_findargcmd(char *name, command_t cmds[]) -{ - command_t *cmd; - - for (cmd = cmds; cmd->pc_name; cmd++) { - if (strcmp(name, cmd->pc_name) == 0) - return cmd; - } - return NULL; -} - -int Parser_execarg(int argc, char **argv, command_t cmds[]) -{ - command_t *cmd; - - cmd = Parser_findargcmd(argv[0], cmds); - if ( cmd ) { - return (cmd->pc_func)(argc, argv); - } else { - printf("Try interactive use without arguments or use one of:\n"); - for (cmd = cmds; cmd->pc_name; cmd++) - printf("\"%s\" ", cmd->pc_name); - printf("\nas argument.\n"); - } - return -1; -} - -/* returns the command_t * (NULL if not found) corresponding to a - _partial_ match with the first token in name. It sets *next to - point to the following token. Does not modify *name. */ -static command_t * find_cmd(char * name, command_t cmds[], char ** next) -{ - int i, len; - - if (!cmds || !name ) - return NULL; - - /* This sets name to point to the first non-white space character, - and next to the first whitespace after name, len to the length: do - this with strtok*/ - name = skipwhitespace(name); - *next = skiptowhitespace(name); - len = *next - name; - if (len == 0) - return NULL; - - for (i = 0; cmds[i].pc_name; i++) { - if (strncasecmp(name, cmds[i].pc_name, len) == 0) { - *next = skipwhitespace(*next); - return(&cmds[i]); - } - } - return NULL; -} - -/* Recursively process a command line string s and find the command - corresponding to it. This can be ambiguous, full, incomplete, - non-existent. */ -static int process(char *s, char ** next, command_t *lookup, - command_t **result, char **prev) -{ - *result = find_cmd(s, lookup, next); - *prev = s; - - /* non existent */ - if ( ! *result ) - return CMD_NONE; - - /* found entry: is it ambigous, i.e. not exact command name and - more than one command in the list matches. Note that find_cmd - points to the first ambiguous entry */ - if ( strncasecmp(s, (*result)->pc_name, strlen((*result)->pc_name)) && - find_cmd(s, (*result) + 1, next)) - return CMD_AMBIG; - - /* found a unique command: component or full? */ - if ( (*result)->pc_func ) { - return CMD_COMPLETE; - } else { - if ( *next == '\0' ) { - return CMD_INCOMPLETE; - } else { - return process(*next, next, (*result)->pc_sub_cmd, result, prev); - } - } -} - -#ifdef HAVE_LIBREADLINE -static command_t * match_tbl; /* Command completion against this table */ -static char * command_generator(const char * text, int state) -{ - static int index, - len; - char *name; - - /* Do we have a match table? */ - if (!match_tbl) - return NULL; - - /* If this is the first time called on this word, state is 0 */ - if (!state) { - index = 0; - len = (int)strlen(text); - } - - /* Return next name in the command list that paritally matches test */ - while ( (name = (match_tbl + index)->pc_name) ) { - index++; - - if (strncasecmp(name, text, len) == 0) { - return(strdup(name)); - } - } - - /* No more matches */ - return NULL; -} - -/* probably called by readline */ -static char **command_completion(char * text, int start, int end) -{ - command_t * table; - char * pos; - - match_tbl = top_level; - for (table = find_cmd(rl_line_buffer, match_tbl, &pos); - table; - table = find_cmd(pos, match_tbl, &pos)) { - - if (*(pos - 1) == ' ') match_tbl = table->pc_sub_cmd; - } - - return(completion_matches(text, command_generator)); -} -#endif - -/* take a string and execute the function or print help */ -int execute_line(char * line) -{ - command_t *cmd, *ambig; - char *prev; - char *next, *tmp; - char *argv[MAXARGS]; - int i; - int rc = 0; - - switch( process(line, &next, top_level, &cmd, &prev) ) { - case CMD_AMBIG: - fprintf(stderr, "Ambiguous command \'%s\'\nOptions: ", line); - while( (ambig = find_cmd(prev, cmd, &tmp)) ) { - fprintf(stderr, "%s ", ambig->pc_name); - cmd = ambig + 1; - } - fprintf(stderr, "\n"); - break; - case CMD_NONE: - fprintf(stderr, "No such command, type help\n"); - break; - case CMD_INCOMPLETE: - fprintf(stderr, - "'%s' incomplete command. Use '%s x' where x is one of:\n", - line, line); - fprintf(stderr, "\t"); - for (i = 0; cmd->pc_sub_cmd[i].pc_name; i++) { - fprintf(stderr, "%s ", cmd->pc_sub_cmd[i].pc_name); - } - fprintf(stderr, "\n"); - break; - case CMD_COMPLETE: - i = line2args(line, argv, MAXARGS); - rc = (cmd->pc_func)(i, argv); - - if (rc == CMD_HELP) - fprintf(stderr, "%s\n", cmd->pc_help); - - break; - } - - return rc; -} - -int -noop_fn () -{ - return (0); -} - -/* just in case you're ever in an airplane and discover you - forgot to install readline-dev. :) */ -int init_input() -{ - int interactive = isatty (fileno (stdin)); - -#ifdef HAVE_LIBREADLINE - using_history(); - stifle_history(HISTORY); - - if (!interactive) - { - rl_prep_term_function = (rl_vintfunc_t *)noop_fn; - rl_deprep_term_function = (rl_voidfunc_t *)noop_fn; - } - - rl_attempted_completion_function = (CPPFunction *)command_completion; - rl_completion_entry_function = (void *)command_generator; -#endif - return interactive; -} - -#ifndef HAVE_LIBREADLINE -#define add_history(s) -char * readline(char * prompt) -{ - char line[2048]; - int n = 0; - if (prompt) - printf ("%s", prompt); - if (fgets(line, sizeof(line), stdin) == NULL) - return (NULL); - n = strlen(line); - if (n && line[n-1] == '\n') - line[n-1] = '\0'; - return strdup(line); -} -#endif - -/* this is the command execution machine */ -int Parser_commands(void) -{ - char *line, *s; - int rc = 0; - int interactive; - - interactive = init_input(); - - while(!done) { - line = readline(interactive ? parser_prompt : NULL); - - if (!line) break; - - s = skipwhitespace(line); - - if (*s) { - add_history(s); - rc = execute_line(s); - } - - free(line); - } - return rc; -} - - -/* sets the parser prompt */ -void Parser_init(char * prompt, command_t * cmds) -{ - done = 0; - top_level = cmds; - if (parser_prompt) free(parser_prompt); - parser_prompt = strdup(prompt); -} - -/* frees the parser prompt */ -void Parser_exit(int argc, char *argv[]) -{ - done = 1; - free(parser_prompt); - parser_prompt = NULL; -} - -/* convert a string to an integer */ -int Parser_int(char *s, int *val) -{ - int ret; - - if (*s != '0') - ret = sscanf(s, "%d", val); - else if (*(s+1) != 'x') - ret = sscanf(s, "%o", val); - else { - s++; - ret = sscanf(++s, "%x", val); - } - - return(ret); -} - - -void Parser_qhelp(int argc, char *argv[]) { - - printf("Available commands are:\n"); - - print_commands(NULL, top_level); - printf("For more help type: help command-name\n"); -} - -int Parser_help(int argc, char **argv) -{ - char line[1024]; - char *next, *prev, *tmp; - command_t *result, *ambig; - int i; - - if ( argc == 1 ) { - Parser_qhelp(argc, argv); - return 0; - } - - line[0]='\0'; - for ( i = 1 ; i < argc ; i++ ) { - strcat(line, argv[i]); - } - - switch ( process(line, &next, top_level, &result, &prev) ) { - case CMD_COMPLETE: - fprintf(stderr, "%s: %s\n",line, result->pc_help); - break; - case CMD_NONE: - fprintf(stderr, "%s: Unknown command.\n", line); - break; - case CMD_INCOMPLETE: - fprintf(stderr, - "'%s' incomplete command. Use '%s x' where x is one of:\n", - line, line); - fprintf(stderr, "\t"); - for (i = 0; result->pc_sub_cmd[i].pc_name; i++) { - fprintf(stderr, "%s ", result->pc_sub_cmd[i].pc_name); - } - fprintf(stderr, "\n"); - break; - case CMD_AMBIG: - fprintf(stderr, "Ambiguous command \'%s\'\nOptions: ", line); - while( (ambig = find_cmd(prev, result, &tmp)) ) { - fprintf(stderr, "%s ", ambig->pc_name); - result = ambig + 1; - } - fprintf(stderr, "\n"); - break; - } - return 0; -} - - -void Parser_printhelp(char *cmd) -{ - char *argv[] = { "help", cmd }; - Parser_help(2, argv); -} - -/************************************************************************* - * COMMANDS * - *************************************************************************/ - - -static void print_commands(char * str, command_t * table) { - command_t * cmds; - char buf[80]; - - for (cmds = table; cmds->pc_name; cmds++) { - if (cmds->pc_func) { - if (str) printf("\t%s %s\n", str, cmds->pc_name); - else printf("\t%s\n", cmds->pc_name); - } - if (cmds->pc_sub_cmd) { - if (str) { - sprintf(buf, "%s %s", str, cmds->pc_name); - print_commands(buf, cmds->pc_sub_cmd); - } else { - print_commands(cmds->pc_name, cmds->pc_sub_cmd); - } - } - } -} - -char *Parser_getstr(const char *prompt, const char *deft, char *res, - size_t len) -{ - char *line = NULL; - int size = strlen(prompt) + strlen(deft) + 8; - char *theprompt; - theprompt = malloc(size); - assert(theprompt); - - sprintf(theprompt, "%s [%s]: ", prompt, deft); - - line = readline(theprompt); - free(theprompt); - - if ( line == NULL || *line == '\0' ) { - strncpy(res, deft, len); - } else { - strncpy(res, line, len); - } - - if ( line ) { - free(line); - return res; - } else { - return NULL; - } -} - -/* get integer from prompt, loop forever to get it */ -int Parser_getint(const char *prompt, long min, long max, long deft, int base) -{ - int rc; - long result; - char *line; - int size = strlen(prompt) + 40; - char *theprompt = malloc(size); - assert(theprompt); - sprintf(theprompt,"%s [%ld, (0x%lx)]: ", prompt, deft, deft); - - fflush(stdout); - - do { - line = NULL; - line = readline(theprompt); - if ( !line ) { - fprintf(stdout, "Please enter an integer.\n"); - fflush(stdout); - continue; - } - if ( *line == '\0' ) { - free(line); - result = deft; - break; - } - rc = Parser_arg2int(line, &result, base); - free(line); - if ( rc != 0 ) { - fprintf(stdout, "Invalid string.\n"); - fflush(stdout); - } else if ( result > max || result < min ) { - fprintf(stdout, "Error: response must lie between %ld and %ld.\n", - min, max); - fflush(stdout); - } else { - break; - } - } while ( 1 ) ; - - if (theprompt) - free(theprompt); - return result; - -} - -/* get boolean (starting with YyNn; loop forever */ -int Parser_getbool(const char *prompt, int deft) -{ - int result = 0; - char *line; - int size = strlen(prompt) + 8; - char *theprompt = malloc(size); - assert(theprompt); - - fflush(stdout); - - if ( deft != 0 && deft != 1 ) { - fprintf(stderr, "Error: Parser_getbool given bad default (%d).\n", - deft); - assert ( 0 ); - } - sprintf(theprompt, "%s [%s]: ", prompt, (deft==0)? "N" : "Y"); - - do { - line = NULL; - line = readline(theprompt); - if ( line == NULL ) { - result = deft; - break; - } - if ( *line == '\0' ) { - result = deft; - break; - } - if ( *line == 'y' || *line == 'Y' ) { - result = 1; - break; - } - if ( *line == 'n' || *line == 'N' ) { - result = 0; - break; - } - if ( line ) - free(line); - fprintf(stdout, "Invalid string. Must start with yY or nN\n"); - fflush(stdout); - } while ( 1 ); - - if ( line ) - free(line); - if ( theprompt ) - free(theprompt); - return result; -} - -/* parse int out of a string or prompt for it */ -long Parser_intarg(const char *inp, const char *prompt, int deft, - int min, int max, int base) -{ - long result; - int rc; - - rc = Parser_arg2int(inp, &result, base); - - if ( rc == 0 ) { - return result; - } else { - return Parser_getint(prompt, deft, min, max, base); - } -} - -/* parse int out of a string or prompt for it */ -char *Parser_strarg(char *inp, const char *prompt, const char *deft, - char *answer, int len) -{ - if ( inp == NULL || *inp == '\0' ) { - return Parser_getstr(prompt, deft, answer, len); - } else - return inp; -} - -/* change a string into a number: return 0 on success. No invalid characters - allowed. The processing of base and validity follows strtol(3)*/ -int Parser_arg2int(const char *inp, long *result, int base) -{ - char *endptr; - - if ( (base !=0) && (base < 2 || base > 36) ) - return 1; - - *result = strtol(inp, &endptr, base); - - if ( *inp != '\0' && *endptr == '\0' ) - return 0; - else - return 1; -} - -/* Convert human readable size string to and int; "1k" -> 1000 */ -int Parser_size (int *sizep, char *str) { - int size; - char mod[32]; - - switch (sscanf (str, "%d%1[gGmMkK]", &size, mod)) { - default: - return (-1); - - case 1: - *sizep = size; - return (0); - - case 2: - switch (*mod) { - case 'g': - case 'G': - *sizep = size << 30; - return (0); - - case 'm': - case 'M': - *sizep = size << 20; - return (0); - - case 'k': - case 'K': - *sizep = size << 10; - return (0); - - default: - *sizep = size; - return (0); - } - } -} - -/* Convert a string boolean to an int; "enable" -> 1 */ -int Parser_bool (int *b, char *str) { - if (!strcasecmp (str, "no") || - !strcasecmp (str, "n") || - !strcasecmp (str, "off") || - !strcasecmp (str, "disable")) - { - *b = 0; - return (0); - } - - if (!strcasecmp (str, "yes") || - !strcasecmp (str, "y") || - !strcasecmp (str, "on") || - !strcasecmp (str, "enable")) - { - *b = 1; - return (0); - } - - return (-1); -} - -int Parser_quit(int argc, char **argv) -{ - argc = argc; - argv = argv; - done = 1; - return 0; -} diff --git a/lustre/portals/utils/parser.h b/lustre/portals/utils/parser.h deleted file mode 100644 index dead9f5..0000000 --- a/lustre/portals/utils/parser.h +++ /dev/null @@ -1,73 +0,0 @@ -#ifndef _PARSER_H_ -#define _PARSER_H_ - -#define HISTORY 100 /* Don't let history grow unbounded */ -#define MAXARGS 100 - -#define CMD_COMPLETE 0 -#define CMD_INCOMPLETE 1 -#define CMD_NONE 2 -#define CMD_AMBIG 3 -#define CMD_HELP 4 - -typedef struct parser_cmd { - char *pc_name; - int (* pc_func)(int, char **); - struct parser_cmd * pc_sub_cmd; - char *pc_help; -} command_t; - -typedef struct argcmd { - char *ac_name; - int (*ac_func)(int, char **); - char *ac_help; -} argcmd_t; - -typedef struct network { - char *type; - char *server; - int port; -} network_t; - -int Parser_quit(int argc, char **argv); -void Parser_init(char *, command_t *); /* Set prompt and load command list */ -int Parser_commands(void); /* Start the command parser */ -void Parser_qhelp(int, char **); /* Quick help routine */ -int Parser_help(int, char **); /* Detailed help routine */ -void Parser_printhelp(char *); /* Detailed help routine */ -void Parser_exit(int, char **); /* Shuts down command parser */ -int Parser_execarg(int argc, char **argv, command_t cmds[]); -int execute_line(char * line); - -/* Converts a string to an integer */ -int Parser_int(char *, int *); - -/* Prompts for a string, with default values and a maximum length */ -char *Parser_getstr(const char *prompt, const char *deft, char *res, - size_t len); - -/* Prompts for an integer, with minimum, maximum and default values and base */ -int Parser_getint(const char *prompt, long min, long max, long deft, - int base); - -/* Prompts for a yes/no, with default */ -int Parser_getbool(const char *prompt, int deft); - -/* Extracts an integer from a string, or prompts if it cannot get one */ -long Parser_intarg(const char *inp, const char *prompt, int deft, - int min, int max, int base); - -/* Extracts a word from the input, or propmts if it cannot get one */ -char *Parser_strarg(char *inp, const char *prompt, const char *deft, - char *answer, int len); - -/* Extracts an integer from a string with a base */ -int Parser_arg2int(const char *inp, long *result, int base); - -/* Convert human readable size string to and int; "1k" -> 1000 */ -int Parser_size(int *sizep, char *str); - -/* Convert a string boolean to an int; "enable" -> 1 */ -int Parser_bool(int *b, char *str); - -#endif diff --git a/lustre/portals/utils/portals.c b/lustre/portals/utils/portals.c deleted file mode 100644 index 90d66f5..0000000 --- a/lustre/portals/utils/portals.c +++ /dev/null @@ -1,985 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <stdio.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <netinet/tcp.h> -#include <netdb.h> -#include <stdlib.h> -#include <string.h> -#include <fcntl.h> -#include <sys/ioctl.h> -#include <errno.h> -#include <unistd.h> -#include <time.h> -#include <asm/byteorder.h> - -#include <portals/api-support.h> -#include <portals/ptlctl.h> -#include <portals/list.h> -#include <portals/lib-types.h> -#include "parser.h" - -unsigned int portal_debug; -unsigned int portal_printk; -unsigned int portal_stack; - - -static ptl_nid_t g_nid = 0; -static unsigned int g_nal = 0; -static unsigned short g_port = 0; - -static int g_socket_txmem = 0; -static int g_socket_rxmem = 0; -static int g_socket_nonagle = 1; - -typedef struct -{ - char *name; - int num; -} name2num_t; - -static name2num_t nalnames[] = { - {"tcp", SOCKNAL}, - {"toe", TOENAL}, - {"elan", QSWNAL}, - {"gm", GMNAL}, - {"scimac", SCIMACNAL}, - {NULL, -1} -}; - -static name2num_t * -name2num_lookup_name (name2num_t *table, char *str) -{ - while (table->name != NULL) - if (!strcmp (str, table->name)) - return (table); - else - table++; - return (NULL); -} - -static name2num_t * -name2num_lookup_num (name2num_t *table, int num) -{ - while (table->name != NULL) - if (num == table->num) - return (table); - else - table++; - return (NULL); -} - -int -ptl_name2nal (char *str) -{ - name2num_t *e = name2num_lookup_name (nalnames, str); - - return ((e == NULL) ? 0 : e->num); -} - -static char * -nal2name (int nal) -{ - name2num_t *e = name2num_lookup_num (nalnames, nal); - - return ((e == NULL) ? "???" : e->name); -} - -int -ptl_parse_nid (ptl_nid_t *nidp, char *str) -{ - struct hostent *he; - int a; - int b; - int c; - int d; - - if (sscanf (str, "%d.%d.%d.%d", &a, &b, &c, &d) == 4 && - (a & ~0xff) == 0 && (b & ~0xff) == 0 && - (c & ~0xff) == 0 && (d & ~0xff) == 0) - { - __u32 addr = (a<<24)|(b<<16)|(c<<8)|d; - - *nidp = (ptl_nid_t)addr; - return (0); - } - - if ((('a' <= str[0] && str[0] <= 'z') || - ('A' <= str[0] && str[0] <= 'Z')) && - (he = gethostbyname (str)) != NULL) - { - __u32 addr = *(__u32 *)he->h_addr; - - *nidp = (ptl_nid_t)ntohl(addr); /* HOST byte order */ - return (0); - } - - if (sscanf (str, "%i", &a) == 1) - { - *nidp = (ptl_nid_t)a; - return (0); - } - - if (sscanf (str, "%x", &a) == 1) - { - *nidp = (ptl_nid_t) a; - return (0); - } - - return (-1); -} - -char * -ptl_nid2str (char *buffer, ptl_nid_t nid) -{ - __u32 addr = htonl((__u32)nid); /* back to NETWORK byte order */ - struct hostent *he = gethostbyaddr ((const char *)&addr, sizeof (addr), AF_INET); - - if (he != NULL) - strcpy (buffer, he->h_name); - else - sprintf (buffer, "0x"LPX64, nid); - - return (buffer); -} - -int -sock_write (int cfd, void *buffer, int nob) -{ - while (nob > 0) - { - int rc = write (cfd, buffer, nob); - - if (rc < 0) - { - if (errno == EINTR) - continue; - - return (rc); - } - - if (rc == 0) - { - fprintf (stderr, "Unexpected zero sock_write\n"); - abort(); - } - - nob -= rc; - buffer = (char *)buffer + nob; - } - - return (0); -} - -int -sock_read (int cfd, void *buffer, int nob) -{ - while (nob > 0) - { - int rc = read (cfd, buffer, nob); - - if (rc < 0) - { - if (errno == EINTR) - continue; - - return (rc); - } - - if (rc == 0) /* EOF */ - { - errno = ECONNABORTED; - return (-1); - } - - nob -= rc; - buffer = (char *)buffer + nob; - } - - return (0); -} - -int ptl_initialize(int argc, char **argv) -{ - register_ioc_dev(PORTALS_DEV_ID, PORTALS_DEV_PATH); - return 0; -} - - -int jt_ptl_network(int argc, char **argv) -{ - int nal; - - if (argc != 2 || - (nal = ptl_name2nal (argv[1])) == 0) - { - name2num_t *entry; - - fprintf(stderr, "usage: %s \n", argv[0]); - for (entry = nalnames; entry->name != NULL; entry++) - fprintf (stderr, "%s%s", entry == nalnames ? "<" : "|", entry->name); - fprintf(stderr, ">\n"); - } - else - g_nal = nal; - - return (0); -} - -int -exchange_nids (int cfd, ptl_nid_t my_nid, ptl_nid_t *peer_nid) -{ - int rc; - ptl_hdr_t hdr; - ptl_magicversion_t *hmv = (ptl_magicversion_t *)&hdr.dest_nid; - - LASSERT (sizeof (*hmv) == sizeof (hdr.dest_nid)); - - memset (&hdr, 0, sizeof (hdr)); - - hmv->magic = __cpu_to_le32 (PORTALS_PROTO_MAGIC); - hmv->version_major = __cpu_to_le16 (PORTALS_PROTO_VERSION_MAJOR); - hmv->version_minor = __cpu_to_le16 (PORTALS_PROTO_VERSION_MINOR); - - hdr.src_nid = __cpu_to_le64 (my_nid); - hdr.type = __cpu_to_le32 (PTL_MSG_HELLO); - - /* Assume there's sufficient socket buffering for a portals HELLO header */ - rc = sock_write (cfd, &hdr, sizeof (hdr)); - if (rc != 0) { - perror ("Can't send initial HELLO"); - return (-1); - } - - /* First few bytes down the wire are the portals protocol magic and - * version, no matter what protocol version we're running. */ - - rc = sock_read (cfd, hmv, sizeof (*hmv)); - if (rc != 0) { - perror ("Can't read from peer"); - return (-1); - } - - if (__cpu_to_le32 (hmv->magic) != PORTALS_PROTO_MAGIC) { - fprintf (stderr, "Bad magic %#08x (%#08x expected)\n", - __cpu_to_le32 (hmv->magic), PORTALS_PROTO_MAGIC); - return (-1); - } - - if (__cpu_to_le16 (hmv->version_major) != PORTALS_PROTO_VERSION_MAJOR || - __cpu_to_le16 (hmv->version_minor) != PORTALS_PROTO_VERSION_MINOR) { - fprintf (stderr, "Incompatible protocol version %d.%d (%d.%d expected)\n", - __cpu_to_le16 (hmv->version_major), - __cpu_to_le16 (hmv->version_minor), - PORTALS_PROTO_VERSION_MAJOR, - PORTALS_PROTO_VERSION_MINOR); - } - - /* version 0 sends magic/version as the dest_nid of a 'hello' header, - * so read the rest of it in now... */ - LASSERT (PORTALS_PROTO_VERSION_MAJOR == 0); - rc = sock_read (cfd, hmv + 1, sizeof (hdr) - sizeof (*hmv)); - if (rc != 0) { - perror ("Can't read rest of HELLO hdr"); - return (-1); - } - - /* ...and check we got what we expected */ - if (__cpu_to_le32 (hdr.type) != PTL_MSG_HELLO || - __cpu_to_le32 (PTL_HDR_LENGTH (&hdr)) != 0) { - fprintf (stderr, "Expecting a HELLO hdr with 0 payload," - " but got type %d with %d payload\n", - __cpu_to_le32 (hdr.type), - __cpu_to_le32 (PTL_HDR_LENGTH (&hdr))); - return (-1); - } - - *peer_nid = __le64_to_cpu (hdr.src_nid); - return (0); -} - -int jt_ptl_connect(int argc, char **argv) -{ - if (argc < 2) { - usage: - fprintf(stderr, "usage: %s <hostname port [xi]> or <elan ID>\n", - argv[0]); - return 0; - } - if (g_nal == 0) { - fprintf(stderr, "Error: you must run the 'network' command " - "first.\n"); - return -1; - } - if (g_nal == SOCKNAL || g_nal == TOENAL) { - ptl_nid_t peer_nid; - struct hostent *he; - struct portal_ioctl_data data; - struct sockaddr_in srvaddr; - char *flag; - int fd, rc; - int nonagle = 0; - int rxmem = 0; - int txmem = 0; - int bind_irq = 0; - int xchange_nids = 0; - int o; - int olen; - - if (argc < 3) { - goto usage; - } - - he = gethostbyname(argv[1]); - if (!he) { - fprintf(stderr, "gethostbyname error: %s\n", - strerror(errno)); - return -1; - } - - g_port = atol(argv[2]); - - if (argc > 3) - for (flag = argv[3]; *flag != 0; flag++) - switch (*flag) - { - case 'i': - bind_irq = 1; - break; - - case 'x': - xchange_nids = 1; - break; - - default: - fprintf (stderr, "unrecognised flag '%c'\n", - *flag); - return (-1); - } - - memset(&srvaddr, 0, sizeof(srvaddr)); - srvaddr.sin_family = AF_INET; - srvaddr.sin_port = htons(g_port); - srvaddr.sin_addr.s_addr = *(__u32 *)he->h_addr; - - fd = socket(PF_INET, SOCK_STREAM, 0); - if ( fd < 0 ) { - fprintf(stderr, "socket() failed: %s\n", - strerror(errno)); - return -1; - } - - if (g_socket_nonagle) - { - o = 1; - if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &o, sizeof (o)) != 0) - { - fprintf(stderr, "cannot disable nagle: %s\n", strerror(errno)); - return (-1); - } - } - - if (g_socket_rxmem != 0) - { - o = g_socket_rxmem; - if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &o, sizeof (o)) != 0) - { - fprintf(stderr, "cannot set receive buffer size: %s\n", strerror(errno)); - return (-1); - } - } - - if (g_socket_txmem != 0) - { - o = g_socket_txmem; - if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &o, sizeof (o)) != 0) - { - fprintf(stderr, "cannot set send buffer size: %s\n", strerror(errno)); - return (-1); - } - } - - rc = connect(fd, (struct sockaddr *)&srvaddr, sizeof(srvaddr)); - if ( rc == -1 ) { - fprintf(stderr, "connect() failed: %s\n", - strerror(errno)); - return -1; - } - - olen = sizeof (txmem); - if (getsockopt (fd, SOL_SOCKET, SO_SNDBUF, &txmem, &olen) != 0) - fprintf (stderr, "Can't get send buffer size: %s\n", strerror (errno)); - olen = sizeof (rxmem); - if (getsockopt (fd, SOL_SOCKET, SO_RCVBUF, &rxmem, &olen) != 0) - fprintf (stderr, "Can't get receive buffer size: %s\n", strerror (errno)); - olen = sizeof (nonagle); - if (getsockopt (fd, IPPROTO_TCP, TCP_NODELAY, &nonagle, &olen) != 0) - fprintf (stderr, "Can't get nagle: %s\n", strerror (errno)); - - if (xchange_nids) { - - PORTAL_IOC_INIT (data); - data.ioc_nal = g_nal; - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_NID, &data); - if (rc != 0) - { - fprintf (stderr, "failed to get my nid: %s\n", - strerror (errno)); - close (fd); - return (-1); - } - - rc = exchange_nids (fd, data.ioc_nid, &peer_nid); - if (rc != 0) - { - close (fd); - return (-1); - } - } - else - peer_nid = ntohl (srvaddr.sin_addr.s_addr); /* HOST byte order */ - - printf("Connected host: %s NID "LPX64" snd: %d rcv: %d nagle: %s\n", argv[1], - peer_nid, txmem, rxmem, nonagle ? "Disabled" : "Enabled"); - - PORTAL_IOC_INIT(data); - data.ioc_fd = fd; - data.ioc_nal = g_nal; - data.ioc_nal_cmd = NAL_CMD_REGISTER_PEER_FD; - data.ioc_nid = peer_nid; - data.ioc_flags = bind_irq; - - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data); - if (rc) { - fprintf(stderr, "failed to register fd with portals: " - "%s\n", strerror(errno)); - close (fd); - return -1; - } - - g_nid = peer_nid; - printf("Connection to "LPX64" registered with socknal\n", g_nid); - - rc = close(fd); - if (rc) { - fprintf(stderr, "close failed: %d\n", rc); - } - } else if (g_nal == QSWNAL) { - g_nid = atoi(argv[1]); - } else if (g_nal == GMNAL) { - g_nid = atoi(argv[1]); - } else if (g_nal == SCIMACNAL) { - unsigned int tmpnid; - if(sscanf(argv[1], "%x", &tmpnid) == 1) { - g_nid=tmpnid; - } - else { - fprintf(stderr, "nid %s invalid for SCI nal\n", argv[1]); - } - - - } else { - fprintf(stderr, "This should never happen. Also it is very " - "bad.\n"); - } - - return 0; -} - -int jt_ptl_disconnect(int argc, char **argv) -{ - if (argc > 2) { - fprintf(stderr, "usage: %s [hostname]\n", argv[0]); - return 0; - } - if (g_nal == 0) { - fprintf(stderr, "Error: you must run the 'network' command " - "first.\n"); - return -1; - } - if (g_nal == SOCKNAL || g_nal == TOENAL) { - struct hostent *he; - struct portal_ioctl_data data; - int rc; - - PORTAL_IOC_INIT(data); - if (argc == 2) { - he = gethostbyname(argv[1]); - if (!he) { - fprintf(stderr, "gethostbyname error: %s\n", - strerror(errno)); - return -1; - } - - data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */ - - } else { - printf("Disconnecting ALL connections.\n"); - /* leave ioc_nid zeroed == disconnect all */ - } - data.ioc_nal = g_nal; - data.ioc_nal_cmd = NAL_CMD_CLOSE_CONNECTION; - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data); - if (rc) { - fprintf(stderr, "failed to remove connection: %s\n", - strerror(errno)); - return -1; - } - } else if (g_nal == QSWNAL) { - printf("'disconnect' doesn't make any sense for " - "elan.\n"); - } else if (g_nal == GMNAL) { - printf("'disconnect' doesn't make any sense for " - "GM.\n"); - } else if (g_nal == SCIMACNAL) { - printf("'disconnect' doesn't make any sense for " - "SCI.\n"); - } else { - fprintf(stderr, "This should never happen. Also it is very " - "bad.\n"); - return -1; - } - - return 0; -} - -int jt_ptl_push_connection (int argc, char **argv) -{ - if (argc > 2) { - fprintf(stderr, "usage: %s [hostname]\n", argv[0]); - return 0; - } - if (g_nal == 0) { - fprintf(stderr, "Error: you must run the 'network' command " - "first.\n"); - return -1; - } - if (g_nal == SOCKNAL || g_nal == TOENAL) { - struct hostent *he; - struct portal_ioctl_data data; - int rc; - - PORTAL_IOC_INIT(data); - if (argc == 2) { - he = gethostbyname(argv[1]); - if (!he) { - fprintf(stderr, "gethostbyname error: %s\n", - strerror(errno)); - return -1; - } - - data.ioc_nid = ntohl (*(__u32 *)he->h_addr); /* HOST byte order */ - - } else { - printf("Pushing ALL connections.\n"); - /* leave ioc_nid zeroed == disconnect all */ - } - data.ioc_nal = g_nal; - data.ioc_nal_cmd = NAL_CMD_PUSH_CONNECTION; - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data); - if (rc) { - fprintf(stderr, "failed to push connection: %s\n", - strerror(errno)); - return -1; - } - } else if (g_nal == QSWNAL) { - printf("'push' doesn't make any sense for elan.\n"); - } else if (g_nal == GMNAL) { - printf("'push' doesn't make any sense for GM.\n"); - } else if (g_nal == SCIMACNAL) { - printf("'push' doesn't make any sense for SCI.\n"); - } else { - fprintf(stderr, "This should never happen. Also it is very " - "bad.\n"); - return -1; - } - - return 0; -} - -int jt_ptl_ping(int argc, char **argv) -{ - int rc; - ptl_nid_t nid; - long count = 1; - long size = 4; - long timeout = 1; - struct portal_ioctl_data data; - - if (argc < 2) { - fprintf(stderr, "usage: %s nid [count] [size] [timeout (secs)]\n", argv[0]); - return 0; - } - - if (g_nal == 0) { - fprintf(stderr, "Error: you must run the 'network' command " - "first.\n"); - return -1; - } - - if (ptl_parse_nid (&nid, argv[1]) != 0) - { - fprintf (stderr, "Can't parse nid \"%s\"\n", argv[1]); - return (-1); - } - - if (argc > 2) - { - count = atol(argv[2]); - - if (count < 0 || count > 20000) - { - fprintf(stderr, "are you insane? %ld is a crazy count.\n", count); - return -1; - } - } - - if (argc > 3) - size= atol(argv[3]); - - if (argc > 4) - timeout = atol (argv[4]); - - PORTAL_IOC_INIT (data); - data.ioc_count = count; - data.ioc_size = size; - data.ioc_nid = nid; - data.ioc_nal = g_nal; - data.ioc_timeout = timeout; - - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_PING, &data); - if (rc) { - fprintf(stderr, "failed to start pinger: %s\n", - strerror(errno)); - return -1; - } - return 0; -} - -int jt_ptl_shownid(int argc, char **argv) -{ - struct portal_ioctl_data data; - int rc; - - if (argc > 1) { - fprintf(stderr, "usage: %s\n", argv[0]); - return 0; - } - - if (g_nal == 0) { - fprintf(stderr, "Error: you must run the 'network' command first\n"); - return -1; - } - - PORTAL_IOC_INIT (data); - data.ioc_nal = g_nal; - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_NID, &data); - if (rc < 0) - fprintf(stderr, "getting my NID failed: %s\n", - strerror (errno)); - else - printf(LPX64"\n", data.ioc_nid); - return 0; -} - -int jt_ptl_mynid(int argc, char **argv) -{ - int rc; - char hostname[1024]; - char *nidstr; - struct portal_ioctl_data data; - ptl_nid_t mynid; - - if (argc > 2) { - fprintf(stderr, "usage: %s [NID]\n", argv[0]); - fprintf(stderr, "NID defaults to the primary IP address of the machine.\n"); - return 0; - } - - if (g_nal == 0) { - fprintf(stderr, "Error: you must run the 'network' command " - "first.\n"); - return -1; - } - - if (argc >= 2) - nidstr = argv[1]; - else if (gethostname(hostname, sizeof(hostname)) != 0) { - fprintf(stderr, "gethostname failed: %s\n", - strerror(errno)); - return -1; - } - else - nidstr = hostname; - - rc = ptl_parse_nid (&mynid, nidstr); - if (rc != 0) { - fprintf (stderr, "Can't convert '%s' into a NID\n", nidstr); - return -1; - } - - PORTAL_IOC_INIT(data); - data.ioc_nid = mynid; - data.ioc_nal = g_nal; - data.ioc_nal_cmd = NAL_CMD_REGISTER_MYNID; - - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_NAL_CMD, &data); - if (rc < 0) - fprintf(stderr, "setting my NID failed: %s\n", - strerror(errno)); - else - printf("registered my nid "LPX64" (%s)\n", mynid, hostname); - return 0; -} - -int -jt_ptl_fail_nid (int argc, char **argv) -{ - int rc; - ptl_nid_t nid; - unsigned int threshold; - struct portal_ioctl_data data; - - if (argc < 2 || argc > 3) - { - fprintf (stderr, "usage: %s nid|\"_all_\" [count (0 == mend)]\n", argv[0]); - return (0); - } - - if (g_nal == 0) { - fprintf(stderr, "Error: you must run the 'network' command " - "first.\n"); - return (-1); - } - - if (!strcmp (argv[1], "_all_")) - nid = PTL_NID_ANY; - else if (ptl_parse_nid (&nid, argv[1]) != 0) - { - fprintf (stderr, "Can't parse nid \"%s\"\n", argv[1]); - return (-1); - } - - if (argc < 3) - threshold = PTL_MD_THRESH_INF; - else if (sscanf (argv[2], "%i", &threshold) != 1) { - fprintf (stderr, "Can't parse count \"%s\"\n", argv[2]); - return (-1); - } - - PORTAL_IOC_INIT (data); - data.ioc_nal = g_nal; - data.ioc_nid = nid; - data.ioc_count = threshold; - - rc = l_ioctl (PORTALS_DEV_ID, IOC_PORTAL_FAIL_NID, &data); - if (rc < 0) - fprintf (stderr, "IOC_PORTAL_FAIL_NID failed: %s\n", - strerror (errno)); - else - printf ("%s %s\n", threshold == 0 ? "Unfailing" : "Failing", argv[1]); - - return (0); -} - -int -jt_ptl_rxmem (int argc, char **argv) -{ - int size; - - if (argc > 1) - { - if (Parser_size (&size, argv[1]) != 0 || size < 0) - { - fprintf (stderr, "Can't parse size %s\n", argv[1]); - return (0); - } - - g_socket_rxmem = size; - } - printf ("Socket rmem = %d\n", g_socket_rxmem); - return (0); -} - -int -jt_ptl_txmem (int argc, char **argv) -{ - int size; - - if (argc > 1) - { - if (Parser_size (&size, argv[1]) != 0 || size < 0) - { - fprintf (stderr, "Can't parse size %s\n", argv[1]); - return (0); - } - g_socket_txmem = size; - } - printf ("Socket txmem = %d\n", g_socket_txmem); - return (0); -} - -int -jt_ptl_nagle (int argc, char **argv) -{ - int enable; - - if (argc > 1) - { - if (Parser_bool (&enable, argv[1]) != 0) - { - fprintf (stderr, "Can't parse boolean %s\n", argv[1]); - return (0); - } - g_socket_nonagle = !enable; - } - printf ("Nagle %s\n", g_socket_nonagle ? "disabled" : "enabled"); - return (0); -} - -int -jt_ptl_add_route (int argc, char **argv) -{ - struct portal_ioctl_data data; - ptl_nid_t nid1; - ptl_nid_t nid2; - ptl_nid_t gateway_nid; - int rc; - - if (argc < 3) - { - fprintf (stderr, "usage: %s gateway target [target]\n", argv[0]); - return (0); - } - - if (g_nal == 0) { - fprintf(stderr, "Error: you must run the 'network' command " - "first.\n"); - return (-1); - } - - if (ptl_parse_nid (&gateway_nid, argv[1]) != 0) - { - fprintf (stderr, "Can't parse gateway NID \"%s\"\n", argv[1]); - return (-1); - } - - if (ptl_parse_nid (&nid1, argv[2]) != 0) - { - fprintf (stderr, "Can't parse first target NID \"%s\"\n", argv[2]); - return (-1); - } - - if (argc < 4) - nid2 = nid1; - else if (ptl_parse_nid (&nid2, argv[3]) != 0) - { - fprintf (stderr, "Can't parse second target NID \"%s\"\n", argv[4]); - return (-1); - } - - PORTAL_IOC_INIT(data); - data.ioc_nid = gateway_nid; - data.ioc_nal = g_nal; - data.ioc_nid2 = MIN (nid1, nid2); - data.ioc_nid3 = MAX (nid1, nid2); - - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_ADD_ROUTE, &data); - if (rc != 0) - { - fprintf (stderr, "IOC_PORTAL_ADD_ROUTE failed: %s\n", strerror (errno)); - return (-1); - } - - return (0); -} - -int -jt_ptl_del_route (int argc, char **argv) -{ - struct portal_ioctl_data data; - ptl_nid_t nid; - int rc; - - if (argc < 2) - { - fprintf (stderr, "usage: %s targetNID\n", argv[0]); - return (0); - } - - if (ptl_parse_nid (&nid, argv[1]) != 0) - { - fprintf (stderr, "Can't parse target NID \"%s\"\n", argv[1]); - return (-1); - } - - PORTAL_IOC_INIT(data); - data.ioc_nid = nid; - - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_DEL_ROUTE, &data); - if (rc != 0) - { - fprintf (stderr, "IOC_PORTAL_DEL_ROUTE ("LPX64") failed: %s\n", nid, strerror (errno)); - return (-1); - } - - return (0); -} - -int -jt_ptl_print_routes (int argc, char **argv) -{ - char buffer[3][128]; - struct portal_ioctl_data data; - int rc; - int index; - int gateway_nal; - ptl_nid_t gateway_nid; - ptl_nid_t nid1; - ptl_nid_t nid2; - - - for (index = 0;;index++) - { - PORTAL_IOC_INIT(data); - data.ioc_count = index; - - rc = l_ioctl(PORTALS_DEV_ID, IOC_PORTAL_GET_ROUTE, &data); - if (rc != 0) - break; - - gateway_nal = data.ioc_nal; - gateway_nid = data.ioc_nid; - nid1 = data.ioc_nid2; - nid2 = data.ioc_nid3; - - printf ("%8s %18s : %s - %s\n", - nal2name (gateway_nal), - ptl_nid2str (buffer[0], gateway_nid), - ptl_nid2str (buffer[1], nid1), - ptl_nid2str (buffer[2], nid2)); - } - return (0); -} - diff --git a/lustre/portals/utils/ptlctl.c b/lustre/portals/utils/ptlctl.c deleted file mode 100644 index 8c56d93..0000000 --- a/lustre/portals/utils/ptlctl.c +++ /dev/null @@ -1,65 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001, 2002 Cluster File Systems, Inc. - * - * This file is part of Portals, http://www.sf.net/projects/lustre/ - * - * Portals is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Portals is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Portals; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <stdio.h> -#include <stdlib.h> -#include <portals/api-support.h> -#include <portals/ptlctl.h> - -#include "parser.h" - - -command_t list[] = { - {"network", jt_ptl_network, 0,"setup the NAL (args: nal name)"}, - {"connect", jt_ptl_connect, 0, "connect to a remote nid (args: <hostname port> | <id> for tcp/elan respectively)"}, - {"disconnect", jt_ptl_disconnect, 0, "disconnect from a remote nid (args: [hostname]"}, - {"push", jt_ptl_push_connection, 0, "flush connection to a remote nid (args: [hostname]"}, - {"ping", jt_ptl_ping, 0, "do a ping test (args: nid [count] [size] [timeout])"}, - {"shownid", jt_ptl_shownid, 0, "print the local NID"}, - {"mynid", jt_ptl_mynid, 0, "inform the socknal of the local NID (args: [hostname])"}, - {"add_route", jt_ptl_add_route, 0, "add an entry to the routing table (args: gatewayNID targetNID [targetNID])"}, - {"del_route", jt_ptl_del_route, 0, "delete an entry from the routing table (args: targetNID"}, - {"print_routes", jt_ptl_print_routes, 0, "print the routing table (args: none)"}, - {"recv_mem", jt_ptl_rxmem, 0, "Set socket receive buffer size (args: [size])"}, - {"send_mem", jt_ptl_txmem, 0, "Set socket send buffer size (args: [size])"}, - {"nagle", jt_ptl_nagle, 0, "Enable/Disable Nagle (args: [on/off])"}, - {"dump", jt_ioc_dump, 0, "usage: dump file, save ioctl buffer to file"}, - {"fail", jt_ptl_fail_nid, 0, "usage: fail nid|_all_ [count]"}, - {"help", Parser_help, 0, "help"}, - {"exit", Parser_quit, 0, "quit"}, - {"quit", Parser_quit, 0, "quit"}, - { 0, 0, 0, NULL } -}; - -int main(int argc, char **argv) -{ - if (ptl_initialize(argc, argv) < 0) - exit(1); - - Parser_init("ptlctl > ", list); - if (argc > 1) - return Parser_execarg(argc - 1, &argv[1], list); - - Parser_commands(); - - return 0; -} diff --git a/lustre/portals/utils/routerstat.c b/lustre/portals/utils/routerstat.c deleted file mode 100644 index 37da12c..0000000 --- a/lustre/portals/utils/routerstat.c +++ /dev/null @@ -1,99 +0,0 @@ -#include <stdio.h> -#include <errno.h> -#include <string.h> -#include <fcntl.h> -#include <unistd.h> -#include <stdlib.h> -#include <sys/types.h> -#include <sys/time.h> - -double -timenow () -{ - struct timeval tv; - - gettimeofday (&tv, NULL); - return (tv.tv_sec + tv.tv_usec / 1000000.0); -} - -void -do_stat (int fd) -{ - static char buffer[1024]; - static double last = 0.0; - double now; - double t; - long long bytes; - long packets; - long errors; - long depth; - int n; - - lseek (fd, 0, SEEK_SET); - now = timenow(); - n = read (fd, buffer, sizeof (buffer)); - if (n < 0) - { - fprintf (stderr, "Can't read statfile\n"); - exit (1); - } - buffer[n] = 0; - - n = sscanf (buffer, "%Ld %ld %ld %ld", &bytes, &packets, &errors, &depth); - - if (n < 3) - { - fprintf (stderr, "Can't parse statfile\n"); - exit (1); - } - - if (last == 0.0) - printf ("%Ld bytes, %ld packets (sz %Ld) %ld errors", - bytes, packets, (long long)((packets == 0) ? 0LL : bytes/packets), errors); - else - { - t = now - last; - - printf ("%9Ld (%7.2fMb/s), %7ld packets (sz %5Ld, %5ld/s) %ld errors (%ld/s)", - bytes, ((double)bytes)/((1<<20) * t), - packets, (long long)((packets == 0) ? 0LL : bytes/packets), (long)(packets/t), - errors, (long)(errors/t)); - } - - if (n == 4) - printf (" (%ld)\n", depth); - else - printf ("\n"); - - fflush (stdout); - - lseek (fd, 0, SEEK_SET); - write (fd, "\n", 1); - last = timenow(); -} - -int main (int argc, char **argv) -{ - int interval = 0; - int fd; - - if (argc > 1) - interval = atoi (argv[1]); - - fd = open ("/proc/sys/portals/router", O_RDWR); - if (fd < 0) - { - fprintf (stderr, "Can't open stat: %s\n", strerror (errno)); - return (1); - } - - do_stat (fd); - if (interval == 0) - return (0); - - for (;;) - { - sleep (interval); - do_stat (fd); - } -} diff --git a/lustre/portals/utils/wirecheck.c b/lustre/portals/utils/wirecheck.c deleted file mode 100644 index 6a4377b..0000000 --- a/lustre/portals/utils/wirecheck.c +++ /dev/null @@ -1,141 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#include <stdio.h> -#include <portals/api-support.h> -#include <portals/list.h> -#include <portals/lib-types.h> - -#define BLANK_LINE() \ -do { \ - printf ("\n"); \ -} while (0) - -#define COMMENT(c) \ -do { \ - printf (" /* "c" */\n"); \ -} while (0) - -#define STRINGIFY(a) #a - -#define CHECK_DEFINE(a) \ -do { \ - printf (" LASSERT ("#a" == "STRINGIFY(a)");\n"); \ -} while (0) - -#define CHECK_VALUE(a) \ -do { \ - printf (" LASSERT ("#a" == %d);\n", a); \ -} while (0) - -#define CHECK_MEMBER_OFFSET(s,m) \ -do { \ - CHECK_VALUE(offsetof(s, m)); \ -} while (0) - -#define CHECK_MEMBER_SIZEOF(s,m) \ -do { \ - CHECK_VALUE((int)sizeof(((s *)0)->m)); \ -} while (0) - -#define CHECK_MEMBER(s,m) \ -do { \ - CHECK_MEMBER_OFFSET(s, m); \ - CHECK_MEMBER_SIZEOF(s, m); \ -} while (0) - -#define CHECK_STRUCT(s) \ -do { \ - BLANK_LINE (); \ - COMMENT ("Checks for struct "#s); \ - CHECK_VALUE((int)sizeof(s)); \ -} while (0) - -void -check_ptl_handle_wire (void) -{ - CHECK_STRUCT (ptl_handle_wire_t); - CHECK_MEMBER (ptl_handle_wire_t, wh_interface_cookie); - CHECK_MEMBER (ptl_handle_wire_t, wh_object_cookie); -} - -void -check_ptl_magicversion (void) -{ - CHECK_STRUCT (ptl_magicversion_t); - CHECK_MEMBER (ptl_magicversion_t, magic); - CHECK_MEMBER (ptl_magicversion_t, version_major); - CHECK_MEMBER (ptl_magicversion_t, version_minor); -} - -void -check_ptl_hdr (void) -{ - CHECK_STRUCT (ptl_hdr_t); - CHECK_MEMBER (ptl_hdr_t, dest_nid); - CHECK_MEMBER (ptl_hdr_t, src_nid); - CHECK_MEMBER (ptl_hdr_t, dest_pid); - CHECK_MEMBER (ptl_hdr_t, src_pid); - CHECK_MEMBER (ptl_hdr_t, type); - - BLANK_LINE (); - COMMENT ("Ack"); - CHECK_MEMBER (ptl_hdr_t, msg.ack.mlength); - CHECK_MEMBER (ptl_hdr_t, msg.ack.dst_wmd); - CHECK_MEMBER (ptl_hdr_t, msg.ack.match_bits); - CHECK_MEMBER (ptl_hdr_t, msg.ack.length); - - BLANK_LINE (); - COMMENT ("Put"); - CHECK_MEMBER (ptl_hdr_t, msg.put.ptl_index); - CHECK_MEMBER (ptl_hdr_t, msg.put.ack_wmd); - CHECK_MEMBER (ptl_hdr_t, msg.put.match_bits); - CHECK_MEMBER (ptl_hdr_t, msg.put.length); - CHECK_MEMBER (ptl_hdr_t, msg.put.offset); - CHECK_MEMBER (ptl_hdr_t, msg.put.hdr_data); - - BLANK_LINE (); - COMMENT ("Get"); - CHECK_MEMBER (ptl_hdr_t, msg.get.ptl_index); - CHECK_MEMBER (ptl_hdr_t, msg.get.return_wmd); - CHECK_MEMBER (ptl_hdr_t, msg.get.match_bits); - CHECK_MEMBER (ptl_hdr_t, msg.get.length); - CHECK_MEMBER (ptl_hdr_t, msg.get.src_offset); - CHECK_MEMBER (ptl_hdr_t, msg.get.return_offset); - CHECK_MEMBER (ptl_hdr_t, msg.get.sink_length); - - BLANK_LINE (); - COMMENT ("Reply"); - CHECK_MEMBER (ptl_hdr_t, msg.reply.dst_wmd); - CHECK_MEMBER (ptl_hdr_t, msg.reply.dst_offset); - CHECK_MEMBER (ptl_hdr_t, msg.reply.length); -} - -int -main (int argc, char **argv) -{ - printf ("void lib_assert_wire_constants (void)\n" - "{\n"); - - COMMENT ("Wire protocol assertions generated by 'wirecheck'"); - BLANK_LINE (); - - COMMENT ("Constants..."); - CHECK_DEFINE (PORTALS_PROTO_MAGIC); - CHECK_DEFINE (PORTALS_PROTO_VERSION_MAJOR); - CHECK_DEFINE (PORTALS_PROTO_VERSION_MINOR); - - CHECK_VALUE (PTL_MSG_ACK); - CHECK_VALUE (PTL_MSG_PUT); - CHECK_VALUE (PTL_MSG_GET); - CHECK_VALUE (PTL_MSG_REPLY); - CHECK_VALUE (PTL_MSG_HELLO); - - check_ptl_handle_wire (); - check_ptl_magicversion (); - check_ptl_hdr (); - - printf ("}\n\n"); - - return (0); -} diff --git a/lustre/ptlbd/.cvsignore b/lustre/ptlbd/.cvsignore deleted file mode 100644 index e995588..0000000 --- a/lustre/ptlbd/.cvsignore +++ /dev/null @@ -1,3 +0,0 @@ -.deps -Makefile -Makefile.in diff --git a/lustre/ptlbd/Makefile.am b/lustre/ptlbd/Makefile.am deleted file mode 100644 index bfaeb25..0000000 --- a/lustre/ptlbd/Makefile.am +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (C) 2002 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -DEFS= - -MODULE = ptlbd -modulefs_DATA = ptlbd.o -EXTRA_PROGRAMS = ptlbd - -ptlbd_SOURCES = blk.c client.c main.c rpc.c server.c - -include $(top_srcdir)/Rules diff --git a/lustre/ptlbd/blk.c b/lustre/ptlbd/blk.c deleted file mode 100644 index a367903..0000000 --- a/lustre/ptlbd/blk.c +++ /dev/null @@ -1,301 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * Author: Zach Brown <zab@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#include <linux/module.h> -#include <linux/major.h> -#include <linux/smp.h> -#include <linux/hdreg.h> - -#define DEBUG_SUBSYSTEM S_PTLBD - -#include <linux/lustre_lite.h> -#include <linux/lustre_ha.h> -#include <linux/obd_support.h> -#include <linux/lustre_idl.h> -#include <linux/obd_ptlbd.h> - -/* - * todo: - * assign proper major number - * allow more minors - * discover actual block sizes? - * allow more than one sector per io - * think about vary-io - * restrict single ops to sequential block io - * ddn target addresses need to be 32 bit - * cant get to addresses after 0xFFFF0000 - */ - -#define PTLBD_MAJOR 253 -#define PTLBD_MAX_MINOR 1 - -#define MAJOR_NR PTLBD_MAJOR -#define LOCAL_END_REQUEST -#include <linux/blk.h> -#include <linux/blkdev.h> -#include <linux/blkpg.h> -#include <linux/devfs_fs_kernel.h> - -static int ptlbd_size_size[PTLBD_MAX_MINOR]; -static int ptlbd_size[PTLBD_MAX_MINOR]; -static int ptlbd_hardsect_size[PTLBD_MAX_MINOR]; -static int ptlbd_max_sectors[PTLBD_MAX_MINOR]; -//RHism static char ptlbd_dev_varyio[PTLBD_MAX_MINOR]; - -/* - * per minor state, indexed by minor. - */ - -static struct ptlbd_obd *one_for_now; - -void ptlbd_blk_register(struct ptlbd_obd *ptlbd) -{ - ENTRY; - one_for_now = ptlbd; - EXIT; -} - -static struct ptlbd_obd * ptlbd_get_minor(int minor) -{ - ENTRY; - if ( minor >= PTLBD_MAX_MINOR ) - RETURN( ERR_PTR(-ENODEV) ); - RETURN(one_for_now); -} - -static struct ptlbd_obd * ptlbd_get_inode(struct inode *inode) -{ - ENTRY; - - if ( inode == NULL ) /* can this really happen? */ - RETURN( ERR_PTR(-EINVAL) ); - - return ptlbd_get_minor(MINOR(inode->i_rdev)); -} - -static int ptlbd_open(struct inode *inode, struct file *file) -{ - struct ptlbd_obd *ptlbd = ptlbd_get_inode(inode); - ENTRY; - - - if ( IS_ERR(ptlbd) ) - RETURN(PTR_ERR(ptlbd)); - - if (! ptlbd->bd_import->imp_remote_handle.cookie) - if (ptlbd_do_connect(ptlbd)) - RETURN(-ENOTCONN); - - ptlbd->refcount++; - RETURN(0); -} - - -static int ptlbd_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) -{ - struct ptlbd_obd *ptlbd; - int ret; - __u16 major, minor, dev; - struct hd_geometry geo; - - if ( ! capable(CAP_SYS_ADMIN) ) - RETURN(-EPERM); - - ptlbd = ptlbd_get_inode(inode); - if ( IS_ERR(ptlbd) ) - RETURN( PTR_ERR(ptlbd) ); - - major = MAJOR(inode->i_rdev); - minor = MINOR(inode->i_rdev); - dev = inode->i_rdev; - - switch(cmd) { - case HDIO_GETGEO: - geo.heads = 64; - geo.sectors = 32; - geo.start = 4; - geo.cylinders = blk_size[major][minor]/ - (geo.heads * geo.sectors); - if (copy_to_user((void *) arg, &geo, sizeof(geo))) - ret = -EFAULT; - else - ret = 0; - break; - - case BLKSECTGET: - ret = copy_to_user((void *) arg, - & max_sectors[major][minor], sizeof(arg)); - break; - - case BLKFLSBUF: - ret = blk_ioctl(dev, cmd, arg); - ptlbd_send_flush_req(ptlbd, PTLBD_FLUSH); - break; - - case BLKGETSIZE: - case BLKGETSIZE64: - case BLKROSET: - case BLKROGET: - case BLKRASET: - case BLKRAGET: - case BLKSSZGET: - case BLKELVGET: - case BLKELVSET: - default: - ret = blk_ioctl(dev, cmd, arg); - break; - - case BLKSECTSET: /* don't allow setting of max_sectors */ - - case BLKRRPART: /* not a partitionable device */ - case BLKPG: /* "" */ - ret = -EINVAL; - break; - } - - RETURN(ret); -} - -static int ptlbd_release(struct inode *inode, struct file *file) -{ - struct ptlbd_obd *ptlbd = ptlbd_get_inode(inode); - ENTRY; - - if ( IS_ERR(ptlbd) ) - RETURN( PTR_ERR(ptlbd) ); - - if (--ptlbd->refcount == 0) - ptlbd_do_disconnect(ptlbd); - - RETURN(0); -} - -static void ptlbd_end_request_havelock(struct request *req) -{ - struct buffer_head *bh; - int uptodate = 1; - - if ( req->errors ) - uptodate = 0; - - while( (bh = req->bh) != NULL ) { - blk_finished_io(bh->b_size >> 9); - req->bh = bh->b_reqnext; - bh->b_reqnext = NULL; - bh->b_end_io(bh, uptodate); - } - blkdev_release_request(req); -} - -#if 0 -static void ptlbd_end_request_getlock(struct request *req) -{ - unsigned long flags; - - spin_lock_irqsave(&io_request_lock, flags); - ptlbd_end_request_havelock(req); - spin_unlock_irqrestore(&io_request_lock, flags); -} -#endif - -static void ptlbd_request(request_queue_t *q) -{ - struct ptlbd_obd *ptlbd; - struct request *req; - ptlbd_cmd_t cmd; - int errors = 0; - ENTRY; - - while ( !QUEUE_EMPTY ) { - req = CURRENT; - ptlbd = ptlbd_get_minor(MINOR(req->rq_dev)); - - blkdev_dequeue_request(req); - - if ( ptlbd->refcount <= 0 ) { - req->errors++; - ptlbd_end_request_havelock(req); - return; - } - - spin_unlock_irq(&io_request_lock); - - if ( req->cmd == READ ) - cmd = PTLBD_READ; - else - cmd = PTLBD_WRITE; - - errors = ptlbd_send_rw_req(ptlbd, cmd, req->bh); - - spin_lock_irq(&io_request_lock); - - if (errors) - req->errors += errors; - - ptlbd_end_request_havelock(req); - } -} - -static struct block_device_operations ptlbd_ops = { - .owner = THIS_MODULE, - .open = ptlbd_open, - .release = ptlbd_release, - .ioctl = ptlbd_ioctl, -}; - -int ptlbd_blk_init(void) -{ - int ret; - int i; - ENTRY; - - ret = register_blkdev(PTLBD_MAJOR, "ptlbd", &ptlbd_ops); - if ( ret < 0 ) - RETURN(ret); - - blk_size[PTLBD_MAJOR] = ptlbd_size; - blksize_size[PTLBD_MAJOR] = ptlbd_size_size; - hardsect_size[PTLBD_MAJOR] = ptlbd_hardsect_size; - max_sectors[PTLBD_MAJOR] = ptlbd_max_sectors; - - blk_init_queue(BLK_DEFAULT_QUEUE(PTLBD_MAJOR), ptlbd_request); - blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0); - - for ( i = 0 ; i < PTLBD_MAX_MINOR ; i++) { - ptlbd_size_size[i] = 4096; - /* avoid integer overflow */ - ptlbd_size[i] = (16*1024*((1024*1024) >> BLOCK_SIZE_BITS)); - ptlbd_hardsect_size[i] = 4096; - ptlbd_max_sectors[i] = PTL_MD_MAX_IOV * (4096/512); - } - - return 0; -} - -void ptlbd_blk_exit(void) -{ - ENTRY; - blk_cleanup_queue(BLK_DEFAULT_QUEUE(PTLBD_MAJOR)); - unregister_blkdev(PTLBD_MAJOR, "ptlbd"); -} - -#undef MAJOR_NR diff --git a/lustre/ptlbd/client.c b/lustre/ptlbd/client.c deleted file mode 100644 index af76523..0000000 --- a/lustre/ptlbd/client.c +++ /dev/null @@ -1,232 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * Author: Zach Brown <zab@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/version.h> -#include <linux/module.h> -#include <linux/fs.h> - -#define DEBUG_SUBSYSTEM S_PTLBD - -#include <linux/obd_support.h> -#include <linux/obd_class.h> -#include <linux/lustre_debug.h> -#include <linux/lprocfs_status.h> -#include <linux/obd_ptlbd.h> - -static int ptlbd_cl_setup(struct obd_device *obd, obd_count len, void *buf) -{ - struct ptlbd_obd *ptlbd = &obd->u.ptlbd; - struct obd_import *imp; - struct obd_ioctl_data* data = buf; - ENTRY; - - if (ptlbd->bd_import != NULL) - RETURN(-EALREADY); - - if (data->ioc_inllen1 < 1) { - CERROR("requires a PTLBD server UUID\n"); - RETURN(-EINVAL); - } - - if (data->ioc_inllen1 > 37) { - CERROR("PTLBD server UUID must be less than 38 characters\n"); - RETURN(-EINVAL); - } - - obd_str2uuid(&ptlbd->bd_server_uuid, data->ioc_inlbuf1); - - /* - * from client_obd_connect.. *shrug* - */ - imp = ptlbd->bd_import = class_new_import(); - imp->imp_connection = ptlrpc_uuid_to_connection(&ptlbd->bd_server_uuid); - if (!imp->imp_connection) { - class_destroy_import(imp); - class_import_put(imp); - RETURN(-ENOENT); - } - imp->imp_level = LUSTRE_CONN_FULL; - - ptlrpc_init_client(PTLBD_REQUEST_PORTAL, PTLBD_REPLY_PORTAL, - "ptlbd", &ptlbd->bd_client); - imp->imp_client = &ptlbd->bd_client; - imp->imp_obd = obd; - memcpy(imp->imp_target_uuid.uuid, data->ioc_inlbuf1, data->ioc_inllen1); - ptlbd_blk_register(ptlbd); - - RETURN(0); -} - -static int ptlbd_cl_cleanup(struct obd_device *obd, int force, int failover) -{ - struct ptlbd_obd *ptlbd = &obd->u.ptlbd; - struct obd_import *imp; - ENTRY; - - if ((!ptlbd) || (!(imp = ptlbd->bd_import))) - RETURN(-ENOENT); - - if (!imp->imp_connection) - RETURN(-ENOENT); - - ptlrpc_cleanup_client(imp); - ptlrpc_put_connection(imp->imp_connection); - - class_destroy_import(imp); - class_import_put(imp); - - RETURN(0); -} - - -/* modelled after ptlrpc_import_connect() */ -int ptlbd_cl_connect(struct lustre_handle *conn, - struct obd_device *obd, - struct obd_uuid *target_uuid) -{ - struct ptlbd_obd *ptlbd = &obd->u.ptlbd; - struct obd_import *imp = ptlbd->bd_import; - struct obd_export *exp; - struct ptlrpc_request *request; - int rc, size[] = {sizeof(imp->imp_target_uuid), - sizeof(obd->obd_uuid), - sizeof(*conn)}; - char *tmp[] = {imp->imp_target_uuid.uuid, - obd->obd_uuid.uuid, - (char*)conn}; - ENTRY; - - if (!conn || !obd || !target_uuid) - RETURN(-EINVAL); - - rc = class_connect(conn, obd, target_uuid); - if (rc) - RETURN(rc); - - request = ptlrpc_prep_req(imp, PTLBD_CONNECT, 3, size, tmp); - if (!request) - GOTO(out_disco, rc = -ENOMEM); - request->rq_level = LUSTRE_CONN_NEW; - request->rq_replen = lustre_msg_size(0, NULL); - - imp->imp_dlm_handle = *conn; - - imp->imp_level = LUSTRE_CONN_CON; - rc = ptlrpc_queue_wait(request); - if (rc) - GOTO(out_req, rc); - - exp = class_conn2export(conn); - exp->exp_connection = ptlrpc_connection_addref(request->rq_connection); - class_export_put(exp); - - imp->imp_level = LUSTRE_CONN_FULL; - imp->imp_remote_handle = request->rq_repmsg->handle; - -out_req: - ptlrpc_req_finished(request); -out_disco: - if (rc) - class_disconnect(conn, 0); - RETURN(rc); -} - - -/* modelled after ptlrpc_import_disconnect() */ -int ptlbd_cl_disconnect(struct lustre_handle *conn, int failover) -{ - struct obd_device *obd = class_conn2obd(conn); - struct ptlbd_obd *ptlbd = &obd->u.ptlbd; - struct obd_import *imp = ptlbd->bd_import; - struct ptlrpc_request *request; - int rc, err; - ENTRY; - - if (!obd) - RETURN(-EINVAL); - - request = ptlrpc_prep_req(imp, PTLBD_DISCONNECT, 0, NULL, NULL); - if (!request) - GOTO(out_req, rc = -ENOMEM); - - request->rq_replen = lustre_msg_size(0, NULL); - request->rq_level = LUSTRE_CONN_RECOVER; - - rc = ptlrpc_queue_wait(request); - -out_req: - if (request) - ptlrpc_req_finished(request); - err = class_disconnect(conn, 0); - memset(&imp->imp_remote_handle, 0, sizeof(imp->imp_remote_handle)); - if (!rc && err) - rc = err; - RETURN(rc); -} - - -static struct obd_ops ptlbd_cl_obd_ops = { - o_owner: THIS_MODULE, - o_setup: ptlbd_cl_setup, - o_cleanup: ptlbd_cl_cleanup, - o_connect: ptlbd_cl_connect, - o_disconnect: ptlbd_cl_disconnect, -}; - -int ptlbd_cl_init(void) -{ - struct lprocfs_static_vars lvars; - - lprocfs_init_vars(&lvars); - return class_register_type(&ptlbd_cl_obd_ops, lvars.module_vars, - OBD_PTLBD_CL_DEVICENAME); -} - -void ptlbd_cl_exit(void) -{ - class_unregister_type(OBD_PTLBD_CL_DEVICENAME); -} - - - -int ptlbd_do_connect(struct ptlbd_obd *ptlbd) -{ - int rc; - struct obd_device *obd = ptlbd->bd_import->imp_obd; - ENTRY; - - memset(&ptlbd->bd_connect_handle, 0, sizeof(ptlbd->bd_connect_handle)); - rc = obd_connect(&ptlbd->bd_connect_handle, obd, - &ptlbd->bd_server_uuid); - RETURN(rc); -} - - -int ptlbd_do_disconnect(struct ptlbd_obd *ptlbd) -{ - int rc; - ENTRY; - - rc = obd_disconnect(&ptlbd->bd_connect_handle, 0); - RETURN(rc); -} - diff --git a/lustre/ptlbd/main.c b/lustre/ptlbd/main.c deleted file mode 100644 index e3fde99..0000000 --- a/lustre/ptlbd/main.c +++ /dev/null @@ -1,71 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/module.h> -#include <linux/major.h> -#include <linux/smp.h> - -#define DEBUG_SUBSYSTEM S_PTLBD - -#include <linux/lustre_lite.h> -#include <linux/lustre_ha.h> -#include <linux/obd_support.h> - -#include <linux/obd_ptlbd.h> - -static int __init ptlbd_init(void) -{ - int ret; - ENTRY; - - ret = ptlbd_cl_init(); - if ( ret < 0 ) - RETURN(ret); - - ret = ptlbd_sv_init(); - if ( ret < 0 ) - GOTO(out_cl, ret); - - ret = ptlbd_blk_init(); - if ( ret < 0 ) - GOTO(out_sv, ret); - - RETURN(0); - -out_sv: - ptlbd_sv_exit(); -out_cl: - ptlbd_cl_exit(); - RETURN(ret); -} - -static void __exit ptlbd_exit(void) -{ - ENTRY; - ptlbd_cl_exit(); - ptlbd_sv_exit(); - ptlbd_blk_exit(); - EXIT; -} - -module_init(ptlbd_init); -module_exit(ptlbd_exit); -MODULE_LICENSE("GPL"); diff --git a/lustre/ptlbd/rpc.c b/lustre/ptlbd/rpc.c deleted file mode 100644 index 9829900..0000000 --- a/lustre/ptlbd/rpc.c +++ /dev/null @@ -1,385 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * Author: Zach Brown <zab@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/version.h> -#include <linux/module.h> -#include <linux/fs.h> - -#define DEBUG_SUBSYSTEM S_PTLBD - -#include <linux/obd_support.h> -#include <linux/obd_class.h> -#include <linux/lustre_debug.h> -#include <linux/lprocfs_status.h> -#include <linux/obd_ptlbd.h> - -int ptlbd_send_rw_req(struct ptlbd_obd *ptlbd, ptlbd_cmd_t cmd, - struct buffer_head *first_bh) -{ - struct obd_import *imp = ptlbd->bd_import; - struct ptlbd_op *op; - struct ptlbd_niob *niob, *niobs; - struct ptlbd_rsp *rsp; - struct ptlrpc_request *req; - struct ptlrpc_bulk_desc *desc; - struct buffer_head *bh; - unsigned int page_count; - int rc, rep_size, size[2]; - ENTRY; - - LASSERT(cmd == PTLBD_READ || cmd == PTLBD_WRITE); - - for ( page_count = 0, bh = first_bh ; bh ; bh = bh->b_reqnext ) - page_count++; - - size[0] = sizeof(struct ptlbd_op); - size[1] = page_count * sizeof(struct ptlbd_niob); - - req = ptlrpc_prep_req(imp, cmd, 2, size, NULL); - if (!req) - RETURN(rc = 1); /* need to return error cnt */ - - op = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*op)); - niobs = lustre_msg_buf(req->rq_reqmsg, 1, size[1]); - - /* XXX pack */ - op->op_cmd = cmd; - op->op_lun = 0; - op->op_niob_cnt = page_count; - op->op__padding = 0; - op->op_block_cnt = page_count; - - if (cmd == PTLBD_READ) - desc = ptlrpc_prep_bulk_imp (req, BULK_PUT_SINK, PTLBD_BULK_PORTAL); - else - desc = ptlrpc_prep_bulk_imp (req, BULK_GET_SOURCE, PTLBD_BULK_PORTAL); - if ( desc == NULL ) - GOTO(out, rc = 1); /* need to return error cnt */ - /* NB req now owns desc, and frees it when she frees herself */ - - for ( niob = niobs, bh = first_bh ; bh ; bh = bh->b_reqnext, niob++ ) { - rc = ptlrpc_prep_bulk_page(desc, bh->b_page, - bh_offset (bh) & (PAGE_SIZE - 1), - bh->b_size); - if (rc != 0) - GOTO(out, rc = 1); /* need to return error cnt */ - - niob->n_block_nr = bh->b_blocknr; - niob->n_offset = bh_offset(bh); - niob->n_length = bh->b_size; - } - - rep_size = sizeof(struct ptlbd_rsp); - req->rq_replen = lustre_msg_size(1, &rep_size); - - /* XXX find out how we're really supposed to manage levels */ - req->rq_level = imp->imp_level; - rc = ptlrpc_queue_wait(req); - - if ( rc != 0 ) - GOTO(out, rc = 1); /* need to return error count */ - - rsp = lustre_swab_repbuf(req, 0, sizeof (*rsp), - lustre_swab_ptlbd_rsp); - if (rsp == NULL) { - CERROR ("can't unpack response\n"); - GOTO (out, rc = 1); /* need to return error count */ - } - else if (rsp->r_status != 0) { - rc = rsp->r_error_cnt; - } - -out: - ptlrpc_req_finished(req); - RETURN(rc); -} - - -int ptlbd_send_flush_req(struct ptlbd_obd *ptlbd, ptlbd_cmd_t cmd) -{ - struct obd_import *imp = ptlbd->bd_import; - struct ptlbd_op *op; - struct ptlbd_rsp *rsp; - struct ptlrpc_request *req; - int rc, rep_size, size[1]; - ENTRY; - - LASSERT(cmd == PTLBD_FLUSH); - - size[0] = sizeof(struct ptlbd_op); - - req = ptlrpc_prep_req(imp, cmd, 1, size, NULL); - if (!req) - RETURN(-ENOMEM); - - op = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*op)); - - /* XXX pack */ - op->op_cmd = cmd; - op->op_lun = 0; - op->op_niob_cnt = 0; - op->op__padding = 0; - op->op_block_cnt = 0; - - rep_size = sizeof(struct ptlbd_rsp); - req->rq_replen = lustre_msg_size(1, &rep_size); - - /* XXX find out how we're really supposed to manage levels */ - req->rq_level = imp->imp_level; - - rc = ptlrpc_queue_wait(req); - if ( rc != 0 ) - GOTO(out_req, rc = 1); - rsp = lustre_swab_repbuf(req, 0, sizeof (*rsp), - lustre_swab_ptlbd_rsp); - if (rsp->r_status != 0) - rc = rsp->r_status; - -out_req: - ptlrpc_req_finished(req); - RETURN(rc); -} - - -int ptlbd_do_filp(struct file *filp, int op, struct ptlbd_niob *niobs, - int page_count, struct list_head *page_list) -{ - mm_segment_t old_fs; - struct list_head *pos; - int status = 0; - ENTRY; - - old_fs = get_fs(); - set_fs(KERNEL_DS); - - list_for_each(pos, page_list) { - ssize_t ret; - struct page *page = list_entry(pos, struct page, list); - loff_t offset = (niobs->n_block_nr << PAGE_SHIFT) + - niobs->n_offset; - if ( op == PTLBD_READ ) - ret = filp->f_op->read(filp, page_address(page), - niobs->n_length, &offset); - else - ret = filp->f_op->write(filp, page_address(page), - niobs->n_length, &offset); - if (ret != niobs->n_length) { - status = ret; - break; - } - niobs++; - } - set_fs(old_fs); - RETURN(status); -} - - -int ptlbd_srv_rw_req(ptlbd_cmd_t cmd, __u16 index, - struct ptlrpc_request *req, int swab) -{ - struct ptlbd_niob *niob, *niobs; - struct ptlbd_rsp *rsp; - struct ptlrpc_bulk_desc *desc = NULL; - struct file *filp = req->rq_obd->u.ptlbd.filp; - struct l_wait_info lwi; - int size[1], i, page_count, rc = 0, error_cnt = 0; - struct list_head *pos, *n; - struct page *page; - LIST_HEAD(tmp_pages); - ENTRY; - - niobs = lustre_swab_reqbuf (req, 1, sizeof (*niobs), - lustre_swab_ptlbd_niob); - if (niobs == NULL) - GOTO (out, rc = -EFAULT); - - size[0] = sizeof(struct ptlbd_rsp); - rc = lustre_pack_msg(1, size, NULL, &req->rq_replen, &req->rq_repmsg); - if ( rc ) - GOTO(out, rc); - - rsp = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*rsp)); - if ( rsp == NULL ) - GOTO (out, rc = -EFAULT); - - page_count = req->rq_reqmsg->buflens[1] / sizeof(struct ptlbd_niob); - if (swab) { /* swab remaining niobs */ - for (i = 1; i < page_count; i++) - lustre_swab_ptlbd_niob(&niobs[i]); - } - if (req->rq_export == NULL) { - error_cnt++; - GOTO(out_reply, rc = -EFAULT); - } - - if (cmd == PTLBD_READ) - desc = ptlrpc_prep_bulk_exp (req, BULK_PUT_SOURCE, PTLBD_BULK_PORTAL); - else - desc = ptlrpc_prep_bulk_exp (req, BULK_GET_SINK, PTLBD_BULK_PORTAL); - if (desc == NULL) { - error_cnt++; - GOTO(out_reply, rc = -ENOMEM); - } - desc->bd_portal = PTLBD_BULK_PORTAL; - LASSERT (page_count > 0); - - for ( i = 0, niob = niobs ; i < page_count; niob++, i++) { - page = alloc_page(GFP_KERNEL); - if (page == NULL) { - error_cnt++; - GOTO(out_reply, rc = -ENOMEM); - } - list_add_tail(&page->list, &tmp_pages); - - rc = ptlrpc_prep_bulk_page(desc, page, - niob->n_offset & (PAGE_SIZE - 1), - niob->n_length); - if (rc != 0) { - error_cnt++; - GOTO(out_reply, rc); - } - } - - if ( cmd == PTLBD_READ ) { - if ((rc = ptlbd_do_filp(filp, PTLBD_READ, niobs, - page_count, &tmp_pages)) < 0) { - error_cnt++; - GOTO(out_reply, rc); - } - rc = ptlrpc_bulk_put(desc); - } else { - rc = ptlrpc_bulk_get(desc); - } - - if ( rc ) { - error_cnt++; - GOTO(out_reply, rc); - } - - lwi = LWI_TIMEOUT(obd_timeout * HZ, NULL, desc); - rc = l_wait_event(desc->bd_waitq, ptlrpc_bulk_complete(desc), &lwi); - if (rc != 0) { - LASSERT(rc == -ETIMEDOUT); - ptlrpc_abort_bulk(desc); - error_cnt++; - GOTO(out_reply, rc); - } - - if ( cmd == PTLBD_WRITE ) { - if ((rc = ptlbd_do_filp(filp, PTLBD_WRITE, niobs, - page_count, &tmp_pages)) < 0) { - error_cnt++; - } - } - -out_reply: - rsp->r_error_cnt = error_cnt; - rsp->r_status = rc; - req->rq_status = rc; - - ptlrpc_reply(req); - - list_for_each_safe(pos, n, &tmp_pages) { - struct page *page = list_entry(pos, struct page, list); - list_del(&page->list); - __free_page(page); - } - if (desc) - ptlrpc_free_bulk(desc); -out: - RETURN(rc); -} - - -int ptlbd_srv_flush_req(ptlbd_cmd_t cmd, __u16 index, - struct ptlrpc_request *req) -{ - struct ptlbd_rsp *rsp; - struct file *filp = req->rq_obd->u.ptlbd.filp; - int size[1], rc, status; - ENTRY; - - size[0] = sizeof(struct ptlbd_rsp); - rc = lustre_pack_msg(1, size, NULL, &req->rq_replen, &req->rq_repmsg); - if ( rc ) - RETURN(rc); - - rsp = lustre_msg_buf(req->rq_repmsg, 0, sizeof(*rsp)); - if ( rsp == NULL ) - RETURN(-EINVAL); - - if (! (filp) && (filp->f_op) && (filp->f_op->fsync) && - (filp->f_dentry)) - GOTO(out_reply, status = -EINVAL); - - status = filp->f_op->fsync(filp, filp->f_dentry, 1); - -out_reply: - rsp->r_error_cnt = 0; - rsp->r_status = status; - req->rq_status = 0; - - ptlrpc_reply(req); - RETURN(0); -} - - -int ptlbd_handle(struct ptlrpc_request *req) -{ - struct ptlbd_op *op; - int swab; - int rc; - ENTRY; - - swab = lustre_msg_swabbed (req->rq_reqmsg); - - if (req->rq_reqmsg->opc == PTLBD_CONNECT) { - rc = target_handle_connect(req, ptlbd_handle); - target_send_reply(req, rc, OBD_FAIL_PTLRPC); - RETURN(0); - } - if (req->rq_reqmsg->opc == PTLBD_DISCONNECT) { - rc = target_handle_disconnect(req); - target_send_reply(req, rc, OBD_FAIL_PTLRPC); - RETURN(0); - } - op = lustre_swab_reqbuf (req, 0, sizeof (*op), - lustre_swab_ptlbd_op); - if (op == NULL) - RETURN(-EFAULT); - - switch (op->op_cmd) { - case PTLBD_READ: - case PTLBD_WRITE: - rc = ptlbd_srv_rw_req(op->op_cmd, op->op_lun, req, - swab); - break; - - case PTLBD_FLUSH: - rc = ptlbd_srv_flush_req(op->op_cmd, op->op_lun, req); - break; - default: - rc = -EINVAL; - } - - RETURN(rc); -} diff --git a/lustre/ptlbd/server.c b/lustre/ptlbd/server.c deleted file mode 100644 index 34ec737..0000000 --- a/lustre/ptlbd/server.c +++ /dev/null @@ -1,113 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * Author: Zach Brown <zab@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/version.h> -#include <linux/module.h> -#include <linux/fs.h> - -#define DEBUG_SUBSYSTEM S_PTLBD - -#include <linux/obd_support.h> -#include <linux/obd_class.h> -#include <linux/lustre_debug.h> -#include <linux/lprocfs_status.h> -#include <linux/obd_ptlbd.h> - -#define BACKING_FILE "/tmp/ptlbd-backing-file-la-la-la" - -static int ptlbd_sv_already_setup = 1; - -static int ptlbd_sv_setup(struct obd_device *obddev, obd_count len, void *buf) -{ - struct ptlbd_obd *ptlbd = &obddev->u.ptlbd; - int rc; - ENTRY; - - ptlbd->filp = filp_open(BACKING_FILE, - O_RDWR|O_CREAT|O_LARGEFILE, 0600); - - if ( IS_ERR(ptlbd->filp) ) - RETURN(PTR_ERR(ptlbd->filp)); - - ptlbd->ptlbd_service = - ptlrpc_init_svc(PTLBD_NEVENTS, PTLBD_NBUFS, PTLBD_BUFSIZE, - PTLBD_MAXREQSIZE, PTLBD_REQUEST_PORTAL, - PTLBD_REPLY_PORTAL, - ptlbd_handle, "ptlbd_sv", obddev); - - if (ptlbd->ptlbd_service == NULL) - GOTO(out_filp, rc = -ENOMEM); - - rc = ptlrpc_start_thread(obddev, ptlbd->ptlbd_service, "ptldb"); - if (rc != 0) - GOTO(out_thread, rc); - - ptlbd_sv_already_setup = 1; - - RETURN(0); - -out_thread: - ptlrpc_stop_all_threads(ptlbd->ptlbd_service); - ptlrpc_unregister_service(ptlbd->ptlbd_service); -out_filp: - filp_close(ptlbd->filp, NULL); - - RETURN(rc); -} - -static int ptlbd_sv_cleanup(struct obd_device *obddev, int force, int failover) -{ - struct ptlbd_obd *ptlbd = &obddev->u.ptlbd; - ENTRY; - - /* XXX check for state */ - - ptlrpc_stop_all_threads(ptlbd->ptlbd_service); - ptlrpc_unregister_service(ptlbd->ptlbd_service); - if ( ! IS_ERR(ptlbd->filp) ) - filp_close(ptlbd->filp, NULL); - - ptlbd_sv_already_setup = 0; - RETURN(0); -} - -static struct obd_ops ptlbd_sv_obd_ops = { - o_owner: THIS_MODULE, - o_setup: ptlbd_sv_setup, - o_cleanup: ptlbd_sv_cleanup, - o_connect: class_connect, - o_disconnect: class_disconnect, -}; - -int ptlbd_sv_init(void) -{ - struct lprocfs_static_vars lvars; - - lprocfs_init_vars(&lvars); - return class_register_type(&ptlbd_sv_obd_ops, lvars.module_vars, - OBD_PTLBD_SV_DEVICENAME); -} - -void ptlbd_sv_exit(void) -{ - class_unregister_type(OBD_PTLBD_SV_DEVICENAME); -} diff --git a/lustre/ptlrpc/.cvsignore b/lustre/ptlrpc/.cvsignore deleted file mode 100644 index 067f05c..0000000 --- a/lustre/ptlrpc/.cvsignore +++ /dev/null @@ -1,9 +0,0 @@ -.Xrefs -config.log -config.status -configure -Makefile -Makefile.in -.deps -tags -TAGS diff --git a/lustre/ptlrpc/Makefile.am b/lustre/ptlrpc/Makefile.am deleted file mode 100644 index eb44329..0000000 --- a/lustre/ptlrpc/Makefile.am +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -DEFS= - -if LIBLUSTRE -lib_LIBRARIES = libptlrpc.a -libptlrpc_a_SOURCES = client.c niobuf.c pack_generic.c recover.c connection.c \ -ptlrpc_module.c events.c ptlrpc_lib.c -else -MODULE = ptlrpc -modulefs_DATA = ptlrpc.o -EXTRA_PROGRAMS = ptlrpc - -ptlrpc_SOURCES = recover.c connection.c ptlrpc_module.c events.c service.c \ -client.c niobuf.c pack_generic.c lproc_ptlrpc.c pinger.c ptlrpc_lib.c \ -ptlrpc_internal.h -endif - -include $(top_srcdir)/Rules diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c deleted file mode 100644 index a98af3e..0000000 --- a/lustre/ptlrpc/client.c +++ /dev/null @@ -1,1605 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define DEBUG_SUBSYSTEM S_RPC -#ifndef __KERNEL__ -#include <errno.h> -#include <signal.h> -#include <liblustre.h> -#endif - -#include <linux/obd_support.h> -#include <linux/obd_class.h> -#include <linux/lustre_lib.h> -#include <linux/lustre_ha.h> -#include <linux/lustre_import.h> - -#include "ptlrpc_internal.h" - -void ptlrpc_init_client(int req_portal, int rep_portal, char *name, - struct ptlrpc_client *cl) -{ - cl->cli_request_portal = req_portal; - cl->cli_reply_portal = rep_portal; - cl->cli_name = name; -} - -struct obd_uuid *ptlrpc_req_to_uuid(struct ptlrpc_request *req) -{ - return &req->rq_connection->c_remote_uuid; -} - -struct ptlrpc_connection *ptlrpc_uuid_to_connection(struct obd_uuid *uuid) -{ - struct ptlrpc_connection *c; - struct ptlrpc_peer peer; - int err; - - err = ptlrpc_uuid_to_peer(uuid, &peer); - if (err != 0) { - CERROR("cannot find peer %s!\n", uuid->uuid); - return NULL; - } - - c = ptlrpc_get_connection(&peer, uuid); - if (c) { - memcpy(c->c_remote_uuid.uuid, - uuid->uuid, sizeof(c->c_remote_uuid.uuid)); - c->c_epoch++; - } - - CDEBUG(D_INFO, "%s -> %p\n", uuid->uuid, c); - - return c; -} - -void ptlrpc_readdress_connection(struct ptlrpc_connection *conn, - struct obd_uuid *uuid) -{ - struct ptlrpc_peer peer; - int err; - - err = ptlrpc_uuid_to_peer (uuid, &peer); - if (err != 0) { - CERROR("cannot find peer %s!\n", uuid->uuid); - return; - } - - memcpy (&conn->c_peer, &peer, sizeof (peer)); - return; -} - -static inline struct ptlrpc_bulk_desc *new_bulk(void) -{ - struct ptlrpc_bulk_desc *desc; - - OBD_ALLOC(desc, sizeof(*desc)); - if (!desc) - return NULL; - - spin_lock_init (&desc->bd_lock); - init_waitqueue_head(&desc->bd_waitq); - INIT_LIST_HEAD(&desc->bd_page_list); - desc->bd_md_h = PTL_HANDLE_NONE; - desc->bd_me_h = PTL_HANDLE_NONE; - - return desc; -} - -struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_imp (struct ptlrpc_request *req, - int type, int portal) -{ - struct obd_import *imp = req->rq_import; - struct ptlrpc_bulk_desc *desc; - - LASSERT (type == BULK_PUT_SINK || type == BULK_GET_SOURCE); - - desc = new_bulk(); - if (desc == NULL) - RETURN(NULL); - - desc->bd_import_generation = req->rq_import_generation; - desc->bd_import = class_import_get(imp); - desc->bd_req = req; - desc->bd_type = type; - desc->bd_portal = portal; - - /* This makes req own desc, and free it when she frees herself */ - req->rq_bulk = desc; - - return desc; -} - -struct ptlrpc_bulk_desc *ptlrpc_prep_bulk_exp (struct ptlrpc_request *req, - int type, int portal) -{ - struct obd_export *exp = req->rq_export; - struct ptlrpc_bulk_desc *desc; - - LASSERT (type == BULK_PUT_SOURCE || type == BULK_GET_SINK); - - desc = new_bulk(); - if (desc == NULL) - RETURN(NULL); - - desc->bd_export = class_export_get(exp); - desc->bd_req = req; - desc->bd_type = type; - desc->bd_portal = portal; - - /* NB we don't assign rq_bulk here; server-side requests are - * re-used, and the handler frees the bulk desc explicitly. */ - - return desc; -} - -int ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc, - struct page *page, int pageoffset, int len) -{ - struct ptlrpc_bulk_page *bulk; - - OBD_ALLOC(bulk, sizeof(*bulk)); - if (bulk == NULL) - return (-ENOMEM); - - LASSERT (page != NULL); - LASSERT (pageoffset >= 0); - LASSERT (len > 0); - LASSERT (pageoffset + len <= PAGE_SIZE); - - bulk->bp_page = page; - bulk->bp_pageoffset = pageoffset; - bulk->bp_buflen = len; - - bulk->bp_desc = desc; - list_add_tail(&bulk->bp_link, &desc->bd_page_list); - desc->bd_page_count++; - return 0; -} - -void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc) -{ - struct list_head *tmp, *next; - ENTRY; - - LASSERT (desc != NULL); - LASSERT (desc->bd_page_count != 0x5a5a5a5a); /* not freed already */ - LASSERT (!desc->bd_network_rw); /* network hands off or */ - - list_for_each_safe(tmp, next, &desc->bd_page_list) { - struct ptlrpc_bulk_page *bulk; - bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link); - ptlrpc_free_bulk_page(bulk); - } - - LASSERT (desc->bd_page_count == 0); - LASSERT((desc->bd_export != NULL) ^ (desc->bd_import != NULL)); - - if (desc->bd_export) - class_export_put(desc->bd_export); - else - class_import_put(desc->bd_import); - - OBD_FREE(desc, sizeof(*desc)); - EXIT; -} - -void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *bulk) -{ - LASSERT (bulk != NULL); - - list_del(&bulk->bp_link); - bulk->bp_desc->bd_page_count--; - OBD_FREE(bulk, sizeof(*bulk)); -} - -struct ptlrpc_request *ptlrpc_prep_req(struct obd_import *imp, int opcode, - int count, int *lengths, char **bufs) -{ - struct ptlrpc_request *request; - int rc; - ENTRY; - - LASSERT((unsigned long)imp > 0x1000); - - OBD_ALLOC(request, sizeof(*request)); - if (!request) { - CERROR("request allocation out of memory\n"); - RETURN(NULL); - } - - rc = lustre_pack_msg(count, lengths, bufs, - &request->rq_reqlen, &request->rq_reqmsg); - if (rc) { - CERROR("cannot pack request %d\n", rc); - OBD_FREE(request, sizeof(*request)); - RETURN(NULL); - } - - request->rq_timeout = obd_timeout; - request->rq_level = LUSTRE_CONN_FULL; - request->rq_type = PTL_RPC_MSG_REQUEST; - request->rq_import = class_import_get(imp); - request->rq_phase = RQ_PHASE_NEW; - - /* XXX FIXME bug 249 */ - request->rq_request_portal = imp->imp_client->cli_request_portal; - request->rq_reply_portal = imp->imp_client->cli_reply_portal; - - request->rq_connection = ptlrpc_connection_addref(imp->imp_connection); - - spin_lock_init (&request->rq_lock); - INIT_LIST_HEAD(&request->rq_list); - init_waitqueue_head(&request->rq_wait_for_rep); - request->rq_xid = ptlrpc_next_xid(); - atomic_set(&request->rq_refcount, 1); - - request->rq_reqmsg->opc = opcode; - request->rq_reqmsg->flags = 0; - - RETURN(request); -} - -struct ptlrpc_request_set *ptlrpc_prep_set(void) -{ - struct ptlrpc_request_set *set; - - OBD_ALLOC(set, sizeof *set); - if (!set) - RETURN(NULL); - INIT_LIST_HEAD(&set->set_requests); - init_waitqueue_head(&set->set_waitq); - set->set_remaining = 0; - - RETURN(set); -} - -/* Finish with this set; opposite of prep_set. */ -void ptlrpc_set_destroy(struct ptlrpc_request_set *set) -{ - struct list_head *tmp; - struct list_head *next; - int expected_phase; - int n = 0; - ENTRY; - - /* Requests on the set should either all be completed, or all be new */ - expected_phase = (set->set_remaining == 0) ? - RQ_PHASE_COMPLETE : RQ_PHASE_NEW; - list_for_each (tmp, &set->set_requests) { - struct ptlrpc_request *req = - list_entry(tmp, struct ptlrpc_request, rq_set_chain); - - LASSERT (req->rq_phase == expected_phase); - n++; - } - - LASSERT (set->set_remaining == 0 || set->set_remaining == n); - - list_for_each_safe(tmp, next, &set->set_requests) { - struct ptlrpc_request *req = - list_entry(tmp, struct ptlrpc_request, rq_set_chain); - list_del_init(&req->rq_set_chain); - - LASSERT (req->rq_phase == expected_phase); - - if (req->rq_phase == RQ_PHASE_NEW) { - - if (req->rq_interpret_reply != NULL) { - int (*interpreter)(struct ptlrpc_request *, - void *, int) = - req->rq_interpret_reply; - - /* higher level (i.e. LOV) failed; - * let the sub reqs clean up */ - req->rq_status = -EBADR; - interpreter(req, &req->rq_async_args, req->rq_status); - } - set->set_remaining--; - } - - req->rq_set = NULL; - ptlrpc_req_finished (req); - } - - LASSERT(set->set_remaining == 0); - - OBD_FREE(set, sizeof(*set)); - EXIT; -} - -void ptlrpc_set_add_req(struct ptlrpc_request_set *set, - struct ptlrpc_request *req) -{ - /* The set takes over the caller's request reference */ - list_add_tail(&req->rq_set_chain, &set->set_requests); - req->rq_set = set; - set->set_remaining++; -} - -static int ptlrpc_check_reply(struct ptlrpc_request *req) -{ - unsigned long flags; - int rc = 0; - ENTRY; - - /* serialise with network callback */ - spin_lock_irqsave (&req->rq_lock, flags); - - if (req->rq_replied) { - DEBUG_REQ(D_NET, req, "REPLIED:"); - GOTO(out, rc = 1); - } - - if (req->rq_err) { - DEBUG_REQ(D_ERROR, req, "ABORTED:"); - GOTO(out, rc = 1); - } - - if (req->rq_resend) { - DEBUG_REQ(D_ERROR, req, "RESEND:"); - GOTO(out, rc = 1); - } - - if (req->rq_restart) { - DEBUG_REQ(D_ERROR, req, "RESTART:"); - GOTO(out, rc = 1); - } - EXIT; - out: - spin_unlock_irqrestore (&req->rq_lock, flags); - DEBUG_REQ(D_NET, req, "rc = %d for", rc); - return rc; -} - -static int ptlrpc_check_status(struct ptlrpc_request *req) -{ - int err; - ENTRY; - - err = req->rq_repmsg->status; - if (req->rq_repmsg->type == PTL_RPC_MSG_ERR) { - DEBUG_REQ(D_ERROR, req, "type == PTL_RPC_MSG_ERR (%d)", err); - if (err >= 0) - CERROR("Error Reply has >= zero status\n"); - RETURN(err < 0 ? err : -EINVAL); - } - - if (err < 0) { - DEBUG_REQ(D_INFO, req, "status is %d", err); - } else if (err > 0) { - /* XXX: translate this error from net to host */ - DEBUG_REQ(D_INFO, req, "status is %d", err); - } - - RETURN(err); -} - -#warning this needs to change after robert fixes eviction handling -static int after_reply(struct ptlrpc_request *req, int *restartp) -{ - unsigned long flags; - struct obd_import *imp = req->rq_import; - int rc; - ENTRY; - - LASSERT (!req->rq_receiving_reply); - LASSERT (req->rq_replied); - - if (restartp != NULL) - *restartp = 0; - - /* NB Until this point, the whole of the incoming message, - * including buflens, status etc is in the sender's byte order. */ - -#if SWAB_PARANOIA - /* Clear reply swab mask; this is a new reply in sender's byte order */ - req->rq_rep_swab_mask = 0; -#endif - rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen); - if (rc) { - CERROR("unpack_rep failed: %d\n", rc); - RETURN (-EPROTO); - } - - if (req->rq_repmsg->type != PTL_RPC_MSG_REPLY && - req->rq_repmsg->type != PTL_RPC_MSG_ERR) { - CERROR("invalid packet type received (type=%u)\n", - req->rq_repmsg->type); - RETURN (-EPROTO); - } - - /* Store transno in reqmsg for replay. */ - req->rq_reqmsg->transno = req->rq_transno = req->rq_repmsg->transno; - - rc = ptlrpc_check_status(req); - - /* Either we've been evicted, or the server has failed for - * some reason. Try to reconnect, and if that fails, punt to the - * upcall. */ - if (rc == -ENOTCONN) { - if (req->rq_level < LUSTRE_CONN_FULL || req->rq_no_recov || - imp->imp_obd->obd_no_recov || imp->imp_dlm_fake) { - RETURN(-ENOTCONN); - } - - ptlrpc_request_handle_eviction(req); - - if (req->rq_err) - RETURN(-EIO); - - if (req->rq_resend) { - if (restartp == NULL) - LBUG(); /* async resend not supported yet */ - spin_lock_irqsave (&req->rq_lock, flags); - req->rq_resend = 0; - spin_unlock_irqrestore (&req->rq_lock, flags); - *restartp = 1; - lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT); - DEBUG_REQ(D_HA, req, "resending: "); - RETURN (0); - } - - CERROR("request should be err or resend: %p\n", req); - LBUG(); - } - - if (req->rq_import->imp_replayable) { - spin_lock_irqsave(&imp->imp_lock, flags); - if ((req->rq_replay || req->rq_transno != 0) && rc >= 0) - ptlrpc_retain_replayable_request(req, imp); - - if (req->rq_transno > imp->imp_max_transno) - imp->imp_max_transno = req->rq_transno; - - /* Replay-enabled imports return commit-status information. */ - if (req->rq_repmsg->last_committed) { - imp->imp_peer_committed_transno = - req->rq_repmsg->last_committed; - } - ptlrpc_free_committed(imp); - spin_unlock_irqrestore(&imp->imp_lock, flags); - } - - RETURN(rc); -} - -int ptlrpc_check_set(struct ptlrpc_request_set *set) -{ - unsigned long flags; - struct list_head *tmp; - ENTRY; - - if (set->set_remaining == 0) - RETURN(1); - - list_for_each(tmp, &set->set_requests) { - struct ptlrpc_request *req = - list_entry(tmp, struct ptlrpc_request, rq_set_chain); - struct obd_import *imp = req->rq_import; - int rc = 0; - - if (!(req->rq_phase == RQ_PHASE_RPC || - req->rq_phase == RQ_PHASE_BULK || - req->rq_phase == RQ_PHASE_INTERPRET || - req->rq_phase == RQ_PHASE_COMPLETE)) { - DEBUG_REQ(D_ERROR, req, "bad phase %x", req->rq_phase); - LBUG(); - } - - if (req->rq_phase == RQ_PHASE_COMPLETE) - continue; - - if (req->rq_phase == RQ_PHASE_INTERPRET) - GOTO (interpret, req->rq_status); - - if (req->rq_err) { - ptlrpc_unregister_reply(req); - if (req->rq_status == 0) - req->rq_status = -EIO; - req->rq_phase = RQ_PHASE_INTERPRET; - - spin_lock_irqsave(&imp->imp_lock, flags); - list_del_init(&req->rq_list); - spin_unlock_irqrestore(&imp->imp_lock, flags); - - GOTO (interpret, req->rq_status); - } - - if (req->rq_intr) { - /* NB could be on delayed list */ - ptlrpc_unregister_reply(req); - req->rq_status = -EINTR; - req->rq_phase = RQ_PHASE_INTERPRET; - - spin_lock_irqsave(&imp->imp_lock, flags); - list_del_init(&req->rq_list); - spin_unlock_irqrestore(&imp->imp_lock, flags); - - GOTO (interpret, req->rq_status); - } - - if (req->rq_phase == RQ_PHASE_RPC) { - int do_restart = 0; - if (req->rq_waiting || req->rq_resend) { - spin_lock_irqsave(&imp->imp_lock, flags); - - if (req->rq_level > imp->imp_level) { - spin_unlock_irqrestore(&imp->imp_lock, - flags); - continue; - } - - list_del(&req->rq_list); - list_add_tail(&req->rq_list, - &imp->imp_sending_list); - - if (req->rq_import_generation < - imp->imp_generation) { - req->rq_status = -EIO; - req->rq_phase = RQ_PHASE_INTERPRET; - spin_unlock_irqrestore(&imp->imp_lock, - flags); - GOTO (interpret, req->rq_status); - } - spin_unlock_irqrestore(&imp->imp_lock, flags); - - req->rq_waiting = 0; - if (req->rq_resend) { - lustre_msg_add_flags(req->rq_reqmsg, - MSG_RESENT); - spin_lock_irqsave(&req->rq_lock, flags); - req->rq_resend = 0; - spin_unlock_irqrestore(&req->rq_lock, - flags); - ptlrpc_unregister_reply(req); - if (req->rq_bulk) - ptlrpc_unregister_bulk(req); - } - - rc = ptl_send_rpc(req); - if (rc) { - req->rq_status = rc; - req->rq_phase = RQ_PHASE_INTERPRET; - GOTO (interpret, req->rq_status); - } - - } - - /* Ensure the network callback returned */ - spin_lock_irqsave (&req->rq_lock, flags); - if (!req->rq_replied) { - spin_unlock_irqrestore (&req->rq_lock, flags); - continue; - } - spin_unlock_irqrestore (&req->rq_lock, flags); - - spin_lock_irqsave(&imp->imp_lock, flags); - list_del_init(&req->rq_list); - spin_unlock_irqrestore(&imp->imp_lock, flags); - - req->rq_status = after_reply(req, &do_restart); - if (do_restart) { - spin_lock_irqsave (&req->rq_lock, flags); - req->rq_resend = 1; /* ugh */ - spin_unlock_irqrestore (&req->rq_lock, flags); - continue; - } - - /* If there is no bulk associated with this request, - * then we're done and should let the interpreter - * process the reply. Similarly if the RPC returned - * an error, and therefore the bulk will never arrive. - */ - if (req->rq_bulk == NULL || req->rq_status != 0) { - req->rq_phase = RQ_PHASE_INTERPRET; - GOTO (interpret, req->rq_status); - } - - req->rq_phase = RQ_PHASE_BULK; - } - - LASSERT (req->rq_phase == RQ_PHASE_BULK); - if (!ptlrpc_bulk_complete (req->rq_bulk)) - continue; - - req->rq_phase = RQ_PHASE_INTERPRET; - - interpret: - LASSERT (req->rq_phase == RQ_PHASE_INTERPRET); - LASSERT (!req->rq_receiving_reply); - - ptlrpc_unregister_reply(req); - if (req->rq_bulk != NULL) - ptlrpc_unregister_bulk (req); - - if (req->rq_interpret_reply != NULL) { - int (*interpreter)(struct ptlrpc_request *,void *,int) = - req->rq_interpret_reply; - req->rq_status = interpreter(req, &req->rq_async_args, - req->rq_status); - } - - CDEBUG(D_RPCTRACE, "Completed RPC pname:cluuid:pid:xid:ni:nid:" - "opc %s:%s:%d:"LPU64":%s:"LPX64":%d\n", current->comm, - imp->imp_obd->obd_uuid.uuid, req->rq_reqmsg->status, - req->rq_xid, - imp->imp_connection->c_peer.peer_ni->pni_name, - imp->imp_connection->c_peer.peer_nid, - req->rq_reqmsg->opc); - - req->rq_phase = RQ_PHASE_COMPLETE; - set->set_remaining--; - } - - RETURN (set->set_remaining == 0); -} - -int ptlrpc_expire_one_request(struct ptlrpc_request *req) -{ - unsigned long flags; - struct obd_import *imp = req->rq_import; - ENTRY; - - DEBUG_REQ(D_ERROR, req, "timeout"); - - spin_lock_irqsave (&req->rq_lock, flags); - req->rq_timedout = 1; - spin_unlock_irqrestore (&req->rq_lock, flags); - - ptlrpc_unregister_reply (req); - - if (imp == NULL) { - DEBUG_REQ(D_HA, req, "NULL import: already cleaned up?"); - RETURN(1); - } - - /* The DLM server doesn't want recovery run on its imports. */ - if (imp->imp_dlm_fake) - RETURN(1); - - /* If this request is for recovery or other primordial tasks, - * don't go back to sleep, and don't start recovery again.. */ - if (req->rq_level < LUSTRE_CONN_FULL || req->rq_no_recov || - imp->imp_obd->obd_no_recov) - RETURN(1); - - ptlrpc_fail_import(imp, req->rq_import_generation); - - RETURN(0); -} - -static int expired_set(void *data) -{ - struct ptlrpc_request_set *set = data; - struct list_head *tmp; - time_t now = LTIME_S (CURRENT_TIME); - ENTRY; - - LASSERT (set != NULL); - - /* A timeout expired; see which reqs it applies to... */ - list_for_each (tmp, &set->set_requests) { - struct ptlrpc_request *req = - list_entry(tmp, struct ptlrpc_request, rq_set_chain); - - /* request in-flight? */ - if (!((req->rq_phase == RQ_PHASE_RPC && !req->rq_waiting) || - (req->rq_phase == RQ_PHASE_BULK))) - continue; - - if (req->rq_timedout || /* already dealt with */ - req->rq_sent + req->rq_timeout > now) /* not expired */ - continue; - - /* deal with this guy */ - ptlrpc_expire_one_request (req); - } - - /* When waiting for a whole set, we always to break out of the - * sleep so we can recalculate the timeout, or enable interrupts - * iff everyone's timed out. - */ - RETURN(1); -} - -static void interrupted_set(void *data) -{ - struct ptlrpc_request_set *set = data; - struct list_head *tmp; - unsigned long flags; - - LASSERT (set != NULL); - CERROR("INTERRUPTED SET %p\n", set); - - list_for_each(tmp, &set->set_requests) { - struct ptlrpc_request *req = - list_entry(tmp, struct ptlrpc_request, rq_set_chain); - - if (req->rq_phase != RQ_PHASE_RPC) - continue; - - spin_lock_irqsave (&req->rq_lock, flags); - req->rq_intr = 1; - spin_unlock_irqrestore (&req->rq_lock, flags); - } -} - -int ptlrpc_set_wait(struct ptlrpc_request_set *set) -{ - struct list_head *tmp; - struct obd_import *imp; - struct ptlrpc_request *req; - struct l_wait_info lwi; - unsigned long flags; - int rc; - time_t now; - time_t deadline; - int timeout; - ENTRY; - - LASSERT(!list_empty(&set->set_requests)); - list_for_each(tmp, &set->set_requests) { - req = list_entry(tmp, struct ptlrpc_request, rq_set_chain); - - LASSERT (req->rq_level == LUSTRE_CONN_FULL); - LASSERT (req->rq_phase == RQ_PHASE_NEW); - req->rq_phase = RQ_PHASE_RPC; - - imp = req->rq_import; - spin_lock_irqsave(&imp->imp_lock, flags); - - if (imp->imp_invalid) { - spin_unlock_irqrestore(&imp->imp_lock, flags); - req->rq_status = -EIO; - req->rq_phase = RQ_PHASE_INTERPRET; - continue; - } - - req->rq_import_generation = imp->imp_generation; - - if (req->rq_level > imp->imp_level) { - if (req->rq_no_recov || imp->imp_obd->obd_no_recov || - imp->imp_dlm_fake) { - spin_unlock_irqrestore(&imp->imp_lock, flags); - req->rq_status = -EWOULDBLOCK; - req->rq_phase = RQ_PHASE_INTERPRET; - continue; - } - - spin_lock (&req->rq_lock); - req->rq_waiting = 1; - spin_unlock (&req->rq_lock); - LASSERT (list_empty (&req->rq_list)); - // list_del(&req->rq_list); - list_add_tail(&req->rq_list, &imp->imp_delayed_list); - spin_unlock_irqrestore(&imp->imp_lock, flags); - continue; - } - - /* XXX this is the same as ptlrpc_queue_wait */ - LASSERT(list_empty(&req->rq_list)); - list_add_tail(&req->rq_list, &imp->imp_sending_list); - spin_unlock_irqrestore(&imp->imp_lock, flags); - - CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:ni:nid:opc" - " %s:%s:%d:"LPU64":%s:"LPX64":%d\n", current->comm, - imp->imp_obd->obd_uuid.uuid, req->rq_reqmsg->status, - req->rq_xid, - imp->imp_connection->c_peer.peer_ni->pni_name, - imp->imp_connection->c_peer.peer_nid, - req->rq_reqmsg->opc); - - rc = ptl_send_rpc(req); - if (rc) { - req->rq_status = rc; - req->rq_phase = RQ_PHASE_INTERPRET; - } - } - - do { - now = LTIME_S (CURRENT_TIME); - timeout = 0; - list_for_each (tmp, &set->set_requests) { - req = list_entry(tmp, struct ptlrpc_request, rq_set_chain); - - /* request in-flight? */ - if (!((req->rq_phase == RQ_PHASE_RPC && - !req->rq_waiting) || - (req->rq_phase == RQ_PHASE_BULK))) - continue; - - if (req->rq_timedout) /* already timed out */ - continue; - - deadline = req->rq_sent + req->rq_timeout; - if (deadline <= now) /* actually expired already */ - timeout = 1; /* ASAP */ - else if (timeout == 0 || timeout > deadline - now) - timeout = deadline - now; - } - - /* wait until all complete, interrupted, or an in-flight - * req times out */ - CDEBUG(D_HA, "set %p going to sleep for %d seconds\n", - set, timeout); - lwi = LWI_TIMEOUT_INTR((timeout ? timeout : 1) * HZ, - expired_set, interrupted_set, set); - rc = l_wait_event(set->set_waitq, ptlrpc_check_set(set), &lwi); - - LASSERT (rc == 0 || rc == -EINTR || rc == -ETIMEDOUT); - - /* -EINTR => all requests have been flagged rq_intr so next - * check completes. - * -ETIMEOUTD => someone timed out. When all reqs have - * timed out, signals are enabled allowing completion with - * EINTR. - * I don't really care if we go once more round the loop in - * the error cases -eeb. */ - } while (rc != 0); - - LASSERT (set->set_remaining == 0); - - rc = 0; - list_for_each(tmp, &set->set_requests) { - req = list_entry(tmp, struct ptlrpc_request, rq_set_chain); - - LASSERT (req->rq_phase == RQ_PHASE_COMPLETE); - if (req->rq_status != 0) - rc = req->rq_status; - } - - if (set->set_interpret != NULL) { - int (*interpreter)(struct ptlrpc_request_set *set,void *,int) = - set->set_interpret; - rc = interpreter (set, &set->set_args, rc); - } - - RETURN(rc); -} - -static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked) -{ - ENTRY; - if (request == NULL) { - EXIT; - return; - } - - LASSERT (!request->rq_receiving_reply); - - /* We must take it off the imp_replay_list first. Otherwise, we'll set - * request->rq_reqmsg to NULL while osc_close is dereferencing it. */ - if (request->rq_import != NULL) { - unsigned long flags = 0; - if (!locked) - spin_lock_irqsave(&request->rq_import->imp_lock, flags); - list_del_init(&request->rq_list); - if (!locked) - spin_unlock_irqrestore(&request->rq_import->imp_lock, - flags); - } - - if (atomic_read(&request->rq_refcount) != 0) { - DEBUG_REQ(D_ERROR, request, - "freeing request with nonzero refcount"); - LBUG(); - } - - if (request->rq_repmsg != NULL) { - OBD_FREE(request->rq_repmsg, request->rq_replen); - request->rq_repmsg = NULL; - } - if (request->rq_reqmsg != NULL) { - OBD_FREE(request->rq_reqmsg, request->rq_reqlen); - request->rq_reqmsg = NULL; - } - if (request->rq_export != NULL) { - class_export_put(request->rq_export); - request->rq_export = NULL; - } - if (request->rq_import != NULL) { - class_import_put(request->rq_import); - request->rq_import = NULL; - } - if (request->rq_bulk != NULL) - ptlrpc_free_bulk(request->rq_bulk); - - ptlrpc_put_connection(request->rq_connection); - OBD_FREE(request, sizeof(*request)); - EXIT; -} - -void ptlrpc_free_req(struct ptlrpc_request *request) -{ - __ptlrpc_free_req(request, 0); -} - -static int __ptlrpc_req_finished(struct ptlrpc_request *request, int locked) -{ - ENTRY; - if (request == NULL) - RETURN(1); - - if (request == (void *)(long)(0x5a5a5a5a5a5a5a5a) || - request->rq_obd == (void *)(long)(0x5a5a5a5a5a5a5a5a)) { - CERROR("dereferencing freed request (bug 575)\n"); - LBUG(); - RETURN(1); - } - - DEBUG_REQ(D_INFO, request, "refcount now %u", - atomic_read(&request->rq_refcount) - 1); - - if (atomic_dec_and_test(&request->rq_refcount)) { - __ptlrpc_free_req(request, locked); - RETURN(1); - } - - RETURN(0); -} - -void ptlrpc_req_finished(struct ptlrpc_request *request) -{ - __ptlrpc_req_finished(request, 0); -} - -static void ptlrpc_cleanup_request_buf(struct ptlrpc_request *request) -{ - OBD_FREE(request->rq_reqmsg, request->rq_reqlen); - request->rq_reqmsg = NULL; - request->rq_reqlen = 0; -} - -/* Disengage the client's reply buffer from the network - * NB does _NOT_ unregister any client-side bulk. - * IDEMPOTENT, but _not_ safe against concurrent callers. - * The request owner (i.e. the thread doing the I/O) must call... - */ -void ptlrpc_unregister_reply (struct ptlrpc_request *request) -{ - unsigned long flags; - int rc; - ENTRY; - - LASSERT (!in_interrupt ()); /* might sleep */ - - spin_lock_irqsave (&request->rq_lock, flags); - if (!request->rq_receiving_reply) { /* not waiting for a reply */ - spin_unlock_irqrestore (&request->rq_lock, flags); - EXIT; - /* NB reply buffer not freed here */ - return; - } - - LASSERT (!request->rq_replied); /* callback hasn't completed */ - spin_unlock_irqrestore (&request->rq_lock, flags); - - rc = PtlMDUnlink (request->rq_reply_md_h); - switch (rc) { - default: - LBUG (); - - case PTL_OK: /* unlinked before completion */ - LASSERT (request->rq_receiving_reply); - LASSERT (!request->rq_replied); - spin_lock_irqsave (&request->rq_lock, flags); - request->rq_receiving_reply = 0; - spin_unlock_irqrestore (&request->rq_lock, flags); - OBD_FREE(request->rq_repmsg, request->rq_replen); - request->rq_repmsg = NULL; - EXIT; - return; - - case PTL_MD_INUSE: /* callback in progress */ - for (;;) { - /* Network access will complete in finite time but - * the timeout lets us CERROR for visibility */ - struct l_wait_info lwi = LWI_TIMEOUT(10*HZ, NULL, NULL); - - rc = l_wait_event (request->rq_wait_for_rep, - request->rq_replied, &lwi); - LASSERT (rc == 0 || rc == -ETIMEDOUT); - if (rc == 0) { - spin_lock_irqsave (&request->rq_lock, flags); - /* Ensure the callback has completed scheduling - * me and taken its hands off the request */ - spin_unlock_irqrestore(&request->rq_lock,flags); - break; - } - - CERROR ("Unexpectedly long timeout: req %p\n", request); - } - /* fall through */ - - case PTL_INV_MD: /* callback completed */ - LASSERT (!request->rq_receiving_reply); - LASSERT (request->rq_replied); - EXIT; - return; - } - /* Not Reached */ -} - -/* caller must hold imp->imp_lock */ -void ptlrpc_free_committed(struct obd_import *imp) -{ - struct list_head *tmp, *saved; - struct ptlrpc_request *req; - struct ptlrpc_request *last_req = NULL; /* temporary fire escape */ - ENTRY; - - LASSERT(imp != NULL); - -#ifdef CONFIG_SMP - LASSERT(spin_is_locked(&imp->imp_lock)); -#endif - - CDEBUG(D_HA, "%s: committing for last_committed "LPU64"\n", - imp->imp_obd->obd_name, imp->imp_peer_committed_transno); - - list_for_each_safe(tmp, saved, &imp->imp_replay_list) { - req = list_entry(tmp, struct ptlrpc_request, rq_list); - - /* XXX ok to remove when 1357 resolved - rread 05/29/03 */ - LASSERT (req != last_req); - last_req = req; - - if (req->rq_import_generation < imp->imp_generation) { - DEBUG_REQ(D_HA, req, "freeing request with old gen"); - GOTO(free_req, 0); - } - - if (req->rq_replay) { - DEBUG_REQ(D_HA, req, "keeping (FL_REPLAY)"); - continue; - } - - /* not yet committed */ - if (req->rq_transno > imp->imp_peer_committed_transno) { - DEBUG_REQ(D_HA, req, "stopping search"); - break; - } - - DEBUG_REQ(D_HA, req, "committing (last_committed "LPU64")", - imp->imp_peer_committed_transno); -free_req: - list_del_init(&req->rq_list); - __ptlrpc_req_finished(req, 1); - } - - EXIT; - return; -} - -void ptlrpc_cleanup_client(struct obd_import *imp) -{ - ENTRY; - EXIT; - return; -} - -void ptlrpc_resend_req(struct ptlrpc_request *req) -{ - unsigned long flags; - - DEBUG_REQ(D_HA, req, "resending"); - req->rq_reqmsg->handle.cookie = 0; - ptlrpc_put_connection(req->rq_connection); - req->rq_connection = - ptlrpc_connection_addref(req->rq_import->imp_connection); - req->rq_status = -EAGAIN; - - spin_lock_irqsave (&req->rq_lock, flags); - req->rq_resend = 1; - req->rq_timedout = 0; - if (req->rq_set != NULL) - wake_up (&req->rq_set->set_waitq); - else - wake_up(&req->rq_wait_for_rep); - spin_unlock_irqrestore (&req->rq_lock, flags); -} - -/* XXX: this function and rq_status are currently unused */ -void ptlrpc_restart_req(struct ptlrpc_request *req) -{ - unsigned long flags; - - DEBUG_REQ(D_HA, req, "restarting (possibly-)completed request"); - req->rq_status = -ERESTARTSYS; - - spin_lock_irqsave (&req->rq_lock, flags); - req->rq_restart = 1; - req->rq_timedout = 0; - if (req->rq_set != NULL) - wake_up (&req->rq_set->set_waitq); - else - wake_up(&req->rq_wait_for_rep); - spin_unlock_irqrestore (&req->rq_lock, flags); -} - -static int expired_request(void *data) -{ - struct ptlrpc_request *req = data; - ENTRY; - - RETURN(ptlrpc_expire_one_request(req)); -} - -static void interrupted_request(void *data) -{ - unsigned long flags; - - struct ptlrpc_request *req = data; - DEBUG_REQ(D_HA, req, "request interrupted"); - spin_lock_irqsave (&req->rq_lock, flags); - req->rq_intr = 1; - spin_unlock_irqrestore (&req->rq_lock, flags); -} - -struct ptlrpc_request *ptlrpc_request_addref(struct ptlrpc_request *req) -{ - ENTRY; - atomic_inc(&req->rq_refcount); - RETURN(req); -} - -void ptlrpc_retain_replayable_request(struct ptlrpc_request *req, - struct obd_import *imp) -{ - struct list_head *tmp; - -#ifdef CONFIG_SMP - LASSERT(spin_is_locked(&imp->imp_lock)); -#endif - - LASSERT(imp->imp_replayable); - /* Balanced in ptlrpc_free_committed, usually. */ - ptlrpc_request_addref(req); - list_for_each_prev(tmp, &imp->imp_replay_list) { - struct ptlrpc_request *iter = - list_entry(tmp, struct ptlrpc_request, rq_list); - - /* We may have duplicate transnos if we create and then - * open a file, or for closes retained if to match creating - * opens, so use req->rq_xid as a secondary key. - * (See bugs 684, 685, and 428.) - * XXX no longer needed, but all opens need transnos! - */ - if (iter->rq_transno > req->rq_transno) - continue; - - if (iter->rq_transno == req->rq_transno) { - LASSERT(iter->rq_xid != req->rq_xid); - if (iter->rq_xid > req->rq_xid) - continue; - } - - list_add(&req->rq_list, &iter->rq_list); - return; - } - - list_add_tail(&req->rq_list, &imp->imp_replay_list); -} - -int ptlrpc_queue_wait(struct ptlrpc_request *req) -{ - int rc = 0; - int brc; - struct l_wait_info lwi; - struct obd_import *imp = req->rq_import; - struct obd_device *obd = imp->imp_obd; - struct ptlrpc_connection *conn = imp->imp_connection; - unsigned int flags; - int do_restart = 0; - int timeout = 0; - ENTRY; - - LASSERT (req->rq_set == NULL); - LASSERT (!req->rq_receiving_reply); - - /* for distributed debugging */ - req->rq_reqmsg->status = current->pid; - LASSERT(imp->imp_obd != NULL); - CDEBUG(D_RPCTRACE, "Sending RPC pname:cluuid:pid:xid:ni:nid:opc " - "%s:%s:%d:"LPU64":%s:"LPX64":%d\n", current->comm, - imp->imp_obd->obd_uuid.uuid, - req->rq_reqmsg->status, req->rq_xid, - conn->c_peer.peer_ni->pni_name, conn->c_peer.peer_nid, - req->rq_reqmsg->opc); - - /* Mark phase here for a little debug help */ - req->rq_phase = RQ_PHASE_RPC; - - spin_lock_irqsave(&imp->imp_lock, flags); - req->rq_import_generation = imp->imp_generation; -restart: - /* - * If the import has been invalidated (such as by an OST failure), the - * request must fail with -EIO. Recovery requests are allowed to go - * through, though, so that they have a chance to revalidate the - * import. - */ - if (req->rq_import->imp_invalid && req->rq_level == LUSTRE_CONN_FULL) { - DEBUG_REQ(D_ERROR, req, "IMP_INVALID:"); - spin_unlock_irqrestore(&imp->imp_lock, flags); - GOTO (out, rc = -EIO); - } - - if (req->rq_import_generation < imp->imp_generation) { - DEBUG_REQ(D_ERROR, req, "req old gen:"); - spin_unlock_irqrestore(&imp->imp_lock, flags); - GOTO (out, rc = -EIO); - } - - if (req->rq_level > imp->imp_level) { - list_del(&req->rq_list); - if (req->rq_no_recov || obd->obd_no_recov || - imp->imp_dlm_fake) { - spin_unlock_irqrestore(&imp->imp_lock, flags); - GOTO (out, rc = -EWOULDBLOCK); - } - - list_add_tail(&req->rq_list, &imp->imp_delayed_list); - spin_unlock_irqrestore(&imp->imp_lock, flags); - - DEBUG_REQ(D_HA, req, "\"%s\" waiting for recovery: (%d > %d)", - current->comm, req->rq_level, imp->imp_level); - lwi = LWI_INTR(NULL, NULL); - rc = l_wait_event(req->rq_wait_for_rep, - (req->rq_level <= imp->imp_level || - req->rq_err), - &lwi); - DEBUG_REQ(D_HA, req, "\"%s\" awake: (%d > %d)", - current->comm, req->rq_level, imp->imp_level); - - spin_lock_irqsave(&imp->imp_lock, flags); - list_del_init(&req->rq_list); - - if (req->rq_err || - req->rq_import_generation < imp->imp_generation) - rc = -EIO; - - - if (rc) { - spin_unlock_irqrestore(&imp->imp_lock, flags); - GOTO (out, rc); - } - - CERROR("process %d resumed\n", current->pid); - } - - /* XXX this is the same as ptlrpc_set_wait */ - LASSERT(list_empty(&req->rq_list)); - list_add_tail(&req->rq_list, &imp->imp_sending_list); - spin_unlock_irqrestore(&imp->imp_lock, flags); - - rc = ptl_send_rpc(req); - if (rc) { - /* The DLM's fake imports want to avoid all forms of - * recovery. */ - if (imp->imp_dlm_fake) { - spin_lock_irqsave(&imp->imp_lock, flags); - list_del_init(&req->rq_list); - spin_unlock_irqrestore(&imp->imp_lock, flags); - GOTO(out, rc); - } - - DEBUG_REQ(D_ERROR, req, "send failed (%d); recovering", rc); - - ptlrpc_fail_import(imp, req->rq_import_generation); - - /* If we've been told to not wait, we're done. */ - if (req->rq_level < LUSTRE_CONN_FULL || req->rq_no_recov || - obd->obd_no_recov) { - spin_lock_irqsave(&imp->imp_lock, flags); - list_del_init(&req->rq_list); - spin_unlock_irqrestore(&imp->imp_lock, flags); - GOTO(out, rc); - } - - /* If we errored, allow the user to interrupt immediately */ - timeout = 1; - } else { - timeout = req->rq_timeout * HZ; - DEBUG_REQ(D_NET, req, "-- sleeping"); - } -#ifdef __KERNEL__ - lwi = LWI_TIMEOUT_INTR(timeout, expired_request, interrupted_request, - req); - l_wait_event(req->rq_wait_for_rep, ptlrpc_check_reply(req), &lwi); -#else - { - extern int reply_in_callback(ptl_event_t *ev); - ptl_event_t reply_ev; - PtlEQWait(req->rq_connection->c_peer.peer_ni->pni_reply_in_eq_h, - &reply_ev); - reply_in_callback(&reply_ev); - - LASSERT (reply_ev.mem_desc.user_ptr == (void *)req); - // ptlrpc_check_reply(req); - // not required now it only tests - } -#endif - - DEBUG_REQ(D_NET, req, "-- done sleeping"); - - CDEBUG(D_RPCTRACE, "Completed RPC pname:cluuid:pid:xid:ni:nid:opc " - "%s:%s:%d:"LPU64":%s:"LPX64":%d\n", current->comm, - imp->imp_obd->obd_uuid.uuid, - req->rq_reqmsg->status, req->rq_xid, - conn->c_peer.peer_ni->pni_name, conn->c_peer.peer_nid, - req->rq_reqmsg->opc); - - spin_lock_irqsave(&imp->imp_lock, flags); - list_del_init(&req->rq_list); - spin_unlock_irqrestore(&imp->imp_lock, flags); - - /* If the reply was received normally, this just grabs the spinlock - * (ensuring the reply callback has returned), sees that - * req->rq_receiving_reply is clear and returns. */ - ptlrpc_unregister_reply (req); - - if (req->rq_err) - GOTO(out, rc = -EIO); - - /* Resend if we need to, unless we were interrupted. */ - if (req->rq_resend && !req->rq_intr) { - /* ...unless we were specifically told otherwise. */ - if (req->rq_no_resend) { - spin_lock_irqsave (&req->rq_lock, flags); - req->rq_no_resend = 0; - spin_unlock_irqrestore (&req->rq_lock, flags); - GOTO(out, rc = -ETIMEDOUT); - } - spin_lock_irqsave (&req->rq_lock, flags); - req->rq_resend = 0; - spin_unlock_irqrestore (&req->rq_lock, flags); - lustre_msg_add_flags(req->rq_reqmsg, MSG_RESENT); - - if (req->rq_bulk != NULL) - ptlrpc_unregister_bulk (req); - - DEBUG_REQ(D_HA, req, "resending: "); - spin_lock_irqsave(&imp->imp_lock, flags); - goto restart; - } - - if (req->rq_intr) { - /* Should only be interrupted if we timed out. */ - if (!req->rq_timedout) - DEBUG_REQ(D_ERROR, req, - "rq_intr set but rq_timedout not"); - GOTO(out, rc = -EINTR); - } - - if (req->rq_timedout) { /* non-recoverable timeout */ - GOTO(out, rc = -ETIMEDOUT); - } - - if (!req->rq_replied) { - /* How can this be? -eeb */ - DEBUG_REQ(D_ERROR, req, "!rq_replied: "); - LBUG(); - GOTO(out, rc = req->rq_status); - } - - rc = after_reply (req, &do_restart); - /* NB may return +ve success rc */ - if (do_restart) { - if (req->rq_bulk != NULL) - ptlrpc_unregister_bulk (req); - DEBUG_REQ(D_HA, req, "resending: "); - spin_lock_irqsave(&imp->imp_lock, flags); - goto restart; - } - - out: - if (req->rq_bulk != NULL) { - if (rc >= 0) { /* success so far */ - lwi = LWI_TIMEOUT(timeout, NULL, NULL); - brc = l_wait_event(req->rq_wait_for_rep, - ptlrpc_bulk_complete(req->rq_bulk), - &lwi); - if (brc != 0) { - LASSERT (brc == -ETIMEDOUT); - CERROR ("Timed out waiting for bulk\n"); - rc = brc; - } - } - if (rc < 0) { - /* MDS blocks for put ACKs before replying */ - /* OSC sets rq_no_resend for the time being */ - LASSERT (req->rq_no_resend); - ptlrpc_unregister_bulk (req); - } - } - - LASSERT (!req->rq_receiving_reply); - req->rq_phase = RQ_PHASE_INTERPRET; - RETURN (rc); -} - -int ptlrpc_replay_req(struct ptlrpc_request *req) -{ - int rc = 0, old_level, old_status = 0; - // struct ptlrpc_client *cli = req->rq_import->imp_client; - struct l_wait_info lwi; - ENTRY; - - /* I don't touch rq_phase here, so the debug log can show what - * state it was left in */ - - /* Not handling automatic bulk replay yet (or ever?) */ - LASSERT (req->rq_bulk == NULL); - - DEBUG_REQ(D_NET, req, "about to replay"); - - /* Update request's state, since we might have a new connection. */ - ptlrpc_put_connection(req->rq_connection); - req->rq_connection = - ptlrpc_connection_addref(req->rq_import->imp_connection); - - /* temporarily set request to RECOVD level (reset at out:) */ - old_level = req->rq_level; - if (req->rq_replied) - old_status = req->rq_repmsg->status; - req->rq_level = LUSTRE_CONN_RECOVER; - rc = ptl_send_rpc(req); - if (rc) { - CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc); - ptlrpc_cleanup_request_buf(req); - // up(&cli->cli_rpc_sem); - GOTO(out, rc = -rc); - } - - CDEBUG(D_OTHER, "-- sleeping\n"); - lwi = LWI_INTR(NULL, NULL); /* XXX needs timeout, nested recovery */ - l_wait_event(req->rq_wait_for_rep, ptlrpc_check_reply(req), &lwi); - CDEBUG(D_OTHER, "-- done\n"); - - // up(&cli->cli_rpc_sem); - - /* If the reply was received normally, this just grabs the spinlock - * (ensuring the reply callback has returned), sees that - * req->rq_receiving_reply is clear and returns. */ - ptlrpc_unregister_reply (req); - - if (!req->rq_replied) { - CERROR("Unknown reason for wakeup\n"); - /* XXX Phil - I end up here when I kill obdctl */ - /* ...that's because signals aren't all masked in - * l_wait_event() -eeb */ - GOTO(out, rc = -EINTR); - } - -#if SWAB_PARANOIA - /* Clear reply swab mask; this is a new reply in sender's byte order */ - req->rq_rep_swab_mask = 0; -#endif - rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen); - if (rc) { - CERROR("unpack_rep failed: %d\n", rc); - GOTO(out, rc = -EPROTO); - } -#if 0 - /* FIXME: Enable when BlueArc makes new release */ - if (req->rq_repmsg->type != PTL_RPC_MSG_REPLY && - req->rq_repmsg->type != PTL_RPC_MSG_ERR) { - CERROR("invalid packet type received (type=%u)\n", - req->rq_repmsg->type); - GOTO(out, rc = -EPROTO); - } -#endif - - /* The transno had better not change over replay. */ - LASSERT(req->rq_reqmsg->transno == req->rq_repmsg->transno); - - CDEBUG(D_NET, "got rep "LPD64"\n", req->rq_xid); - - /* let the callback do fixups, possibly including in the request */ - if (req->rq_replay_cb) - req->rq_replay_cb(req); - - if (req->rq_replied && req->rq_repmsg->status != old_status) { - DEBUG_REQ(D_HA, req, "status %d, old was %d", - req->rq_repmsg->status, old_status); - } - - out: - req->rq_level = old_level; - RETURN(rc); -} - -void ptlrpc_abort_inflight(struct obd_import *imp) -{ - unsigned long flags; - struct list_head *tmp, *n; - ENTRY; - - /* Make sure that no new requests get processed for this import. - * ptlrpc_{queue,set}_wait must (and does) hold imp_lock while testing - * this flag and then putting requests on sending_list or delayed_list. - */ - spin_lock_irqsave(&imp->imp_lock, flags); - - /* XXX locking? Maybe we should remove each request with the list - * locked? Also, how do we know if the requests on the list are - * being freed at this time? - */ - list_for_each_safe(tmp, n, &imp->imp_sending_list) { - struct ptlrpc_request *req = - list_entry(tmp, struct ptlrpc_request, rq_list); - - DEBUG_REQ(D_HA, req, "inflight"); - - spin_lock (&req->rq_lock); - if (req->rq_import_generation < imp->imp_generation) { - req->rq_err = 1; - if (req->rq_set != NULL) - wake_up(&req->rq_set->set_waitq); - else - wake_up(&req->rq_wait_for_rep); - } - spin_unlock (&req->rq_lock); - } - - list_for_each_safe(tmp, n, &imp->imp_delayed_list) { - struct ptlrpc_request *req = - list_entry(tmp, struct ptlrpc_request, rq_list); - - DEBUG_REQ(D_HA, req, "aborting waiting req"); - - spin_lock (&req->rq_lock); - if (req->rq_import_generation < imp->imp_generation) { - req->rq_err = 1; - if (req->rq_set != NULL) - wake_up(&req->rq_set->set_waitq); - else - wake_up(&req->rq_wait_for_rep); - spin_unlock (&req->rq_lock); - } - } - - /* Last chance to free reqs left on the replay list, but we - * will still leak reqs that haven't comitted. */ - if (imp->imp_replayable) - ptlrpc_free_committed(imp); - - spin_unlock_irqrestore(&imp->imp_lock, flags); - - EXIT; -} - -static __u64 ptlrpc_last_xid = 0; -static spinlock_t ptlrpc_last_xid_lock = SPIN_LOCK_UNLOCKED; - -__u64 ptlrpc_next_xid(void) -{ - __u64 tmp; - spin_lock(&ptlrpc_last_xid_lock); - tmp = ++ptlrpc_last_xid; - spin_unlock(&ptlrpc_last_xid_lock); - return tmp; -} - - diff --git a/lustre/ptlrpc/connection.c b/lustre/ptlrpc/connection.c deleted file mode 100644 index 6b7690b..0000000 --- a/lustre/ptlrpc/connection.c +++ /dev/null @@ -1,186 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define DEBUG_SUBSYSTEM S_RPC -#ifdef __KERNEL__ -#include <linux/obd_support.h> -#include <linux/obd_class.h> -#include <linux/lustre_net.h> -#else -#include <liblustre.h> -#endif - -#include "ptlrpc_internal.h" - -static spinlock_t conn_lock; -static struct list_head conn_list; -static struct list_head conn_unused_list; - -void ptlrpc_dump_connections(void) -{ - struct list_head *tmp; - struct ptlrpc_connection *c; - ENTRY; - - list_for_each(tmp, &conn_list) { - c = list_entry(tmp, struct ptlrpc_connection, c_link); - CERROR("Connection %p/%s has refcount %d (nid="LPX64" on %s)\n", - c, c->c_remote_uuid.uuid, atomic_read(&c->c_refcount), - c->c_peer.peer_nid, c->c_peer.peer_ni->pni_name); - } - EXIT; -} - -struct ptlrpc_connection *ptlrpc_get_connection(struct ptlrpc_peer *peer, - struct obd_uuid *uuid) -{ - struct list_head *tmp, *pos; - struct ptlrpc_connection *c; - ENTRY; - - - CDEBUG(D_INFO, "peer is "LPX64" on %s\n", - peer->peer_nid, peer->peer_ni->pni_name); - - spin_lock(&conn_lock); - if (list_empty(&conn_list)) { - if (!ptlrpc_get_ldlm_hooks()) { - spin_unlock(&conn_lock); - RETURN(NULL); - } - } - - list_for_each(tmp, &conn_list) { - c = list_entry(tmp, struct ptlrpc_connection, c_link); - if (peer->peer_nid == c->c_peer.peer_nid && - peer->peer_ni == c->c_peer.peer_ni) { - ptlrpc_connection_addref(c); - GOTO(out, c); - } - } - - list_for_each_safe(tmp, pos, &conn_unused_list) { - c = list_entry(tmp, struct ptlrpc_connection, c_link); - if (peer->peer_nid == c->c_peer.peer_nid && - peer->peer_ni == c->c_peer.peer_ni) { - ptlrpc_connection_addref(c); - list_del(&c->c_link); - list_add(&c->c_link, &conn_list); - GOTO(out, c); - } - } - - /* FIXME: this should be a slab once we can validate slab addresses - * without OOPSing */ - OBD_ALLOC(c, sizeof(*c)); - if (c == NULL) - GOTO(out, c); - - c->c_generation = 1; - c->c_epoch = 1; - c->c_bootcount = 0; - c->c_flags = 0; - if (uuid && uuid->uuid) /* XXX ???? */ - obd_str2uuid(&c->c_remote_uuid, uuid->uuid); - atomic_set(&c->c_refcount, 0); - memcpy(&c->c_peer, peer, sizeof(c->c_peer)); - spin_lock_init(&c->c_lock); - - ptlrpc_connection_addref(c); - - list_add(&c->c_link, &conn_list); - - EXIT; - out: - spin_unlock(&conn_lock); - return c; -} - -int ptlrpc_put_connection(struct ptlrpc_connection *c) -{ - int rc = 0; - ENTRY; - - if (c == NULL) { - CERROR("NULL connection\n"); - RETURN(0); - } - - CDEBUG (D_INFO, "connection=%p refcount %d to "LPX64" on %s\n", - c, atomic_read(&c->c_refcount) - 1, c->c_peer.peer_nid, - c->c_peer.peer_ni->pni_name); - - if (atomic_dec_and_test(&c->c_refcount)) { - spin_lock(&conn_lock); - list_del(&c->c_link); - list_add(&c->c_link, &conn_unused_list); - if (list_empty(&conn_list)) { - ptlrpc_put_ldlm_hooks(); - } - spin_unlock(&conn_lock); - rc = 1; - } - if (atomic_read(&c->c_refcount) < 0) - CERROR("connection %p refcount %d!\n", - c, atomic_read(&c->c_refcount)); - - RETURN(rc); -} - -struct ptlrpc_connection *ptlrpc_connection_addref(struct ptlrpc_connection *c) -{ - ENTRY; - atomic_inc(&c->c_refcount); - CDEBUG (D_INFO, "connection=%p refcount %d to "LPX64" on %s\n", - c, atomic_read(&c->c_refcount), c->c_peer.peer_nid, - c->c_peer.peer_ni->pni_name); - RETURN(c); -} - -void ptlrpc_init_connection(void) -{ - INIT_LIST_HEAD(&conn_list); - INIT_LIST_HEAD(&conn_unused_list); - conn_lock = SPIN_LOCK_UNLOCKED; -} - -void ptlrpc_cleanup_connection(void) -{ - struct list_head *tmp, *pos; - struct ptlrpc_connection *c; - - spin_lock(&conn_lock); - list_for_each_safe(tmp, pos, &conn_unused_list) { - c = list_entry(tmp, struct ptlrpc_connection, c_link); - list_del(&c->c_link); - OBD_FREE(c, sizeof(*c)); - } - list_for_each_safe(tmp, pos, &conn_list) { - c = list_entry(tmp, struct ptlrpc_connection, c_link); - CERROR("Connection %p/%s has refcount %d (nid="LPX64" on %s)\n", - c, c->c_remote_uuid.uuid, atomic_read(&c->c_refcount), - c->c_peer.peer_nid, c->c_peer.peer_ni->pni_name); - list_del(&c->c_link); - OBD_FREE(c, sizeof(*c)); - } - spin_unlock(&conn_lock); -} diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c deleted file mode 100644 index 167898a..0000000 --- a/lustre/ptlrpc/events.c +++ /dev/null @@ -1,497 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define DEBUG_SUBSYSTEM S_RPC - -#ifdef __KERNEL__ -#include <linux/module.h> -#else -#include <liblustre.h> -#endif -#include <linux/obd_class.h> -#include <linux/lustre_net.h> - -struct ptlrpc_ni ptlrpc_interfaces[NAL_MAX_NR]; -int ptlrpc_ninterfaces; - -/* - * Free the packet when it has gone out - */ -static int request_out_callback(ptl_event_t *ev) -{ - struct ptlrpc_request *req = ev->mem_desc.user_ptr; - ENTRY; - - /* requests always contiguous */ - LASSERT((ev->mem_desc.options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0); - - if (ev->type != PTL_EVENT_SENT) { - // XXX make sure we understand all events, including ACK's - CERROR("Unknown event %d\n", ev->type); - LBUG(); - } - - /* this balances the atomic_inc in ptl_send_rpc() */ - ptlrpc_req_finished(req); - RETURN(1); -} - -/* - * Free the packet when it has gone out - */ -static int reply_out_callback(ptl_event_t *ev) -{ - struct ptlrpc_request *req = ev->mem_desc.user_ptr; - unsigned long flags; - ENTRY; - - /* replies always contiguous */ - LASSERT((ev->mem_desc.options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0); - - if (ev->type == PTL_EVENT_SENT) { - /* NB don't even know if this is the current reply! In fact - * we can't touch any state in the request, since the - * service handler zeros it on each incoming request. */ - OBD_FREE(ev->mem_desc.start, ev->mem_desc.length); - } else if (ev->type == PTL_EVENT_ACK) { - LASSERT(req->rq_want_ack); - spin_lock_irqsave(&req->rq_lock, flags); - req->rq_want_ack = 0; - wake_up(&req->rq_wait_for_rep); - spin_unlock_irqrestore(&req->rq_lock, flags); - } else { - // XXX make sure we understand all events - CERROR("Unknown event %d\n", ev->type); - LBUG(); - } - - RETURN(1); -} - -/* - * Wake up the thread waiting for the reply once it comes in. - */ -int reply_in_callback(ptl_event_t *ev) -{ - struct ptlrpc_request *req = ev->mem_desc.user_ptr; - unsigned long flags; - ENTRY; - - /* replies always contiguous */ - LASSERT((ev->mem_desc.options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0); - - if (req->rq_xid == 0x5a5a5a5a5a5a5a5a) { - CERROR("Reply received for freed request! Probably a missing " - "ptlrpc_abort()\n"); - LBUG(); - } - - if (req->rq_xid != ev->match_bits) { - CERROR("Reply packet for wrong request\n"); - LBUG(); - } - - if (ev->type == PTL_EVENT_PUT) { - /* Bug 1190: should handle non-zero offset as a protocol - * error */ - LASSERT (ev->offset == 0); - - spin_lock_irqsave (&req->rq_lock, flags); - LASSERT (req->rq_receiving_reply); - req->rq_receiving_reply = 0; - req->rq_replied = 1; - if (req->rq_set != NULL) - wake_up(&req->rq_set->set_waitq); - else - wake_up(&req->rq_wait_for_rep); - spin_unlock_irqrestore (&req->rq_lock, flags); - } else { - // XXX make sure we understand all events, including ACKs - CERROR("Unknown event %d\n", ev->type); - LBUG(); - } - - RETURN(1); -} - -int request_in_callback(ptl_event_t *ev) -{ - struct ptlrpc_request_buffer_desc *rqbd = ev->mem_desc.user_ptr; - struct ptlrpc_srv_ni *srv_ni = rqbd->rqbd_srv_ni; - struct ptlrpc_service *service = srv_ni->sni_service; - - /* requests always contiguous */ - LASSERT((ev->mem_desc.options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0); - /* we only enable puts */ - LASSERT(ev->type == PTL_EVENT_PUT); - LASSERT(atomic_read(&srv_ni->sni_nrqbds_receiving) > 0); - LASSERT(atomic_read(&rqbd->rqbd_refcount) > 0); - - if (ev->rlength != ev->mlength) - CERROR("Warning: Possibly truncated rpc (%d/%d)\n", - ev->mlength, ev->rlength); - - if (!PtlHandleEqual (ev->unlinked_me, PTL_HANDLE_NONE)) { - /* This is the last request to be received into this - * request buffer. We don't bump the refcount, since the - * thread servicing this event is effectively taking over - * portals' reference. - */ - /* NB ev->unlinked_me.nal_idx is not set properly in a callback */ - LASSERT(ev->unlinked_me.cookie==rqbd->rqbd_me_h.cookie); - - /* we're off the air */ - /* we'll probably start dropping packets in portals soon */ - if (atomic_dec_and_test(&srv_ni->sni_nrqbds_receiving)) - CERROR("All request buffers busy\n"); - } else { - /* +1 ref for service thread */ - atomic_inc(&rqbd->rqbd_refcount); - } - - wake_up(&service->srv_waitq); - - return 0; -} - -static int bulk_put_source_callback(ptl_event_t *ev) -{ - unsigned long flags; - struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr; - ENTRY; - - CDEBUG(D_NET, "got %s event %d\n", - (ev->type == PTL_EVENT_SENT) ? "SENT" : - (ev->type == PTL_EVENT_ACK) ? "ACK" : "UNEXPECTED", ev->type); - - LASSERT(ev->type == PTL_EVENT_SENT || ev->type == PTL_EVENT_ACK); - - /* 1 fragment for each page always */ - LASSERT(ev->mem_desc.niov == desc->bd_page_count); - - spin_lock_irqsave (&desc->bd_lock, flags); - - LASSERT(desc->bd_callback_count > 0 && - desc->bd_callback_count <= 2); - - if (--desc->bd_callback_count == 0) { - desc->bd_network_rw = 0; - desc->bd_complete = 1; - wake_up(&desc->bd_waitq); - } - - spin_unlock_irqrestore (&desc->bd_lock, flags); - RETURN(0); -} - -struct ptlrpc_bulk_desc ptlrpc_bad_desc; -ptl_event_t ptlrpc_bad_event; - -static int bulk_put_sink_callback(ptl_event_t *ev) -{ - struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr; - unsigned long flags; - ENTRY; - - LASSERT(ev->type == PTL_EVENT_PUT); - - /* used iovs */ - LASSERT((ev->mem_desc.options & (PTL_MD_IOV | PTL_MD_KIOV)) == - PTL_MD_KIOV); - /* Honestly, it's best to find out early. */ - if (desc->bd_page_count == 0x5a5a5a5a5a || - desc->bd_page_count != ev->mem_desc.niov || - ev->mem_desc.start != &desc->bd_iov) { - /* not guaranteed (don't LASSERT) but good for this bug hunt */ - ptlrpc_bad_event = *ev; - ptlrpc_bad_desc = *desc; - CERROR ("XXX ev %p type %d portal %d match "LPX64", seq %ld\n", - ev, ev->type, ev->portal, ev->match_bits, ev->sequence); - CERROR ("XXX desc %p, export %p import %p gen %d " - " portal %d\n", - desc, desc->bd_export, - desc->bd_import, desc->bd_import_generation, - desc->bd_portal); - RETURN (0); - } - - LASSERT(desc->bd_page_count != 0x5a5a5a5a); - /* 1 fragment for each page always */ - LASSERT(ev->mem_desc.niov == desc->bd_page_count); - LASSERT(ev->match_bits == desc->bd_req->rq_xid); - - /* peer must put with zero offset */ - if (ev->offset != 0) { - /* Bug 1190: handle this as a protocol failure */ - CERROR ("Bad offset %d\n", ev->offset); - LBUG (); - } - - /* No check for total # bytes; this could be a short read */ - - spin_lock_irqsave (&desc->bd_lock, flags); - desc->bd_network_rw = 0; - desc->bd_complete = 1; - if (desc->bd_req->rq_set != NULL) - wake_up (&desc->bd_req->rq_set->set_waitq); - else - wake_up (&desc->bd_req->rq_wait_for_rep); - spin_unlock_irqrestore (&desc->bd_lock, flags); - - RETURN(1); -} - -static int bulk_get_source_callback(ptl_event_t *ev) -{ - struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr; - struct ptlrpc_bulk_page *bulk; - struct list_head *tmp; - unsigned long flags; - ptl_size_t total = 0; - ENTRY; - - LASSERT(ev->type == PTL_EVENT_GET); - - /* used iovs */ - LASSERT((ev->mem_desc.options & (PTL_MD_IOV | PTL_MD_KIOV)) == - PTL_MD_KIOV); - /* 1 fragment for each page always */ - LASSERT(ev->mem_desc.niov == desc->bd_page_count); - LASSERT(ev->match_bits == desc->bd_req->rq_xid); - - /* peer must get with zero offset */ - if (ev->offset != 0) { - /* Bug 1190: handle this as a protocol failure */ - CERROR ("Bad offset %d\n", ev->offset); - LBUG (); - } - - list_for_each (tmp, &desc->bd_page_list) { - bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link); - - total += bulk->bp_buflen; - } - - /* peer must get everything */ - if (ev->mem_desc.length != total) { - /* Bug 1190: handle this as a protocol failure */ - CERROR ("Bad length/total %d/%d\n", ev->mem_desc.length, total); - LBUG (); - } - - spin_lock_irqsave (&desc->bd_lock, flags); - desc->bd_network_rw = 0; - desc->bd_complete = 1; - if (desc->bd_req->rq_set != NULL) - wake_up (&desc->bd_req->rq_set->set_waitq); - else - wake_up (&desc->bd_req->rq_wait_for_rep); - spin_unlock_irqrestore (&desc->bd_lock, flags); - - RETURN(1); -} - -static int bulk_get_sink_callback(ptl_event_t *ev) -{ - struct ptlrpc_bulk_desc *desc = ev->mem_desc.user_ptr; - unsigned long flags; - ENTRY; - - CDEBUG(D_NET, "got %s event %d\n", - (ev->type == PTL_EVENT_SENT) ? "SENT" : - (ev->type == PTL_EVENT_REPLY) ? "REPLY" : "UNEXPECTED", - ev->type); - - LASSERT(ev->type == PTL_EVENT_SENT || ev->type == PTL_EVENT_REPLY); - - /* 1 fragment for each page always */ - LASSERT(ev->mem_desc.niov == desc->bd_page_count); - - spin_lock_irqsave (&desc->bd_lock, flags); - LASSERT(desc->bd_callback_count > 0 && - desc->bd_callback_count <= 2); - - if (--desc->bd_callback_count == 0) { - desc->bd_network_rw = 0; - desc->bd_complete = 1; - wake_up(&desc->bd_waitq); - } - spin_unlock_irqrestore (&desc->bd_lock, flags); - - RETURN(0); -} - -int ptlrpc_uuid_to_peer (struct obd_uuid *uuid, struct ptlrpc_peer *peer) -{ - struct ptlrpc_ni *pni; - struct lustre_peer lpeer; - int i; - int rc = lustre_uuid_to_peer (uuid->uuid, &lpeer); - - if (rc != 0) - RETURN (rc); - - for (i = 0; i < ptlrpc_ninterfaces; i++) { - pni = &ptlrpc_interfaces[i]; - - if (!memcmp(&lpeer.peer_ni, &pni->pni_ni_h, - sizeof (lpeer.peer_ni))) { - peer->peer_nid = lpeer.peer_nid; - peer->peer_ni = pni; - return (0); - } - } - - CERROR("Can't find ptlrpc interface for "LPX64" ni handle %08lx."LPX64"\n", - lpeer.peer_nid, lpeer.peer_ni.nal_idx, lpeer.peer_ni.cookie); - return (-ENOENT); -} - -void ptlrpc_ni_fini(struct ptlrpc_ni *pni) -{ - PtlEQFree(pni->pni_request_out_eq_h); - PtlEQFree(pni->pni_reply_out_eq_h); - PtlEQFree(pni->pni_reply_in_eq_h); - PtlEQFree(pni->pni_bulk_put_source_eq_h); - PtlEQFree(pni->pni_bulk_put_sink_eq_h); - PtlEQFree(pni->pni_bulk_get_source_eq_h); - PtlEQFree(pni->pni_bulk_get_sink_eq_h); - - kportal_put_ni (pni->pni_number); -} - -int ptlrpc_ni_init(int number, char *name, struct ptlrpc_ni *pni) -{ - int rc; - ptl_handle_ni_t *nip = kportal_get_ni (number); - - if (nip == NULL) { - CDEBUG (D_NET, "Network interface %s not loaded\n", name); - return (-ENOENT); - } - - CDEBUG (D_NET, "init %d %s: nal_idx %ld\n", number, name, nip->nal_idx); - - pni->pni_name = name; - pni->pni_number = number; - pni->pni_ni_h = *nip; - - pni->pni_request_out_eq_h = PTL_HANDLE_NONE; - pni->pni_reply_out_eq_h = PTL_HANDLE_NONE; - pni->pni_reply_in_eq_h = PTL_HANDLE_NONE; - pni->pni_bulk_put_source_eq_h = PTL_HANDLE_NONE; - pni->pni_bulk_put_sink_eq_h = PTL_HANDLE_NONE; - pni->pni_bulk_get_source_eq_h = PTL_HANDLE_NONE; - pni->pni_bulk_get_sink_eq_h = PTL_HANDLE_NONE; - - /* NB We never actually PtlEQGet() out of these events queues since - * we're only interested in the event callback, so we can just let - * them wrap. Their sizes aren't a big deal, apart from providing - * a little history for debugging... */ - - rc = PtlEQAlloc(pni->pni_ni_h, 1024, request_out_callback, - &pni->pni_request_out_eq_h); - if (rc != PTL_OK) - GOTO (fail, rc = -ENOMEM); - - rc = PtlEQAlloc(pni->pni_ni_h, 1024, reply_out_callback, - &pni->pni_reply_out_eq_h); - if (rc != PTL_OK) - GOTO (fail, rc = -ENOMEM); - - rc = PtlEQAlloc(pni->pni_ni_h, 1024, reply_in_callback, - &pni->pni_reply_in_eq_h); - if (rc != PTL_OK) - GOTO (fail, rc = -ENOMEM); - - rc = PtlEQAlloc(pni->pni_ni_h, 1024, bulk_put_source_callback, - &pni->pni_bulk_put_source_eq_h); - if (rc != PTL_OK) - GOTO (fail, rc = -ENOMEM); - - rc = PtlEQAlloc(pni->pni_ni_h, 1024, bulk_put_sink_callback, - &pni->pni_bulk_put_sink_eq_h); - if (rc != PTL_OK) - GOTO (fail, rc = -ENOMEM); - - rc = PtlEQAlloc(pni->pni_ni_h, 1024, bulk_get_source_callback, - &pni->pni_bulk_get_source_eq_h); - if (rc != PTL_OK) - GOTO (fail, rc = -ENOMEM); - - rc = PtlEQAlloc(pni->pni_ni_h, 1024, bulk_get_sink_callback, - &pni->pni_bulk_get_sink_eq_h); - if (rc != PTL_OK) - GOTO (fail, rc = -ENOMEM); - - return (0); - fail: - CERROR ("Failed to initialise network interface %s: %d\n", - name, rc); - - /* OK to do complete teardown since we invalidated the handles above */ - ptlrpc_ni_fini (pni); - return (rc); -} - -int ptlrpc_init_portals(void) -{ - /* Add new portals network interfaces here. - * Order is irrelevent! */ - static struct { - int number; - char *name; - } ptl_nis[] = { - {QSWNAL, "qswnal"}, - {SOCKNAL, "socknal"}, - {GMNAL, "gmnal"}, - {TOENAL, "toenal"}, - {TCPNAL, "tcpnal"}, - {SCIMACNAL, "scimacnal"}}; - int rc; - int i; - - LASSERT(ptlrpc_ninterfaces == 0); - - for (i = 0; i < sizeof (ptl_nis) / sizeof (ptl_nis[0]); i++) { - LASSERT(ptlrpc_ninterfaces < (sizeof(ptlrpc_interfaces) / - sizeof(ptlrpc_interfaces[0]))); - - rc = ptlrpc_ni_init(ptl_nis[i].number, ptl_nis[i].name, - &ptlrpc_interfaces[ptlrpc_ninterfaces]); - if (rc == 0) - ptlrpc_ninterfaces++; - } - - if (ptlrpc_ninterfaces == 0) { - CERROR("network initialisation failed: is a NAL module " - "loaded?\n"); - return -EIO; - } - return 0; -} - -void ptlrpc_exit_portals(void) -{ - while (ptlrpc_ninterfaces > 0) - ptlrpc_ni_fini (&ptlrpc_interfaces[--ptlrpc_ninterfaces]); -} diff --git a/lustre/ptlrpc/lproc_ptlrpc.c b/lustre/ptlrpc/lproc_ptlrpc.c deleted file mode 100644 index 07be1af..0000000 --- a/lustre/ptlrpc/lproc_ptlrpc.c +++ /dev/null @@ -1,159 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ -#define DEBUG_SUBSYSTEM S_CLASS - -#include <linux/obd_support.h> -#include <linux/obd.h> -#include <linux/lprocfs_status.h> -#include <linux/lustre_idl.h> -#include <linux/lustre_net.h> -#include "ptlrpc_internal.h" - - -struct ll_rpc_opcode { - __u32 opcode; - const char *opname; -} ll_rpc_opcode_table[LUSTRE_MAX_OPCODES] = { - { OST_REPLY, "ost_reply" }, - { OST_GETATTR, "ost_getattr" }, - { OST_SETATTR, "ost_setattr" }, - { OST_READ, "ost_read" }, - { OST_WRITE, "ost_write" }, - { OST_CREATE , "ost_create" }, - { OST_DESTROY, "ost_destroy" }, - { OST_GET_INFO, "ost_get_info" }, - { OST_CONNECT, "ost_connect" }, - { OST_DISCONNECT, "ost_disconnect" }, - { OST_PUNCH, "ost_punch" }, - { OST_OPEN, "ost_open" }, - { OST_CLOSE, "ost_close" }, - { OST_STATFS, "ost_statfs" }, - { OST_SAN_READ, "ost_san_read" }, - { OST_SAN_WRITE, "ost_san_write" }, - { OST_SYNCFS, "ost_syncfs" }, - { MDS_GETATTR, "mds_getattr" }, - { MDS_GETATTR_NAME, "mds_getattr_name" }, - { MDS_CLOSE, "mds_close" }, - { MDS_REINT, "mds_reint" }, - { MDS_READPAGE, "mds_readpage" }, - { MDS_CONNECT, "mds_connect" }, - { MDS_DISCONNECT, "mds_disconnect" }, - { MDS_GETSTATUS, "mds_getstatus" }, - { MDS_STATFS, "mds_statfs" }, - { MDS_GETLOVINFO, "mds_getlovinfo" }, - { LDLM_ENQUEUE, "ldlm_enqueue" }, - { LDLM_CONVERT, "ldlm_convert" }, - { LDLM_CANCEL, "ldlm_cancel" }, - { LDLM_BL_CALLBACK, "ldlm_bl_callback" }, - { LDLM_CP_CALLBACK, "ldlm_cp_callback" }, - { PTLBD_QUERY, "ptlbd_query" }, - { PTLBD_READ, "ptlbd_read" }, - { PTLBD_WRITE, "ptlbd_write" }, - { PTLBD_FLUSH, "ptlbd_flush" }, - { PTLBD_CONNECT, "ptlbd_connect" }, - { PTLBD_DISCONNECT, "ptlbd_disconnect" }, - { OBD_PING, "obd_ping" } -}; - -const char* ll_opcode2str(__u32 opcode) -{ - /* When one of the assertions below fail, chances are that: - * 1) A new opcode was added in lustre_idl.h, but was - * is missing from the table above. - * or 2) The opcode space was renumbered or rearranged, - * and the opcode_offset() function in - * ptlrpc_internals.h needs to be modified. - */ - __u32 offset = opcode_offset(opcode); - LASSERT(offset < LUSTRE_MAX_OPCODES); - LASSERT(ll_rpc_opcode_table[offset].opcode == opcode); - return ll_rpc_opcode_table[offset].opname; -} - -#ifndef LPROCFS -void ptlrpc_lprocfs_register_service(struct obd_device *obddev, - struct ptlrpc_service *svc) { return ; } -void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc) { return; } -#else - -void ptlrpc_lprocfs_register_service(struct obd_device *obddev, - struct ptlrpc_service *svc) -{ - struct proc_dir_entry *svc_procroot; - struct lprocfs_stats *svc_stats; - int i, rc; - unsigned int svc_counter_config = LPROCFS_CNTR_EXTERNALLOCK | - LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV; - - LASSERT(svc->svc_procroot == NULL); - LASSERT(svc->svc_stats == NULL); - - svc_procroot = lprocfs_register(svc->srv_name, obddev->obd_proc_entry, - NULL, NULL); - if (svc_procroot == NULL) - return; - - svc_stats = lprocfs_alloc_stats(PTLRPC_LAST_CNTR + LUSTRE_MAX_OPCODES); - if (svc_stats == NULL) { - lprocfs_remove(svc_procroot); - return; - } - - lprocfs_counter_init(svc_stats, PTLRPC_REQWAIT_CNTR, - svc_counter_config, "req_waittime", "cycles"); - /* Wait for b_eq branch - lprocfs_counter_init(svc_stats, PTLRPC_SVCEQDEPTH_CNTR, - svc_counter_config, "svc_eqdepth", "reqs"); - */ - /* no stddev on idletime */ - lprocfs_counter_init(svc_stats, PTLRPC_SVCIDLETIME_CNTR, - (LPROCFS_CNTR_EXTERNALLOCK|LPROCFS_CNTR_AVGMINMAX), - "svc_idletime", "cycles"); - for (i = 0; i < LUSTRE_MAX_OPCODES; i++) { - __u32 opcode = ll_rpc_opcode_table[i].opcode; - lprocfs_counter_init(svc_stats, PTLRPC_LAST_CNTR + i, - svc_counter_config, ll_opcode2str(opcode), - "cycles"); - } - - rc = lprocfs_register_stats(svc_procroot, "stats", svc_stats); - if (rc < 0) { - lprocfs_remove(svc_procroot); - lprocfs_free_stats(svc_stats); - } else { - svc->svc_procroot = svc_procroot; - svc->svc_stats = svc_stats; - } -} - -void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc) -{ - if (svc->svc_procroot) { - lprocfs_remove(svc->svc_procroot); - svc->svc_procroot = NULL; - } - if (svc->svc_stats) { - lprocfs_free_stats(svc->svc_stats); - svc->svc_stats = NULL; - } -} -#endif /* LPROCFS */ diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c deleted file mode 100644 index c0ccb4d..0000000 --- a/lustre/ptlrpc/niobuf.c +++ /dev/null @@ -1,745 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define DEBUG_SUBSYSTEM S_RPC -#ifndef __KERNEL__ -#include <liblustre.h> -#include <portals/lib-types.h> -#endif -#include <linux/obd_support.h> -#include <linux/lustre_net.h> -#include <linux/lustre_lib.h> -#include <linux/obd.h> -#include "ptlrpc_internal.h" - -static int ptl_send_buf(struct ptlrpc_request *request, - struct ptlrpc_connection *conn, int portal) -{ - int rc; - int rc2; - ptl_process_id_t remote_id; - ptl_handle_md_t md_h; - ptl_ack_req_t ack_req; - - LASSERT (portal != 0); - LASSERT (conn != NULL); - CDEBUG (D_INFO, "conn=%p ni %s nid "LPX64" on %s\n", - conn, conn->c_peer.peer_ni->pni_name, - conn->c_peer.peer_nid, conn->c_peer.peer_ni->pni_name); - - request->rq_req_md.user_ptr = request; - - switch (request->rq_type) { - case PTL_RPC_MSG_REQUEST: - request->rq_reqmsg->type = request->rq_type; - request->rq_req_md.start = request->rq_reqmsg; - request->rq_req_md.length = request->rq_reqlen; - request->rq_req_md.eventq = - conn->c_peer.peer_ni->pni_request_out_eq_h; - LASSERT (!request->rq_want_ack); - break; - case PTL_RPC_MSG_ERR: - case PTL_RPC_MSG_REPLY: - request->rq_repmsg->type = request->rq_type; - request->rq_req_md.start = request->rq_repmsg; - request->rq_req_md.length = request->rq_replen; - request->rq_req_md.eventq = - conn->c_peer.peer_ni->pni_reply_out_eq_h; - break; - default: - LBUG(); - return -1; /* notreached */ - } - if (request->rq_want_ack) { - request->rq_req_md.threshold = 2; /* SENT and ACK */ - ack_req = PTL_ACK_REQ; - } else { - request->rq_req_md.threshold = 1; - ack_req = PTL_NOACK_REQ; - } - request->rq_req_md.options = PTL_MD_OP_PUT; - request->rq_req_md.user_ptr = request; - - if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_ACK | OBD_FAIL_ONCE)) { - request->rq_req_md.options |= PTL_MD_ACK_DISABLE; - obd_fail_loc |= OBD_FAIL_ONCE | OBD_FAILED; - } - - /* NB if the send fails, we back out of the send and return - * failure; it's down to the caller to handle missing callbacks */ - - rc = PtlMDBind(conn->c_peer.peer_ni->pni_ni_h, request->rq_req_md, - &md_h); - if (rc != PTL_OK) { - CERROR("PtlMDBind failed: %d\n", rc); - LASSERT (rc == PTL_NOSPACE); - RETURN (-ENOMEM); - } - if (request->rq_type != PTL_RPC_MSG_REQUEST) - memcpy(&request->rq_reply_md_h, &md_h, sizeof(md_h)); - - remote_id.nid = conn->c_peer.peer_nid; - remote_id.pid = 0; - - CDEBUG(D_NET, "Sending %d bytes to portal %d, xid "LPD64"\n", - request->rq_req_md.length, portal, request->rq_xid); - - rc = PtlPut(md_h, ack_req, remote_id, portal, 0, request->rq_xid, 0, 0); - if (rc != PTL_OK) { - CERROR("PtlPut("LPU64", %d, "LPD64") failed: %d\n", - remote_id.nid, portal, request->rq_xid, rc); - rc2 = PtlMDUnlink(md_h); - LASSERT (rc2 == PTL_OK); - RETURN ((rc == PTL_NOSPACE) ? -ENOMEM : -ECOMM); - } - - return 0; -} - -static inline ptl_kiov_t * -ptlrpc_get_bulk_iov (struct ptlrpc_bulk_desc *desc) -{ - ptl_kiov_t *iov; - - if (desc->bd_page_count <= sizeof (desc->bd_iov)/sizeof (*iov)) - return (desc->bd_iov); - - OBD_ALLOC (iov, desc->bd_page_count * sizeof (*iov)); - if (iov == NULL) - LBUG(); - - return (iov); -} - -static inline void -ptlrpc_put_bulk_iov (struct ptlrpc_bulk_desc *desc, ptl_kiov_t *iov) -{ - if (desc->bd_page_count <= sizeof (desc->bd_iov)/sizeof (*iov)) - return; - - OBD_FREE (iov, desc->bd_page_count * sizeof (*iov)); -} - -int ptlrpc_bulk_put(struct ptlrpc_bulk_desc *desc) -{ - int rc; - int rc2; - struct ptlrpc_peer *peer; - struct list_head *tmp, *next; - ptl_process_id_t remote_id; - ptl_kiov_t *iov; - __u64 xid; - ENTRY; - - /* NB no locking required until desc is on the network */ - LASSERT (!desc->bd_network_rw); - LASSERT (desc->bd_type == BULK_PUT_SOURCE); - desc->bd_complete = 0; - - iov = ptlrpc_get_bulk_iov (desc); - if (iov == NULL) - RETURN (-ENOMEM); - - peer = &desc->bd_export->exp_connection->c_peer; - - desc->bd_md.start = iov; - desc->bd_md.niov = 0; - desc->bd_md.length = 0; - desc->bd_md.eventq = peer->peer_ni->pni_bulk_put_source_eq_h; - desc->bd_md.threshold = 2; /* SENT and ACK */ - desc->bd_md.options = PTL_MD_OP_PUT | PTL_MD_KIOV; - desc->bd_md.user_ptr = desc; - - desc->bd_callback_count = 2; - - list_for_each_safe(tmp, next, &desc->bd_page_list) { - struct ptlrpc_bulk_page *bulk; - bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link); - - LASSERT(desc->bd_md.niov < desc->bd_page_count); - - iov[desc->bd_md.niov].kiov_page = bulk->bp_page; - iov[desc->bd_md.niov].kiov_offset = bulk->bp_pageoffset; - iov[desc->bd_md.niov].kiov_len = bulk->bp_buflen; - - LASSERT (iov[desc->bd_md.niov].kiov_offset + - iov[desc->bd_md.niov].kiov_len <= PAGE_SIZE); - desc->bd_md.niov++; - desc->bd_md.length += bulk->bp_buflen; - } - - /* NB total length may be 0 for a read past EOF, so we send a 0 - * length bulk, since the client expects a bulk event. */ - LASSERT(desc->bd_md.niov == desc->bd_page_count); - - rc = PtlMDBind(peer->peer_ni->pni_ni_h, desc->bd_md, - &desc->bd_md_h); - - ptlrpc_put_bulk_iov (desc, iov); /*move down to reduce latency to send*/ - - if (rc != PTL_OK) { - CERROR("PtlMDBind failed: %d\n", rc); - LASSERT (rc == PTL_NOSPACE); - RETURN(-ENOMEM); - } - - /* Client's bulk and reply matchbits are the same */ - xid = desc->bd_req->rq_xid; - remote_id.nid = peer->peer_nid; - remote_id.pid = 0; - - CDEBUG(D_NET, "Sending %u pages %u bytes to portal %d on %s " - "nid "LPX64" pid %d xid "LPX64"\n", - desc->bd_md.niov, desc->bd_md.length, - desc->bd_portal, peer->peer_ni->pni_name, - remote_id.nid, remote_id.pid, xid); - - desc->bd_network_rw = 1; - rc = PtlPut(desc->bd_md_h, PTL_ACK_REQ, remote_id, - desc->bd_portal, 0, xid, 0, 0); - if (rc != PTL_OK) { - desc->bd_network_rw = 0; - CERROR("PtlPut("LPU64", %d, "LPX64") failed: %d\n", - remote_id.nid, desc->bd_portal, xid, rc); - rc2 = PtlMDUnlink(desc->bd_md_h); - LASSERT (rc2 == PTL_OK); - RETURN((rc == PTL_NOSPACE) ? -ENOMEM : -ECOMM); - } - - RETURN(0); -} - -int ptlrpc_bulk_get(struct ptlrpc_bulk_desc *desc) -{ - int rc; - int rc2; - struct ptlrpc_peer *peer; - struct list_head *tmp, *next; - ptl_process_id_t remote_id; - ptl_kiov_t *iov; - __u64 xid; - ENTRY; - - /* NB no locking required until desc is on the network */ - LASSERT (!desc->bd_network_rw); - LASSERT (desc->bd_type == BULK_GET_SINK); - desc->bd_complete = 0; - - iov = ptlrpc_get_bulk_iov (desc); - if (iov == NULL) - RETURN(-ENOMEM); - - peer = &desc->bd_export->exp_connection->c_peer; - - desc->bd_md.start = iov; - desc->bd_md.niov = 0; - desc->bd_md.length = 0; - desc->bd_md.eventq = peer->peer_ni->pni_bulk_get_sink_eq_h; - desc->bd_md.threshold = 2; /* SENT and REPLY */ - desc->bd_md.options = PTL_MD_OP_GET | PTL_MD_KIOV; - desc->bd_md.user_ptr = desc; - - desc->bd_callback_count = 2; - - list_for_each_safe(tmp, next, &desc->bd_page_list) { - struct ptlrpc_bulk_page *bulk; - bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link); - - LASSERT(desc->bd_md.niov < desc->bd_page_count); - - iov[desc->bd_md.niov].kiov_page = bulk->bp_page; - iov[desc->bd_md.niov].kiov_len = bulk->bp_buflen; - iov[desc->bd_md.niov].kiov_offset = bulk->bp_pageoffset; - - LASSERT (iov[desc->bd_md.niov].kiov_offset + - iov[desc->bd_md.niov].kiov_len <= PAGE_SIZE); - desc->bd_md.niov++; - desc->bd_md.length += bulk->bp_buflen; - } - - LASSERT(desc->bd_md.niov == desc->bd_page_count); - LASSERT(desc->bd_md.niov != 0); - - rc = PtlMDBind(peer->peer_ni->pni_ni_h, desc->bd_md, &desc->bd_md_h); - - ptlrpc_put_bulk_iov(desc, iov); /*move down to reduce latency to send*/ - - if (rc != PTL_OK) { - CERROR("PtlMDBind failed: %d\n", rc); - LASSERT (rc == PTL_NOSPACE); - RETURN(-ENOMEM); - } - - /* Client's bulk and reply matchbits are the same */ - xid = desc->bd_req->rq_xid; - remote_id.nid = desc->bd_export->exp_connection->c_peer.peer_nid; - remote_id.pid = 0; - - CDEBUG(D_NET, "Fetching %u pages %u bytes from portal %d on %s " - "nid "LPX64" pid %d xid "LPX64"\n", - desc->bd_md.niov, desc->bd_md.length, desc->bd_portal, - peer->peer_ni->pni_name, remote_id.nid, remote_id.pid, - xid); - - desc->bd_network_rw = 1; - rc = PtlGet(desc->bd_md_h, remote_id, desc->bd_portal, 0, - xid, 0); - if (rc != PTL_OK) { - desc->bd_network_rw = 0; - CERROR("PtlGet("LPU64", %d, "LPX64") failed: %d\n", - remote_id.nid, desc->bd_portal, xid, rc); - rc2 = PtlMDUnlink(desc->bd_md_h); - LASSERT (rc2 == PTL_OK); - RETURN((rc == PTL_NOSPACE) ? -ENOMEM : -ECOMM); - } - - RETURN(0); -} - -void ptlrpc_abort_bulk (struct ptlrpc_bulk_desc *desc) -{ - /* Server side bulk abort. Idempotent. Not thread-safe (i.e. only - * serialises with completion callback) */ - unsigned long flags; - struct l_wait_info lwi; - int callback_count; - int rc; - - LASSERT (!in_interrupt ()); /* might sleep */ - - /* NB. server-side bulk gets 2 events, so we have to keep trying to - * unlink the MD until all callbacks have happened, or - * PtlMDUnlink() returns OK or INVALID */ - again: - spin_lock_irqsave (&desc->bd_lock, flags); - if (!desc->bd_network_rw) { - /* completed or never even registered. NB holding bd_lock - * guarantees callback has completed if it ran. */ - spin_unlock_irqrestore (&desc->bd_lock, flags); - return; - } - - /* sample callback count while we have the lock */ - callback_count = desc->bd_callback_count; - spin_unlock_irqrestore (&desc->bd_lock, flags); - - rc = PtlMDUnlink (desc->bd_md_h); - switch (rc) { - default: - CERROR("PtlMDUnlink returned %d\n", rc); - LBUG (); - case PTL_OK: /* Won the race with the network */ - LASSERT (!desc->bd_complete); /* Not all callbacks ran */ - desc->bd_network_rw = 0; - return; - - case PTL_MD_INUSE: /* MD is being accessed right now */ - for (;;) { - /* Network access will complete in finite time but the - * timeout lets us CERROR for visibility */ - lwi = LWI_TIMEOUT (10 * HZ, NULL, NULL); - rc = l_wait_event(desc->bd_waitq, - desc->bd_callback_count != - callback_count, &lwi); - if (rc == -ETIMEDOUT) { - CERROR("Unexpectedly long timeout: desc %p\n", - desc); - continue; - } - LASSERT (rc == 0); - break; - } - /* go back and try again... */ - goto again; - - case PTL_INV_MD: /* Lost the race with completion */ - LASSERT (desc->bd_complete); /* Callbacks all ran */ - LASSERT (!desc->bd_network_rw); - return; - } -} - -int ptlrpc_register_bulk (struct ptlrpc_request *req) -{ - struct ptlrpc_bulk_desc *desc = req->rq_bulk; - struct ptlrpc_peer *peer; - struct list_head *tmp, *next; - int rc; - int rc2; - ptl_kiov_t *iov; - ptl_process_id_t source_id; - ENTRY; - - /* NB no locking required until desc is on the network */ - LASSERT (!desc->bd_network_rw); - LASSERT (desc->bd_page_count <= PTL_MD_MAX_IOV); - LASSERT (desc->bd_req != NULL); - LASSERT (desc->bd_type == BULK_PUT_SINK || - desc->bd_type == BULK_GET_SOURCE); - - desc->bd_complete = 0; - - iov = ptlrpc_get_bulk_iov (desc); - if (iov == NULL) - return (-ENOMEM); - - peer = &desc->bd_import->imp_connection->c_peer; - - desc->bd_md.start = iov; - desc->bd_md.niov = 0; - desc->bd_md.length = 0; - desc->bd_md.threshold = 1; - desc->bd_md.user_ptr = desc; - - if (desc->bd_type == BULK_GET_SOURCE) { - desc->bd_md.options = PTL_MD_OP_GET | PTL_MD_KIOV; - desc->bd_md.eventq = peer->peer_ni->pni_bulk_get_source_eq_h; - } else { - desc->bd_md.options = PTL_MD_OP_PUT | PTL_MD_KIOV; - desc->bd_md.eventq = peer->peer_ni->pni_bulk_put_sink_eq_h; - } - - list_for_each_safe(tmp, next, &desc->bd_page_list) { - struct ptlrpc_bulk_page *bulk; - bulk = list_entry(tmp, struct ptlrpc_bulk_page, bp_link); - - LASSERT(desc->bd_md.niov < desc->bd_page_count); - - iov[desc->bd_md.niov].kiov_page = bulk->bp_page; - iov[desc->bd_md.niov].kiov_len = bulk->bp_buflen; - iov[desc->bd_md.niov].kiov_offset = bulk->bp_pageoffset; - - LASSERT (bulk->bp_pageoffset + bulk->bp_buflen <= PAGE_SIZE); - desc->bd_md.niov++; - desc->bd_md.length += bulk->bp_buflen; - } - - LASSERT(desc->bd_md.niov == desc->bd_page_count); - LASSERT(desc->bd_md.niov != 0); - - /* XXX Registering the same xid on retried bulk makes my head - * explode trying to understand how the original request's bulk - * might interfere with the retried request -eeb */ - LASSERT (!desc->bd_registered || req->rq_xid != desc->bd_last_xid); - desc->bd_registered = 1; - desc->bd_last_xid = desc->bd_last_xid; - - source_id.nid = desc->bd_import->imp_connection->c_peer.peer_nid; - source_id.pid = PTL_PID_ANY; - - rc = PtlMEAttach(peer->peer_ni->pni_ni_h, - desc->bd_portal, source_id, req->rq_xid, 0, - PTL_UNLINK, PTL_INS_AFTER, &desc->bd_me_h); - - if (rc != PTL_OK) { - CERROR("PtlMEAttach failed: %d\n", rc); - LASSERT (rc == PTL_NOSPACE); - GOTO(out, rc = -ENOMEM); - } - - /* About to let the network at it... */ - desc->bd_network_rw = 1; - rc = PtlMDAttach(desc->bd_me_h, desc->bd_md, PTL_UNLINK, - &desc->bd_md_h); - if (rc != PTL_OK) { - CERROR("PtlMDAttach failed: %d\n", rc); - LASSERT (rc == PTL_NOSPACE); - desc->bd_network_rw = 0; - rc2 = PtlMEUnlink (desc->bd_me_h); - LASSERT (rc2 == PTL_OK); - GOTO(out, rc = -ENOMEM); - } - rc = 0; - - CDEBUG(D_NET, "Setup bulk %s buffers: %u pages %u bytes, xid "LPX64", " - "portal %u on %s\n", - desc->bd_type == BULK_GET_SOURCE ? "get-source" : "put-sink", - desc->bd_md.niov, desc->bd_md.length, - req->rq_xid, desc->bd_portal, peer->peer_ni->pni_name); - - out: - ptlrpc_put_bulk_iov (desc, iov); - RETURN(rc); -} - -void ptlrpc_unregister_bulk (struct ptlrpc_request *req) -{ - /* Disconnect a bulk desc from the network. Idempotent. Not - * thread-safe (i.e. only interlocks with completion callback). */ - struct ptlrpc_bulk_desc *desc = req->rq_bulk; - wait_queue_head_t *wq; - unsigned long flags; - struct l_wait_info lwi; - int rc; - - LASSERT (!in_interrupt ()); /* might sleep */ - - spin_lock_irqsave (&desc->bd_lock, flags); - if (!desc->bd_network_rw) { /* completed or never even registered */ - spin_unlock_irqrestore (&desc->bd_lock, flags); - return; - } - spin_unlock_irqrestore (&desc->bd_lock, flags); - - LASSERT (desc->bd_req == req); /* NB bd_req NULL until registered */ - - /* NB... - * 1. If the MD unlink is successful, the ME gets unlinked too. - * 2. Since client-side bulk only gets a single event and a - * .. threshold of 1. If the MD was inuse at the first link - * .. attempt, the callback is due any minute, and the MD/ME will - * .. unlink themselves. - */ - rc = PtlMDUnlink (desc->bd_md_h); - switch (rc) { - default: - CERROR("PtlMDUnlink returned %d\n", rc); - LBUG (); - case PTL_OK: /* Won the race with completion */ - LASSERT (!desc->bd_complete); /* Callback hasn't happened */ - desc->bd_network_rw = 0; - return; - case PTL_MD_INUSE: /* MD is being accessed right now */ - for (;;) { - /* Network access will complete in finite time but the - * timeout lets us CERROR for visibility */ - if (desc->bd_req->rq_set != NULL) - wq = &req->rq_set->set_waitq; - else - wq = &req->rq_wait_for_rep; - lwi = LWI_TIMEOUT (10 * HZ, NULL, NULL); - rc = l_wait_event(*wq, ptlrpc_bulk_complete(desc), &lwi); - LASSERT (rc == 0 || rc == -ETIMEDOUT); - if (rc == 0) - break; - CERROR ("Unexpectedly long timeout: desc %p\n", desc); - LBUG(); - } - /* Fall through */ - case PTL_INV_MD: /* Lost the race with completion */ - LASSERT (desc->bd_complete);/* Callback has run to completion */ - LASSERT (!desc->bd_network_rw); - return; - } -} - -int ptlrpc_reply(struct ptlrpc_request *req) -{ - unsigned long flags; - int rc; - - /* We must already have a reply buffer (only ptlrpc_error() may be - * called without one). We must also have a request buffer which - * is either the actual (swabbed) incoming request, or a saved copy - * if this is a req saved in target_queue_final_reply(). */ - LASSERT (req->rq_repmsg != NULL); - LASSERT (req->rq_reqmsg != NULL); - - /* FIXME: we need to increment the count of handled events */ - if (req->rq_type != PTL_RPC_MSG_ERR) - req->rq_type = PTL_RPC_MSG_REPLY; - - req->rq_repmsg->status = req->rq_status; - req->rq_repmsg->opc = req->rq_reqmsg->opc; - - init_waitqueue_head(&req->rq_wait_for_rep); - rc = ptl_send_buf(req, req->rq_connection, req->rq_svc->srv_rep_portal); - if (rc != 0) { - /* Do what the callback handler would have done */ - OBD_FREE (req->rq_repmsg, req->rq_replen); - - spin_lock_irqsave (&req->rq_lock, flags); - req->rq_want_ack = 0; - spin_unlock_irqrestore (&req->rq_lock, flags); - } - return rc; -} - -int ptlrpc_error(struct ptlrpc_request *req) -{ - int rc; - ENTRY; - - if (!req->rq_repmsg) { - rc = lustre_pack_msg(0, NULL, NULL, &req->rq_replen, - &req->rq_repmsg); - if (rc) - RETURN(rc); - } - - - req->rq_type = PTL_RPC_MSG_ERR; - - rc = ptlrpc_reply(req); - RETURN(rc); -} - -int ptl_send_rpc(struct ptlrpc_request *request) -{ - int rc; - int rc2; - unsigned long flags; - ptl_process_id_t source_id; - ptl_handle_me_t reply_me_h; - ENTRY; - - LASSERT (request->rq_type == PTL_RPC_MSG_REQUEST); - - /* If this is a re-transmit, we're required to have disengaged - * cleanly from the previous attempt */ - LASSERT (!request->rq_receiving_reply); - - if (request->rq_bulk != NULL) { - rc = ptlrpc_register_bulk (request); - if (rc != 0) - RETURN(rc); - } - - request->rq_reqmsg->handle = request->rq_import->imp_remote_handle; - - source_id.nid = request->rq_connection->c_peer.peer_nid; - source_id.pid = PTL_PID_ANY; - - LASSERT (request->rq_replen != 0); - OBD_ALLOC(request->rq_repmsg, request->rq_replen); - if (request->rq_repmsg == NULL) { - LBUG(); - RETURN(-ENOMEM); - } - - rc = PtlMEAttach(request->rq_connection->c_peer.peer_ni->pni_ni_h, - request->rq_reply_portal, /* XXX FIXME bug 249 */ - source_id, request->rq_xid, 0, PTL_UNLINK, - PTL_INS_AFTER, &reply_me_h); - if (rc != PTL_OK) { - CERROR("PtlMEAttach failed: %d\n", rc); - LASSERT (rc == PTL_NOSPACE); - LBUG(); - GOTO(cleanup, rc = -ENOMEM); - } - - request->rq_reply_md.start = request->rq_repmsg; - request->rq_reply_md.length = request->rq_replen; - request->rq_reply_md.threshold = 1; - request->rq_reply_md.options = PTL_MD_OP_PUT; - request->rq_reply_md.user_ptr = request; - request->rq_reply_md.eventq = - request->rq_connection->c_peer.peer_ni->pni_reply_in_eq_h; - - rc = PtlMDAttach(reply_me_h, request->rq_reply_md, - PTL_UNLINK, &request->rq_reply_md_h); - if (rc != PTL_OK) { - CERROR("PtlMDAttach failed: %d\n", rc); - LASSERT (rc == PTL_NOSPACE); - LBUG(); - GOTO(cleanup2, rc -ENOMEM); - } - - CDEBUG(D_NET, "Setup reply buffer: %u bytes, xid "LPU64 - ", portal %u on %s\n", - request->rq_replen, request->rq_xid, - request->rq_reply_portal, - request->rq_connection->c_peer.peer_ni->pni_name); - - ptlrpc_request_addref(request); /* 1 ref for the SENT callback */ - - spin_lock_irqsave (&request->rq_lock, flags); - request->rq_receiving_reply = 1; - /* Clear any flags that may be present from previous sends. */ - request->rq_replied = 0; - request->rq_err = 0; - request->rq_timedout = 0; - request->rq_resend = 0; - request->rq_restart = 0; - spin_unlock_irqrestore (&request->rq_lock, flags); - - request->rq_sent = LTIME_S(CURRENT_TIME); - ptlrpc_pinger_sending_on_import(request->rq_import); - rc = ptl_send_buf(request, request->rq_connection, - request->rq_request_portal); - if (rc == 0) - RETURN(rc); - - spin_lock_irqsave (&request->rq_lock, flags); - request->rq_receiving_reply = 0; - spin_unlock_irqrestore (&request->rq_lock, flags); - ptlrpc_req_finished (request); /* drop callback ref */ - cleanup2: - /* MEUnlink is safe; the PUT didn't even get off the ground, and - * nobody apart from the PUT's target has the right nid+XID to - * access the reply buffer. */ - rc2 = PtlMEUnlink(reply_me_h); - LASSERT (rc2 == PTL_OK); - cleanup: - OBD_FREE(request->rq_repmsg, request->rq_replen); - request->rq_repmsg = NULL; - return rc; -} - -void ptlrpc_link_svc_me(struct ptlrpc_request_buffer_desc *rqbd) -{ - struct ptlrpc_srv_ni *srv_ni = rqbd->rqbd_srv_ni; - struct ptlrpc_service *service = srv_ni->sni_service; - static ptl_process_id_t match_id = {PTL_NID_ANY, PTL_PID_ANY}; - int rc; - ptl_md_t dummy; - ptl_handle_md_t md_h; - - LASSERT(atomic_read(&rqbd->rqbd_refcount) == 0); - - CDEBUG(D_NET, "PtlMEAttach: portal %d on %s h %lx."LPX64"\n", - service->srv_req_portal, srv_ni->sni_ni->pni_name, - srv_ni->sni_ni->pni_ni_h.nal_idx, - srv_ni->sni_ni->pni_ni_h.cookie); - - /* Attach the leading ME on which we build the ring */ - rc = PtlMEAttach(srv_ni->sni_ni->pni_ni_h, service->srv_req_portal, - match_id, 0, ~0, - PTL_UNLINK, PTL_INS_AFTER, &rqbd->rqbd_me_h); - if (rc != PTL_OK) { - CERROR("PtlMEAttach failed: %d\n", rc); - /* BUG 1191 */ - LBUG(); - } - - dummy.start = rqbd->rqbd_buffer; - dummy.length = service->srv_buf_size; - dummy.max_size = service->srv_max_req_size; - dummy.threshold = PTL_MD_THRESH_INF; - dummy.options = PTL_MD_OP_PUT | PTL_MD_MAX_SIZE | PTL_MD_AUTO_UNLINK; - dummy.user_ptr = rqbd; - dummy.eventq = srv_ni->sni_eq_h; - - atomic_inc(&srv_ni->sni_nrqbds_receiving); - atomic_set(&rqbd->rqbd_refcount, 1); /* 1 ref for portals */ - - rc = PtlMDAttach(rqbd->rqbd_me_h, dummy, PTL_UNLINK, &md_h); - if (rc != PTL_OK) { - CERROR("PtlMDAttach failed: %d\n", rc); - LASSERT (rc == PTL_NOSPACE); - LBUG(); - /* BUG 1191 */ - PtlMEUnlink (rqbd->rqbd_me_h); - atomic_set(&rqbd->rqbd_refcount, 0); - atomic_dec(&srv_ni->sni_nrqbds_receiving); - } -} diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c deleted file mode 100644 index 3811d2a..0000000 --- a/lustre/ptlrpc/pack_generic.c +++ /dev/null @@ -1,1092 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001-2003 Cluster File Systems, Inc. - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Eric Barton <eeb@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - * (Un)packing of OST requests - * - */ - -#define DEBUG_SUBSYSTEM S_RPC -#ifndef __KERNEL__ -#include <liblustre.h> -#endif - -#include <linux/obd_support.h> -#include <linux/lustre_net.h> - - -#define HDR_SIZE(count) \ - size_round(offsetof (struct lustre_msg, buflens[(count)])) - -int lustre_pack_msg(int count, int *lens, char **bufs, int *len, - struct lustre_msg **msg) -{ - char *ptr; - struct lustre_msg *m; - int size = 0, i; - - size = HDR_SIZE (count); - for (i = 0; i < count; i++) - size += size_round(lens[i]); - - *len = size; - - OBD_ALLOC(*msg, *len); - if (!*msg) - RETURN(-ENOMEM); - - m = *msg; - m->magic = PTLRPC_MSG_MAGIC; - m->version = PTLRPC_MSG_VERSION; - m->bufcount = count; - for (i = 0; i < count; i++) - m->buflens[i] = lens[i]; - - ptr = (char *)m + HDR_SIZE(count); - for (i = 0; i < count; i++) { - char *tmp = NULL; - if (bufs) - tmp = bufs[i]; - LOGL(tmp, lens[i], ptr); - - } - - return 0; -} - -/* This returns the size of the buffer that is required to hold a lustre_msg - * with the given sub-buffer lengths. */ -int lustre_msg_size(int count, int *lengths) -{ - int size; - int i; - - size = HDR_SIZE (count); - for (i = 0; i < count; i++) - size += size_round(lengths[i]); - - return size; -} - -int lustre_unpack_msg(struct lustre_msg *m, int len) -{ - int flipped; - int required_len; - int i; - ENTRY; - - /* We can provide a slightly better error log, if we check the - * message magic and version first. In the future, struct - * lustre_msg may grow, and we'd like to log a version mismatch, - * rather than a short message. - * - */ - required_len = MAX (offsetof (struct lustre_msg, version) + - sizeof (m->version), - offsetof (struct lustre_msg, magic) + - sizeof (m->magic)); - if (len < required_len) { - /* can't even look inside the message */ - CERROR ("message length %d too small for magic/version check\n", - len); - RETURN (-EINVAL); - } - - flipped = lustre_msg_swabbed(m); - if (flipped) - __swab32s (&m->version); - else if (m->magic != PTLRPC_MSG_MAGIC) { - CERROR("wrong lustre_msg magic %#08x\n", m->magic); - RETURN (-EINVAL); - } - - if (m->version != PTLRPC_MSG_VERSION) { - CERROR("wrong lustre_msg version %#08x\n", m->version); - RETURN (-EINVAL); - } - - /* Now we know the sender speaks my language (but possibly flipped)...*/ - required_len = HDR_SIZE(0); - if (len < required_len) { - /* can't even look inside the message */ - CERROR ("message length %d too small for lustre_msg\n", len); - RETURN (-EINVAL); - } - - if (flipped) { - __swab32s (&m->type); - __swab32s (&m->opc); - __swab64s (&m->last_xid); - __swab64s (&m->last_committed); - __swab64s (&m->transno); - __swab32s (&m->status); - __swab32s (&m->bufcount); - __swab32s (&m->flags); - } - - required_len = HDR_SIZE(m->bufcount); - - if (len < required_len) { - /* didn't receive all the buffer lengths */ - CERROR ("message length %d too small for %d buflens\n", - len, m->bufcount); - RETURN(-EINVAL); - } - - for (i = 0; i < m->bufcount; i++) { - if (flipped) - __swab32s (&m->buflens[i]); - required_len += size_round(m->buflens[i]); - } - - if (len < required_len) { - CERROR("len: %d, required_len %d\n", len, required_len); - CERROR("bufcount: %d\n", m->bufcount); - for (i = 0; i < m->bufcount; i++) - CERROR("buffer %d length %d\n", i, m->buflens[i]); - RETURN(-EINVAL); - } - - RETURN(0); -} - -void *lustre_msg_buf(struct lustre_msg *m, int n, int min_size) -{ - int i; - int offset; - int buflen; - int bufcount; - - LASSERT (m != NULL); - LASSERT (n >= 0); - - bufcount = m->bufcount; - if (n >= bufcount) { - CDEBUG(D_INFO, "msg %p buffer[%d] not present (count %d)\n", - m, n, bufcount); - return NULL; - } - - buflen = m->buflens[n]; - if (buflen == 0) { - CERROR("msg %p buffer[%d] is zero length\n", m, n); - return NULL; - } - - if (buflen < min_size) { - CERROR("msg %p buffer[%d] size %d too small (required %d)\n", - m, n, buflen, min_size); - return NULL; - } - - offset = HDR_SIZE(bufcount); - for (i = 0; i < n; i++) - offset += size_round(m->buflens[i]); - - return (char *)m + offset; -} - -char *lustre_msg_string (struct lustre_msg *m, int index, int max_len) -{ - /* max_len == 0 means the string should fill the buffer */ - char *str = lustre_msg_buf (m, index, 0); - int slen; - int blen; - - if (str == NULL) { - CERROR ("can't unpack string in msg %p buffer[%d]\n", m, index); - return (NULL); - } - - blen = m->buflens[index]; - slen = strnlen (str, blen); - - if (slen == blen) { /* not NULL terminated */ - CERROR ("can't unpack non-NULL terminated string in " - "msg %p buffer[%d] len %d\n", m, index, blen); - return (NULL); - } - - if (max_len == 0) { - if (slen != blen - 1) { - CERROR ("can't unpack short string in msg %p " - "buffer[%d] len %d: strlen %d\n", - m, index, blen, slen); - return (NULL); - } - } else if (slen > max_len) { - CERROR ("can't unpack oversized string in msg %p " - "buffer[%d] len %d strlen %d: max %d expected\n", - m, index, blen, slen, max_len); - return (NULL); - } - - return (str); -} - -/* Wrap up the normal fixed length case */ -void *lustre_swab_reqbuf (struct ptlrpc_request *req, int index, int min_size, - void *swabber) -{ - void *ptr; - - LASSERT_REQSWAB (req, index); - - ptr = lustre_msg_buf(req->rq_reqmsg, index, min_size); - if (ptr == NULL) - return (NULL); - - if (swabber != NULL && - lustre_msg_swabbed (req->rq_reqmsg)) - ((void (*)(void *))swabber)(ptr); - - return (ptr); -} - -/* Wrap up the normal fixed length case */ -void *lustre_swab_repbuf (struct ptlrpc_request *req, int index, int min_size, - void *swabber) -{ - void *ptr; - - LASSERT_REPSWAB (req, index); - - ptr = lustre_msg_buf (req->rq_repmsg, index, min_size); - if (ptr == NULL) - return (NULL); - - if (swabber != NULL && - lustre_msg_swabbed (req->rq_repmsg)) - ((void (*)(void *))swabber)(ptr); - - return (ptr); -} - -/* byte flipping routines for all wire types declared in - * lustre_idl.h implemented here. - */ - -void lustre_swab_obdo (struct obdo *o) -{ - __swab64s (&o->o_id); - __swab64s (&o->o_gr); - __swab64s (&o->o_atime); - __swab64s (&o->o_mtime); - __swab64s (&o->o_ctime); - __swab64s (&o->o_size); - __swab64s (&o->o_blocks); - __swab64s (&o->o_rdev); - __swab32s (&o->o_blksize); - __swab32s (&o->o_mode); - __swab32s (&o->o_uid); - __swab32s (&o->o_gid); - __swab32s (&o->o_flags); - __swab32s (&o->o_nlink); - __swab32s (&o->o_generation); - __swab32s (&o->o_valid); - __swab32s (&o->o_obdflags); - __swab32s (&o->o_easize); - /* o_inline is opaque */ -} - -void lustre_swab_obd_statfs (struct obd_statfs *os) -{ - __swab64s (&os->os_type); - __swab64s (&os->os_blocks); - __swab64s (&os->os_bfree); - __swab64s (&os->os_bavail); - __swab64s (&os->os_ffree); - /* no need to swap os_fsid */ - __swab32s (&os->os_bsize); - __swab32s (&os->os_namelen); - /* no need to swap os_spare */ -} - -void lustre_swab_obd_ioobj (struct obd_ioobj *ioo) -{ - __swab64s (&ioo->ioo_id); - __swab64s (&ioo->ioo_gr); - __swab32s (&ioo->ioo_type); - __swab32s (&ioo->ioo_bufcnt); -} - -void lustre_swab_niobuf_remote (struct niobuf_remote *nbr) -{ - __swab64s (&nbr->offset); - __swab32s (&nbr->len); - __swab32s (&nbr->flags); -} - -void lustre_swab_ost_body (struct ost_body *b) -{ - lustre_swab_obdo (&b->oa); -} - -void lustre_swab_ll_fid (struct ll_fid *fid) -{ - __swab64s (&fid->id); - __swab32s (&fid->generation); - __swab32s (&fid->f_type); -} - -void lustre_swab_mds_status_req (struct mds_status_req *r) -{ - __swab32s (&r->flags); - __swab32s (&r->repbuf); -} - -void lustre_swab_mds_fileh_body (struct mds_fileh_body *f) -{ - lustre_swab_ll_fid (&f->f_fid); -} - -void lustre_swab_mds_body (struct mds_body *b) -{ - lustre_swab_ll_fid (&b->fid1); - lustre_swab_ll_fid (&b->fid2); - /* handle is opaque */ - __swab64s (&b->size); - __swab64s (&b->blocks); - __swab32s (&b->ino); - __swab32s (&b->valid); - __swab32s (&b->fsuid); - __swab32s (&b->fsgid); - __swab32s (&b->capability); - __swab32s (&b->mode); - __swab32s (&b->uid); - __swab32s (&b->gid); - __swab32s (&b->mtime); - __swab32s (&b->ctime); - __swab32s (&b->atime); - __swab32s (&b->flags); - __swab32s (&b->rdev); - __swab32s (&b->nlink); - __swab32s (&b->generation); - __swab32s (&b->suppgid); - __swab32s (&b->eadatasize); -} - -void lustre_swab_mds_rec_setattr (struct mds_rec_setattr *sa) -{ - __swab32s (&sa->sa_opcode); - __swab32s (&sa->sa_fsuid); - __swab32s (&sa->sa_fsgid); - __swab32s (&sa->sa_cap); - __swab32s (&sa->sa_reserved); - __swab32s (&sa->sa_valid); - lustre_swab_ll_fid (&sa->sa_fid); - __swab32s (&sa->sa_mode); - __swab32s (&sa->sa_uid); - __swab32s (&sa->sa_gid); - __swab32s (&sa->sa_attr_flags); - __swab64s (&sa->sa_size); - __swab64s (&sa->sa_atime); - __swab64s (&sa->sa_mtime); - __swab64s (&sa->sa_ctime); - __swab32s (&sa->sa_suppgid); -} - -void lustre_swab_mds_rec_create (struct mds_rec_create *cr) -{ - __swab32s (&cr->cr_opcode); - __swab32s (&cr->cr_fsuid); - __swab32s (&cr->cr_fsgid); - __swab32s (&cr->cr_cap); - __swab32s (&cr->cr_flags); /* for use with open */ - __swab32s (&cr->cr_mode); - lustre_swab_ll_fid (&cr->cr_fid); - lustre_swab_ll_fid (&cr->cr_replayfid); - __swab32s (&cr->cr_uid); - __swab32s (&cr->cr_gid); - __swab64s (&cr->cr_time); - __swab64s (&cr->cr_rdev); - __swab32s (&cr->cr_suppgid); -} - -void lustre_swab_mds_rec_link (struct mds_rec_link *lk) -{ - __swab32s (&lk->lk_opcode); - __swab32s (&lk->lk_fsuid); - __swab32s (&lk->lk_fsgid); - __swab32s (&lk->lk_cap); - __swab32s (&lk->lk_suppgid1); - __swab32s (&lk->lk_suppgid2); - lustre_swab_ll_fid (&lk->lk_fid1); - lustre_swab_ll_fid (&lk->lk_fid2); -} - -void lustre_swab_mds_rec_unlink (struct mds_rec_unlink *ul) -{ - __swab32s (&ul->ul_opcode); - __swab32s (&ul->ul_fsuid); - __swab32s (&ul->ul_fsgid); - __swab32s (&ul->ul_cap); - __swab32s (&ul->ul_reserved); - __swab32s (&ul->ul_mode); - __swab32s (&ul->ul_suppgid); - lustre_swab_ll_fid (&ul->ul_fid1); - lustre_swab_ll_fid (&ul->ul_fid2); -} - -void lustre_swab_mds_rec_rename (struct mds_rec_rename *rn) -{ - __swab32s (&rn->rn_opcode); - __swab32s (&rn->rn_fsuid); - __swab32s (&rn->rn_fsgid); - __swab32s (&rn->rn_cap); - __swab32s (&rn->rn_suppgid1); - __swab32s (&rn->rn_suppgid2); - lustre_swab_ll_fid (&rn->rn_fid1); - lustre_swab_ll_fid (&rn->rn_fid2); -} - -void lustre_swab_lov_desc (struct lov_desc *ld) -{ - __swab32s (&ld->ld_tgt_count); - __swab32s (&ld->ld_active_tgt_count); - __swab32s (&ld->ld_default_stripe_count); - __swab64s (&ld->ld_default_stripe_size); - __swab64s (&ld->ld_default_stripe_offset); - __swab32s (&ld->ld_pattern); - /* uuid endian insensitive */ -} - -void lustre_swab_ldlm_res_id (struct ldlm_res_id *id) -{ - int i; - - for (i = 0; i < RES_NAME_SIZE; i++) - __swab64s (&id->name[i]); -} - -void lustre_swab_ldlm_extent (struct ldlm_extent *e) -{ - __swab64s (&e->start); - __swab64s (&e->end); -} - -void lustre_swab_ldlm_intent (struct ldlm_intent *i) -{ - __swab64s (&i->opc); -} - -void lustre_swab_ldlm_resource_desc (struct ldlm_resource_desc *r) -{ - int i; - - __swab32s (&r->lr_type); - lustre_swab_ldlm_res_id (&r->lr_name); - for (i = 0; i < RES_VERSION_SIZE; i++) - __swab32s (&r->lr_version[i]); -} - -void lustre_swab_ldlm_lock_desc (struct ldlm_lock_desc *l) -{ - int i; - - lustre_swab_ldlm_resource_desc (&l->l_resource); - __swab32s (&l->l_req_mode); - __swab32s (&l->l_granted_mode); - lustre_swab_ldlm_extent (&l->l_extent); - for (i = 0; i < RES_VERSION_SIZE; i++) - __swab32s (&l->l_version[i]); -} - -void lustre_swab_ldlm_request (struct ldlm_request *rq) -{ - __swab32s (&rq->lock_flags); - lustre_swab_ldlm_lock_desc (&rq->lock_desc); - /* lock_handle1 opaque */ - /* lock_handle2 opaque */ -} - -void lustre_swab_ldlm_reply (struct ldlm_reply *r) -{ - __swab32s (&r->lock_flags); - __swab32s (&r->lock_mode); - lustre_swab_ldlm_res_id (&r->lock_resource_name); - /* lock_handle opaque */ - lustre_swab_ldlm_extent (&r->lock_extent); - __swab64s (&r->lock_policy_res1); - __swab64s (&r->lock_policy_res2); -} - -void lustre_swab_ptlbd_op (struct ptlbd_op *op) -{ - __swab16s (&op->op_cmd); - __swab16s (&op->op_lun); - __swab16s (&op->op_niob_cnt); - /* ignore op__padding */ - __swab32s (&op->op_block_cnt); -} - -void lustre_swab_ptlbd_niob (struct ptlbd_niob *n) -{ - __swab64s (&n->n_xid); - __swab64s (&n->n_block_nr); - __swab32s (&n->n_offset); - __swab32s (&n->n_length); -} - -void lustre_swab_ptlbd_rsp (struct ptlbd_rsp *r) -{ - __swab16s (&r->r_status); - __swab16s (&r->r_error_cnt); -} - -void lustre_assert_wire_constants (void) -{ -#if BUG_1343 - /* Wire protocol assertions generated by 'wirecheck' */ - - /* Constants... */ - LASSERT (PTLRPC_MSG_MAGIC == 0x0BD00BD0); - LASSERT (PTLRPC_MSG_VERSION == 0x00040002); - LASSERT (PTL_RPC_MSG_REQUEST == 4711); - LASSERT (PTL_RPC_MSG_ERR == 4712); - LASSERT (PTL_RPC_MSG_REPLY == 4713); - LASSERT (MSG_LAST_REPLAY == 1); - LASSERT (MSG_RESENT == 2); - LASSERT (MSG_CONNECT_RECOVERING == 1); - LASSERT (MSG_CONNECT_RECONNECT == 2); - LASSERT (MSG_CONNECT_REPLAYABLE == 4); - LASSERT (OST_REPLY == 0); - LASSERT (OST_GETATTR == 1); - LASSERT (OST_SETATTR == 2); - LASSERT (OST_READ == 3); - LASSERT (OST_WRITE == 4); - LASSERT (OST_CREATE == 5); - LASSERT (OST_DESTROY == 6); - LASSERT (OST_GET_INFO == 7); - LASSERT (OST_CONNECT == 8); - LASSERT (OST_DISCONNECT == 9); - LASSERT (OST_PUNCH == 10); - LASSERT (OST_OPEN == 11); - LASSERT (OST_CLOSE == 12); - LASSERT (OST_STATFS == 13); - LASSERT (OST_SAN_READ == 14); - LASSERT (OST_SAN_WRITE == 15); - LASSERT (OST_SYNCFS == 16); - LASSERT (OST_LAST_OPC == 17); - LASSERT (OST_FIRST_OPC == 0); - LASSERT (OBD_FL_INLINEDATA == 1); - LASSERT (OBD_FL_OBDMDEXISTS == 2); - LASSERT (LOV_MAGIC == 198183888); - LASSERT (OBD_MD_FLALL == -1); - LASSERT (OBD_MD_FLID == 1); - LASSERT (OBD_MD_FLATIME == 2); - LASSERT (OBD_MD_FLMTIME == 4); - LASSERT (OBD_MD_FLCTIME == 8); - LASSERT (OBD_MD_FLSIZE == 16); - LASSERT (OBD_MD_FLBLOCKS == 32); - LASSERT (OBD_MD_FLBLKSZ == 64); - LASSERT (OBD_MD_FLMODE == 128); - LASSERT (OBD_MD_FLTYPE == 256); - LASSERT (OBD_MD_FLUID == 512); - LASSERT (OBD_MD_FLGID == 1024); - LASSERT (OBD_MD_FLFLAGS == 2048); - LASSERT (OBD_MD_FLOBDFLG == 4096); - LASSERT (OBD_MD_FLNLINK == 8192); - LASSERT (OBD_MD_FLGENER == 16384); - LASSERT (OBD_MD_FLINLINE == 32768); - LASSERT (OBD_MD_FLRDEV == 65536); - LASSERT (OBD_MD_FLEASIZE == 131072); - LASSERT (OBD_MD_LINKNAME == 262144); - LASSERT (OBD_MD_FLHANDLE == 524288); - LASSERT (OBD_MD_FLCKSUM == 1048576); - LASSERT (OBD_BRW_READ == 1); - LASSERT (OBD_BRW_WRITE == 2); - LASSERT (OBD_BRW_CREATE == 4); - LASSERT (OBD_BRW_SYNC == 8); - LASSERT (OBD_OBJECT_EOF == 0xffffffffffffffffULL); - LASSERT (OST_REQ_HAS_OA1 == 1); - LASSERT (MDS_GETATTR == 33); - LASSERT (MDS_GETATTR_NAME == 34); - LASSERT (MDS_CLOSE == 35); - LASSERT (MDS_REINT == 36); - LASSERT (MDS_READPAGE == 37); - LASSERT (MDS_CONNECT == 38); - LASSERT (MDS_DISCONNECT == 39); - LASSERT (MDS_GETSTATUS == 40); - LASSERT (MDS_STATFS == 41); - LASSERT (MDS_GETLOVINFO == 42); - LASSERT (MDS_LAST_OPC == 43); - LASSERT (MDS_FIRST_OPC == 33); - LASSERT (REINT_SETATTR == 1); - LASSERT (REINT_CREATE == 2); - LASSERT (REINT_LINK == 3); - LASSERT (REINT_UNLINK == 4); - LASSERT (REINT_RENAME == 5); - LASSERT (REINT_OPEN == 6); - LASSERT (REINT_MAX == 6); - LASSERT (IT_INTENT_EXEC == 1); - LASSERT (IT_OPEN_LOOKUP == 2); - LASSERT (IT_OPEN_NEG == 4); - LASSERT (IT_OPEN_POS == 8); - LASSERT (IT_OPEN_CREATE == 16); - LASSERT (IT_OPEN_OPEN == 32); - LASSERT (MDS_STATUS_CONN == 1); - LASSERT (MDS_STATUS_LOV == 2); - LASSERT (MDS_OPEN_HAS_EA == 1); - LASSERT (LOV_RAID0 == 0); - LASSERT (LOV_RAIDRR == 1); - LASSERT (LDLM_ENQUEUE == 101); - LASSERT (LDLM_CONVERT == 102); - LASSERT (LDLM_CANCEL == 103); - LASSERT (LDLM_BL_CALLBACK == 104); - LASSERT (LDLM_CP_CALLBACK == 105); - LASSERT (LDLM_LAST_OPC == 106); - LASSERT (LDLM_FIRST_OPC == 101); - LASSERT (PTLBD_QUERY == 200); - LASSERT (PTLBD_READ == 201); - LASSERT (PTLBD_WRITE == 202); - LASSERT (PTLBD_FLUSH == 203); - LASSERT (PTLBD_CONNECT == 204); - LASSERT (PTLBD_DISCONNECT == 205); - LASSERT (PTLBD_LAST_OPC == 204); - LASSERT (PTLBD_FIRST_OPC == 200); - LASSERT (OBD_PING == 400); - /* Sizes and Offsets */ - - - /* Checks for struct lustre_handle */ - LASSERT (sizeof (struct lustre_handle) == 8); - LASSERT (offsetof (struct lustre_handle, cookie) == 0); - LASSERT (sizeof (((struct lustre_handle *)0)->cookie) == 8); - - /* Checks for struct lustre_msg */ - LASSERT (sizeof (struct lustre_msg) == 60); - LASSERT (offsetof (struct lustre_msg, handle) == 0); - LASSERT (sizeof (((struct lustre_msg *)0)->handle) == 8); - LASSERT (offsetof (struct lustre_msg, magic) == 8); - LASSERT (sizeof (((struct lustre_msg *)0)->magic) == 4); - LASSERT (offsetof (struct lustre_msg, type) == 12); - LASSERT (sizeof (((struct lustre_msg *)0)->type) == 4); - LASSERT (offsetof (struct lustre_msg, version) == 16); - LASSERT (sizeof (((struct lustre_msg *)0)->version) == 4); - LASSERT (offsetof (struct lustre_msg, opc) == 20); - LASSERT (sizeof (((struct lustre_msg *)0)->opc) == 4); - LASSERT (offsetof (struct lustre_msg, last_xid) == 24); - LASSERT (sizeof (((struct lustre_msg *)0)->last_xid) == 8); - LASSERT (offsetof (struct lustre_msg, last_committed) == 32); - LASSERT (sizeof (((struct lustre_msg *)0)->last_committed) == 8); - LASSERT (offsetof (struct lustre_msg, transno) == 40); - LASSERT (sizeof (((struct lustre_msg *)0)->transno) == 8); - LASSERT (offsetof (struct lustre_msg, status) == 48); - LASSERT (sizeof (((struct lustre_msg *)0)->status) == 4); - LASSERT (offsetof (struct lustre_msg, flags) == 52); - LASSERT (sizeof (((struct lustre_msg *)0)->flags) == 4); - LASSERT (offsetof (struct lustre_msg, bufcount) == 56); - LASSERT (sizeof (((struct lustre_msg *)0)->bufcount) == 4); - LASSERT (offsetof (struct lustre_msg, buflens[7]) == 88); - LASSERT (sizeof (((struct lustre_msg *)0)->buflens[7]) == 4); - - /* Checks for struct obdo */ - LASSERT (sizeof (struct obdo) == 164); - LASSERT (offsetof (struct obdo, o_id) == 0); - LASSERT (sizeof (((struct obdo *)0)->o_id) == 8); - LASSERT (offsetof (struct obdo, o_gr) == 8); - LASSERT (sizeof (((struct obdo *)0)->o_gr) == 8); - LASSERT (offsetof (struct obdo, o_atime) == 16); - LASSERT (sizeof (((struct obdo *)0)->o_atime) == 8); - LASSERT (offsetof (struct obdo, o_mtime) == 24); - LASSERT (sizeof (((struct obdo *)0)->o_mtime) == 8); - LASSERT (offsetof (struct obdo, o_ctime) == 32); - LASSERT (sizeof (((struct obdo *)0)->o_ctime) == 8); - LASSERT (offsetof (struct obdo, o_size) == 40); - LASSERT (sizeof (((struct obdo *)0)->o_size) == 8); - LASSERT (offsetof (struct obdo, o_blocks) == 48); - LASSERT (sizeof (((struct obdo *)0)->o_blocks) == 8); - LASSERT (offsetof (struct obdo, o_rdev) == 56); - LASSERT (sizeof (((struct obdo *)0)->o_rdev) == 8); - LASSERT (offsetof (struct obdo, o_blksize) == 64); - LASSERT (sizeof (((struct obdo *)0)->o_blksize) == 4); - LASSERT (offsetof (struct obdo, o_mode) == 68); - LASSERT (sizeof (((struct obdo *)0)->o_mode) == 4); - LASSERT (offsetof (struct obdo, o_uid) == 72); - LASSERT (sizeof (((struct obdo *)0)->o_uid) == 4); - LASSERT (offsetof (struct obdo, o_gid) == 76); - LASSERT (sizeof (((struct obdo *)0)->o_gid) == 4); - LASSERT (offsetof (struct obdo, o_flags) == 80); - LASSERT (sizeof (((struct obdo *)0)->o_flags) == 4); - LASSERT (offsetof (struct obdo, o_nlink) == 84); - LASSERT (sizeof (((struct obdo *)0)->o_nlink) == 4); - LASSERT (offsetof (struct obdo, o_generation) == 88); - LASSERT (sizeof (((struct obdo *)0)->o_generation) == 4); - LASSERT (offsetof (struct obdo, o_valid) == 92); - LASSERT (sizeof (((struct obdo *)0)->o_valid) == 4); - LASSERT (offsetof (struct obdo, o_obdflags) == 96); - LASSERT (sizeof (((struct obdo *)0)->o_obdflags) == 4); - LASSERT (offsetof (struct obdo, o_easize) == 100); - LASSERT (sizeof (((struct obdo *)0)->o_easize) == 4); - LASSERT (offsetof (struct obdo, o_inline) == 104); - LASSERT (sizeof (((struct obdo *)0)->o_inline) == 60); - - /* Checks for struct obd_statfs */ - LASSERT (sizeof (struct obd_statfs) == 144); - LASSERT (offsetof (struct obd_statfs, os_type) == 0); - LASSERT (sizeof (((struct obd_statfs *)0)->os_type) == 8); - LASSERT (offsetof (struct obd_statfs, os_blocks) == 8); - LASSERT (sizeof (((struct obd_statfs *)0)->os_blocks) == 8); - LASSERT (offsetof (struct obd_statfs, os_bfree) == 16); - LASSERT (sizeof (((struct obd_statfs *)0)->os_bfree) == 8); - LASSERT (offsetof (struct obd_statfs, os_bavail) == 24); - LASSERT (sizeof (((struct obd_statfs *)0)->os_bavail) == 8); - LASSERT (offsetof (struct obd_statfs, os_ffree) == 40); - LASSERT (sizeof (((struct obd_statfs *)0)->os_ffree) == 8); - LASSERT (offsetof (struct obd_statfs, os_fsid) == 48); - LASSERT (sizeof (((struct obd_statfs *)0)->os_fsid) == 40); - LASSERT (offsetof (struct obd_statfs, os_bsize) == 88); - LASSERT (sizeof (((struct obd_statfs *)0)->os_bsize) == 4); - LASSERT (offsetof (struct obd_statfs, os_namelen) == 92); - LASSERT (sizeof (((struct obd_statfs *)0)->os_namelen) == 4); - - /* Checks for struct obd_ioobj */ - LASSERT (sizeof (struct obd_ioobj) == 24); - LASSERT (offsetof (struct obd_ioobj, ioo_id) == 0); - LASSERT (sizeof (((struct obd_ioobj *)0)->ioo_id) == 8); - LASSERT (offsetof (struct obd_ioobj, ioo_gr) == 8); - LASSERT (sizeof (((struct obd_ioobj *)0)->ioo_gr) == 8); - LASSERT (offsetof (struct obd_ioobj, ioo_type) == 16); - LASSERT (sizeof (((struct obd_ioobj *)0)->ioo_type) == 4); - LASSERT (offsetof (struct obd_ioobj, ioo_bufcnt) == 20); - LASSERT (sizeof (((struct obd_ioobj *)0)->ioo_bufcnt) == 4); - - /* Checks for struct niobuf_remote */ - LASSERT (sizeof (struct niobuf_remote) == 16); - LASSERT (offsetof (struct niobuf_remote, offset) == 0); - LASSERT (sizeof (((struct niobuf_remote *)0)->offset) == 8); - LASSERT (offsetof (struct niobuf_remote, len) == 8); - LASSERT (sizeof (((struct niobuf_remote *)0)->len) == 4); - LASSERT (offsetof (struct niobuf_remote, flags) == 12); - LASSERT (sizeof (((struct niobuf_remote *)0)->flags) == 4); - - /* Checks for struct ost_body */ - LASSERT (sizeof (struct ost_body) == 164); - LASSERT (offsetof (struct ost_body, oa) == 0); - LASSERT (sizeof (((struct ost_body *)0)->oa) == 164); - - /* Checks for struct ll_fid */ - LASSERT (sizeof (struct ll_fid) == 16); - LASSERT (offsetof (struct ll_fid, id) == 0); - LASSERT (sizeof (((struct ll_fid *)0)->id) == 8); - LASSERT (offsetof (struct ll_fid, generation) == 8); - LASSERT (sizeof (((struct ll_fid *)0)->generation) == 4); - LASSERT (offsetof (struct ll_fid, f_type) == 12); - LASSERT (sizeof (((struct ll_fid *)0)->f_type) == 4); - - /* Checks for struct mds_status_req */ - LASSERT (sizeof (struct mds_status_req) == 8); - LASSERT (offsetof (struct mds_status_req, flags) == 0); - LASSERT (sizeof (((struct mds_status_req *)0)->flags) == 4); - LASSERT (offsetof (struct mds_status_req, repbuf) == 4); - LASSERT (sizeof (((struct mds_status_req *)0)->repbuf) == 4); - - /* Checks for struct mds_fileh_body */ - LASSERT (sizeof (struct mds_fileh_body) == 24); - LASSERT (offsetof (struct mds_fileh_body, f_fid) == 0); - LASSERT (sizeof (((struct mds_fileh_body *)0)->f_fid) == 16); - - /* Checks for struct mds_body */ - LASSERT (sizeof (struct mds_body) == 124); - LASSERT (offsetof (struct mds_body, fid1) == 0); - LASSERT (sizeof (((struct mds_body *)0)->fid1) == 16); - LASSERT (offsetof (struct mds_body, fid2) == 16); - LASSERT (sizeof (((struct mds_body *)0)->fid2) == 16); - LASSERT (offsetof (struct mds_body, handle) == 32); - LASSERT (sizeof (((struct mds_body *)0)->handle) == 8); - LASSERT (offsetof (struct mds_body, size) == 40); - LASSERT (sizeof (((struct mds_body *)0)->size) == 8); - LASSERT (offsetof (struct mds_body, blocks) == 48); - LASSERT (sizeof (((struct mds_body *)0)->blocks) == 8); - LASSERT (offsetof (struct mds_body, ino) == 56); - LASSERT (sizeof (((struct mds_body *)0)->ino) == 4); - LASSERT (offsetof (struct mds_body, valid) == 60); - LASSERT (sizeof (((struct mds_body *)0)->valid) == 4); - LASSERT (offsetof (struct mds_body, fsuid) == 64); - LASSERT (sizeof (((struct mds_body *)0)->fsuid) == 4); - LASSERT (offsetof (struct mds_body, fsgid) == 68); - LASSERT (sizeof (((struct mds_body *)0)->fsgid) == 4); - LASSERT (offsetof (struct mds_body, capability) == 72); - LASSERT (sizeof (((struct mds_body *)0)->capability) == 4); - LASSERT (offsetof (struct mds_body, mode) == 76); - LASSERT (sizeof (((struct mds_body *)0)->mode) == 4); - LASSERT (offsetof (struct mds_body, uid) == 80); - LASSERT (sizeof (((struct mds_body *)0)->uid) == 4); - LASSERT (offsetof (struct mds_body, gid) == 84); - LASSERT (sizeof (((struct mds_body *)0)->gid) == 4); - LASSERT (offsetof (struct mds_body, mtime) == 88); - LASSERT (sizeof (((struct mds_body *)0)->mtime) == 4); - LASSERT (offsetof (struct mds_body, ctime) == 92); - LASSERT (sizeof (((struct mds_body *)0)->ctime) == 4); - LASSERT (offsetof (struct mds_body, atime) == 96); - LASSERT (sizeof (((struct mds_body *)0)->atime) == 4); - LASSERT (offsetof (struct mds_body, flags) == 100); - LASSERT (sizeof (((struct mds_body *)0)->flags) == 4); - LASSERT (offsetof (struct mds_body, rdev) == 104); - LASSERT (sizeof (((struct mds_body *)0)->rdev) == 4); - LASSERT (offsetof (struct mds_body, nlink) == 108); - LASSERT (sizeof (((struct mds_body *)0)->nlink) == 4); - LASSERT (offsetof (struct mds_body, generation) == 112); - LASSERT (sizeof (((struct mds_body *)0)->generation) == 4); - LASSERT (offsetof (struct mds_body, suppgid) == 116); - LASSERT (sizeof (((struct mds_body *)0)->suppgid) == 4); - - /* Checks for struct mds_rec_setattr */ - LASSERT (sizeof (struct mds_rec_setattr) == 92); - LASSERT (offsetof (struct mds_rec_setattr, sa_opcode) == 0); - LASSERT (sizeof (((struct mds_rec_setattr *)0)->sa_opcode) == 4); - LASSERT (offsetof (struct mds_rec_setattr, sa_fsuid) == 4); - LASSERT (sizeof (((struct mds_rec_setattr *)0)->sa_fsuid) == 4); - LASSERT (offsetof (struct mds_rec_setattr, sa_fsgid) == 8); - LASSERT (sizeof (((struct mds_rec_setattr *)0)->sa_fsgid) == 4); - LASSERT (offsetof (struct mds_rec_setattr, sa_cap) == 12); - LASSERT (sizeof (((struct mds_rec_setattr *)0)->sa_cap) == 4); - LASSERT (offsetof (struct mds_rec_setattr, sa_reserved) == 16); - LASSERT (sizeof (((struct mds_rec_setattr *)0)->sa_reserved) == 4); - LASSERT (offsetof (struct mds_rec_setattr, sa_valid) == 20); - LASSERT (sizeof (((struct mds_rec_setattr *)0)->sa_valid) == 4); - LASSERT (offsetof (struct mds_rec_setattr, sa_fid) == 24); - LASSERT (sizeof (((struct mds_rec_setattr *)0)->sa_fid) == 16); - LASSERT (offsetof (struct mds_rec_setattr, sa_mode) == 40); - LASSERT (sizeof (((struct mds_rec_setattr *)0)->sa_mode) == 4); - LASSERT (offsetof (struct mds_rec_setattr, sa_uid) == 44); - LASSERT (sizeof (((struct mds_rec_setattr *)0)->sa_uid) == 4); - LASSERT (offsetof (struct mds_rec_setattr, sa_gid) == 48); - LASSERT (sizeof (((struct mds_rec_setattr *)0)->sa_gid) == 4); - LASSERT (offsetof (struct mds_rec_setattr, sa_attr_flags) == 52); - LASSERT (sizeof (((struct mds_rec_setattr *)0)->sa_attr_flags) == 4); - LASSERT (offsetof (struct mds_rec_setattr, sa_size) == 56); - LASSERT (sizeof (((struct mds_rec_setattr *)0)->sa_size) == 8); - LASSERT (offsetof (struct mds_rec_setattr, sa_atime) == 64); - LASSERT (sizeof (((struct mds_rec_setattr *)0)->sa_atime) == 8); - LASSERT (offsetof (struct mds_rec_setattr, sa_mtime) == 72); - LASSERT (sizeof (((struct mds_rec_setattr *)0)->sa_mtime) == 8); - LASSERT (offsetof (struct mds_rec_setattr, sa_ctime) == 80); - LASSERT (sizeof (((struct mds_rec_setattr *)0)->sa_ctime) == 8); - LASSERT (offsetof (struct mds_rec_setattr, sa_suppgid) == 88); - LASSERT (sizeof (((struct mds_rec_setattr *)0)->sa_suppgid) == 4); - - /* Checks for struct mds_rec_create */ - LASSERT (sizeof (struct mds_rec_create) == 84); - LASSERT (offsetof (struct mds_rec_create, cr_opcode) == 0); - LASSERT (sizeof (((struct mds_rec_create *)0)->cr_opcode) == 4); - LASSERT (offsetof (struct mds_rec_create, cr_fsuid) == 4); - LASSERT (sizeof (((struct mds_rec_create *)0)->cr_fsuid) == 4); - LASSERT (offsetof (struct mds_rec_create, cr_fsgid) == 8); - LASSERT (sizeof (((struct mds_rec_create *)0)->cr_fsgid) == 4); - LASSERT (offsetof (struct mds_rec_create, cr_cap) == 12); - LASSERT (sizeof (((struct mds_rec_create *)0)->cr_cap) == 4); - LASSERT (offsetof (struct mds_rec_create, cr_flags) == 16); - LASSERT (sizeof (((struct mds_rec_create *)0)->cr_flags) == 4); - LASSERT (offsetof (struct mds_rec_create, cr_mode) == 20); - LASSERT (sizeof (((struct mds_rec_create *)0)->cr_mode) == 4); - LASSERT (offsetof (struct mds_rec_create, cr_fid) == 24); - LASSERT (sizeof (((struct mds_rec_create *)0)->cr_fid) == 16); - LASSERT (offsetof (struct mds_rec_create, cr_replayfid) == 40); - LASSERT (sizeof (((struct mds_rec_create *)0)->cr_replayfid) == 16); - LASSERT (offsetof (struct mds_rec_create, cr_uid) == 56); - LASSERT (sizeof (((struct mds_rec_create *)0)->cr_uid) == 4); - LASSERT (offsetof (struct mds_rec_create, cr_gid) == 60); - LASSERT (sizeof (((struct mds_rec_create *)0)->cr_gid) == 4); - LASSERT (offsetof (struct mds_rec_create, cr_time) == 64); - LASSERT (sizeof (((struct mds_rec_create *)0)->cr_time) == 8); - LASSERT (offsetof (struct mds_rec_create, cr_rdev) == 72); - LASSERT (sizeof (((struct mds_rec_create *)0)->cr_rdev) == 8); - LASSERT (offsetof (struct mds_rec_create, cr_suppgid) == 80); - LASSERT (sizeof (((struct mds_rec_create *)0)->cr_suppgid) == 4); - - /* Checks for struct mds_rec_link */ - LASSERT (sizeof (struct mds_rec_link) == 56); - LASSERT (offsetof (struct mds_rec_link, lk_opcode) == 0); - LASSERT (sizeof (((struct mds_rec_link *)0)->lk_opcode) == 4); - LASSERT (offsetof (struct mds_rec_link, lk_fsuid) == 4); - LASSERT (sizeof (((struct mds_rec_link *)0)->lk_fsuid) == 4); - LASSERT (offsetof (struct mds_rec_link, lk_fsgid) == 8); - LASSERT (sizeof (((struct mds_rec_link *)0)->lk_fsgid) == 4); - LASSERT (offsetof (struct mds_rec_link, lk_cap) == 12); - LASSERT (sizeof (((struct mds_rec_link *)0)->lk_cap) == 4); - LASSERT (offsetof (struct mds_rec_link, lk_suppgid1) == 16); - LASSERT (sizeof (((struct mds_rec_link *)0)->lk_suppgid1) == 4); - LASSERT (offsetof (struct mds_rec_link, lk_suppgid2) == 20); - LASSERT (sizeof (((struct mds_rec_link *)0)->lk_suppgid2) == 4); - LASSERT (offsetof (struct mds_rec_link, lk_fid1) == 24); - LASSERT (sizeof (((struct mds_rec_link *)0)->lk_fid1) == 16); - LASSERT (offsetof (struct mds_rec_link, lk_fid2) == 40); - LASSERT (sizeof (((struct mds_rec_link *)0)->lk_fid2) == 16); - - /* Checks for struct mds_rec_unlink */ - LASSERT (sizeof (struct mds_rec_unlink) == 60); - LASSERT (offsetof (struct mds_rec_unlink, ul_opcode) == 0); - LASSERT (sizeof (((struct mds_rec_unlink *)0)->ul_opcode) == 4); - LASSERT (offsetof (struct mds_rec_unlink, ul_fsuid) == 4); - LASSERT (sizeof (((struct mds_rec_unlink *)0)->ul_fsuid) == 4); - LASSERT (offsetof (struct mds_rec_unlink, ul_fsgid) == 8); - LASSERT (sizeof (((struct mds_rec_unlink *)0)->ul_fsgid) == 4); - LASSERT (offsetof (struct mds_rec_unlink, ul_cap) == 12); - LASSERT (sizeof (((struct mds_rec_unlink *)0)->ul_cap) == 4); - LASSERT (offsetof (struct mds_rec_unlink, ul_reserved) == 16); - LASSERT (sizeof (((struct mds_rec_unlink *)0)->ul_reserved) == 4); - LASSERT (offsetof (struct mds_rec_unlink, ul_mode) == 20); - LASSERT (sizeof (((struct mds_rec_unlink *)0)->ul_mode) == 4); - LASSERT (offsetof (struct mds_rec_unlink, ul_suppgid) == 24); - LASSERT (sizeof (((struct mds_rec_unlink *)0)->ul_suppgid) == 4); - LASSERT (offsetof (struct mds_rec_unlink, ul_fid1) == 28); - LASSERT (sizeof (((struct mds_rec_unlink *)0)->ul_fid1) == 16); - LASSERT (offsetof (struct mds_rec_unlink, ul_fid2) == 44); - LASSERT (sizeof (((struct mds_rec_unlink *)0)->ul_fid2) == 16); - - /* Checks for struct mds_rec_rename */ - LASSERT (sizeof (struct mds_rec_rename) == 56); - LASSERT (offsetof (struct mds_rec_rename, rn_opcode) == 0); - LASSERT (sizeof (((struct mds_rec_rename *)0)->rn_opcode) == 4); - LASSERT (offsetof (struct mds_rec_rename, rn_fsuid) == 4); - LASSERT (sizeof (((struct mds_rec_rename *)0)->rn_fsuid) == 4); - LASSERT (offsetof (struct mds_rec_rename, rn_fsgid) == 8); - LASSERT (sizeof (((struct mds_rec_rename *)0)->rn_fsgid) == 4); - LASSERT (offsetof (struct mds_rec_rename, rn_cap) == 12); - LASSERT (sizeof (((struct mds_rec_rename *)0)->rn_cap) == 4); - LASSERT (offsetof (struct mds_rec_rename, rn_suppgid1) == 16); - LASSERT (sizeof (((struct mds_rec_rename *)0)->rn_suppgid1) == 4); - LASSERT (offsetof (struct mds_rec_rename, rn_suppgid2) == 20); - LASSERT (sizeof (((struct mds_rec_rename *)0)->rn_suppgid2) == 4); - LASSERT (offsetof (struct mds_rec_rename, rn_fid1) == 24); - LASSERT (sizeof (((struct mds_rec_rename *)0)->rn_fid1) == 16); - LASSERT (offsetof (struct mds_rec_rename, rn_fid2) == 40); - LASSERT (sizeof (((struct mds_rec_rename *)0)->rn_fid2) == 16); - - /* Checks for struct lov_desc */ - LASSERT (sizeof (struct lov_desc) == 72); - LASSERT (offsetof (struct lov_desc, ld_tgt_count) == 0); - LASSERT (sizeof (((struct lov_desc *)0)->ld_tgt_count) == 4); - LASSERT (offsetof (struct lov_desc, ld_active_tgt_count) == 4); - LASSERT (sizeof (((struct lov_desc *)0)->ld_active_tgt_count) == 4); - LASSERT (offsetof (struct lov_desc, ld_default_stripe_count) == 8); - LASSERT (sizeof (((struct lov_desc *)0)->ld_default_stripe_count) == 4); - LASSERT (offsetof (struct lov_desc, ld_default_stripe_size) == 12); - LASSERT (sizeof (((struct lov_desc *)0)->ld_default_stripe_size) == 8); - LASSERT (offsetof (struct lov_desc, ld_default_stripe_offset) == 20); - LASSERT (sizeof (((struct lov_desc *)0)->ld_default_stripe_offset) == 8); - LASSERT (offsetof (struct lov_desc, ld_pattern) == 28); - LASSERT (sizeof (((struct lov_desc *)0)->ld_pattern) == 4); - LASSERT (offsetof (struct lov_desc, ld_uuid) == 32); - LASSERT (sizeof (((struct lov_desc *)0)->ld_uuid) == 37); - - /* Checks for struct ldlm_res_id */ - LASSERT (sizeof (struct ldlm_res_id) == 24); - LASSERT (offsetof (struct ldlm_res_id, name[3]) == 24); - LASSERT (sizeof (((struct ldlm_res_id *)0)->name[3]) == 8); - - /* Checks for struct ldlm_extent */ - LASSERT (sizeof (struct ldlm_extent) == 16); - LASSERT (offsetof (struct ldlm_extent, start) == 0); - LASSERT (sizeof (((struct ldlm_extent *)0)->start) == 8); - LASSERT (offsetof (struct ldlm_extent, end) == 8); - LASSERT (sizeof (((struct ldlm_extent *)0)->end) == 8); - - /* Checks for struct ldlm_intent */ - LASSERT (sizeof (struct ldlm_intent) == 8); - LASSERT (offsetof (struct ldlm_intent, opc) == 0); - LASSERT (sizeof (((struct ldlm_intent *)0)->opc) == 8); - - /* Checks for struct ldlm_resource_desc */ - LASSERT (sizeof (struct ldlm_resource_desc) == 44); - LASSERT (offsetof (struct ldlm_resource_desc, lr_type) == 0); - LASSERT (sizeof (((struct ldlm_resource_desc *)0)->lr_type) == 4); - LASSERT (offsetof (struct ldlm_resource_desc, lr_name) == 4); - LASSERT (sizeof (((struct ldlm_resource_desc *)0)->lr_name) == 24); - LASSERT (offsetof (struct ldlm_resource_desc, lr_version[4]) == 44); - LASSERT (sizeof (((struct ldlm_resource_desc *)0)->lr_version[4]) == 4); - - /* Checks for struct ldlm_lock_desc */ - LASSERT (sizeof (struct ldlm_lock_desc) == 84); - LASSERT (offsetof (struct ldlm_lock_desc, l_resource) == 0); - LASSERT (sizeof (((struct ldlm_lock_desc *)0)->l_resource) == 44); - LASSERT (offsetof (struct ldlm_lock_desc, l_req_mode) == 44); - LASSERT (sizeof (((struct ldlm_lock_desc *)0)->l_req_mode) == 4); - LASSERT (offsetof (struct ldlm_lock_desc, l_granted_mode) == 48); - LASSERT (sizeof (((struct ldlm_lock_desc *)0)->l_granted_mode) == 4); - LASSERT (offsetof (struct ldlm_lock_desc, l_extent) == 52); - LASSERT (sizeof (((struct ldlm_lock_desc *)0)->l_extent) == 16); - LASSERT (offsetof (struct ldlm_lock_desc, l_version[4]) == 84); - LASSERT (sizeof (((struct ldlm_lock_desc *)0)->l_version[4]) == 4); - - /* Checks for struct ldlm_request */ - LASSERT (sizeof (struct ldlm_request) == 104); - LASSERT (offsetof (struct ldlm_request, lock_flags) == 0); - LASSERT (sizeof (((struct ldlm_request *)0)->lock_flags) == 4); - LASSERT (offsetof (struct ldlm_request, lock_desc) == 4); - LASSERT (sizeof (((struct ldlm_request *)0)->lock_desc) == 84); - LASSERT (offsetof (struct ldlm_request, lock_handle1) == 88); - LASSERT (sizeof (((struct ldlm_request *)0)->lock_handle1) == 8); - LASSERT (offsetof (struct ldlm_request, lock_handle2) == 96); - LASSERT (sizeof (((struct ldlm_request *)0)->lock_handle2) == 8); - - /* Checks for struct ldlm_reply */ - LASSERT (sizeof (struct ldlm_reply) == 72); - LASSERT (offsetof (struct ldlm_reply, lock_flags) == 0); - LASSERT (sizeof (((struct ldlm_reply *)0)->lock_flags) == 4); - LASSERT (offsetof (struct ldlm_reply, lock_mode) == 4); - LASSERT (sizeof (((struct ldlm_reply *)0)->lock_mode) == 4); - LASSERT (offsetof (struct ldlm_reply, lock_resource_name) == 8); - LASSERT (sizeof (((struct ldlm_reply *)0)->lock_resource_name) == 24); - LASSERT (offsetof (struct ldlm_reply, lock_handle) == 32); - LASSERT (sizeof (((struct ldlm_reply *)0)->lock_handle) == 8); - LASSERT (offsetof (struct ldlm_reply, lock_extent) == 40); - LASSERT (sizeof (((struct ldlm_reply *)0)->lock_extent) == 16); - LASSERT (offsetof (struct ldlm_reply, lock_policy_res1) == 56); - LASSERT (sizeof (((struct ldlm_reply *)0)->lock_policy_res1) == 8); - LASSERT (offsetof (struct ldlm_reply, lock_policy_res2) == 64); - LASSERT (sizeof (((struct ldlm_reply *)0)->lock_policy_res2) == 8); - - /* Checks for struct ptlbd_op */ - LASSERT (sizeof (struct ptlbd_op) == 12); - LASSERT (offsetof (struct ptlbd_op, op_cmd) == 0); - LASSERT (sizeof (((struct ptlbd_op *)0)->op_cmd) == 2); - LASSERT (offsetof (struct ptlbd_op, op_lun) == 2); - LASSERT (sizeof (((struct ptlbd_op *)0)->op_lun) == 2); - LASSERT (offsetof (struct ptlbd_op, op_niob_cnt) == 4); - LASSERT (sizeof (((struct ptlbd_op *)0)->op_niob_cnt) == 2); - LASSERT (offsetof (struct ptlbd_op, op__padding) == 6); - LASSERT (sizeof (((struct ptlbd_op *)0)->op__padding) == 2); - LASSERT (offsetof (struct ptlbd_op, op_block_cnt) == 8); - LASSERT (sizeof (((struct ptlbd_op *)0)->op_block_cnt) == 4); - - /* Checks for struct ptlbd_niob */ - LASSERT (sizeof (struct ptlbd_niob) == 24); - LASSERT (offsetof (struct ptlbd_niob, n_xid) == 0); - LASSERT (sizeof (((struct ptlbd_niob *)0)->n_xid) == 8); - LASSERT (offsetof (struct ptlbd_niob, n_block_nr) == 8); - LASSERT (sizeof (((struct ptlbd_niob *)0)->n_block_nr) == 8); - LASSERT (offsetof (struct ptlbd_niob, n_offset) == 16); - LASSERT (sizeof (((struct ptlbd_niob *)0)->n_offset) == 4); - LASSERT (offsetof (struct ptlbd_niob, n_length) == 20); - LASSERT (sizeof (((struct ptlbd_niob *)0)->n_length) == 4); - - /* Checks for struct ptlbd_rsp */ - LASSERT (sizeof (struct ptlbd_rsp) == 4); - LASSERT (offsetof (struct ptlbd_rsp, r_status) == 0); - LASSERT (sizeof (((struct ptlbd_rsp *)0)->r_status) == 2); - LASSERT (offsetof (struct ptlbd_rsp, r_error_cnt) == 2); - LASSERT (sizeof (((struct ptlbd_rsp *)0)->r_error_cnt) == 2); -#endif -} diff --git a/lustre/ptlrpc/pinger.c b/lustre/ptlrpc/pinger.c deleted file mode 100644 index ebc69e1..0000000 --- a/lustre/ptlrpc/pinger.c +++ /dev/null @@ -1,316 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Portal-RPC reconnection and replay operations, for use in recovery. - * - * Copyright (c) 2003 Cluster File Systems, Inc. - * Authors: Phil Schwan <phil@clusterfs.com> - * Mike Shaver <shaver@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/version.h> -#include <asm/semaphore.h> - -#define DEBUG_SUBSYSTEM S_RPC -#include <linux/obd_support.h> -#include <linux/obd_class.h> -#include "ptlrpc_internal.h" - -static struct ptlrpc_thread *pinger_thread = NULL; -static DECLARE_MUTEX(pinger_sem); -static struct list_head pinger_imports = LIST_HEAD_INIT(pinger_imports); - -int ptlrpc_start_pinger(void); -int ptlrpc_stop_pinger(void); - -void ptlrpc_pinger_sending_on_import(struct obd_import *imp) -{ - down(&pinger_sem); - imp->imp_next_ping = jiffies + (obd_timeout * HZ); - up(&pinger_sem); -} - -int ptlrpc_pinger_add_import(struct obd_import *imp) -{ - int rc; - ENTRY; - -#ifndef ENABLE_PINGER - RETURN(0); -#else - if (!list_empty(&imp->imp_pinger_chain)) - RETURN(-EALREADY); - - down(&pinger_sem); - if (list_empty(&pinger_imports)) { - up(&pinger_sem); - rc = ptlrpc_start_pinger(); - if (rc < 0) - RETURN(rc); - down(&pinger_sem); - } - - CDEBUG(D_HA, "adding pingable import %s->%s\n", - imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid); - imp->imp_next_ping = jiffies + (obd_timeout * HZ); - list_add_tail(&imp->imp_pinger_chain, &pinger_imports); /* XXX sort, blah blah */ - class_import_get(imp); - up(&pinger_sem); - RETURN(0); -#endif -} - -int ptlrpc_pinger_del_import(struct obd_import *imp) -{ - int rc; - ENTRY; - -#ifndef ENABLE_PINGER - RETURN(0); -#else - if (list_empty(&imp->imp_pinger_chain)) - RETURN(-ENOENT); - - down(&pinger_sem); - list_del_init(&imp->imp_pinger_chain); - CDEBUG(D_HA, "removing pingable import %s->%s\n", - imp->imp_obd->obd_uuid.uuid, imp->imp_target_uuid.uuid); - class_import_put(imp); - if (list_empty(&pinger_imports)) { - up(&pinger_sem); - rc = ptlrpc_stop_pinger(); - if (rc) - RETURN(rc); - down(&pinger_sem); - } - up(&pinger_sem); - RETURN(0); -#endif -} - -static int ptlrpc_pinger_main(void *arg) -{ - struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg; - struct ptlrpc_thread *thread = data->thread; - unsigned long flags; - ENTRY; - - lock_kernel(); - ptlrpc_daemonize(); - - SIGNAL_MASK_LOCK(current, flags); - sigfillset(¤t->blocked); - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); - -#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) - sprintf(current->comm, "%s|%d", data->name,current->thread.extern_pid); -#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - sprintf(current->comm, "%s|%d", data->name, - current->thread.mode.tt.extern_pid); -#else - strcpy(current->comm, data->name); -#endif - unlock_kernel(); - - /* Record that the thread is running */ - thread->t_flags = SVC_RUNNING; - wake_up(&thread->t_ctl_waitq); - - /* And now, loop forever, pinging as needed. */ - while (1) { - unsigned long this_ping = jiffies; - long time_to_next_ping; - struct l_wait_info lwi = LWI_TIMEOUT(10 * HZ, NULL, NULL); - struct ptlrpc_request_set *set; - struct ptlrpc_request *req; - struct list_head *iter; - wait_queue_t set_wait; - int rc; - - set = ptlrpc_prep_set(); - down(&pinger_sem); - list_for_each(iter, &pinger_imports) { - struct obd_import *imp = - list_entry(iter, struct obd_import, imp_pinger_chain); - int generation, level; - unsigned long flags; - - if (imp->imp_next_ping <= this_ping) { - /* Add a ping. */ - spin_lock_irqsave(&imp->imp_lock, flags); - generation = imp->imp_generation; - level = imp->imp_level; - spin_unlock_irqrestore(&imp->imp_lock, flags); - - if (level != LUSTRE_CONN_FULL) { - CDEBUG(D_HA, "not pinging %s (in recovery)\n", - imp->imp_target_uuid.uuid); - continue; - } - - req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL, NULL); - if (!req) { - CERROR("OOM trying to ping\n"); - break; - } - req->rq_replen = lustre_msg_size(0, NULL); - req->rq_level = LUSTRE_CONN_FULL; - req->rq_phase = RQ_PHASE_RPC; - req->rq_import_generation = generation; - ptlrpc_set_add_req(set, req); - } else { - CDEBUG(D_HA, "don't need to ping %s (%lu > %lu)\n", - imp->imp_target_uuid.uuid, imp->imp_next_ping, - this_ping); - } - } - up(&pinger_sem); - - /* Might be empty, that's OK. */ - if (set->set_remaining == 0) - CDEBUG(D_HA, "nothing to ping\n"); - list_for_each(iter, &set->set_requests) { - struct ptlrpc_request *req = - list_entry(iter, struct ptlrpc_request, rq_set_chain); - DEBUG_REQ(D_HA, req, "pinging %s->%s", - req->rq_import->imp_obd->obd_uuid.uuid, - req->rq_import->imp_target_uuid.uuid); - (void)ptl_send_rpc(req); - } - - /* Have to wait on both the thread's queue and the set's. */ - init_waitqueue_entry(&set_wait, current); - add_wait_queue(&set->set_waitq, &set_wait); - rc = l_wait_event(thread->t_ctl_waitq, - thread->t_flags & SVC_STOPPING || ptlrpc_check_set(set), - &lwi); - remove_wait_queue(&set->set_waitq, &set_wait); - CDEBUG(D_HA, "ping complete (%lu)\n", jiffies); - - if (thread->t_flags & SVC_STOPPING) { - thread->t_flags &= ~SVC_STOPPING; - list_for_each(iter, &set->set_requests) { - req = list_entry(iter, struct ptlrpc_request, - rq_set_chain); - if (!req->rq_replied) - ptlrpc_unregister_reply(req); - } - ptlrpc_set_destroy(set); - EXIT; - break; - } - - /* Expire all the requests that didn't come back. */ - down(&pinger_sem); - list_for_each(iter, &set->set_requests) { - req = list_entry(iter, struct ptlrpc_request, rq_set_chain); - - if (req->rq_replied) - continue; - - req->rq_phase = RQ_PHASE_COMPLETE; - set->set_remaining--; - /* If it was disconnected, don't sweat it. */ - if (list_empty(&req->rq_import->imp_pinger_chain)) - continue; - - ptlrpc_expire_one_request(req); - } - up(&pinger_sem); - ptlrpc_set_destroy(set); - - /* Wait until the next ping time, or until we're stopped. */ - time_to_next_ping = this_ping + (obd_timeout * HZ) - jiffies; - CDEBUG(D_HA, "next ping in %lu (%lu)\n", time_to_next_ping, - this_ping + (obd_timeout * HZ)); - if (time_to_next_ping > 0) { - lwi = LWI_TIMEOUT(time_to_next_ping, NULL, NULL); - l_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPING, - &lwi); - if (thread->t_flags & SVC_STOPPING) { - thread->t_flags &= ~SVC_STOPPING; - EXIT; - break; - } - } - } - - thread->t_flags = SVC_STOPPED; - wake_up(&thread->t_ctl_waitq); - - CDEBUG(D_NET, "pinger thread exiting, process %d\n", current->pid); - return 0; -} - -int ptlrpc_start_pinger(void) -{ - struct l_wait_info lwi = { 0 }; - struct ptlrpc_svc_data d; - int rc; - ENTRY; - - down(&pinger_sem); - if (pinger_thread != NULL) - GOTO(out, rc = -EALREADY); - - OBD_ALLOC(pinger_thread, sizeof(*pinger_thread)); - if (pinger_thread == NULL) - GOTO(out, rc = -ENOMEM); - init_waitqueue_head(&pinger_thread->t_ctl_waitq); - - d.name = "ll_ping"; - d.thread = pinger_thread; - - /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we - * just drop the VM and FILES in ptlrpc_daemonize() right away. */ - rc = kernel_thread(ptlrpc_pinger_main, &d, CLONE_VM | CLONE_FILES); - if (rc < 0) { - CERROR("cannot start thread: %d\n", rc); - OBD_FREE(pinger_thread, sizeof(*pinger_thread)); - GOTO(out, rc); - } - l_wait_event(pinger_thread->t_ctl_waitq, - pinger_thread->t_flags & SVC_RUNNING, &lwi); - - out: - up(&pinger_sem); - RETURN(rc); -} - -int ptlrpc_stop_pinger(void) -{ - struct l_wait_info lwi = { 0 }; - int rc = 0; - ENTRY; - - down(&pinger_sem); - if (pinger_thread == NULL) - GOTO(out, rc = -EALREADY); - - pinger_thread->t_flags = SVC_STOPPING; - wake_up(&pinger_thread->t_ctl_waitq); - l_wait_event(pinger_thread->t_ctl_waitq, - (pinger_thread->t_flags & SVC_STOPPED), &lwi); - - OBD_FREE(pinger_thread, sizeof(*pinger_thread)); - - out: - up(&pinger_sem); - RETURN(rc); -} diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h deleted file mode 100644 index cb96c3c..0000000 --- a/lustre/ptlrpc/ptlrpc_internal.h +++ /dev/null @@ -1,98 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -/* Intramodule declarations for ptlrpc. */ - -#ifndef PTLRPC_INTERNAL_H -#define PTLRPC_INTERNAL_H - -struct ldlm_namespace; -struct obd_import; -struct ldlm_res_id; -struct ptlrpc_request_set; - -/* ldlm hooks that we need, managed via inter_module_{get,put} */ -extern int (*ptlrpc_ldlm_namespace_cleanup)(struct ldlm_namespace *, int); -extern int (*ptlrpc_ldlm_cli_cancel_unused)(struct ldlm_namespace *, - struct ldlm_res_id *, int); -extern int (*ptlrpc_ldlm_replay_locks)(struct obd_import *); - -int ptlrpc_get_ldlm_hooks(void); -void ptlrpc_daemonize(void); - -void ptlrpc_request_handle_eviction(struct ptlrpc_request *); -void lustre_assert_wire_constants (void); - -void ptlrpc_lprocfs_register_service(struct obd_device *obddev, - struct ptlrpc_service *svc); -void ptlrpc_lprocfs_unregister_service(struct ptlrpc_service *svc); - - -static inline int opcode_offset(__u32 opc) { - if (opc < OST_LAST_OPC) { - /* OST opcode */ - return (opc - OST_FIRST_OPC); - } else if (opc < MDS_LAST_OPC) { - /* MDS opcode */ - return (opc - MDS_FIRST_OPC + - (OST_LAST_OPC - OST_FIRST_OPC)); - } else if (opc < LDLM_LAST_OPC) { - /* LDLM Opcode */ - return (opc - LDLM_FIRST_OPC + - (MDS_LAST_OPC - MDS_FIRST_OPC) + - (OST_LAST_OPC - OST_FIRST_OPC)); - } else if (opc < PTLBD_LAST_OPC) { - /* Portals Block Device */ - return (opc - PTLBD_FIRST_OPC + - (LDLM_LAST_OPC - LDLM_FIRST_OPC) + - (MDS_LAST_OPC - MDS_FIRST_OPC) + - (OST_LAST_OPC - OST_FIRST_OPC)); - } else if (opc == OBD_PING) { - /* OBD Ping */ - return (opc - OBD_PING + - (PTLBD_LAST_OPC - PTLBD_FIRST_OPC) + - (LDLM_LAST_OPC - LDLM_FIRST_OPC) + - (MDS_LAST_OPC - MDS_FIRST_OPC) + - (OST_LAST_OPC - OST_FIRST_OPC)); - } else { - /* Unknown Opcode */ - return -1; - } -} - -#define LUSTRE_MAX_OPCODES (1 + (PTLBD_LAST_OPC - PTLBD_FIRST_OPC) \ - + (LDLM_LAST_OPC - LDLM_FIRST_OPC) \ - + (MDS_LAST_OPC - MDS_FIRST_OPC) \ - + (OST_LAST_OPC - OST_FIRST_OPC)) - -enum { - PTLRPC_REQWAIT_CNTR = 0, - PTLRPC_SVCIDLETIME_CNTR = 1, - //PTLRPC_SVCEQDEPTH_CNTR, - PTLRPC_LAST_CNTR -}; - -int ptlrpc_expire_one_request(struct ptlrpc_request *req); -int ptlrpc_check_set(struct ptlrpc_request_set *set); - -void ptlrpc_pinger_sending_on_import(struct obd_import *imp); -#endif /* PTLRPC_INTERNAL_H */ diff --git a/lustre/ptlrpc/ptlrpc_lib.c b/lustre/ptlrpc/ptlrpc_lib.c deleted file mode 100644 index ccc05dc..0000000 --- a/lustre/ptlrpc/ptlrpc_lib.c +++ /dev/null @@ -1,125 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define EXPORT_SYMTAB -#define DEBUG_SUBSYSTEM S_RPC - -#ifdef __KERNEL__ -# include <linux/module.h> -#else -# include <liblustre.h> -#endif -#include <linux/obd.h> -#include <linux/obd_ost.h> -#include <linux/lustre_net.h> -#include <linux/lustre_dlm.h> - -int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) -{ - struct ptlrpc_connection *conn; - struct obd_ioctl_data* data = buf; - struct client_obd *cli = &obddev->u.cli; - struct obd_import *imp; - struct obd_uuid server_uuid; - int rq_portal, rp_portal, connect_op; - char *name; - ENTRY; - - if (obddev->obd_type->typ_ops->o_brw) { - rq_portal = OST_REQUEST_PORTAL; - rp_portal = OSC_REPLY_PORTAL; - name = "osc"; - connect_op = OST_CONNECT; - } else { - rq_portal = MDS_REQUEST_PORTAL; - rp_portal = MDC_REPLY_PORTAL; - name = "mdc"; - connect_op = MDS_CONNECT; - } - - if (data->ioc_inllen1 < 1) { - CERROR("requires a TARGET UUID\n"); - RETURN(-EINVAL); - } - - if (data->ioc_inllen1 > 37) { - CERROR("client UUID must be less than 38 characters\n"); - RETURN(-EINVAL); - } - - if (data->ioc_inllen2 < 1) { - CERROR("setup requires a SERVER UUID\n"); - RETURN(-EINVAL); - } - - if (data->ioc_inllen2 > 37) { - CERROR("target UUID must be less than 38 characters\n"); - RETURN(-EINVAL); - } - - sema_init(&cli->cl_sem, 1); - cli->cl_conn_count = 0; - memcpy(server_uuid.uuid, data->ioc_inlbuf2, MIN(data->ioc_inllen2, - sizeof(server_uuid))); - - init_MUTEX(&cli->cl_dirty_sem); - cli->cl_dirty = 0; - cli->cl_dirty_granted = 0; - cli->cl_ost_can_grant = 1; - - conn = ptlrpc_uuid_to_connection(&server_uuid); - if (conn == NULL) - RETURN(-ENOENT); - - ptlrpc_init_client(rq_portal, rp_portal, name, - &obddev->obd_ldlm_client); - - imp = class_new_import(); - if (imp == NULL) { - ptlrpc_put_connection(conn); - RETURN(-ENOMEM); - } - imp->imp_connection = conn; - imp->imp_client = &obddev->obd_ldlm_client; - imp->imp_obd = obddev; - imp->imp_connect_op = connect_op; - imp->imp_generation = 0; - INIT_LIST_HEAD(&imp->imp_pinger_chain); - memcpy(imp->imp_target_uuid.uuid, data->ioc_inlbuf1, data->ioc_inllen1); - class_import_put(imp); - - cli->cl_import = imp; - cli->cl_max_mds_easize = sizeof(struct lov_mds_md); - cli->cl_sandev = to_kdev_t(0); - - RETURN(0); -} - -int client_obd_cleanup(struct obd_device *obddev, int force, int failover) -{ - struct client_obd *client = &obddev->u.cli; - - if (!client->cl_import) - RETURN(-EINVAL); - class_destroy_import(client->cl_import); - client->cl_import = NULL; - RETURN(0); -} diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c deleted file mode 100644 index 57f3653..0000000 --- a/lustre/ptlrpc/ptlrpc_module.c +++ /dev/null @@ -1,242 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define EXPORT_SYMTAB -#define DEBUG_SUBSYSTEM S_RPC - -#ifdef __KERNEL__ -# include <linux/module.h> -# include <linux/init.h> -#else -# include <liblustre.h> -#endif - -#include <linux/obd_support.h> -#include <linux/obd_class.h> -#include <linux/lustre_net.h> - -#include "ptlrpc_internal.h" - -extern int ptlrpc_init_portals(void); -extern void ptlrpc_exit_portals(void); -static int ldlm_hooks_referenced = 0; - -int (*ptlrpc_ldlm_namespace_cleanup)(struct ldlm_namespace *, int); -int (*ptlrpc_ldlm_replay_locks)(struct obd_import *); - -#define GET_HOOK(name) \ -if (!ptlrpc_##name) { \ - if (!(ptlrpc_##name = inter_module_get(#name))) { \ - CERROR("can't i_m_g(\"" #name "\")\n"); \ - return 0; \ - } \ -} - -static int ldlm_hooks_referenced; - -/* This is called from ptlrpc_get_connection, which runs after all the modules - * are loaded, but before anything else interesting happens. - */ -int ptlrpc_get_ldlm_hooks(void) -{ - if (ldlm_hooks_referenced) - return 1; - - GET_HOOK(ldlm_namespace_cleanup); - GET_HOOK(ldlm_replay_locks); - - ldlm_hooks_referenced = 1; - RETURN(1); -} - -#undef GET_HOOK - -#define PUT_HOOK(hook) \ -if (ptlrpc_##hook) { \ - inter_module_put(#hook); \ - ptlrpc_##hook = NULL; \ -} - -void ptlrpc_put_ldlm_hooks(void) -{ - ENTRY; - if (!ldlm_hooks_referenced) - return; - - PUT_HOOK(ldlm_namespace_cleanup); - PUT_HOOK(ldlm_replay_locks); - ldlm_hooks_referenced = 0; - EXIT; -} - -#undef PUT_HOOK - -int ptlrpc_ldlm_hooks_referenced(void) -{ - return ldlm_hooks_referenced; -} - -__init int ptlrpc_init(void) -{ - int rc; - ENTRY; - - lustre_assert_wire_constants (); - - rc = ptlrpc_init_portals(); - if (rc) - RETURN(rc); - - ptlrpc_init_connection(); - - ptlrpc_put_connection_superhack = ptlrpc_put_connection; - ptlrpc_abort_inflight_superhack = ptlrpc_abort_inflight; - RETURN(0); -} - -static void __exit ptlrpc_exit(void) -{ - ptlrpc_exit_portals(); - ptlrpc_cleanup_connection(); -} - -/* connection.c */ -EXPORT_SYMBOL(ptlrpc_dump_connections); -EXPORT_SYMBOL(ptlrpc_readdress_connection); -EXPORT_SYMBOL(ptlrpc_get_connection); -EXPORT_SYMBOL(ptlrpc_put_connection); -EXPORT_SYMBOL(ptlrpc_connection_addref); -EXPORT_SYMBOL(ptlrpc_init_connection); -EXPORT_SYMBOL(ptlrpc_cleanup_connection); - -/* niobuf.c */ -EXPORT_SYMBOL(ptlrpc_bulk_put); -EXPORT_SYMBOL(ptlrpc_bulk_get); -EXPORT_SYMBOL(ptlrpc_abort_bulk); -EXPORT_SYMBOL(ptlrpc_register_bulk); -EXPORT_SYMBOL(ptlrpc_unregister_bulk); -EXPORT_SYMBOL(ptlrpc_reply); -EXPORT_SYMBOL(ptlrpc_error); -EXPORT_SYMBOL(ptlrpc_resend_req); -EXPORT_SYMBOL(ptl_send_rpc); -EXPORT_SYMBOL(ptlrpc_link_svc_me); - -/* client.c */ -EXPORT_SYMBOL(ptlrpc_init_client); -EXPORT_SYMBOL(ptlrpc_cleanup_client); -EXPORT_SYMBOL(ptlrpc_req_to_uuid); -EXPORT_SYMBOL(ptlrpc_uuid_to_connection); -EXPORT_SYMBOL(ptlrpc_queue_wait); -EXPORT_SYMBOL(ptlrpc_replay_req); -EXPORT_SYMBOL(ptlrpc_restart_req); -EXPORT_SYMBOL(ptlrpc_prep_req); -EXPORT_SYMBOL(ptlrpc_free_req); -EXPORT_SYMBOL(ptlrpc_unregister_reply); -EXPORT_SYMBOL(ptlrpc_req_finished); -EXPORT_SYMBOL(ptlrpc_request_addref); -EXPORT_SYMBOL(ptlrpc_prep_bulk_imp); -EXPORT_SYMBOL(ptlrpc_prep_bulk_exp); -EXPORT_SYMBOL(ptlrpc_free_bulk); -EXPORT_SYMBOL(ptlrpc_prep_bulk_page); -EXPORT_SYMBOL(ptlrpc_free_bulk_page); -EXPORT_SYMBOL(ptlrpc_abort_inflight); -EXPORT_SYMBOL(ptlrpc_retain_replayable_request); -EXPORT_SYMBOL(ptlrpc_next_xid); - -EXPORT_SYMBOL(ptlrpc_prep_set); -EXPORT_SYMBOL(ptlrpc_set_add_req); -EXPORT_SYMBOL(ptlrpc_set_destroy); -EXPORT_SYMBOL(ptlrpc_set_wait); - -/* service.c */ -EXPORT_SYMBOL(ptlrpc_init_svc); -EXPORT_SYMBOL(ptlrpc_stop_all_threads); -EXPORT_SYMBOL(ptlrpc_start_thread); -EXPORT_SYMBOL(ptlrpc_unregister_service); - -/* pack_generic.c */ -EXPORT_SYMBOL(lustre_pack_msg); -EXPORT_SYMBOL(lustre_msg_size); -EXPORT_SYMBOL(lustre_unpack_msg); -EXPORT_SYMBOL(lustre_msg_buf); -EXPORT_SYMBOL(lustre_msg_string); -EXPORT_SYMBOL(lustre_swab_reqbuf); -EXPORT_SYMBOL(lustre_swab_repbuf); -EXPORT_SYMBOL(lustre_swab_obdo); -EXPORT_SYMBOL(lustre_swab_obd_statfs); -EXPORT_SYMBOL(lustre_swab_obd_ioobj); -EXPORT_SYMBOL(lustre_swab_niobuf_remote); -EXPORT_SYMBOL(lustre_swab_ost_body); -EXPORT_SYMBOL(lustre_swab_ll_fid); -EXPORT_SYMBOL(lustre_swab_mds_status_req); -EXPORT_SYMBOL(lustre_swab_mds_fileh_body); -EXPORT_SYMBOL(lustre_swab_mds_body); -EXPORT_SYMBOL(lustre_swab_mds_rec_setattr); -EXPORT_SYMBOL(lustre_swab_mds_rec_create); -EXPORT_SYMBOL(lustre_swab_mds_rec_link); -EXPORT_SYMBOL(lustre_swab_mds_rec_unlink); -EXPORT_SYMBOL(lustre_swab_mds_rec_rename); -EXPORT_SYMBOL(lustre_swab_lov_desc); -EXPORT_SYMBOL(lustre_swab_ldlm_res_id); -EXPORT_SYMBOL(lustre_swab_ldlm_extent); -EXPORT_SYMBOL(lustre_swab_ldlm_intent); -EXPORT_SYMBOL(lustre_swab_ldlm_resource_desc); -EXPORT_SYMBOL(lustre_swab_ldlm_lock_desc); -EXPORT_SYMBOL(lustre_swab_ldlm_request); -EXPORT_SYMBOL(lustre_swab_ldlm_reply); -EXPORT_SYMBOL(lustre_swab_ptlbd_op); -EXPORT_SYMBOL(lustre_swab_ptlbd_niob); -EXPORT_SYMBOL(lustre_swab_ptlbd_rsp); - -/* ptlrpc_module.c */ -EXPORT_SYMBOL(ptlrpc_put_ldlm_hooks); -EXPORT_SYMBOL(ptlrpc_ldlm_hooks_referenced); - -/* recover.c */ -EXPORT_SYMBOL(ptlrpc_run_recovery_over_upcall); -EXPORT_SYMBOL(ptlrpc_run_failed_import_upcall); -EXPORT_SYMBOL(ptlrpc_reconnect_import); -EXPORT_SYMBOL(ptlrpc_replay); -EXPORT_SYMBOL(ptlrpc_resend); -EXPORT_SYMBOL(ptlrpc_wake_delayed); -EXPORT_SYMBOL(ptlrpc_set_import_active); -EXPORT_SYMBOL(ptlrpc_fail_import); -EXPORT_SYMBOL(ptlrpc_fail_export); -EXPORT_SYMBOL(ptlrpc_recover_import); - -/*ptlrpc_lib.c*/ -EXPORT_SYMBOL(client_obd_setup); -EXPORT_SYMBOL(client_obd_cleanup); - -/* pinger.c */ -EXPORT_SYMBOL(ptlrpc_pinger_add_import); -EXPORT_SYMBOL(ptlrpc_pinger_del_import); -EXPORT_SYMBOL(ptlrpc_pinger_sending_on_import); - -#ifdef __KERNEL__ -MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>"); -MODULE_DESCRIPTION("Lustre Request Processor"); -MODULE_LICENSE("GPL"); - -module_init(ptlrpc_init); -module_exit(ptlrpc_exit); -#endif diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c deleted file mode 100644 index ca2afad..0000000 --- a/lustre/ptlrpc/recover.c +++ /dev/null @@ -1,596 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Portal-RPC reconnection and replay operations, for use in recovery. - * - * Copyright (c) 2002, 2003 Cluster File Systems, Inc. - * Author: Mike Shaver <shaver@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#define DEBUG_SUBSYSTEM S_RPC -#ifdef __KERNEL__ -#include <linux/config.h> -#include <linux/module.h> -#include <linux/kmod.h> -#else -#include <liblustre.h> -#endif - -#include <linux/obd_support.h> -#include <linux/lustre_ha.h> -#include <linux/lustre_net.h> -#include <linux/lustre_import.h> -#include <linux/lustre_export.h> -#include <linux/obd.h> -#include <linux/obd_class.h> -#include <linux/obd_lov.h> /* for IOC_LOV_SET_OSC_ACTIVE */ - -#include "ptlrpc_internal.h" - -enum reconnect_result { - RECON_RESULT_RECOVERING = 1, - RECON_RESULT_RECONNECTED = 2, - RECON_RESULT_EVICTED = 3, -}; - -int ptlrpc_reconnect_import(struct obd_import *imp) -{ - struct obd_device *obd = imp->imp_obd; - int rc, size[] = {sizeof(imp->imp_target_uuid), - sizeof(obd->obd_uuid), - sizeof(imp->imp_dlm_handle)}; - char *tmp[] = {imp->imp_target_uuid.uuid, - obd->obd_uuid.uuid, - (char *)&imp->imp_dlm_handle}; - struct ptlrpc_connection *conn = imp->imp_connection; - struct ptlrpc_request *req; - struct lustre_handle old_hdl; - __u64 committed_before_reconnect = imp->imp_peer_committed_transno; - - CERROR("reconnect handle "LPX64"\n", - imp->imp_dlm_handle.cookie); - - req = ptlrpc_prep_req(imp, imp->imp_connect_op, 3, size, tmp); - if (!req) - RETURN(-ENOMEM); - req->rq_level = LUSTRE_CONN_NEW; - req->rq_replen = lustre_msg_size(0, NULL); - rc = ptlrpc_queue_wait(req); - if (rc) { - /* what if rc > 0 ??*/ - CERROR("cannot connect to %s@%s: rc = %d\n", - imp->imp_target_uuid.uuid, conn->c_remote_uuid.uuid, rc); - GOTO(out_disc, rc); - } - - if (lustre_msg_get_op_flags(req->rq_repmsg) & MSG_CONNECT_RECONNECT) { - memset(&old_hdl, 0, sizeof(old_hdl)); - if (!memcmp(&old_hdl, &req->rq_repmsg->handle, - sizeof (old_hdl))) { - CERROR("%s@%s didn't like our handle "LPX64 - ", failed\n", imp->imp_target_uuid.uuid, - conn->c_remote_uuid.uuid, - imp->imp_dlm_handle.cookie); - GOTO(out_disc, rc = -ENOTCONN); - } - - if (memcmp(&imp->imp_remote_handle, &req->rq_repmsg->handle, - sizeof(imp->imp_remote_handle))) { - CERROR("%s@%s changed handle from "LPX64" to "LPX64 - "; copying, but this may foreshadow disaster\n", - imp->imp_target_uuid.uuid, - conn->c_remote_uuid.uuid, - imp->imp_remote_handle.cookie, - req->rq_repmsg->handle.cookie); - imp->imp_remote_handle = req->rq_repmsg->handle; - GOTO(out_disc, rc = RECON_RESULT_RECONNECTED); - } - - CERROR("reconnected to %s@%s after partition\n", - imp->imp_target_uuid.uuid, conn->c_remote_uuid.uuid); - GOTO(out_disc, rc = RECON_RESULT_RECONNECTED); - } else if (lustre_msg_get_op_flags(req->rq_repmsg) & MSG_CONNECT_RECOVERING) { - rc = RECON_RESULT_RECOVERING; - } else { - rc = RECON_RESULT_EVICTED; - } - - old_hdl = imp->imp_remote_handle; - imp->imp_remote_handle = req->rq_repmsg->handle; - CERROR("reconnected to %s@%s ("LPX64", was "LPX64")!\n", - imp->imp_target_uuid.uuid, conn->c_remote_uuid.uuid, - imp->imp_remote_handle.cookie, old_hdl.cookie); - if (req->rq_repmsg->last_committed < committed_before_reconnect) { - CERROR("%s went back in time (transno "LPD64 - " was committed, server claims "LPD64 - ")! is shared storage not coherent?\n", - imp->imp_target_uuid.uuid, - imp->imp_peer_committed_transno, - req->rq_repmsg->last_committed); - } - - GOTO(out_disc, rc); - - out_disc: - ptlrpc_req_finished(req); - return rc; -} - -void ptlrpc_run_recovery_over_upcall(struct obd_device *obd) -{ - char *argv[4]; - char *envp[3]; - int rc; - - ENTRY; - argv[0] = obd_lustre_upcall; - argv[1] = "RECOVERY_OVER"; - argv[2] = obd->obd_uuid.uuid; - argv[3] = NULL; - - envp[0] = "HOME=/"; - envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; - envp[2] = NULL; - - rc = USERMODEHELPER(argv[0], argv, envp); - if (rc < 0) { - CERROR("Error invoking recovery upcall %s %s %s: %d; check " - "/proc/sys/lustre/upcall\n", - argv[0], argv[1], argv[2], rc); - - } else { - CERROR("Invoked upcall %s %s %s", - argv[0], argv[1], argv[2]); - } -} - -void ptlrpc_run_failed_import_upcall(struct obd_import* imp) -{ - char *argv[7]; - char *envp[3]; - int rc; - - ENTRY; - argv[0] = obd_lustre_upcall; - argv[1] = "FAILED_IMPORT"; - argv[2] = imp->imp_target_uuid.uuid; - argv[3] = imp->imp_obd->obd_name; - argv[4] = imp->imp_connection->c_remote_uuid.uuid; - argv[5] = imp->imp_obd->obd_uuid.uuid; - argv[6] = NULL; - - envp[0] = "HOME=/"; - envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin"; - envp[2] = NULL; - - rc = USERMODEHELPER(argv[0], argv, envp); - if (rc < 0) { - CERROR("Error invoking recovery upcall %s %s %s %s %s: %d; check " - "/proc/sys/lustre/lustre_upcall\n", - argv[0], argv[1], argv[2], argv[3], argv[4],rc); - - } else { - CERROR("Invoked upcall %s %s %s %s %s\n", - argv[0], argv[1], argv[2], argv[3], argv[4]); - } -} - -int ptlrpc_replay(struct obd_import *imp) -{ - int rc = 0; - struct list_head *tmp, *pos; - struct ptlrpc_request *req; - unsigned long flags; - __u64 committed = imp->imp_peer_committed_transno; - ENTRY; - - /* It might have committed some after we last spoke, so make sure we - * get rid of them now. - */ - spin_lock_irqsave(&imp->imp_lock, flags); - ptlrpc_free_committed(imp); - spin_unlock_irqrestore(&imp->imp_lock, flags); - - CDEBUG(D_HA, "import %p from %s has committed "LPD64"\n", - imp, imp->imp_target_uuid.uuid, committed); - - list_for_each(tmp, &imp->imp_replay_list) { - req = list_entry(tmp, struct ptlrpc_request, rq_list); - DEBUG_REQ(D_HA, req, "RETAINED: "); - } - - /* Do I need to hold a lock across this iteration? We shouldn't be - * racing with any additions to the list, because we're in recovery - * and are therefore not processing additional requests to add. Calls - * to ptlrpc_free_committed might commit requests, but nothing "newer" - * than the one we're replaying (it can't be committed until it's - * replayed, and we're doing that here). l_f_e_safe protects against - * problems with the current request being committed, in the unlikely - * event of that race. So, in conclusion, I think that it's safe to - * perform this list-walk without the imp_lock held. - * - * But, the {mdc,osc}_replay_open callbacks both iterate - * request lists, and have comments saying they assume the - * imp_lock is being held by ptlrpc_replay, but it's not. it's - * just a little race... - */ - list_for_each_safe(tmp, pos, &imp->imp_replay_list) { - req = list_entry(tmp, struct ptlrpc_request, rq_list); - - DEBUG_REQ(D_HA, req, "REPLAY:"); - - rc = ptlrpc_replay_req(req); - - if (rc) { - CERROR("recovery replay error %d for req "LPD64"\n", - rc, req->rq_xid); - RETURN(rc); - } - } - - RETURN(0); -} - -int ptlrpc_resend(struct obd_import *imp) -{ - struct list_head *tmp, *pos; - struct ptlrpc_request *req; - unsigned long flags; - - ENTRY; - - /* As long as we're in recovery, nothing should be added to the sending - * list, so we don't need to hold the lock during this iteration and - * resend process. - */ - /* Well... what if lctl recover is called twice at the same time? - */ - spin_lock_irqsave(&imp->imp_lock, flags); - LASSERT(imp->imp_level == LUSTRE_CONN_RECOVER); - spin_unlock_irqrestore(&imp->imp_lock, flags); - - list_for_each_safe(tmp, pos, &imp->imp_sending_list) { - req = list_entry(tmp, struct ptlrpc_request, rq_list); - ptlrpc_resend_req(req); - } - - RETURN(0); -} - -void ptlrpc_wake_delayed(struct obd_import *imp) -{ - unsigned long flags; - struct list_head *tmp, *pos; - struct ptlrpc_request *req; - - spin_lock_irqsave(&imp->imp_lock, flags); - list_for_each_safe(tmp, pos, &imp->imp_delayed_list) { - req = list_entry(tmp, struct ptlrpc_request, rq_list); - - ptlrpc_put_connection(req->rq_connection); - req->rq_connection = - ptlrpc_connection_addref(req->rq_import->imp_connection); - - if (req->rq_set) { - DEBUG_REQ(D_HA, req, "waking (set %p):", req->rq_set); - wake_up(&req->rq_set->set_waitq); - } else { - DEBUG_REQ(D_HA, req, "waking:"); - wake_up(&req->rq_wait_for_rep); - } - } - spin_unlock_irqrestore(&imp->imp_lock, flags); -} - -inline void ptlrpc_invalidate_import_state(struct obd_import *imp) -{ - struct ldlm_namespace *ns = imp->imp_obd->obd_namespace; - if (ptlrpc_ldlm_namespace_cleanup == NULL) - CERROR("ptlrpc/ldlm hook is NULL! Please tell phil\n"); - else - ptlrpc_ldlm_namespace_cleanup(ns, 1 /* no network ops */); - ptlrpc_abort_inflight(imp); -} - - -void ptlrpc_handle_failed_import(struct obd_import *imp) -{ - ENTRY; - if (!imp->imp_replayable) { - CDEBUG(D_HA, - "import %s@%s for %s not replayable, deactivating\n", - imp->imp_target_uuid.uuid, - imp->imp_connection->c_remote_uuid.uuid, - imp->imp_obd->obd_name); - ptlrpc_set_import_active(imp, 0); - } - - ptlrpc_run_failed_import_upcall(imp); - EXIT; -} - -void ptlrpc_request_handle_eviction(struct ptlrpc_request *failed_req) -{ - int rc; - struct obd_import *imp= failed_req->rq_import; - unsigned long flags; - struct ptlrpc_request *req; - ENTRY; - - CDEBUG(D_HA, "import %s of %s@%s evicted: reconnecting\n", - imp->imp_obd->obd_name, - imp->imp_target_uuid.uuid, - imp->imp_connection->c_remote_uuid.uuid); - rc = ptlrpc_recover_import(imp, NULL); - if (rc) { - ptlrpc_resend_req(failed_req); - if (rc != -EALREADY) - ptlrpc_handle_failed_import(imp); - } else { - LASSERT(failed_req->rq_import_generation < imp->imp_generation); - spin_lock_irqsave (&failed_req->rq_lock, flags); - failed_req->rq_err = 1; - spin_unlock_irqrestore (&failed_req->rq_lock, flags); - } - ptlrpc_req_finished(req); - EXIT; -} - -int ptlrpc_set_import_active(struct obd_import *imp, int active) -{ - struct obd_device *notify_obd; - unsigned long flags; - int rc; - - LASSERT(imp->imp_obd); - - notify_obd = imp->imp_obd->u.cli.cl_containing_lov; - - /* When deactivating, mark import invalid, and - abort in-flight requests. */ - if (!active) { - CDEBUG(D_ERROR, "setting import %s INVALID\n", imp->imp_target_uuid.uuid); - spin_lock_irqsave(&imp->imp_lock, flags); - imp->imp_invalid = 1; - imp->imp_generation++; - spin_unlock_irqrestore(&imp->imp_lock, flags); - ptlrpc_invalidate_import_state(imp); -// ptlrpc_abort_inflight(imp); - } - - if (notify_obd == NULL) - GOTO(out, rc = 0); - - /* How gross is _this_? */ - if (!list_empty(¬ify_obd->obd_exports)) { - struct lustre_handle fakeconn; - struct obd_ioctl_data ioc_data = { 0 }; - struct obd_export *exp = - list_entry(notify_obd->obd_exports.next, - struct obd_export, exp_obd_chain); - - fakeconn.cookie = exp->exp_handle.h_cookie; - ioc_data.ioc_inlbuf1 = (char *)&imp->imp_target_uuid; - ioc_data.ioc_offset = active; - rc = obd_iocontrol(IOC_LOV_SET_OSC_ACTIVE, &fakeconn, - sizeof ioc_data, &ioc_data, NULL); - if (rc) - CERROR("error %sabling %s on LOV %p/%s: %d\n", - active ? "en" : "dis", - imp->imp_target_uuid.uuid, notify_obd, - notify_obd->obd_uuid.uuid, rc); - } else { - CDEBUG(D_HA, "No exports for obd %p/%s, can't notify about " - "%p\n", notify_obd, notify_obd->obd_uuid.uuid, - imp->imp_obd->obd_uuid.uuid); - rc = -ENOENT; - } - -out: - /* When activating, mark import valid */ - if (active) { - CDEBUG(D_ERROR, "setting import %s VALID\n", imp->imp_target_uuid.uuid); - spin_lock_irqsave(&imp->imp_lock, flags); - imp->imp_invalid = 0; - spin_unlock_irqrestore(&imp->imp_lock, flags); - } - - RETURN(rc); -} - -void ptlrpc_fail_import(struct obd_import *imp, int generation) -{ - unsigned long flags; - int in_recovery = 0; - ENTRY; - - LASSERT (!imp->imp_dlm_fake); - - spin_lock_irqsave(&imp->imp_lock, flags); - if (imp->imp_level != LUSTRE_CONN_FULL) - in_recovery = 1; - else - imp->imp_level = LUSTRE_CONN_NOTCONN; - spin_unlock_irqrestore(&imp->imp_lock, flags); - - if (in_recovery) { - EXIT; - return; - } - - ptlrpc_handle_failed_import(imp); - EXIT; -} - -static int signal_completed_replay(struct obd_import *imp) -{ - struct ptlrpc_request *req; - int rc; - ENTRY; - - req = ptlrpc_prep_req(imp, OBD_PING, 0, NULL, NULL); - if (!req) - RETURN(-ENOMEM); - - req->rq_replen = lustre_msg_size(0, NULL); - req->rq_level = LUSTRE_CONN_RECOVER; - req->rq_reqmsg->flags |= MSG_LAST_REPLAY; - - rc = ptlrpc_queue_wait(req); - - ptlrpc_req_finished(req); - RETURN(rc); -} - -int ptlrpc_recover_import(struct obd_import *imp, char *new_uuid) -{ - int rc; - unsigned long flags; - int in_recover = 0; - int recon_result; - ENTRY; - - spin_lock_irqsave(&imp->imp_lock, flags); - if (imp->imp_level == LUSTRE_CONN_FULL || - imp->imp_level == LUSTRE_CONN_NOTCONN) - imp->imp_level = LUSTRE_CONN_RECOVER; - else - in_recover = 1; - spin_unlock_irqrestore(&imp->imp_lock, flags); - - if (in_recover == 1) - RETURN(-EALREADY); - - if (new_uuid) { - struct ptlrpc_connection *conn; - struct obd_uuid uuid; - struct ptlrpc_peer peer; - struct obd_export *dlmexp; - - obd_str2uuid(&uuid, new_uuid); - if (ptlrpc_uuid_to_peer(&uuid, &peer)) { - CERROR("no connection found for UUID %s\n", new_uuid); - RETURN(-EINVAL); - } - - conn = ptlrpc_get_connection(&peer, &uuid); - if (!conn) - RETURN(-ENOMEM); - - CDEBUG(D_HA, "switching import %s/%s from %s to %s\n", - imp->imp_target_uuid.uuid, imp->imp_obd->obd_name, - imp->imp_connection->c_remote_uuid.uuid, - conn->c_remote_uuid.uuid); - - /* Switch the import's connection and the DLM export's - * connection (which are almost certainly the same, but we - * keep distinct refs just to make things clearer. I think. */ - if (imp->imp_connection) - ptlrpc_put_connection(imp->imp_connection); - /* We hand off the ref from ptlrpc_get_connection. */ - imp->imp_connection = conn; - - dlmexp = class_conn2export(&imp->imp_dlm_handle); - if (dlmexp->exp_connection) - ptlrpc_put_connection(dlmexp->exp_connection); - dlmexp->exp_connection = ptlrpc_connection_addref(conn); - class_export_put(dlmexp); - - } - - recon_result = ptlrpc_reconnect_import(imp); - - if (recon_result < 0) { - CERROR("failed to reconnect to %s@%s: %d\n", - imp->imp_target_uuid.uuid, - imp->imp_connection->c_remote_uuid.uuid, recon_result); - spin_lock_irqsave(&imp->imp_lock, flags); - imp->imp_level = LUSTRE_CONN_NOTCONN; - spin_unlock_irqrestore(&imp->imp_lock, flags); - RETURN(recon_result); - } - - if (recon_result == RECON_RESULT_RECOVERING) { - CDEBUG(D_HA, "replay requested by %s\n", - imp->imp_target_uuid.uuid); - rc = ptlrpc_replay(imp); - if (rc) - GOTO(out, rc); - - if (ptlrpc_ldlm_replay_locks == NULL) - CERROR("ptlrpc/ldlm hook is NULL! Please tell phil\n"); - else - rc = ptlrpc_ldlm_replay_locks(imp); - if (rc) - GOTO(out, rc); - - rc = signal_completed_replay(imp); - if (rc) - GOTO(out, rc); - } else if (recon_result == RECON_RESULT_RECONNECTED) { - CDEBUG(D_HA, "reconnected to %s@%s\n", - imp->imp_target_uuid.uuid, - imp->imp_connection->c_remote_uuid.uuid); - } else if (recon_result == RECON_RESULT_EVICTED) { - CDEBUG(D_HA, "evicted from %s@%s; invalidating\n", - imp->imp_target_uuid.uuid, - imp->imp_connection->c_remote_uuid.uuid); - ptlrpc_set_import_active(imp, 0); -// ptlrpc_invalidate_import_state(imp); - } else { - LBUG(); - } - - ptlrpc_set_import_active(imp, 1); - - rc = ptlrpc_resend(imp); - - spin_lock_irqsave(&imp->imp_lock, flags); - imp->imp_level = LUSTRE_CONN_FULL; - spin_unlock_irqrestore(&imp->imp_lock, flags); - - ptlrpc_wake_delayed(imp); - EXIT; - out: - return rc; -} - -void ptlrpc_fail_export(struct obd_export *exp) -{ - int rc, already_failed; - struct lustre_handle hdl; - unsigned long flags; - - spin_lock_irqsave(&exp->exp_lock, flags); - already_failed = exp->exp_failed; - exp->exp_failed = 1; - spin_unlock_irqrestore(&exp->exp_lock, flags); - - if (already_failed) { - CDEBUG(D_HA, "disconnecting dead export %p/%s; skipping\n", - exp, exp->exp_client_uuid.uuid); - return; - } - - CDEBUG(D_HA, "disconnecting export %p/%s\n", - exp, exp->exp_client_uuid.uuid); - hdl.cookie = exp->exp_handle.h_cookie; - rc = obd_disconnect(&hdl, 0); - if (rc) - CERROR("disconnecting export %p failed: %d\n", exp, rc); -} diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c deleted file mode 100644 index f2a1089..0000000 --- a/lustre/ptlrpc/service.c +++ /dev/null @@ -1,550 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#define DEBUG_SUBSYSTEM S_RPC -#ifndef __KERNEL__ -#include <liblustre.h> -#include <linux/kp30.h> -#endif -#include <linux/obd_support.h> -#include <linux/obd_class.h> -#include <linux/lustre_net.h> -#include <portals/types.h> -#include "ptlrpc_internal.h" - -extern int request_in_callback(ptl_event_t *ev); - -static int ptlrpc_check_event(struct ptlrpc_service *svc, - struct ptlrpc_thread *thread, ptl_event_t *event) -{ - struct ptlrpc_srv_ni *srv_ni; - int i; - int idx; - int rc; - ENTRY; - - spin_lock(&svc->srv_lock); - - if (thread->t_flags & SVC_STOPPING) - GOTO(out, rc = 1); - - LASSERT ((thread->t_flags & SVC_EVENT) == 0); - LASSERT (ptlrpc_ninterfaces > 0); - - for (i = 0; i < ptlrpc_ninterfaces; i++) { - idx = (svc->srv_interface_rover + i) % ptlrpc_ninterfaces; - srv_ni = &svc->srv_interfaces[idx]; - - LASSERT (!PtlHandleEqual (srv_ni->sni_eq_h, PTL_HANDLE_NONE)); - - rc = PtlEQGet(srv_ni->sni_eq_h, event); - switch (rc) { - case PTL_OK: - /* next time start with the next interface */ - svc->srv_interface_rover = (idx+1) % ptlrpc_ninterfaces; - thread->t_flags |= SVC_EVENT; - GOTO(out, rc = 1); - - case PTL_EQ_EMPTY: - continue; - - default: - CERROR("BUG: PtlEQGet returned %d\n", rc); - LBUG(); - } - } - rc = 0; - EXIT; - out: - spin_unlock(&svc->srv_lock); - return rc; -} - -struct ptlrpc_service * ptlrpc_init_svc(__u32 nevents, __u32 nbufs, - __u32 bufsize, __u32 max_req_size, - int req_portal, int rep_portal, - svc_handler_t handler, char *name, - struct obd_device *obddev) -{ - int i, j, ssize, rc; - struct ptlrpc_service *service; - struct ptlrpc_srv_ni *srv_ni; - ENTRY; - - LASSERT (ptlrpc_ninterfaces > 0); - - ssize = offsetof (struct ptlrpc_service, - srv_interfaces[ptlrpc_ninterfaces]); - OBD_ALLOC(service, ssize); - if (service == NULL) - RETURN(NULL); - - service->srv_name = name; - spin_lock_init(&service->srv_lock); - INIT_LIST_HEAD(&service->srv_threads); - init_waitqueue_head(&service->srv_waitq); - - service->srv_max_req_size = max_req_size; - service->srv_buf_size = bufsize; - - service->srv_rep_portal = rep_portal; - service->srv_req_portal = req_portal; - service->srv_handler = handler; - service->srv_interface_rover = 0; - - /* First initialise enough for early teardown */ - for (i = 0; i < ptlrpc_ninterfaces; i++) { - srv_ni = &service->srv_interfaces[i]; - - srv_ni->sni_service = service; - srv_ni->sni_ni = &ptlrpc_interfaces[i]; - srv_ni->sni_eq_h = PTL_HANDLE_NONE; - INIT_LIST_HEAD(&srv_ni->sni_rqbds); - srv_ni->sni_nrqbds = 0; - atomic_set(&srv_ni->sni_nrqbds_receiving, 0); - } - - /* Now allocate the event queue and request buffers, assuming all - * interfaces require the same level of buffering. */ - for (i = 0; i < ptlrpc_ninterfaces; i++) { - srv_ni = &service->srv_interfaces[i]; - CDEBUG (D_NET, "%s: initialising interface %s\n", name, - srv_ni->sni_ni->pni_name); - - rc = PtlEQAlloc(srv_ni->sni_ni->pni_ni_h, nevents, - request_in_callback, &(srv_ni->sni_eq_h)); - if (rc != PTL_OK) { - CERROR("%s.%d: PtlEQAlloc on %s failed: %d\n", - name, i, srv_ni->sni_ni->pni_name, rc); - GOTO (failed, NULL); - } - - for (j = 0; j < nbufs; j++) { - struct ptlrpc_request_buffer_desc *rqbd; - - OBD_ALLOC(rqbd, sizeof(*rqbd)); - if (rqbd == NULL) { - CERROR ("%s.%d: Can't allocate request " - "descriptor %d on %s\n", - name, i, srv_ni->sni_nrqbds, - srv_ni->sni_ni->pni_name); - GOTO(failed, NULL); - } - - rqbd->rqbd_srv_ni = srv_ni; - rqbd->rqbd_me_h = PTL_HANDLE_NONE; - atomic_set(&rqbd->rqbd_refcount, 0); - - OBD_ALLOC(rqbd->rqbd_buffer, service->srv_buf_size); - if (rqbd->rqbd_buffer == NULL) { - CERROR ("%s.%d: Can't allocate request " - "buffer %d on %s\n", - name, i, srv_ni->sni_nrqbds, - srv_ni->sni_ni->pni_name); - OBD_FREE(rqbd, sizeof(*rqbd)); - GOTO(failed, NULL); - } - list_add(&rqbd->rqbd_list, &srv_ni->sni_rqbds); - srv_ni->sni_nrqbds++; - - ptlrpc_link_svc_me(rqbd); - } - } - - ptlrpc_lprocfs_register_service(obddev, service); - - CDEBUG(D_NET, "%s: Started on %d interfaces, listening on portal %d\n", - service->srv_name, ptlrpc_ninterfaces, service->srv_req_portal); - - RETURN(service); -failed: - ptlrpc_unregister_service(service); - return NULL; -} - -static int handle_incoming_request(struct obd_device *obddev, - struct ptlrpc_service *svc, - ptl_event_t *event, - struct ptlrpc_request *request) -{ - struct ptlrpc_request_buffer_desc *rqbd = event->mem_desc.user_ptr; - int rc; - - /* FIXME: If we move to an event-driven model, we should put the request - * on the stack of mds_handle instead. */ - - LASSERT (atomic_read (&rqbd->rqbd_refcount) > 0); - LASSERT ((event->mem_desc.options & (PTL_MD_IOV | PTL_MD_KIOV)) == 0); - LASSERT (rqbd->rqbd_srv_ni->sni_service == svc); - LASSERT (rqbd->rqbd_buffer == event->mem_desc.start); - LASSERT (event->offset + event->mlength <= svc->srv_buf_size); - - memset(request, 0, sizeof(*request)); - spin_lock_init (&request->rq_lock); - INIT_LIST_HEAD(&request->rq_list); - request->rq_svc = svc; - request->rq_obd = obddev; - request->rq_xid = event->match_bits; - request->rq_reqmsg = event->mem_desc.start + event->offset; - request->rq_reqlen = event->mlength; - -#if SWAB_PARANOIA - /* Clear request swab mask; this is a new request */ - request->rq_req_swab_mask = 0; -#endif - rc = lustre_unpack_msg (request->rq_reqmsg, request->rq_reqlen); - if (rc != 0) { - CERROR ("error unpacking request: ptl %d from "LPX64 - " xid "LPU64"\n", svc->srv_req_portal, - event->initiator.nid, request->rq_xid); - goto out; - } - rc = -EINVAL; - if (request->rq_reqmsg->type != PTL_RPC_MSG_REQUEST) { - CERROR("wrong packet type received (type=%u)\n", - request->rq_reqmsg->type); - goto out; - } - - CDEBUG(D_NET, "got req "LPD64" (md: %p + %d)\n", request->rq_xid, - event->mem_desc.start, event->offset); - - request->rq_peer.peer_nid = event->initiator.nid; - request->rq_peer.peer_ni = rqbd->rqbd_srv_ni->sni_ni; - - request->rq_export = class_conn2export(&request->rq_reqmsg->handle); - - if (request->rq_export) { - request->rq_connection = request->rq_export->exp_connection; - ptlrpc_connection_addref(request->rq_connection); - request->rq_export->exp_last_request_time = - LTIME_S(CURRENT_TIME); - } else { - /* create a (hopefully temporary) connection that will be used - * to send the reply if this call doesn't create an export. - * XXX revisit this when we revamp ptlrpc */ - request->rq_connection = - ptlrpc_get_connection(&request->rq_peer, NULL); - } - - CDEBUG(D_RPCTRACE, "Handling RPC pname:cluuid:pid:xid:ni:nid:opc %s:%s:%d:" - LPU64":%s:"LPX64":%d\n", - current->comm, - (request->rq_export ? - (char *)request->rq_export->exp_client_uuid.uuid : "0"), - request->rq_reqmsg->status, request->rq_xid, - rqbd->rqbd_srv_ni->sni_ni->pni_name, event->initiator.nid, - request->rq_reqmsg->opc); - - rc = svc->srv_handler(request); - CDEBUG(D_RPCTRACE, "Handled RPC pname:cluuid:pid:xid:ni:nid:opc %s:%s:%d:" - LPU64":%s:"LPX64":%d\n", - current->comm, - (request->rq_export ? - (char *)request->rq_export->exp_client_uuid.uuid : "0"), - request->rq_reqmsg->status, request->rq_xid, - rqbd->rqbd_srv_ni->sni_ni->pni_name, event->initiator.nid, - request->rq_reqmsg->opc); - - ptlrpc_put_connection(request->rq_connection); - if (request->rq_export != NULL) - class_export_put(request->rq_export); - - out: - if (atomic_dec_and_test (&rqbd->rqbd_refcount)) /* last reference? */ - ptlrpc_link_svc_me (rqbd); - - return rc; -} - -/* Don't use daemonize, it removes fs struct from new thread (bug 418) */ -void ptlrpc_daemonize(void) -{ - exit_mm(current); - - current->session = 1; - current->pgrp = 1; - current->tty = NULL; - - exit_files(current); - reparent_to_init(); -} - -static int ptlrpc_main(void *arg) -{ - struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg; - struct obd_device *obddev = data->dev; - struct ptlrpc_service *svc = data->svc; - struct ptlrpc_thread *thread = data->thread; - struct ptlrpc_request *request; - ptl_event_t *event; - int rc = 0; - unsigned long flags; - cycles_t workdone_time = -1; - cycles_t svc_workcycles = -1; - ENTRY; - - lock_kernel(); - ptlrpc_daemonize(); - - SIGNAL_MASK_LOCK(current, flags); - sigfillset(¤t->blocked); - RECALC_SIGPENDING; - SIGNAL_MASK_UNLOCK(current, flags); - -#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20)) - sprintf(current->comm, "%s|%d", data->name,current->thread.extern_pid); -#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - sprintf(current->comm, "%s|%d", data->name, - current->thread.mode.tt.extern_pid); -#else - strcpy(current->comm, data->name); -#endif - unlock_kernel(); - - OBD_ALLOC(event, sizeof(*event)); - if (!event) - GOTO(out, rc = -ENOMEM); - OBD_ALLOC(request, sizeof(*request)); - if (!request) - GOTO(out_event, rc = -ENOMEM); - - /* Record that the thread is running */ - thread->t_flags = SVC_RUNNING; - wake_up(&thread->t_ctl_waitq); - - /* XXX maintain a list of all managed devices: insert here */ - - /* And now, loop forever on requests */ - while (1) { - struct l_wait_info lwi = { 0 }; - l_wait_event(svc->srv_waitq, - ptlrpc_check_event(svc, thread, event), &lwi); - - if (thread->t_flags & SVC_STOPPING) { - spin_lock(&svc->srv_lock); - thread->t_flags &= ~SVC_STOPPING; - spin_unlock(&svc->srv_lock); - - EXIT; - break; - } - - if (thread->t_flags & SVC_EVENT) { - cycles_t workstart_time; - - spin_lock(&svc->srv_lock); - thread->t_flags &= ~SVC_EVENT; - /* Update Service Statistics */ - workstart_time = get_cycles(); - if (workdone_time != -1 && svc->svc_stats != NULL) { - /* Stats for req(n) are updated just before - * req(n+1) is executed. This avoids need to - * reacquire svc->srv_lock after - * call to handling_request(). - */ - int opc; - - /* req_waittime */ - lprocfs_counter_add(svc->svc_stats, - PTLRPC_REQWAIT_CNTR, - (workstart_time - - event->arrival_time)); - /* svc_eqdepth */ - /* Wait for b_eq branch - lprocfs_counter_add(svc->svc_stats, - PTLRPC_SVCEQDEPTH_CNTR, - 0); - */ - /* svc_idletime */ - lprocfs_counter_add(svc->svc_stats, - PTLRPC_SVCIDLETIME_CNTR, - (workstart_time - - workdone_time)); - /* previous request */ - opc = opcode_offset(request->rq_reqmsg->opc); - if (opc > 0) { - LASSERT(opc < LUSTRE_MAX_OPCODES); - lprocfs_counter_add(svc->svc_stats, opc, - PTLRPC_LAST_CNTR + - svc_workcycles); - } - } - spin_unlock(&svc->srv_lock); - - rc = handle_incoming_request(obddev, svc, event, - request); - workdone_time = get_cycles(); - svc_workcycles = workdone_time - workstart_time; - continue; - } - - CERROR("unknown break in service"); - LBUG(); - EXIT; - break; - } - - /* NB should wait for all SENT callbacks to complete before exiting - * here. Unfortunately at this time there is no way to track this - * state. - */ - OBD_FREE(request, sizeof(*request)); -out_event: - OBD_FREE(event, sizeof(*event)); -out: - thread->t_flags = SVC_STOPPED; - wake_up(&thread->t_ctl_waitq); - - CDEBUG(D_NET, "service thread exiting, process %d: rc = %d\n", - current->pid, rc); - return rc; -} - -static void ptlrpc_stop_thread(struct ptlrpc_service *svc, - struct ptlrpc_thread *thread) -{ - struct l_wait_info lwi = { 0 }; - - spin_lock(&svc->srv_lock); - thread->t_flags = SVC_STOPPING; - spin_unlock(&svc->srv_lock); - - wake_up(&svc->srv_waitq); - l_wait_event(thread->t_ctl_waitq, (thread->t_flags & SVC_STOPPED), - &lwi); -} - -void ptlrpc_stop_all_threads(struct ptlrpc_service *svc) -{ - spin_lock(&svc->srv_lock); - while (!list_empty(&svc->srv_threads)) { - struct ptlrpc_thread *thread; - thread = list_entry(svc->srv_threads.next, struct ptlrpc_thread, - t_link); - spin_unlock(&svc->srv_lock); - ptlrpc_stop_thread(svc, thread); - spin_lock(&svc->srv_lock); - list_del(&thread->t_link); - OBD_FREE(thread, sizeof(*thread)); - } - spin_unlock(&svc->srv_lock); -} - -int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc, - char *name) -{ - struct l_wait_info lwi = { 0 }; - struct ptlrpc_svc_data d; - struct ptlrpc_thread *thread; - int rc; - ENTRY; - - OBD_ALLOC(thread, sizeof(*thread)); - if (thread == NULL) - RETURN(-ENOMEM); - init_waitqueue_head(&thread->t_ctl_waitq); - - d.dev = dev; - d.svc = svc; - d.name = name; - d.thread = thread; - - spin_lock(&svc->srv_lock); - list_add(&thread->t_link, &svc->srv_threads); - spin_unlock(&svc->srv_lock); - - /* CLONE_VM and CLONE_FILES just avoid a needless copy, because we - * just drop the VM and FILES in ptlrpc_daemonize() right away. - */ - rc = kernel_thread(ptlrpc_main, &d, CLONE_VM | CLONE_FILES); - if (rc < 0) { - CERROR("cannot start thread: %d\n", rc); - OBD_FREE(thread, sizeof(*thread)); - RETURN(rc); - } - l_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_RUNNING, &lwi); - - RETURN(0); -} - -int ptlrpc_unregister_service(struct ptlrpc_service *service) -{ - int i, rc; - struct ptlrpc_srv_ni *srv_ni; - - LASSERT (list_empty (&service->srv_threads)); - - /* XXX We could reply (with failure) to all buffered requests - * _after_ unlinking _all_ the request buffers, but _before_ - * freeing them. - */ - - for (i = 0; i < ptlrpc_ninterfaces; i++) { - srv_ni = &service->srv_interfaces[i]; - CDEBUG (D_NET, "%s: tearing down interface %s\n", - service->srv_name, srv_ni->sni_ni->pni_name); - - while (!list_empty (&srv_ni->sni_rqbds)) { - struct ptlrpc_request_buffer_desc *rqbd = - list_entry (srv_ni->sni_rqbds.next, - struct ptlrpc_request_buffer_desc, - rqbd_list); - - list_del (&rqbd->rqbd_list); - - LASSERT (atomic_read (&rqbd->rqbd_refcount) > 0); - /* refcount could be anything; it's possible for - * the buffers to continued to get filled after all - * the server threads exited. But we know they - * _have_ exited. - */ - - (void) PtlMEUnlink(rqbd->rqbd_me_h); - /* The callback handler could have unlinked this ME - * already (we're racing with her) but it's safe to - * ensure it _has_ been unlinked. - */ - - OBD_FREE (rqbd->rqbd_buffer, service->srv_buf_size); - OBD_FREE (rqbd, sizeof (*rqbd)); - srv_ni->sni_nrqbds--; - } - - LASSERT (srv_ni->sni_nrqbds == 0); - - if (!PtlHandleEqual (srv_ni->sni_eq_h, PTL_HANDLE_NONE)) { - rc = PtlEQFree(srv_ni->sni_eq_h); - if (rc) - CERROR("%s.%d: PtlEQFree failed on %s: %d\n", - service->srv_name, i, - srv_ni->sni_ni->pni_name, rc); - } - } - - ptlrpc_lprocfs_unregister_service(service); - - OBD_FREE(service, - offsetof (struct ptlrpc_service, - srv_interfaces[ptlrpc_ninterfaces])); - return 0; -} diff --git a/lustre/scripts/.cvsignore b/lustre/scripts/.cvsignore deleted file mode 100644 index 104ddf7..0000000 --- a/lustre/scripts/.cvsignore +++ /dev/null @@ -1,9 +0,0 @@ -.Xrefs -lustre.spec -config.log -config.status -configure -Makefile -Makefile.in -.deps -TAGS diff --git a/lustre/scripts/Makefile.am b/lustre/scripts/Makefile.am deleted file mode 100644 index 199f5be..0000000 --- a/lustre/scripts/Makefile.am +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -EXTRA_DIST = license-status maketags.sh lustre.spec version_tag.pl $(initd_SCRIPTS) -initddir = $(sysconfdir)/init.d -initd_SCRIPTS = lustre -include $(top_srcdir)/Rules - diff --git a/lustre/scripts/dodiff.sh b/lustre/scripts/dodiff.sh deleted file mode 100755 index 899415d..0000000 --- a/lustre/scripts/dodiff.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -for f in `cat $1` ; do - diff -u $2-pristine/$f $2/$f -done diff --git a/lustre/scripts/license-status b/lustre/scripts/license-status deleted file mode 100755 index 5407b91..0000000 --- a/lustre/scripts/license-status +++ /dev/null @@ -1,26 +0,0 @@ -#! /bin/sh -# license-status - Display the status of files in the current directory -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution -# -# Gordon Matzigkeit <gord@fig.org>, 2001-09-27 - -for f in `find . -type f | sort`; do - case "$f" in - *~ | *.orig | *.gz | */config.* | *.o | \ - */CVS/* | */.cvsignore | */.depfiles/* | \ - */COPYING | */ChangeLog) - continue - ;; - esac - - if head -20 "$f" | egrep -e 'GNU' > /dev/null; then - echo "gpled $f" - elif head -20 "$f" | egrep -e '\([Cc]\)' > /dev/null; then - echo "copyrighted $f" - else - echo "bare $f" - fi -done | sort diff --git a/lustre/scripts/llite-group.sh b/lustre/scripts/llite-group.sh deleted file mode 100644 index ed914e8..0000000 --- a/lustre/scripts/llite-group.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/sh -# -# llite-group.sh : Cluster Manager service script for Lustre -# -# This must be named llite-<group>.sh, where group is the device -# group that is being managed by the cluster manager service. -# - -set -e -set -vx - -[ -f ${LUSTRE_CFG:=/etc/lustre/lustre.cfg} ] && . ${LUSTRE_CFG} - -LDAPURL=${LDAPURL:-ldap://localhost} -CONFIG=${CONFIG:-test23} - -LACTIVE=${LACTIVE:-/usr/sbin/lactive} -LCONF=${LCONF:-/usr/sbin/lconf} - -group=`basename $0 .sh| cut -d- -f2` -confopt="--ldapurl $LDAPURL --config $CONFIG" - -[ -z "$group" ] && exit 0 - -node=`hostname -s` - -[ -d ${STATUS_DIR:=/var/lustre} ] || mkdir -p $STATUS_DIR - -start() { - echo -n "Starting $SERVICE: " - python2 $LACTIVE $confopt --group $group --active $node - python2 $LCONF -v $confopt - RETVAL=$? - echo done -} - -stop() { - echo -n "Shutting down $SERVICE: " - python2 $LCONF -v --cleanup --force --failover $confopt - RETVAL=$? - echo done -} - -status() { - RETVAL=0 -} - - -case "$1" in - start) - start - ;; - stop) - stop - ;; - restart) - restart - ;; - status) - status $SERVICE - ;; - *) - echo "Usage: $0 {start|stop|status}" - exit 1 -esac - -exit $RETVAL diff --git a/lustre/scripts/lustre b/lustre/scripts/lustre deleted file mode 100755 index 95c1d06..0000000 --- a/lustre/scripts/lustre +++ /dev/null @@ -1,95 +0,0 @@ -#!/bin/sh -# -# lustre This shell script takes care of starting and stopping Lustre -# -# chkconfig: 345 99 1 -# description: Lustre Lite network File System. \ -# This starts both Lustre client and server functions. -# processname: lconf -# config: /etc/lustre/config.xml -# pidfile: /var/run/lustre.pid - -SERVICE=lustre -LOCK=/var/lock/subsys/$SERVICE - -: ${LUSTRE_CFG:=/etc/lustre/lustre.cfg} -[ -f ${LUSTRE_CFG} ] && . ${LUSTRE_CFG} - -: ${LUSTRE_CONFIG_XML:=/etc/lustre/config.xml} -: ${LCONF:=/usr/sbin/lconf} -: ${LCONF_START_ARGS:="${LUSTRE_CONFIG_XML}"} -: ${LCONF_STOP_ARGS:="--force --cleanup ${LUSTRE_CONFIG_XML}"} - -# Source function library. -if [ -f /etc/init.d/functions ] ; then - . /etc/init.d/functions -fi - -# Source networking configuration. -if [ -f /etc/sysconfig/network ] ; then - . /etc/sysconfig/network -fi - -# Check that networking is up. -[ "${NETWORKING}" = "no" ] && exit 0 - -[ -x ${LCONF} -a -f ${LUSTRE_CONFIG_XML} ] || exit 0 - -# Create /var/lustre directory -# This is used by snmp agent for checking lustre services \ -# status online/offline/online pending/offline pending. - -[ -d ${STATUS_DIR:=/var/lustre} ] || mkdir -p $STATUS_DIR -STATUS=${STATUS_DIR}/sysStatus - -start() { - echo -n "Starting $SERVICE: " - ${LCONF} ${LCONF_START_ARGS} - RETVAL=$? - echo $SERVICE - if [ $RETVAL -eq 0 ]; then - touch $LOCK - echo "online" >$STATUS - else - echo "online pending" >$STATUS - fi -} - -stop() { - echo -n "Shutting down $SERVICE: " - ${LCONF} ${LCONF_STOP_ARGS} - RETVAL=$? - echo $SERVICE - rm -f $LOCK - if [ $RETVAL -eq 0 ]; then - echo "offline" >$STATUS - else - echo "offline pending" >$STATUS - fi -} - -restart() { - stop - start -} - -# See how we were called. -case "$1" in - start) - start - ;; - stop) - stop - ;; - restart) - restart - ;; - status) - status $SERVICE - ;; - *) - echo "Usage: $SERVICE {start|stop|restart|status}" - exit 1 -esac - -exit $RETVAL diff --git a/lustre/scripts/lustre.spec.in b/lustre/scripts/lustre.spec.in deleted file mode 100644 index a24a26a..0000000 --- a/lustre/scripts/lustre.spec.in +++ /dev/null @@ -1,263 +0,0 @@ -# lustre.spec -%define version b_devel -%define kversion @RELEASE@ -%define linuxdir @LINUX@ -Release: 0306170928kernel - -Summary: Lustre Lite File System -Name: lustre-lite -Version: %{version} -Copyright: GPL -Group: Utilities/System -Requires: lustre-modules, PyXML -BuildRoot: /var/tmp/lustre-%{version}-root -Source: ftp://ftp.lustre.com/pub/lustre/lustre-%{version}.tar.gz - -%description -The Lustre Lite Cluster File System: kernel drivers for file system, -servers and utilities. - -%package -n lustre-modules -Summary: Kernel Lustre drivers for Linux %{kversion} -Requires: modutils >= 2.4.10 -Group: Development/Kernel - -%description -n lustre-modules -Lustre file System, server and network drivers for Linux %{kversion}. - -%package -n lustre-source -Summary: Object-Based Disk storage driver source -Group: Development/Kernel - -%description -n lustre-source -Lustre Lite Source for further development - -%package -n lustre-doc -Summary: Documentation and sample configuration files -Group: Documentation -# FIXME: BuildArch overrides all the packages in rpm 4.0.4-7x -#BuildArch: noarch - -%description -n lustre-doc -Documentation and sample configuration files for Lustre - -%package -n lustre-ldap -Summary: Configures openldap server for LDAP Lustre config database -Group: Configuration -Requires: openldap-servers, openldap-clients, python-ldap, 4Suite - -%description -n lustre-ldap -Configures openldap server for LDAP Lustre config database - - -#%package -n liblustre -#Summary: Lustre Lib -#Group: Development/Kernel - -#%description -n liblustre -#Lustre lib binary package. - -%prep -%setup -qn lustre-%{version} -#%setup -c -n lustre-%{version}-lib - -%build -rm -rf $RPM_BUILD_ROOT - -# Set an explicit path to our Linux tree, if we can. -cd $RPM_BUILD_DIR/lustre-%{version} -./configure --with-linux='%{linuxdir}' -make - -#%ifarch i386 -#cd $RPM_BUILD_DIR/lustre-%{version}-lib/lustre-%{version} -#./configure --with-lib -#make -#%endif - -%install -cd $RPM_BUILD_DIR/lustre-%{version} -make install prefix=$RPM_BUILD_ROOT - -#%ifarch i386 -#cd $RPM_BUILD_DIR/lustre-%{version}-lib/lustre-%{version} -#make install prefix=$RPM_BUILD_ROOT -#%endif - -%ifarch alpha -# this hurts me - conf_flag= - linuxdir=%{linuxdir} - test -d $linuxdir && conf_flag=--with-linux=$linuxdir - make clean - ./configure --enable-rtscts-myrinet $conf_flag - make - cp linux/rtscts/rtscts.o $RPM_BUILD_ROOT/lib/modules/%{kversion}/kernel/net/lustre/rtscts_myrinet.o - cp user/myrinet_utils/mcpload $RPM_BUILD_ROOT/usr/sbin/mcpload -%endif - -# Create the pristine source directory. -cd $RPM_BUILD_DIR/lustre-%{version} -mkdir -p $RPM_BUILD_ROOT/usr/src -rm -f lustre-source -ln -s $RPM_BUILD_ROOT/usr/src lustre-source -make distdir distdir=lustre-source/lustre-%{version} - -# ldap database directory -mkdir -p $RPM_BUILD_ROOT/var/lib/ldap/lustre - -%files -%attr(-, root, root) /usr/sbin/lmc -%attr(-, root, root) /usr/sbin/lctl -%attr(-, root, root) /usr/sbin/lconf -%attr(-, root, root) /usr/sbin/lactive -%attr(-, root, root) /usr/sbin/llanalyze -%attr(-, root, root) /usr/sbin/lfind -%attr(-, root, root) /usr/sbin/lstripe -%attr(-, root, root) /usr/sbin/mcreate -%attr(-, root, root) /usr/sbin/mkdirmany -%attr(-, root, root) /usr/sbin/llstat.pl -%attr(-, root, root) /usr/sbin/llobdstat.pl -%attr(-, root, root) /usr/sbin/load_ldap.sh -%attr(-, root, root) /usr/lib/lustre/python/* -%attr(-, root, root) /usr/lib/lustre/examples/llmount.sh -%attr(-, root, root) /usr/lib/lustre/examples/llmountcleanup.sh -%attr(-, root, root) /usr/lib/lustre/examples/llecho.sh -%attr(-, root, root) /usr/lib/lustre/examples/local.sh -%attr(-, root, root) /usr/lib/lustre/examples/uml.sh -%attr(-, root, root) /usr/lib/lustre/examples/lov.sh -%attr(-, root, root) /usr/lib/lustre/examples/echo.sh -%attr(-, root, root) /usr/lib/lustre/examples/llechocleanup.sh - -%attr(-, root, root) /etc/init.d/lustre -%attr(-, root, root) /usr/sbin/acceptor -%attr(-, root, root) /usr/sbin/ptlctl -%attr(-, root, root) /usr/sbin/debugctl -%attr(-, root, root) /lib/libportals.a -%attr(-, root, root) /lib/libptlctl.a -%attr(-, root, root) /lib/libtcpnal.a -%attr(-, root, root) /usr/include/lustre/*.h -%attr(-, root, root) /usr/sbin/lload -%attr(-, root, root) /usr/sbin/obdbarrier -%attr(-, root, root) /usr/sbin/obdio -%attr(-, root, root) /usr/sbin/routerstat -%attr(-, root, root) /usr/sbin/wirecheck - -%ifarch alpha -%attr(-, root, root) /usr/sbin/mcpload -%endif - -%files -n lustre-doc -%attr(-, root, root) %doc COPYING FDL -%attr(-, root, root) %doc doc/lustre.pdf doc/lustre-HOWTO.txt -#%attr(-, root, root) %doc tests/client-echo.cfg tests/client-mount.cfg -#%attr(-, root, root) %doc tests/client-mount2.cfg -#%attr(-, root, root) %doc tests/elan-client.cfg tests/elan-server.cfg -#%attr(-, root, root) %doc tests/ldlm.cfg tests/lustre.cfg -#%attr(-, root, root) %doc tests/mds.cfg tests/net-client.cfg -#%attr(-, root, root) %doc tests/net-local.cfg tests/net-server.cfg -#%attr(-, root, root) %doc tests/obdecho.cfg tests/obdfilter.cfg - -%files -n lustre-modules -%attr(-, root, root) %doc COPYING -%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/ldlm.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/llite.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/mdc.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/mds.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/fsfilt_ext3.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/fsfilt_reiserfs.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/obdclass.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/obdecho.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/obdfilter.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/lov.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/osc.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/ost.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/ptlrpc.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/ptlbd.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/fs/lustre/cobd.o -#portals modules -%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/lustre/kptlrouter.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/lustre/*nal.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/lustre/portals.o -%ifarch alpha -%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/lustre/p3mod.o -%attr(-, root, root) /lib/modules/%{kversion}/kernel/net/lustre/rtscts.o -%endif - -%files -n lustre-source -%attr(-, root, root) /usr/src/lustre-%{version} - -#%ifarch i386 -#%files -n liblustre -#%attr(-, root, root) /lib/lustre -#%attr(-, root, root) /lib/lustre/liblov.a -#%attr(-, root, root) /lib/lustre/liblustreclass.a -#%attr(-, root, root) /lib/lustre/libptlrpc.a -#%attr(-, root, root) /lib/lustre/libobdecho.a -#%attr(-, root, root) /lib/lustre/libldlm.a -#%attr(-, root, root) /lib/lustre/libosc.a -#%attr(-, root, root) /usr/sbin/lctl -#%attr(-, root, root) /usr/sbin/lfind -#%attr(-, root, root) /usr/sbin/lstripe -#%attr(-, root, root) /usr/sbin/obdio -#%attr(-, root, root) /usr/sbin/obdbarrier -#%attr(-, root, root) /usr/sbin/obdstat -#%attr(-, root, root) /usr/sbin/lload -#%attr(-, root, root) /usr/sbin/lconf -#%attr(-, root, root) /usr/sbin/lmc -#%attr(-, root, root) /usr/sbin/llanalyze -#%endif - - -%files -n lustre-ldap -%attr(-, root, root) /etc/openldap/slapd-lustre.conf -%attr(-, root, root) /etc/openldap/schema/lustre.schema -%attr(-, root, root) /usr/lib/lustre/lustre2ldif.xsl -%attr(-, root, root) /usr/lib/lustre/top.ldif -#%dir /var/lib/ldap/lustre -%attr(700, ldap, ldap) /var/lib/ldap/lustre - -%post -if [ ! -e /dev/obd ]; then - mknod /dev/obd c 10 241 -fi -if [ ! -e /dev/portals ]; then - mknod /dev/portals c 10 240 -fi -depmod -ae || exit 0 - -grep -q obdclass /etc/modules.conf || \ - echo 'alias char-major-10-241 obdclass' >> /etc/modules.conf - -grep -q '/dev/obd' /etc/modules.conf || \ - echo 'alias /dev/obd obdclass' >> /etc/modules.conf - -grep -q '/dev/lustre' /etc/modules.conf || \ - echo 'alias /dev/lustre obdclass' >> /etc/modules.conf - -grep -q portals /etc/modules.conf || \ - echo 'alias char-major-10-240 portals' >> /etc/modules.conf - -grep -q '/dev/portals' /etc/modules.conf || \ - echo 'alias /dev/portals portals' >> /etc/modules.conf - -%postun -depmod -ae || exit 0 - -%post -n lustre-ldap -if ! grep -q slapd-lustre /etc/openldap/slapd.conf; then - echo "include /etc/openldap/slapd-lustre.conf" >> /etc/openldap/slapd.conf -fi - -%postun -n lustre-ldap -slapd=/etc/openldap/slapd.conf -if grep -q slapd-lustre $slapd; then - tmp=/tmp/lustre-ldap.$$ - sed "/slapd-lustre/d" $slapd >> $tmp - cp $tmp $slapd - rm $tmp -fi -%clean -#rm -rf $RPM_BUILD_ROOT - -# end of file diff --git a/lustre/scripts/maketags.sh b/lustre/scripts/maketags.sh deleted file mode 100755 index 9bd9f87..0000000 --- a/lustre/scripts/maketags.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/sh -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution -set -vx -rm -f TAGS ; find . -name '*.h' -or -name '*.c' | xargs etags -rm -f ctags; find . -name '*.h' -or -name '*.c' | xargs ctags diff --git a/lustre/scripts/nodelustre b/lustre/scripts/nodelustre deleted file mode 100755 index b5e6540..0000000 --- a/lustre/scripts/nodelustre +++ /dev/null @@ -1,46 +0,0 @@ -#! /bin/sh -# nodelustre - Start and stop Lustre on MCR nodes -# Copyright (C) 2002 Cluster File Systems, Inc. -# Gord Eagle <gord@clusterfs.com>, 2002-09-10 - -# Set this to the shared config file. -MASTER_CONFIG=http://emcri/lustre.xml -CONFIG=/etc/lustre/lustre.xml -COMPUTE_NODE=client - -LCONF=/usr/local/cfs/lustre/utils/lconf -WGET=wget - -case "$1" in -start | stop) - # Fetch the config file. We can't use --output-document because it - # makes Wget ignore timestamping. - if test -n "$MASTER_CONFIG"; then - (cd `echo "$CONFIG" | sed 's%/[^/]*$%%'` && \ - $WGET --timestamping "$MASTER_CONFIG") || exit $? - fi - - # Map all client nodes to the COMPUTE_NODE virtual node. - if test -n "$COMPUTE_NODE" && nodeattr compute; then - node=" --node $COMPUTE_NODE" - else - node= - fi - - # If we're stopping, do the lconf cleanup. - if test "$1" = stop; then - cleanup=' --cleanup' - else - cleanup= - fi - - $LCONF$cleanup$node "$CONFIG" - ;; - -*) - echo "$0 {start|stop}" 1>&2 - exit 1 - ;; -esac - -exit 0 diff --git a/lustre/scripts/system-profile.sh b/lustre/scripts/system-profile.sh deleted file mode 100755 index a669339..0000000 --- a/lustre/scripts/system-profile.sh +++ /dev/null @@ -1,233 +0,0 @@ -#!/bin/sh - -# System Profiling Script - -TESTS="oprofile iostat vmstat proc_dump" - -# common parameters -export OUTPUTDIR=/home/op -export KERNELDIR=/usr/src/linux -export LUSTREDIR=/usr/src/lustre -export PORTALSDIR=/usr/src/portals -NAL=toenal - -# Params for OPROFILE -#CTR0_EVENT=CPU_CLK_UNHALTED -CTR0_COUNT=10000 - -# for intel Petium 4 onwards... Also requires Unit Mask -CTR0_EVENT=GLOBAL_POWER_EVENTS -CTR0_UNIT_MASK=0x01 - -# Params for VMSTAT -VM_SAMPLING=1 - -# Params for IOSTAT -IO_SAMPLING=1 - -# Params for PROC DUMP -export PROC_SAMPLING=2 - -#------------------------------------------------------------------- -oprofile_start() { - - echo `date +%T`": oprofile started...." >> $OUTPUTDIR/$HOSTNAME/summary - op_start --ctr0-event=$CTR0_EVENT --ctr0-count=$CTR0_COUNT --ctr0-unit-mask=$CTR0_UNIT_MASK --vmlinux=${KERNELDIR}/vmlinux - mkdir -p ${OUTPUTDIR}/${HOSTNAME}/oprofile/prof_source/{obdclass,obdecho,osc,ptlrpc,extN,obdfilter,ost,mdc,mds4mds,mds4mds_extN,llite,portals,$NAL} - mkdir -p ${OUTPUTDIR}/${HOSTNAME}/oprofile/profiling - -} - -iostat_start() { - echo `date +%T`": iostat started...." >> $OUTPUTDIR/$HOSTNAME/summary - - mkdir ${OUTPUTDIR}/${HOSTNAME}/iostat - - iostat $IO_SAMPLING > $OUTPUTDIR/$HOSTNAME/iostat/iostat.op & - PID=$! - echo $PID > $OUTPUTDIR/$HOSTNAME/tmp/iostat.pid -} - -vmstat_start() { - - echo $OUTPUTDIR; - echo `date +%T`": vmstat started...." >> $OUTPUTDIR/$HOSTNAME/summary - - mkdir ${OUTPUTDIR}/${HOSTNAME}/vmstat - - vmstat $VM_SAMPLING > $OUTPUTDIR/$HOSTNAME/vmstat/vmstat.op & - PID=$! - echo $PID > $OUTPUTDIR/$HOSTNAME/tmp/vmstat.pid -} - -proc_dump_start() { - echo `date +%T`": proc dump started...." >> $OUTPUTDIR/$HOSTNAME/summary - sh -c ' - CTRFILE=/$OUTPUTDIR/$HOSTNAME/tmp/running.$$.pid; - echo $$ > ${CTRFILE}; - while [ -f $CTRFILE ]; do - cat /proc/meminfo >> $OUTPUTDIR/$HOSTNAME/meminfo; - cat /proc/interrupts >> $OUTPUTDIR/$HOSTNAME/interrupts; - cat /proc/net/dev >> $OUTPUTDIR/$HOSTNAME/net-dev; - sleep ${PROC_SAMPLING}; - done; - ' & - CTRFILE=/tmp/running.$!.pid - echo "proc_dump_ctrl $CTRFILE" >> /tmp/prof-ctrl - -} - -oprofile_stop() { - op_dump - op_stop - - echo `date +%T`": oprofile stopped...." >> $OUTPUTDIR/$HOSTNAME/summary - - for i in obdclass obdecho osc ptlrpc extN obdfilter mds ost mdc llite - do - oprofpp -l ${LUSTREDIR}/${i}/${i}.o > ${OUTPUTDIR}/${HOSTNAME}/oprofile/profiling/${i}.prof 2>/dev/null - done - oprofpp -l ${LUSTREDIR}/mds/mds_extN.o > ${OUTPUTDIR}/${HOSTNAME}/oprofile/profiling/mds_extN.prof 2>/dev/null - oprofpp -l ${PORTALSDIR}/linux/oslib/portals.o > ${OUTPUTDIR}/${HOSTNAME}/oprofile/profiling/portals.prof 2>/dev/null - oprofpp -l ${PORTALSDIR}/linux/${NAL}/k${NAL}.o > ${OUTPUTDIR}/${HOSTNAME}/oprofile/profiling/k${NAL}.prof 2>/dev/null - - - for i in obdclass obdecho osc ptlrpc extN obdfilter ost mdc llite - do - op_to_source --source-dir=${LUSTREDIR}/${i}/ --output-dir=${OUTPUTDIR}/${HOSTNAME}/oprofile/prof_source/${i}/ ${LUSTREDIR}/${i}/${i}.o 2>/dev/null - done - - op_to_source --source-dir=${LUSTREDIR}/mds/ --output-dir=${OUTPUTDIR}/${HOSTNAME}/oprofile/prof_source/mds4mds/ ${LUSTREDIR}/mds/mds.o 2>/dev/null - op_to_source --source-dir=${LUSTREDIR}/mds/ --output-dir=${OUTPUTDIR}/${HOSTNAME}/oprofile/prof_source/mds4mds_extN/ ${LUSTREDIR}/mds/mds_extN.o 2>/dev/null - - op_to_source --source-dir=${PORTALSDIR}/linux/oslib/ --output-dir=${OUTPUTDIR}/${HOSTNAME}/oprofile/prof_source/portals ${PORTALSDIR}/linux/oslib/portals.o 2>/dev/null - op_to_source --source-dir=${PORTALSDIR}/linux/${NAL}/ --output-dir=${OUTPUTDIR}/${HOSTNAME}/oprofile/prof_source/${NAL} ${PORTALSDIR}/linux/${NAL}/k${NAL}.o 2>/dev/null - - op_time -l > ${OUTPUTDIR}/${HOSTNAME}/oprofile/globalprofile 2>/dev/null - -} - -iostat_stop() { - echo `date +%T`": iostat stopped...." >> $OUTPUTDIR/$HOSTNAME/summary - - PID=$(cat $OUTPUTDIR/$HOSTNAME/tmp/iostat.pid) - kill $PID -} - -vmstat_stop() { - echo `date +%T`": vmstat stopped...." >> $OUTPUTDIR/$HOSTNAME/summary - - PID=$(cat $OUTPUTDIR/$HOSTNAME/tmp/vmstat.pid) - kill $PID -} - -oprofile_dump() { - op_dump; -} - -proc_dump_stop() { - echo `date +%T`": proc dump stopped...." >> $OUTPUTDIR/$HOSTNAME/summary - CTRFILE=`cat /tmp/prof-ctrl | awk '$1 == "prof_dump_ctrl" {print $2}'` - rm -f $CTRFILE -} -#------------------------------------------------------------------- - -case "$1" in - - start) - - shift; - while [ ${#*} -gt 1 ]; do - case "$1" in - -k) - shift; - KERNELDIR=$1; - ;; - - -l) - shift; - LUSTREDIR=$1; - ;; - -p) - shift; - PORTALSDIR=$1; - ;; - - -o) - shift; - OUTPUTDIR=$1; - ;; - *) - echo unrecognized option $1 - break; - ;; - esac - shift; - done - echo "kerneldir $KERNELDIR" > /tmp/prof-ctrl - echo -e "\nlustredir $LUSTREDIR" >> /tmp/prof-ctrl - echo -e "\nportalsdir $PORTALSDIR" >> /tmp/prof-ctrl - echo -e "\noutputdir $OUTPUTDIR" >> /tmp/prof-ctrl - - if [ -d ${OUTPUTDIR}/${HOSTNAME} ]; then - echo "Output already exists" - echo "Please take backup and remove it" - exit 1 - fi - - mkdir -p ${OUTPUTDIR}/${HOSTNAME} - echo -e "Profiling started on $HOSTNAME" > ${OUTPUTDIR}/${HOSTNAME}/summary - echo -e "\n\nModules Listing on $HOSTNAME" >> ${OUTPUTDIR}/${HOSTNAME}/summary - /sbin/lsmod >> ${OUTPUTDIR}/${HOSTNAME}/summary - echo -e "\n\nKernel : " >> ${OUTPUTDIR}/${HOSTNAME}/summary - uname -a >> ${OUTPUTDIR}/${HOSTNAME}/summary - echo -e "\n\nPCI Devices : " >> ${OUTPUTDIR}/${HOSTNAME}/summary - lspci -t -v >> ${OUTPUTDIR}/${HOSTNAME}/summary - echo -e "\n\nTests carried out " >> ${OUTPUTDIR}/${HOSTNAME}/summary - - mkdir $OUTPUTDIR/$HOSTNAME/tmp - for test in $TESTS; do - ${test}_start; - done - ;; - stop) - - KERNELDIR=`cat /tmp/prof-ctrl | awk '$1 == "kerneldir" {print $2}'` - LUSTREDIR=`cat /tmp/prof-ctrl | awk '$1 == "lustredir" {print $2}'` - PORTALSDIR=`cat /tmp/prof-ctrl | awk '$1 == "portalsdir" {print $2}'` - OUTPUTDIR=`cat /tmp/prof-ctrl | awk '$1 == "outputdir" {print $2}'` - for test in $TESTS; do - ${test}_stop; - done - - rm -rf ${OUTPUTDIR}/$HOSTNAME/tmp - tar -cf ${OUTPUTDIR}/${HOSTNAME}.tar ${OUTPUTDIR}/${HOSTNAME} - echo "Dumped results in ${OUTPUTDIR}/${HOSTNAME}.tar" - ;; - dump) - if "oprofile" in $TESTS; then - oprofile_dump; - fi - ;; - clean) - - OUTPUTDIR=`cat /tmp/prof-ctrl | awk '$1 == "outputdir" {print $2}'` - echo Deleting directory $OUTPUTDIR/$HOSTNAME ... - rm -rf $OUTPUTDIR/$HOSTNAME - echo Deleting file $OUTPUTDIR/${HOSTNAME}.tar ... - rm -rf $OUTPUTDIR/${HOSTNAME}.tar - echo Deleting oprofile samples ... - rm -f /var/lib/oprofile/samples/* - rm -f /tmp/prof-ctrl - ;; - *) - echo $"Usage : $0 {start|stop|dump|clean} [OPTIONS]"; - echo $"OPTIONS :" - echo $" -l lustre_dir" - echo $" -p portals_dir" - echo $" -k kernel_dir" - echo $" -o output_dir" - exit 1 -esac - -exit 0 diff --git a/lustre/scripts/version_tag.pl b/lustre/scripts/version_tag.pl deleted file mode 100644 index f33443f..0000000 --- a/lustre/scripts/version_tag.pl +++ /dev/null @@ -1,174 +0,0 @@ -#!/usr/bin/perl -# -*- Mode: perl; indent-tabs-mode: nil; cperl-indent-level: 4 -*- - -use strict; -use diagnostics; -use IO::File; -use Time::Local; - -my $pristine = 1; -my $kernver; - -sub get_tag() -{ - my $tag; - - my $tagfile = new IO::File; - if (!$tagfile->open("CVS/Tag")) { - return "HEAD"; - } else { - my $tmp = <$tagfile>; - $tagfile->close(); - - $tmp =~ m/T(.*)/; - return $1; - } -} - -sub get_latest_mtime() -{ - my %months=("Jan" => 0, "Feb" => 1, "Mar" => 2, "Apr" => 3, "May" => 4, - "Jun" => 5, "Jul" => 6, "Aug" => 7, "Sep" => 8, "Oct" => 9, - "Nov" => 10, "Dec" => 11); - - my $last_mtime = 0; - my @entries = `find . -name Entries`; - my $entry_file; - foreach $entry_file (@entries) { - chomp($entry_file); - my $entry = new IO::File; - if (!$entry->open($entry_file)) { - die "unable to open $entry_file: $!\n"; - } - my $line; - while (defined($line = <$entry>)) { - chomp($line); - #print "line: $line\n"; - my ($junk, $file, $version, $date) = split(/\//, $line); - - #print "junk: $junk\nfile: $file\nver: $version\ndate: $date\n"; - #print "last_mtime: " . localtime($last_mtime) . "\n"; - - if ($junk eq "D" || - $file eq "lustre.spec.in" || - $file !~ m/\.(c|h|am|in)$/) { - next; - } - - my $cur_dir = $entry_file; - $cur_dir =~ s/\/CVS\/Entries$//; - my @statbuf = stat("$cur_dir/$file"); - my $mtime = $statbuf[9]; - if (!defined($mtime)) { - next; - } - my $local_date = gmtime($mtime); - if ($local_date ne $date && - $file ne "lustre.spec.in") { - #print "$file : " . localtime($mtime) . "\n"; - $pristine = 0; - } - - if ($mtime > $last_mtime) { - $last_mtime = $mtime; - } - - if ($date) { - my @t = split(/ +/, $date); - if (int(@t) != 5) { - #print "skipping: $date\n"; - next; - } - my ($hours, $min, $sec) = split(/:/, $t[3]); - my ($mon, $mday, $year) = ($t[1], $t[2], $t[4]); - my $secs = 0; - $mon = $months{$mon}; - $secs = timelocal($sec, $min, $hours, $mday, $mon, $year); - if ($secs > $last_mtime) { - $last_mtime = $secs; - } - } - } - $entry->close(); - } - return $last_mtime; -} - -sub get_linuxdir() -{ - my $config = new IO::File; - my ($line, $dir); - if (!$config->open("Makefile")) { - die "Run ./configure first\n"; - } - while (defined($line = <$config>)) { - chomp($line); - if ($line =~ /LINUX :?= (.*)/) { - $dir = $1; - last; - } - } - $config->close(); - my $ver = new IO::File; - if (!$ver->open("$dir/include/linux/version.h")) { - die "Run make dep on $dir\n"; - } - while(defined($line = <$ver>)) { - $line =~ /\#define UTS_RELEASE "(.*)"/; - if ($1) { - $kernver = $1; - last; - } - } - $ver->close(); - chomp($kernver); - $dir =~ s/\//\./g; - return $dir; -} - -sub generate_ver($$$) -{ - my $tag = shift; - my $mtime = shift; - my $linuxdir = shift; - - #print "localtime: " . localtime($mtime) . "\n"; - - my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = - localtime($mtime); - $year += 1900; - $mon++; - my $show_last = sprintf("%04d%02d%02d%02d%02d%02d", $year, $mon, $mday, - $hour, $min, $sec); - - print "#define BUILD_VERSION \""; - if ($pristine) { - print "$tag-$show_last-PRISTINE-$linuxdir-$kernver\"\n"; - } else { - print "$tag-$show_last-CHANGED-$linuxdir-$kernver\"\n"; - } -} -chomp(my $cwd = `pwd`); - -# ARGV[0] = srcdir -# ARGV[1] = builddir - -# for get_latest_mtime and get_tag you need to be in srcdir - -if ($ARGV[0]) { - chdir($ARGV[0]); -} -my $tag = get_tag(); -my $mtime = get_latest_mtime(); - -# for get_linuxdir you need to be in builddir - -if ($ARGV[1]) { - chdir($cwd); - chdir($ARGV[1]); -} -my $linuxdir = get_linuxdir(); - -generate_ver($tag, $mtime, $linuxdir); - -exit(0); diff --git a/lustre/tests/.cvsignore b/lustre/tests/.cvsignore deleted file mode 100644 index 2e5c1fe..0000000 --- a/lustre/tests/.cvsignore +++ /dev/null @@ -1,45 +0,0 @@ -.Xrefs -config.log -config.status -configure -Makefile -Makefile.in -.deps -TAGS -openunlink -testreq -truncate -directio -openme -writeme -mcreate -munlink -mlink -tchmod -toexcl -fsx -test_brw -newfile -openclose -createdestroy -createmany -statmany -mkdirmany -lovstripe -*.xml -stat -setuid -multifstat -checkstat -wantedi -createtest -open_delay -statone -opendevunlink -opendirunlink -runas -openfile -unlinkmany -fchdir_test -getdents -o_directory diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am deleted file mode 100644 index 064de98..0000000 --- a/lustre/tests/Makefile.am +++ /dev/null @@ -1,60 +0,0 @@ -# Lustre test Makefile -DEFS= -CPPFLAGS = -I. -I$(top_srcdir)/portals/include/ -I$(top_srcdir)/include -D_LARGEFILE64_SOURCE -CFLAGS := -g -Wall -# LDADD = -lldap -# LDADD := -lreadline -ltermcap # -lefence -EXTRA_DIST = $(pkgexample_SCRIPTS) $(noinst_SCRIPTS) $(noinst_DATA) \ - sanity.sh rundbench mcreate -pkgexample_SCRIPTS = llmount.sh llmountcleanup.sh llecho.sh llechocleanup.sh local.sh echo.sh uml.sh lov.sh -noinst_DATA = -noinst_SCRIPTS = leak_finder.pl llecho.sh llmount.sh llmountcleanup.sh tbox.sh \ - llrmount.sh runfailure-mds runvmstat runfailure-net runfailure-ost \ - runiozone runregression-net.sh runtests sanity.sh rundbench -noinst_PROGRAMS = openunlink testreq truncate directio openme writeme open_delay -noinst_PROGRAMS += munlink tchmod toexcl fsx test_brw openclose createdestroy -noinst_PROGRAMS += stat createmany statmany multifstat createtest mlink -noinst_PROGRAMS += opendirunlink opendevunlink unlinkmany fchdir_test checkstat -noinst_PROGRAMS += wantedi statone runas openfile getdents o_directory -# noinst_PROGRAMS += ldaptest -sbin_PROGRAMS = mcreate mkdirmany - -# ldaptest_SOURCES = ldaptest.c -tchmod_SOURCES = tchmod.c -toexcl_SOURCES = toexcl.c -testreq_SOURCES = testreq.c -mcreate_SOURCES = mcreate.c -munlink_SOURCES = munlink.c -mlink_SOURCES = mlink.c -truncate_SOURCES = truncate.c -directio_SOURCES = directio.c -openunlink_SOURCES = openunlink.c -openme_SOURCES = openme.c -writeme_SOURCES = writeme.c -fsx_SOURCES = fsx.c -test_brw_SOURCES = test_brw.c -openclose_SOURCES = openclose.c -createdestroy_SOURCES = createdestroy.c -stat_SOURCES = stat.c -createmany_SOURCES = createmany.c -statmany_SOURCES = statmany.c -unlinkmany_SOURCES = unlinkmany.c -statone_SOURCES = statone.c -mkdirmany_SOURCES = mkdirmany.c -multifstat_SOURCES = multifstat.c -checkstat_SOURCES = checkstat.c -runas_SOURCES = runas.c -openfile_SOURCES = openfile.c -wantedi_SOURCES = wantedi.c -createtest_SOURCES = createtest.c -open_delay_SOURCES = open_delay.c -opendirunlink_SOURCES=opendirunlink.c -opendevunlink_SOURCES=opendirunlink.c -fchdir_test_SOURCES=fchdir_test.c -getdents_SOURCES=getdents.c -o_directory_SOURCES = o_directory.c -#mkdirdeep_SOURCES= mkdirdeep.c -#mkdirdeep_LDADD=-L../portals/util -lptlctl -#mkdirdeep_CPPFLAGS=-I$(top_srcdir)/portals/include - -include $(top_srcdir)/Rules diff --git a/lustre/tests/README b/lustre/tests/README deleted file mode 100644 index 00634a7..0000000 --- a/lustre/tests/README +++ /dev/null @@ -1,85 +0,0 @@ -1. How to build .xml configs: -The various .xml configs in the tests/ directory are built by running the -corresponding .sh script. The .sh script runs a series of lmc (Lustre make -config) commands in order to build up an XML file. It is much easier to -simply edit a .sh script and rebuild your XML config file than trying to -edit the XML directly. - -For a loopback setup with a mounted filesystem, you could do something like: - - sh local.sh - ../utils/lconf --reformat local.xml - -This will configure an MDS, an OBD/OST, and a filesystem client all running -on the same system and communicating over the TCP loopback interface. If -the --reformat option is given, then the OST and MDS devices will be -formatted. This is required the first time you set up the system, or if -you think you have corrupted the filesystems after you hit a bug. - -A more complex configuration, using a separate host for each of the MDS, -OBD/OST, and filesystem client functions is in uml.sh. It configures 3 -systems, and the OST system (uml2) serves up multiple OST devices, and -the client accesses these via a logical object volume (LOV) driver (which -essentially stripes both of the OST devices into a single storage device. - -This configuration could be run on any 3 systems with the following commands: - - sh uml.sh - system1# ../utils/lconf --reformat --node uml1 uml.xml - system2# ../utils/lconf --reformat --node uml2 uml.xml - system3# ../utils/lconf --node uml3 uml.xml - -The "--node <name>" parameter tells lconf to use the configuration for -the node "name" in the XML configuration file. If the hostnames were -already "uml1", "uml2", and "uml3", then the "--node" parameter would -not need to be given. The internals of lconf and portals handle the -configuration details for setting up netowrking. - -2. runregression-net.sh and runregression-brw.sh - -This test performs raw block and attribute requests against a real or -"null" OST device. It is useful for generating isolated load on the -OST device, while avoiding the need to run tests through the filesystem. -This can be useful for testing the network part of Lustre in isolation, -or doing RPC and bulk I/O performance tests against an OST. - -If things are alright it goes through a series of tests single threaded, -multithreaded, using getattr and brw (both read and write, with single -page and vector I/O, doing basic data checking of each page). - -You can create a simple echo client by running the "llecho.sh" to -run the tests locally (over TCP loopback), or edit llecho.sh to -specify the SERVER and CLIENT names. You would then set up as normal: - - # if you are using a remote server, first run: - server# ../utils/lconf echo.xml - -Configure the client (or if you are running a single system only): - - client# ../utils/lconf echo.xml - client# sh runregression-net.sh - -3. runtests - -The runtests script does a series of simple file-based tests using a -filesystem. You need to have an XML file as appropriate for your setup -(one or more hosts, including an MDS, one or more OSTs, and a mountpoint). -If the MDS and/or OST is on a remote machine, configure them first: - - ../utils/lconf --reformat <conf>.xml - -On the client machine, the runtests script needs the XML configuration -file as a command-line parameter, as it mounts and unmounts the filesystem -several times during the test in order to verify that the data is still -there as expected (ensures that it makes it to disk instead of just into -the filesystem cache). If you are running on only a single machine, you -can just use runtests directly. If this is only a client machine, the ---reformat parameter is not needed (it will not do anything). - - sh runtests [--reformat] <conf>.xml - -This creates a few simple files and directories first, and then untars -a copy of the /etc filesystem into the Lustre filesystem. It then does -data verification both before and after the filesystem is remounted, and -finally deletes all of the files and verifies that the amount of space -left in the filesystem is (nearly) the same as it was before the test. diff --git a/lustre/tests/acceptance-metadata-double.sh b/lustre/tests/acceptance-metadata-double.sh deleted file mode 100644 index f647a55..0000000 --- a/lustre/tests/acceptance-metadata-double.sh +++ /dev/null @@ -1,140 +0,0 @@ -#!/bin/sh -set -e - -# -# Runs create.pl and rename.pl on two mountpoints with increasing load, varying -# debug levels. Assumes that the node is already setup with llmount2.sh -# - -SRCDIR="`dirname $0`" -CREATE=$SRCDIR/create.pl - -debug_client_on() -{ - echo -1 > /proc/sys/portals/debug -} - -debug_client_off() -{ - echo 0 > /proc/sys/portals/debug -} - -MNT=${MNT:-/mnt/lustre} - -debug_client_on -echo "create.pl, 2 mounts, 1 thread, 10 ops, debug on" -perl $CREATE -- $MNT 2 10 -echo "create.pl, 2 mounts, 1 thread, 100 ops, debug on" -perl $CREATE --silent -- $MNT 2 100 -echo "create.pl --mcreate=0, 2 mounts, 1 thread, 10 ops, debug on" -perl $CREATE --mcreate=0 -- $MNT 2 10 -echo "create.pl --mcreate=0, 2 mounts, 1 thread, 100 ops, debug on" -perl $CREATE --mcreate=0 --silent -- $MNT 2 100 -echo "rename.pl, 2 mounts, 1 thread, 10 ops, debug on" -perl rename.pl --count=2 $MNT 10 -echo "rename.pl, 2 mounts, 1 thread, 100 ops, debug on" -perl rename.pl --count=2 --silent $MNT 100 - -debug_client_off -echo "create.pl, 2 mounts, 1 thread, 1000 ops, debug off" -perl $CREATE --silent -- $MNT 2 1000 -echo "create.pl --mcreate=0, 2 mounts, 1 thread, 1000 ops, debug off" -perl $CREATE --silent --mcreate=0 -- $MNT 2 1000 -echo "rename.pl, 2 mounts, 1 thread, 1000 ops, debug off" -perl rename.pl --count=2 --silent $MNT 1000 - -debug_client_on -echo "create.pl, 2 mounts, 2 threads, 100 ops, debug on" -perl $CREATE --silent -- $MNT 2 100 & -perl $CREATE --silent -- $MNT 2 100 & -wait -echo "create.pl --mcreate=0, 2 mounts, 2 threads, 100 ops, debug on" -perl $CREATE --silent --mcreate=0 -- $MNT 2 100 & -perl $CREATE --silent --mcreate=0 -- $MNT 2 100 & -wait -echo "rename.pl, 2 mounts, 2 thread, 1000 ops, debug on" -perl rename.pl --count=2 --silent $MNT 1000 & -perl rename.pl --count=2 --silent $MNT 1000 & -wait - -debug_client_off -echo "create.pl, 2 mounts, 2 threads, 2000 ops, debug off" -perl $CREATE --silent -- $MNT 2 2000 & -perl $CREATE --silent -- $MNT 2 2000 & -wait -echo "create.pl --mcreate=0, 2 mounts, 2 threads, 2000 ops, debug off" -perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 & -perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 & -wait -echo "rename.pl, 2 mounts, 2 threads, 2000 ops, debug off" -perl rename.pl --count=2 --silent $MNT 2000 & -perl rename.pl --count=2 --silent $MNT 2000 & -wait - -debug_client_on -echo "create.pl, 2 mounts, 4 threads, 100 ops, debug on" -for i in `seq 1 4`; do - perl $CREATE --silent -- $MNT 2 100 & -done -wait -echo "create.pl --mcreate=0, 2 mounts, 4 threads, 100 ops, debug on" -for i in `seq 1 4`; do - perl $CREATE --silent --mcreate=0 -- $MNT 2 100 & -done -wait -echo "rename.pl, 2 mounts, 4 threads, 2000 ops, debug on" -for i in `seq 1 4`; do - perl rename.pl --count=2 --silent $MNT 2000 & -done -wait - -debug_client_off -echo "create.pl, 2 mounts, 4 threads, 2000 ops, debug off" -for i in `seq 1 4`; do - perl $CREATE --silent -- $MNT 2 2000 & -done -wait -echo "create.pl --mcreate=0, 2 mounts, 4 threads, 2000 ops, debug off" -for i in `seq 1 4`; do - perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 & -done -wait -echo "rename.pl, 2 mounts, 4 threads, 2000 ops, debug off" -for i in `seq 1 4`; do - perl rename.pl --count=2 --silent $MNT 2000 & -done -wait - -debug_client_on -echo "create.pl, 2 mounts, 8 threads, 500 ops, debug on" -for i in `seq 1 8`; do - perl $CREATE --silent -- $MNT 2 500 & -done -wait -echo "create.pl --mcreate=0, 2 mounts, 8 threads, 500 ops, debug on" -for i in `seq 1 8`; do - perl $CREATE --silent --mcreate=0 -- $MNT 2 500 & -done -wait -echo "rename.pl, 2 mounts, 8 threads, 2000 ops, debug on" -for i in `seq 1 8`; do - perl rename.pl --count=2 --silent $MNT 2000 & -done -wait - -debug_client_off -echo "create.pl, 2 mounts, 8 threads, 2000 ops, debug off" -for i in `seq 1 8`; do - perl $CREATE --silent -- $MNT 2 2000 & -done -wait -echo "create.pl --mcreate=0, 2 mounts, 8 threads, 2000 ops, debug off" -for i in `seq 1 8`; do - perl $CREATE --silent --mcreate=0 -- $MNT 2 2000 & -done -wait -echo "rename.pl, 2 mounts, 8 threads, 2000 ops, debug off" -for i in `seq 1 8`; do - perl rename.pl --count=2 --silent $MNT 2000 & -done -wait diff --git a/lustre/tests/acceptance-metadata-single.sh b/lustre/tests/acceptance-metadata-single.sh deleted file mode 100644 index 53774e5..0000000 --- a/lustre/tests/acceptance-metadata-single.sh +++ /dev/null @@ -1,146 +0,0 @@ -#!/bin/sh -set -e - -# -# Runs create.pl and rename.pl on a single mountpoint with increasing -# load, varying debug levels -# - -SRCDIR="`dirname $0`" -CREATE=$SRCDIR/create.pl - -debug_client_on() -{ - echo -1 > /proc/sys/portals/debug -} - -debug_client_off() -{ - echo 0 > /proc/sys/portals/debug -} - -MNT=${MNT:-/mnt/lustre} - -debug_client_on -echo "create.pl, 1 mount, 1 thread, 10 ops, debug on" -perl $CREATE -- $MNT -1 10 -echo "create.pl, 1 mount, 1 thread, 100 ops, debug on" -perl $CREATE --silent -- $MNT -1 100 -echo "create.pl --mcreate=0, 1 mount, 1 thread, 10 ops, debug on" -perl $CREATE --mcreate=0 -- $MNT -1 10 -echo "create.pl --mcreate=0, 1 mount, 1 thread, 100 ops, debug on" -perl $CREATE --mcreate=0 --silent -- $MNT -1 100 -echo "rename.pl, 1 mount, 1 thread, 10 ops, debug on" -perl rename.pl $MNT 10 -echo "rename.pl, 1 mount, 1 thread, 100 ops, debug on" -perl rename.pl --silent $MNT 100 - -debug_client_off -echo "create.pl, 1 mount, 1 thread, 1000 ops, debug off" -perl $CREATE --silent -- $MNT -1 1000 -echo "create.pl --mcreate=0, 1 mount, 1 thread, 1000 ops, debug off" -perl $CREATE --silent --mcreate=0 -- $MNT -1 1000 -echo "rename.pl, 1 mount, 1 thread, 1000 ops, debug off" -perl rename.pl --silent $MNT 1000 - -debug_client_on -echo "create.pl, 1 mount, 2 threads, 100 ops, debug on" -perl $CREATE --silent -- $MNT -1 100 & -perl $CREATE --silent -- $MNT -1 100 & -wait -echo "create.pl --mcreate=0, 1 mount, 2 threads, 100 ops, debug on" -perl $CREATE --silent --mcreate=0 -- $MNT -1 100 & -perl $CREATE --silent --mcreate=0 -- $MNT -1 100 & -wait -echo "rename.pl, 1 mount, 2 thread, 1000 ops, debug on" -perl rename.pl --silent $MNT 1000 & -perl rename.pl --silent $MNT 1000 & -wait - -debug_client_off -echo "create.pl, 1 mount, 2 threads, 2000 ops, debug off" -perl $CREATE --silent -- $MNT -1 2000 & -perl $CREATE --silent -- $MNT -1 2000 & -wait -echo "create.pl --mcreate=0, 1 mount, 2 threads, 2000 ops, debug off" -perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 & -perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 & -wait -echo "rename.pl, 1 mount, 2 threads, 2000 ops, debug off" -perl rename.pl --silent $MNT 2000 & -perl rename.pl --silent $MNT 2000 & -wait - -debug_client_on -echo "create.pl, 1 mount, 4 threads, 100 ops, debug on" -for i in `seq 1 4`; do - perl $CREATE --silent -- $MNT -1 100 & -done -wait -echo "create.pl --mcreate=0, 1 mount, 4 threads, 100 ops, debug on" -for i in `seq 1 4`; do - perl $CREATE --silent --mcreate=0 -- $MNT -1 100 & -done -wait -echo "rename.pl, 1 mount, 4 threads, 2000 ops, debug on" -for i in `seq 1 4`; do - perl rename.pl --silent $MNT 2000 & -done -wait - -debug_client_off -echo "create.pl, 1 mount, 4 threads, 2000 ops, debug off" -for i in `seq 1 4`; do - perl $CREATE --silent -- $MNT -1 2000 & -done -wait -echo "create.pl --mcreate=0, 1 mount, 4 threads, 2000 ops, debug off" -for i in `seq 1 4`; do - perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 & -done -wait -echo "rename.pl, 1 mount, 4 threads, 2000 ops, debug off" -for i in `seq 1 4`; do - perl rename.pl --silent $MNT 2000 & -done -wait - -debug_client_on -echo "create.pl, 1 mount, 8 threads, 500 ops, debug on" -for i in `seq 1 8`; do - perl $CREATE --silent -- $MNT -1 500 & -done -wait -echo "create.pl --mcreate=0, 1 mount, 8 threads, 500 ops, debug on" -for i in `seq 1 8`; do - perl $CREATE --silent --mcreate=0 -- $MNT -1 500 & -done -wait -echo "rename.pl, 1 mount, 8 threads, 2000 ops, debug on" -for i in `seq 1 8`; do - perl rename.pl --silent $MNT 2000 & -done -wait - -debug_client_off -echo "create.pl, 1 mount, 8 threads, 2000 ops, debug off" -for i in `seq 1 8`; do - perl $CREATE --silent -- $MNT -1 2000 & -done -wait -echo "create.pl --mcreate=0, 1 mount, 8 threads, 2000 ops, debug off" -for i in `seq 1 8`; do - perl $CREATE --silent --mcreate=0 -- $MNT -1 2000 & -done -wait -echo "rename.pl, 1 mount, 8 threads, 2000 ops, debug off" -for i in `seq 1 8`; do - perl rename.pl --silent $MNT 2000 & -done -wait -sh rundbench 1 -sh rundbench 2 -sh rundbench 4 -sh rundbench 8 -sh rundbench 16 -sh rundbench 32 diff --git a/lustre/tests/acceptance-small.sh b/lustre/tests/acceptance-small.sh deleted file mode 100755 index 0d2d836..0000000 --- a/lustre/tests/acceptance-small.sh +++ /dev/null @@ -1,121 +0,0 @@ -#!/bin/sh -# script which _must_ complete successfully (at minimum) before checkins to -# the CVS HEAD are allowed. -set -vxe - -[ "$CONFIGS" -a -z "$SANITYN" ] && SANITYN=no -[ "$CONFIGS" ] || CONFIGS="local lov" -[ "$MAX_THREADS" ] || MAX_THREADS=50 -if [ -z "$THREADS" ]; then - KB=`awk '/MemTotal:/ { print $2 }' /proc/meminfo` - THREADS=`expr $KB / 16384` - [ $THREADS -gt $MAX_THREADS ] && THREADS=$MAX_THREADS -fi -[ "$SIZE" ] || SIZE=20480 -[ "$RSIZE" ] || RSIZE=64 -[ "$UID" ] || UID=1000 -[ "$MNT" ] || MNT=/mnt/lustre -[ "$TMP" ] || TMP=/tmp -[ "$COUNT" ] || COUNT=1000 -[ "$DEBUG_OFF" ] || DEBUG_OFF="eval echo 0 > /proc/sys/portals/debug" -[ "$DEBUG_ON" ] || DEBUG_ON="eval echo -1 > /proc/sys/portals/debug" - -for NAME in $CONFIGS; do - export NAME - [ -e $NAME.sh ] && sh $NAME.sh - [ ! -e $NAME.xml ] && [ -z "$LDAPURL" ] && echo "no config '$NAME.xml'" 1>&2 && exit 1 - - if [ "$RUNTESTS" != "no" ]; then - sh runtests - fi - - #[ "$SANITY" != "no" ] && sh sanity.sh - if [ "$SANITY" != "no" ]; then - START=: CLEAN=: sh sanity.sh - fi - - if [ "$DBENCH" != "no" ]; then - mount | grep $MNT || sh llmount.sh - SPACE=`df $MNT | tail -1 | awk '{ print $4 }'` - DB_THREADS=`expr $SPACE / 50000` - [ $THREADS -lt $DB_THREADS ] && DB_THREADS=$THREADS - - $DEBUG_OFF - sh rundbench 1 - $DEBUG_ON - sh llmountcleanup.sh - sh llrmount.sh - if [ $DB_THREADS -gt 1 ]; then - $DEBUG_OFF - sh rundbench $DB_THREADS - $DEBUG_ON - sh llmountcleanup.sh - sh llrmount.sh - fi - rm -f /mnt/lustre/client.txt - fi - chown $UID $MNT && chmod 700 $MNT - if [ "$BONNIE" != "no" ]; then - mount | grep $MNT || sh llmount.sh - $DEBUG_OFF - bonnie++ -s 0 -n 10 -u $UID -d $MNT - $DEBUG_ON - sh llmountcleanup.sh - sh llrmount.sh - fi - IOZONE_OPTS="-i 0 -i 1 -i 2 -+d -r $RSIZE -s $SIZE" - IOZONE_FILE="-f $MNT/iozone" - if [ "$IOZONE" != "no" ]; then - mount | grep $MNT || sh llmount.sh - $DEBUG_OFF - iozone $IOZONE_OPTS $IOZONE_FILE - $DEBUG_ON - sh llmountcleanup.sh - sh llrmount.sh - fi - if [ "$IOZONE_DIR" != "no" ]; then - mount | grep $MNT || sh llmount.sh - SPACE=`df $MNT | tail -1 | awk '{ print $4 }'` - IOZ_THREADS=`expr $SPACE / $SIZE` - [ $THREADS -lt $IOZ_THREADS ] && IOZ_THREADS=$THREADS - - $DEBUG_OFF - iozone -I $IOZONE_OPTS $IOZONE_FILE.odir - IOZVER=`iozone -v | awk '/Revision:/ { print $3 }' | tr -d '.'` - $DEBUG_ON - sh llmountcleanup.sh - sh llrmount.sh - if [ "$IOZ_THREADS" -gt 1 -a "$IOZVER" -ge 3145 ]; then - $DEBUG_OFF - THREAD=1 - IOZONE_FILE="-F " - while [ $THREAD -le $IOZ_THREADS ]; do - IOZONE_FILE="$IOZONE_FILE $MNT/iozone.$THREAD" - THREAD=`expr $THREAD + 1` - done - iozone -I $IOZONE_OPTS -t $IOZ_THREADS $IOZONE_FILE - $DEBUG_ON - sh llmountcleanup.sh - sh llrmount.sh - elif [ $IOZVER -lt 3145 ]; then - VER=`iozone -v | awk '/Revision:/ { print $3 }'` - echo "iozone $VER too old for multi-threaded tests" - fi - fi - if [ "$FSX" != "no" ]; then - mount | grep $MNT || sh llmount.sh - $DEBUG_OFF - ./fsx -W -c 50 -p 1000 -P $TMP -l 1024000 -N $(($COUNT * 100)) $MNT/fsxfile - $DEBUG_ON - sh llmountcleanup.sh - #sh llrmount.sh - fi - mount | grep $MNT && sh llmountcleanup.sh -done - -if [ "$SANITYN" != "no" ]; then - export NAME=mount2 - mount | grep $MNT || sh llmount.sh - sh sanityN.sh - mount | grep $MNT && sh llmountcleanup.sh -fi diff --git a/lustre/tests/ba-echo.sh b/lustre/tests/ba-echo.sh deleted file mode 100644 index b28c5f4..0000000 --- a/lustre/tests/ba-echo.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/bash - -config=${1:-ba-echo.xml} - -LMC_REAL="${LMC:-../utils/lmc} -m $config" -LMC="save_cmd" - -TCPBUF=1048576 -OST=${OST:-ba-ost-1} -CLIENT=${CLIENT:-`hostname`} - -UUIDLIST=${UUIDLIST:-/usr/local/admin/ba-ost/UUID.txt} - -h2tcp () { - echo "${1}" -} -BATCH=/tmp/lmc-batch.$$ -save_cmd() { - echo "$@" >> $BATCH -} - -[ -f $config ] && rm $config - -# Client node -${LMC} --add net --node $CLIENT --tcpbuf $TCPBUF --nid '*' --nettype tcp - -OST_UUID=`awk "/$OST / { print \\$3 }" $UUIDLIST` -[ "$OST_UUID" ] && OST_UUID="--ostuuid=$OST_UUID" || echo "$OST: no UUID" - -# server node -${LMC} --add net --node $OST --tcpbuf $TCPBUF --nid $OST --nettype tcp -${LMC} --add ost --node $OST --ost ost1 --osdtype=obdecho $OST_UUID - -# osc on client -${LMC} --add echo_client --node $CLIENT --ost ost1 - -$LMC_REAL --batch $BATCH -rm -f $BATCH diff --git a/lustre/tests/ba-mount.sh b/lustre/tests/ba-mount.sh deleted file mode 100644 index ac05660..0000000 --- a/lustre/tests/ba-mount.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -# There are configurations for three machines in this config file: the OST, -# the MDS/client, other clients -# -# To start your cluster using the ba-mount.xml file that this produces, first -# run: -# > lconf ba-mount.xml -# on the MDS/client, and then run: -# > lconf --node client ba-mount.xml -# on any other clients. - -config=${1:-ba-mount.xml} - -LMC_REAL="${LMC:-../utils/lmc} -m $config" -LMC="save_cmd" - -TCPBUF=1048576 -OST=${OST:-ba-ost-1} -MDS=`hostname` - -UUIDLIST=${UUIDLIST:-/usr/local/admin/ba-ost/UUID.txt} - -h2tcp () { - echo "${1}" -} -BATCH=/tmp/lmc-batch.$$ -save_cmd() { - echo "$@" >> $BATCH -} - -[ -f $config ] && rm $config - -# MDS/client node -${LMC} --add net --node $MDS --tcpbuf $TCPBUF --nid $MDS --nettype tcp -${LMC} --add mds --node $MDS --mds mds1 --dev /tmp/mds1 --size 50000 - -OST_UUID=`awk "/$OST / { print \\$3 }" $UUIDLIST` -[ "$OST_UUID" ] && OST_UUID="--ostuuid $OST_UUID" || echo "$OST: no UUID" - -# server node -${LMC} --add net --node $OST --tcpbuf $TCPBUF --nid $OST --nettype tcp -${LMC} --add ost --node $OST --ost ost1 $OST_UUID --dev bluearc - -# mount point on the MDS/client -${LMC} --add mtpt --node $MDS --path /mnt/lustre --mds mds1 --lov ost1 - -# other clients -${LMC} --add net --node client --tcpbuf $TCPBUF --nid '*' --nettype tcp -${LMC} --add mtpt --node client --path /mnt/lustre --mds mds1 --lov ost1 - -$LMC_REAL --batch $BATCH -rm -f $BATCH diff --git a/lustre/tests/busy.sh b/lustre/tests/busy.sh deleted file mode 100644 index 2f90986..0000000 --- a/lustre/tests/busy.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - - mkdir /mnt/lustre/d22 - mkdir /mnt/lustre/d22/etc - ./mcreate /mnt/lustre/d22/etc/foo - ls -ld /mnt/lustre/etc - ls -ld /mnt/lustre/d22/etc diff --git a/lustre/tests/checkstack.pl b/lustre/tests/checkstack.pl deleted file mode 100644 index 9c0d097..0000000 --- a/lustre/tests/checkstack.pl +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/perl - -# Check the stack usage of functions -# -# Copyright Joern Engel <joern@wh.fh-wedel.de> -# Inspired by Linus Torvalds -# Original idea maybe from Keith Owens -# -# Usage: -# objdump -d vmlinux | checkstack.pl <arch> -# -# find <moduledir> -name "*.o" | while read M; do -# objdump -d $M | perl ~/checkstack.pl <arch> | \ -# sed "s/^/`basename $M`: /" ; done | \ -# awk '/esp/ { print $5, $2, $4 }' | sort -nr -# -# TODO : Port to all architectures (one regex per arch) -# Speed this puppy up - -# check for arch -# -# $re is used for three matches: -# $& (whole re) matches the complete objdump line with the stack growth -# $1 (first bracket) matches the code that will be displayed in the output -# $2 (second bracket) matches the size of the stack growth -# -# use anything else and feel the pain ;) -{ - my $arch = shift; - $x = "[0-9a-f]"; # hex character - $xs = "[0-9a-f ]"; # hex character or space - if ($arch =~ /^i[3456]86$/) { - #c0105234: 81 ec ac 05 00 00 sub $0x5ac,%esp - $re = qr/^.*(sub/s\$(0x$x{3,5}),\%esp)$/o; - } elsif ($arch =~ /^ia64$/) { - # adds r12=-384,r12 - $re = qr/.*(adds/sr12=-($x{3,5}),r12)/o; - } elsif ($arch =~ /^ppc$/) { - #c00029f4: 94 21 ff 30 stwu r1,-208(r1) - $re = qr/.*(stwu/sr1,-($x{3,5})\(r1\))/o; - } elsif ($arch =~ /^s390x?$/) { - # 11160: a7 fb ff 60 aghi %r15,-160 - $re = qr/.*(ag?hi.*\%r15,-(([0-9]{2}|[3-9])[0-9]{2}))/o; - } else { - print("wrong or unknown architecture\n"); - exit - } -} - -sub bysize($) { - ($asize = $a) =~ s/$re/\2/; - ($bsize = $b) =~ s/$re/\2/; - $bsize <=> $asize -} - -# -# main() -# -$funcre = qr/^$x* \<(.*)\>:$/; -while ($line = <STDIN>) { - if ($line =~ m/$funcre/) { - ($func = $line) =~ s/$funcre/\1/; - chomp($func); - } - - if ($line =~ m/$re/) { - (my $addr = $line) =~ s/^($xs{8}).*/0x\1/o; - chomp($addr); - - my $intro = "$addr $func:"; - my $padlen = 56 - length($intro); - while ($padlen > 0) { - $intro .= ' '; - $padlen -= 8; - } - (my $code = $line) =~ s/$re/\1/; - - $stack[@stack] = "$intro $code"; - } -} - -@sortedstack = sort bysize @stack; - -foreach $i (@sortedstack) { - print("$i"); -} --- -Andreas Dilger -http://sourceforge.net/projects/ext2resize/ -http://www-mddsp.enel.ucalgary.ca/People/adilger/ - - diff --git a/lustre/tests/checkstat.c b/lustre/tests/checkstat.c deleted file mode 100644 index c98d6aa..0000000 --- a/lustre/tests/checkstat.c +++ /dev/null @@ -1,317 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#include <errno.h> -#include <string.h> -#include <pwd.h> -#include <grp.h> - -void -usage (char *argv0, int help) -{ - char *progname = strrchr(argv0, '/'); - - if (progname == NULL) - progname = argv0; - - fprintf (help ? stdout : stderr, - "Usage: %s [flags] file[s]\n", - progname); - - if (!help) - { - fprintf (stderr, " or try '-h' for help\n"); - exit (1); - } - - printf ("Check given files have...\n"); - printf (" -p permission file must have required permissions\n"); - printf (" -t dir|file|link file must be of the specified type\n"); - printf (" -l link_name file must be a link to the given name\n"); - printf (" -s size file must have the given size\n"); - printf (" -u user file must be owned by given user\n"); - printf (" -g group file must be owned by given group\n"); - printf (" -f follow symlinks\n"); - printf (" -a file must be absent\n"); - printf (" -v increase verbosity\n"); - printf (" -h print help\n"); - printf (" Exit status is 0 on success, 1 on failure\n"); -} - -int -main (int argc, char **argv) -{ - int c; - struct stat64 buf; - int perms = -1; - uid_t uid = (uid_t)-1; - gid_t gid = (gid_t)-1; - char *type = NULL; - long absent = 0; - char *checklink = NULL; - int verbose = 0; - long long size = -1; - int follow = 0; - char *term; - - while ((c = getopt (argc, argv, "p:t:l:s:u:g:avfh")) != -1) - switch (c) - { - case 'p': - perms = (int)strtol (optarg, &term, 0); - if (term == optarg) - { - fprintf (stderr, "Can't parse permission %s\n", optarg); - return (1); - } - break; - - case 'l': - checklink = optarg; - break; - - case 's': - size = strtoll (optarg, &term, 0); - if (term == optarg) - { - fprintf (stderr, "Can't parse size %s\n", optarg); - return (1); - } - break; - - case 'u': - if (*optarg == '#') - { - uid = (uid_t)strtol (optarg + 1, &term, 0); - if (term == optarg + 1) - { - fprintf (stderr, "Can't parse numeric uid %s\n", optarg); - return (1); - } - } else { - struct passwd *pw = getpwnam (optarg); - - if (pw == NULL) - { - fprintf (stderr, "Can't find user %s\n", optarg); - return (1); - } - uid = pw->pw_uid; - } - break; - - case 'g': - if (*optarg == '#') - { - gid = (gid_t)strtol (optarg + 1, &term, 0); - if (term == optarg + 1) - { - fprintf (stderr, "Can't parse numeric gid %s\n", optarg); - return (1); - } - } else { - struct group *gr = getgrnam (optarg); - - if (gr == NULL) - { - fprintf (stderr, "Can't find group %s\n", optarg); - return (1); - } - uid = gr->gr_gid; - } - break; - - case 't': - type = optarg; - break; - - case 'a': - absent = 1; - break; - - case 'v': - verbose++; - break; - - case 'f': - follow++; - break; - - case 'h': - usage (argv[0], 1); - return (0); - - default: - usage (argv[0], 0); - } - - if (optind == argc) - usage (argv[0], 0); - - do - { - char *fname = argv[optind]; - int rc = follow ? stat64 (fname, &buf) : lstat64 (fname, &buf); - - if (rc != 0) - { - if (!(absent && errno == ENOENT)) - { - if (verbose) - printf ("Can't %sstat %s: %s\n", - follow ? "" : "l", - fname, strerror (errno)); - return (1); - } - - if (verbose) - printf ("%s: absent OK\n", fname); - continue; - } - - if (absent) - { - if (verbose) - printf ("%s exists\n", fname); - return (1); - } - - if (type != NULL) - { - if (!strcmp (type, "d") || - !strcmp (type, "dir")) - { - if (!S_ISDIR (buf.st_mode)) - { - if (verbose) - printf ("%s is not a directory\n", - fname); - return (1); - } - } - else if (!strcmp (type, "f") || - !strcmp (type, "file")) - { - if (!S_ISREG (buf.st_mode)) - { - if (verbose) - printf ("%s is not a regular file\n", - fname); - return (1); - } - } - else if (!strcmp (type, "l") || - !strcmp (type, "link")) - { - if (!S_ISLNK (buf.st_mode)) - { - if (verbose) - printf ("%s is not a link\n", - fname); - return (1); - } - } - else - { - fprintf (stderr, "Can't parse file type %s\n", - type); - return (1); - } - - if (verbose) - printf ("%s has type %s OK\n", fname, type); - } - - if (perms != -1) - { - if ((buf.st_mode & ~S_IFMT) != perms) - { - if (verbose) - printf ("%s has perms 0%o, not 0%o\n", - fname, (buf.st_mode & ~S_IFMT), - perms); - return (1); - } - - if (verbose) - printf ("%s has perms 0%o OK\n", - fname, perms); - } - - if (size != -1) - { - if (buf.st_size != size) - { - if (verbose) - printf ("%s has size %Ld, not %Ld\n", - fname, (long long)buf.st_size, - size); - return (1); - } - - if (verbose) - printf ("%s has size %Ld OK\n", fname, size); - } - - if (checklink != NULL) - { - static char lname[4<<10]; - - rc = readlink (fname, lname, sizeof (lname) - 1); - - if (rc < 0) - { - if (verbose) - printf ("%s: can't read link: %s\n", - fname, strerror (errno)); - return (1); - } - - lname[rc] = 0; - if (strcmp (checklink, lname)) - { - if (verbose) - printf ("%s is a link to %s and not %s\n", - fname, lname, checklink); - return (1); - } - - if (verbose) - printf ("%s links to %s OK\n", fname, checklink); - } - - if (uid != (uid_t)-1) - { - if (buf.st_uid != uid) - { - if (verbose) - printf ("%s is owned by user #%ld and not #%ld\n", - fname, (long)buf.st_uid, (long)uid); - return (1); - } - - if (verbose) - printf ("%s is owned by user #%ld OK\n", - fname, (long)uid); - } - - if (gid != (gid_t)-1) - { - if (buf.st_gid != gid) - { - if (verbose) - printf ("%s is owned by group #%ld and not #%ld\n", - fname, (long)buf.st_gid, (long)gid); - return (1); - } - - if (verbose) - printf ("%s is owned by group #%ld OK\n", - fname, (long)gid); - } - } while (++optind < argc); - - return (0); -} diff --git a/lustre/tests/client-echo.cfg b/lustre/tests/client-echo.cfg deleted file mode 100644 index 83856ec..0000000 --- a/lustre/tests/client-echo.cfg +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -# Config file for setting up a client to talk to an echo OST -SETUP_OSC=y diff --git a/lustre/tests/client-mount.cfg b/lustre/tests/client-mount.cfg deleted file mode 100644 index 6f2addb..0000000 --- a/lustre/tests/client-mount.cfg +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -# Config file for mounting a client Lustre filesystem -SETUP_MDC=y -SETUP_OSC=y -OSCMT=/mnt/lustre -SETUP_MOUNT=y diff --git a/lustre/tests/client-mount2.cfg b/lustre/tests/client-mount2.cfg deleted file mode 100644 index cb210c8..0000000 --- a/lustre/tests/client-mount2.cfg +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh -# Config file for mounting a client Lustre filesystem -MDC_NAMES="MDCDEV1 MDCDEV2" -OSC_NAMES="OSCDEV1 OSCDEV2" -SETUP_MDC=y -SETUP_OSC=y -MT1="/mnt/lustre1 OSCDEV1 MDCDEV1" -MT2="/mnt/lustre2 OSCDEV2 MDCDEV2" -MOUNT_LIST="MT1 MT2" -SETUP_MOUNT=y diff --git a/lustre/tests/cobd.sh b/lustre/tests/cobd.sh deleted file mode 100755 index cb4f94d..0000000 --- a/lustre/tests/cobd.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash - - -config=${1:-$(basename $0 .sh)}.xml - -LMC=${LMC:-../utils/lmc -m $config} -TMP=${TMP:-/tmp} - -MDSDEV=$TMP/mds1 -MDSSIZE=50000 - -OSTDEV=$TMP/ost1 -OSTSIZE=200000 - -rm -f $config -# create nodes -${LMC} --add node --node localhost || exit 10 -${LMC} --add net --node localhost --nid localhost --nettype tcp || exit 11 - -# configure mds server -${LMC} --add mds --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 20 - -# configure ost -${LMC} --add ost --node localhost --obd obd1 --obdtype obdecho || exit 30 -# configure ost -${LMC} --add ost --node localhost --obd obd2 --obdtype obdecho || exit 30 - -${LMC} --add cobd --node localhost --real_obd obd1 --cache_obd obd2 - -# create client config -# ${LMC} -m $config --add mtpt --node localhost --path /mnt/lustre --mds mds1 --obd obd1 || exit 40 diff --git a/lustre/tests/common.sh b/lustre/tests/common.sh deleted file mode 100644 index 6a4429e..0000000 --- a/lustre/tests/common.sh +++ /dev/null @@ -1,713 +0,0 @@ -#!/bin/sh -export PATH=$PATH:/sbin:/usr/sbin - -[ -d /r ] && R=/r - -# check if running in source directory -# will probably need to create variable for each module. -if [ -f $SRCDIR/Makefile.am ]; then - USEDEV=yes - PORTALS=$SRCDIR/../../portals - LUSTRE=$SRCDIR/.. - - PTLCTL=$LUSTRE/utils/lctl - DBGCTL=$LUSTRE/utils/lctl - ACCEPTOR=$PORTALS/linux/utils/acceptor - - OBDCTL=$LUSTRE/utils/lctl -else - USEDEV=no - # should have configure set the paths here - BINDIR=/usr/sbin - PORTALS=/lib/modules - LUSTRE=/lib/modules - - PTLCTL=$BINDIR/lctl - DBGCTL=$BINDIR/lctl - ACCEPTOR=$BINDIR/acceptor - OBDCTL=$BINDIR/lctl -fi - -LOOPNUM=0; export LOOPNUM -if [ -b /dev/loop0 ]; then - LOOP=/dev/loop -elif [ -b /dev/loop/0 ]; then - LOOP=/dev/loop/ -else - echo "Cannot find /dev/loop0 or /dev/loop/0" 1>&2 && exit -1 -fi - -do_insmod() { - MODULE=$1 - BASE=`echo $MODULE | sed -e "s^.*/^^" -e "s/\.o$//"` - - lsmod | grep -q "\<$BASE\>" && return 0 - [ "$MODULE" ] || fail "usage: $0 <module>" - - if [ "$USEDEV" = "yes" ]; then - [ -f $MODULE ] || echo "$0: module '$MODULE' not found" 1>&2 - insmod $MODULE - else - modprobe $BASE - fi -} - -do_rmmod() { - MODULE=$1 - [ "$MODULE" ] || fail "usage: $0 <module>" - lsmod | grep -q $MODULE || return 0 - rmmod $MODULE || lsmod | sed "s/^/$MODULE failed: /" -} - -# Return the next unused loop device on stdout and in the $LOOPDEV -# environment variable. -next_loop_dev() { - NEXT= - while [ -b ${LOOP}${LOOPNUM} ]; do - LOOPDEV=${LOOP}${LOOPNUM} - losetup ${LOOPDEV} > /dev/null 2>&1 || NEXT=${LOOPDEV} - LOOPNUM=`expr ${LOOPNUM} + 1` - [ "$NEXT" ] && echo ${NEXT} && break - done -} - -# Create a new filesystem. If we are using a loopback device, we check -# for existing "template" filesystems instead of creating a new one, -# because it is _much_ faster to gunzip the empty filesystem instead of -# creating a new one from scratch. Conversely, if we are creating a -# filesystem on a device we use mkfs, because that only writes sparsely -# to the device. The empty filesystems are also highly compressed (1000:1) -# so they don't take too much space. -# -new_fs_usage() { - echo "new_fs <fstype> {device | file} [size]" 1>&2 - exit -1 -} -new_fs () { - EFILE="$1_$3.gz" - MKFS="mkfs.$1" - MKFSOPT="-b 4096" - - [ "$1" = "ext3" ] && MKFS="mkfs.ext2 -j" - if [ "$1" = "extN" ]; then - MKFS="mkfs.ext2 -j" - EFILE="ext3_$3.gz" - fi - - if [ -b "$2" ]; then - [ $# -lt 2 -o $# -gt 3 ] && new_fs_usage - - PM="/proc/mounts" - [ -r "$PM" ] || PM="/etc/mtab" - - grep "$2 " $PM 1>&2 && echo "$0: $2 is in $PM!" 1>&2 && exit -1 - - $MKFS $MKFSOPT $2 $3 || exit -1 - LOOPDEV=$2 # Not really a loop device - else - [ $# -ne 3 ] && new_fs_usage - - if [ -r "$EFILE" ]; then - echo "using prepared filesystem $EFILE for $2" - zcat "$EFILE" > $2 || exit -1 - sync - else - echo "creating new sparse filesystem on $2" - dd if=/dev/zero of=$2 bs=1k seek=$3 count=1 1>&2 || exit -1 - $MKFS $MKFSOPT -F $2 1>&2 || exit -1 - fi - LOOPDEV=`next_loop_dev` - losetup ${LOOPDEV} $2 1>&2 || exit -1 - fi - - # Enable hash-indexed directories for extN filesystems - [ "$1" = "extN" ] && echo "feature FEATURE_C5" | debugfs -w $2 -} - -# Set up to use an existing filesystem. We take the same parameters as -# new_fs, even though we only use the <fstype> and <file> parameters, to -# make it easy to convert between new_fs and old_fs in testing scripts. -old_fs () { - [ -e $2 ] || exit -1 - - if [ -b "$2" ]; then - LOOPDEV=$2 # Not really a loop device - else - LOOPDEV=`next_loop_dev` - losetup ${LOOPDEV} $2 1>&2 || exit -1 - fi -} - -list_mods() { - $DBGCTL modules > $R/tmp/ogdb - echo "The GDB module script is in $R/tmp/ogdb" - [ "$DEBUG_WAIT" = "yes" ] && echo -n "Press ENTER to continue" && read < /dev/tty - return 0 -} - -# start acceptor for a given network and port. -# not all networks need an acceptor -start_acceptor() { - case $NETWORK in - elan) [ "$PORT" ] && fail "$0: NETWORK is elan but PORT is set" - ;; - tcp) [ "$PORT" ] || fail "$0: NETWORK is tcp but PORT is not set" - $ACCEPTOR -r 1048576 -s 1048576 $PORT - ;; - *) fail "$0: unknown NETWORK '$NETWORK'" ;; - esac - -} - -# We need at least one setup file to be given. It can be passed on -# the command-line, or it can be found in the home directory, or it -# can even be sourced into the current shell environment. -setup_opts() { - DEF=/etc/lustre/lustre.cfg - if [ "$#" = 0 -a -r $DEF ]; then - . $DEF && SETUP=y - fi - - for CFG in "$@" ; do - case $CFG in - *.cfg) [ -r "$CFG" ] && . $CFG && SETUP=y ;; - *) echo "unknown option '$CFG'" 1>&2 - esac - done - - if [ "$SETUP" != "y" ]; then - echo "error: no config file on command-line and no $DEF" 1>&2 - exit -1 - fi - - [ "$MDC_NAMES" ] || export MDC_NAMES=MDCDEV - [ "$OSC_NAMES" ] || export OSC_NAMES=OSCDEV - [ -z "$MOUNT_LIST" -a "$OSCMT" ] && export MOUNT_LIST="MT" && export MT="$OSCMT OSCDEV MDCDEV" -} - -setup_variables() { - [ -z "$OSTNODE" ] && OSTNODE=$SERVER - [ -z "$MDSNODE" ] && MDSNODE=$SERVER - [ -z "$DLM" ] && DLM=$SERVER -} - -setup_portals() { - setup_variables - - if egrep -q "ksocknal|kqswnal" /proc/modules; then - echo "$0: portals already appears to be set up, skipping" - return 0 - fi - - if [ -z "$NETWORK" -o -z "$LOCALHOST" ]; then - echo "$0: NETWORK or LOCALHOST is not set" 1>&2 - exit -1 - fi - - if [ -z "$OSTNODE" -a -z "$MDSNODE" -a -z "$DLM" ]; then - echo "$0: SERVER (or OSTNODE and MDSNODE and DLM) not set" 1>&2 - exit -1 - fi - - [ -c /dev/portals ] || mknod /dev/portals c 10 240 - - do_insmod $PORTALS/linux/oslib/portals.o || exit -1 - #do_insmod $PORTALS/linux/router/kptlrouter.o || exit -1 - - case $NETWORK in - elan) do_insmod $PORTALS/linux/rqswnal/kqswnal.o || exit -1 - MYNID= - RECV_MEM= - SEND_MEM= - ;; - tcp) do_insmod $PORTALS/linux/socknal/ksocknal.o || exit -1 - MYNID="mynid $LOCALHOST" - RECV_MEM="recv_mem 1048576" - SEND_MEM="send_mem 1048576" - ;; - *) fail "$0: unknown NETWORK '$NETWORK'" ;; - esac - - start_acceptor - - $PTLCTL <<- EOF - network $NETWORK - $SEND_MEM - $RECV_MEM - $MYNID - connect $DLM $PORT - add_uuid $DLM $DLM - add_uuid self $LOCALHOST - quit - EOF - - if [ "$SETUP_MDS" -o "$SETUP_MDC" ]; then - $PTLCTL <<- EOF - network $NETWORK - connect $MDSNODE $PORT - add_uuid $MDSNODE $MDSNODE - quit - EOF - fi - - - if [ "$SETUP_OST" -o "$SETUP_OSC" ]; then - $PTLCTL <<- EOF - network $NETWORK - connect $OSTNODE $PORT - add_uuid $OSTNODE $OSTNODE - quit - EOF - fi -} - -setup_lustre() { - [ -c /dev/obd ] || mknod /dev/obd c 10 241 - - do_insmod $LUSTRE/obdclass/obdclass.o || exit -1 - do_insmod $LUSTRE/ptlrpc/ptlrpc.o || exit -1 - do_insmod $LUSTRE/ldlm/ldlm.o || exit -1 - do_insmod $LUSTRE/extN/extN.o || \ - echo "info: can't load extN.o module, not fatal if using ext3" - do_insmod $LUSTRE/mds/mds.o || exit -1 - #do_insmod $LUSTRE/obdclass/fsfilt_ext2.o || exit -1 - #do_insmod $LUSTRE/obdclass/fsfilt_ext3.o || exit -1 - do_insmod $LUSTRE/obdclass/fsfilt_extN.o || \ - echo "info: can't load fsfilt_extN.o module, needs extN.o" - do_insmod $LUSTRE/obdecho/obdecho.o || exit -1 - #do_insmod $LUSTRE/obdext2/obdext2.o || exit -1 - do_insmod $LUSTRE/obdfilter/obdfilter.o || exit -1 - do_insmod $LUSTRE/ost/ost.o || exit -1 - do_insmod $LUSTRE/osc/osc.o || exit -1 - do_insmod $LUSTRE/mdc/mdc.o || exit -1 - do_insmod $LUSTRE/lov/lov.o || exit -1 - do_insmod $LUSTRE/llite/llite.o || exit -1 - - echo "$R/tmp/lustre-log" > /proc/sys/portals/debug_path - - if $OBDCTL name2dev RPCDEV > /dev/null 2>&1; then - echo "$0: RPCDEV is already configured, skipping" - return 0 - fi - list_mods - - $OBDCTL <<- EOF || return $? - newdev - attach ptlrpc RPCDEV - setup - quit - EOF - - [ -d /mnt/lustre ] || mkdir /mnt/lustre -} - -setup_ldlm() { - [ "$SETUP_LDLM" = "y" ] || return 0 - - [ -c /dev/portals ] || mknod /dev/portals c 10 240 - - $OBDCTL <<- EOF || return $? - newdev - attach ldlm LDLMDEV LDLMUUID - setup - quit - EOF -} - -find_devno() { - if [ -z "$1" ]; then - echo "usage: $0 <devname>" 1>&2 - return -1 - fi - - $OBDCTL name2dev $1 -} - -setup_mds() { - [ "$SETUP_MDS" = "y" ] || return 0 - - if [ -z "$MDSFS" -o -z "$MDSDEV" ]; then - echo "error: setup_mds: MDSFS or MDSDEV unset" 1>&2 - return -1 - fi - - [ "$1" ] && DO_FS=$1 - if [ "$DO_FS" != "new_fs" -a "$DO_FS" != "old_fs" ]; then - echo "usage: setup_mds {new_fs|old_fs}" 1>&2 - return -1 - fi - - if $OBDCTL name2dev MDSDEV > /dev/null 2>&1; then - echo "$0: MDSDEV is already configured" - return 0 - fi - - $DO_FS ${MDSFS} ${MDSDEV} ${MDSSIZE} - MDS=${LOOPDEV} - - $OBDCTL <<- EOF || return $? - newdev - attach mds MDSDEV MDSUUID - setup ${MDS} ${MDSFS} - quit - EOF -} - -setup_mds_lov() { - [ "$SETUP_MDS" = "y" ] || return 0 - - if [ -z "$LOVUUID" ]; then - echo "No LOV configured" - return - fi - - $OBDCTL <<- EOF || return $? - name2dev MDSDEV - connect - lov_setconfig ${LOVUUID} 1 65536 0 OSCDEV-`hostname` - disconnect - quit - EOF -} - - -setup_ost() { - [ "$SETUP_OST" = "y" ] || return 0 - - if [ -z "$OSTTYPE" ]; then - echo "error: setup_ost: OSTTYPE unset" 1>&2 - return -1 - fi - - case $OSTTYPE in - obdecho) OBD= - OBDARG= - NEED_FS=n - ;; - obdext2) OBDARG= - NEED_FS=y - ;; - obdfilter) OBDARG=$OSTFS - NEED_FS=y - ;; - *) echo "error: setup_ost: unknown OSTTYPE '$OSTTYPE'" 1>&2 - return -1 - ;; - esac - - if $OBDCTL name2dev OBDDEV > /dev/null 2>&1; then - echo "$0: OBDDEV is already configured" - return 0 - fi - - if [ "$NEED_FS" = "y" ]; then - [ "$1" ] && DO_FS=$1 - if [ -z "$OSTFS" -o -z "$OSTDEV" ]; then - echo "error: setup_ost: OSTFS or OSTDEV unset" 1>&2 - return -1 - fi - - if [ "$DO_FS" != "new_fs" -a "$DO_FS" != "old_fs" ]; then - echo "usage: setup_ost {new_fs|old_fs}" 1>&2 - return -1 - fi - - $DO_FS ${OSTFS} ${OSTDEV} ${OSTSIZE} - OBD=${LOOPDEV} - fi - - $OBDCTL <<- EOF || return $? - newdev - attach ${OSTTYPE} OBDDEV OBDUUID - setup ${OBD} ${OBDARG} - quit - EOF - - $OBDCTL <<- EOF || return $? - newdev - attach ost OSTDEV OSTUUID - setup OBDUUID - quit - EOF -} - -setup_server() { - #setup_mds $1 && setup_mds_lov $1 && setup_ost $1 - setup_mds $1 && setup_ost $1 -} - -setup_osc() { - [ "$SETUP_OSC" != "y" ] && return 0 - [ "$OSC_NAMES" ] || OSC_NAMES=OSCDEV - - for THEOSC in $OSC_NAMES ; do - if $OBDCTL name2dev $THEOSC > /dev/null 2>&1; then - echo "$0: OSCDEV is already configured" - continue - fi - - [ -z "$OBD_UUID" ] && OBD_UUID="OBDUUID" - $OBDCTL <<- EOF || return $rc - newdev - attach osc $THEOSC ${THEOSC}-`hostname` - setup $OBD_UUID $OSTNODE - quit - EOF - done -} - -setup_mdc() { - [ "$SETUP_MDC" != "y" ] && return 0 - [ "$MDC_NAMES" ] || MDC_NAMES=MDCDEV - - for THEMDC in $MDC_NAMES ; do - if $OBDCTL name2dev $THEMDC > /dev/null 2>&1; then - echo "$0: MDCDEV is already configured" - continue - fi - - $OBDCTL <<- EOF || return $? - newdev - attach mdc $THEMDC ${THEMDC}-`hostname` - setup MDSUUID $MDSNODE - quit - EOF - done -} - -setup_lov () { - [ "$SETUP_MDC" != "y" ] && return 0 - - if [ -z "$LOVUUID" ]; then - echo "No LOV configured" - return - fi - - $OBDCTL <<- EOF || return $? - newdev - attach lov LOVNAME ${LOVUUID} - setup MDCDEV-`hostname` - quit - EOF -} - - -setup_mount() { - [ "$SETUP_MOUNT" != "y" ] && return 0 - - [ "$MOUNT_LIST" ] || fail "error: $0: MOUNT_LIST unset" - - for THEMOUNT in $MOUNT_LIST; do - eval "echo \$$THEMOUNT" | while read MTPT THEOSC THEMDC; do - if mount | grep -q $MTPT; then - echo "$0: $MTPT is already mounted" - return 0 - fi - - [ ! -d $MTPT ] && mkdir $MTPT - echo mount -t lustre_lite -o osc=${THEOSC}-`hostname`,mdc=${THEMDC}-`hostname` none $MTPT - mount -t lustre_lite -o osc=${THEOSC}-`hostname`,mdc=${THEMDC}-`hostname` none $MTPT - done - done -} - -setup_client() { - # setup_osc && setup_mdc && setup_lov && setup_mount - setup_osc && setup_mdc && setup_mount -} - -DEBUG_ON="echo 0xffffffff > /proc/sys/portals/debug" -DEBUG_OFF="echo 0 > /proc/sys/portals/debug" - -debug_server_off() { - echo "Turn OFF debug" && eval "$DEBUG_OFF" -} - -debug_server_on() { - echo "Turn ON debug" && eval "$DEBUG_ON" -} - -debug_client_off() { - echo "Turning OFF debug on client" && eval "$DEBUG_OFF" -} - -debug_client_on() { - echo "Turning ON debug on client" && eval "$DEBUG_ON" -} - -cleanup_portals() { - [ -z "$NETWORK" ] && NETWORK=tcp - - setup_variables - - $PTLCTL <<- EOF - network $NETWORK - disconnect - del_uuid self - del_uuid $MDSNODE - del_uuid $OSTNODE - del_uuid $DLM - quit - EOF - - do_rmmod ldlm - do_rmmod ptlrpc - do_rmmod obdclass - - do_rmmod kqswnal - do_rmmod ksocknal - do_rmmod kptlrouter - - [ "$TIME" ] && $DBGCTL debug_kernel $R/tmp/debug.5.$TIME - - do_rmmod portals -} - -cleanup_lustre() { - killall acceptor - - do_rmmod llite - do_rmmod lov - do_rmmod mdc - do_rmmod osc - - do_rmmod fsfilt_extN - do_rmmod fsfilt_ext3 - do_rmmod fsfilt_ext2 - do_rmmod mds - do_rmmod ost - do_rmmod obdecho - do_rmmod obdfilter - do_rmmod obdext2 - do_rmmod extN - - losetup -d ${LOOP}0 - losetup -d ${LOOP}1 - losetup -d ${LOOP}2 -} - -cleanup_ldlm() { - [ "$SETUP" -a -z "$SETUP_LDLM" ] && return 0 - - LDLMDEVNO=`find_devno LDLMDEV` - if [ "$LDLMDEVNO" ]; then - $OBDCTL <<- EOF - device $LDLMDEVNO - cleanup - detach - quit - EOF - fi -} - -cleanup_mds() { - [ "$SETUP" -a -z "$SETUP_MDS" ] && return 0 - - MDSDEVNO=`find_devno MDSDEV` - if [ "$MDSDEVNO" ]; then - $OBDCTL <<- EOF - device $MDSDEVNO - cleanup - detach - quit - EOF - fi -} - -cleanup_ost() { - [ "$SETUP" -a -z "$SETUP_OST" ] && return 0 - - OSTDEVNO=`find_devno OSTDEV` - if [ "$OSTDEVNO" ]; then - $OBDCTL <<- EOF - device $OSTDEVNO - cleanup - detach - quit - EOF - fi - - OBDDEVNO=`find_devno OBDDEV` - if [ "$OBDDEVNO" ]; then - $OBDCTL <<- EOF - device $OBDDEVNO - cleanup - detach - quit - EOF - fi -} - -cleanup_server() { - cleanup_ost && cleanup_mds -} - -cleanup_mount() { - [ "$SETUP_MOUNT" != "y" ] && return 0 - - [ "$MOUNT_LIST" ] || fail "error: $0: MOUNT_LIST unset" - - for THEMOUNT in $MOUNT_LIST; do - eval "echo \$$THEMOUNT" | while read MTPT THEOSC THEMDC; do - if [ "`mount | grep $MTPT`" ]; then - umount $MTPT || fail "unable to unmount $MTPT" - fi - done - done -} - -cleanup_osc() { - [ "$SETUP" -a -z "$SETUP_OSC" ] && return 0 - [ "$OSC_NAMES" ] || OSC_NAMES=OSCDEV - - for THEOSC in $OSC_NAMES ; do - OSCDEVNO=`find_devno $THEOSC` - if [ "$OSCDEVNO" ]; then - $OBDCTL <<- EOF - device $OSCDEVNO - cleanup - detach - quit - EOF - fi - done -} - -cleanup_mdc() { - [ "$SETUP" -a -z "$SETUP_MDC" ] && return 0 - [ "$MDC_NAMES" ] || MDC_NAMES=MDCDEV - - for THEMDC in $MDC_NAMES ; do - MDCDEVNO=`find_devno $THEMDC` - if [ "$MDCDEVNO" ]; then - $OBDCTL <<- EOF - device $MDCDEVNO - cleanup - detach - quit - EOF - fi - done -} - -cleanup_rpc() { - RPCDEVNO=`find_devno RPCDEV` - if [ "$RPCDEVNO" ]; then - $OBDCTL <<- EOF - device $RPCDEVNO - cleanup - detach - quit - EOF - fi -} - -cleanup_client() { - cleanup_mount && cleanup_osc && cleanup_mdc && cleanup_rpc -} - -fail() { - echo "ERROR: $1" 1>&2 - [ $2 ] && RC=$2 || RC=1 - exit $RC -} diff --git a/lustre/tests/compile.sh b/lustre/tests/compile.sh deleted file mode 100644 index 13c142e..0000000 --- a/lustre/tests/compile.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/sh -set -evx - -MNT=${MNT:-/mnt/lustre} -DIR=${DIR:-$MNT} -SRC=${SRC:-`dirname $0`/../..} -while date; do - for i in portals lustre; do - TGT=$DIR/$i - [ -d $TGT ] || cp -av $SRC/$i/ $TGT - make -C $TGT clean - make -C $TGT -j2 - make -C $TGT clean - done -done diff --git a/lustre/tests/crash-mod.sh b/lustre/tests/crash-mod.sh deleted file mode 100644 index d5ce473..0000000 --- a/lustre/tests/crash-mod.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh -TMP=${TMP:-/tmp} -BASEDIR=${1:-`dirname $0`/..} -LCMD=$TMP/crash-mod-`hostname` -echo "Storing crash module info in $LCMD" -cat /tmp/ogdb-`hostname` | while read JUNK M JUNK; do - MOD="$BASEDIR/$M" - MODNAME=`basename $MOD .o` - - echo mod -s $MODNAME $MOD | tee -a $LCMD -done diff --git a/lustre/tests/create.pl b/lustre/tests/create.pl deleted file mode 100644 index 6156869..0000000 --- a/lustre/tests/create.pl +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/perl -use Getopt::Long; - -my $silent = 0; -my $mcreate = 1; # should we use mcreate or open? -my $files = 5; - -GetOptions("silent!" => \$silent, - "mcreate=i" => \$mcreate, - "files=i" => \$files); - -my $mtpt = shift || usage(); -my $mount_count = shift || usage(); -my $i = shift || usage(); -my $count = $i; - -sub usage () { - print "Usage: $0 [--silent] [--mcreate=n] [--files=n] <mnt prefix> <mnt count> <iterations>\n"; - print "example: $0 /mnt/lustre 2 50\n"; - print " will test in /mnt/lustre1 and /mnt/lustre2\n"; - print " $0 /mnt/lustre -1 50\n"; - print " will test in /mnt/lustre only\n"; - exit; -} - -sub do_open($) { - my $path = shift; - - if ($mcreate) { - my $tmp = `./mcreate $path`; - if ($tmp) { - print "Creating $path [" . $$."]...\n" if !$silent; - $tmp =~ /.*error: (.*)\n/; - print "Create done [$$] $path: $!\n" if !$silent; - } else { - print "Create done [$$] $path: Success\n"if !$silent; - } - } else { - print "Opening $path [" . $$."]...\n"if !$silent; - open(FH, ">$path") || die "open($PATH): $!"; - print "Open done [$$] $path: Success\n"if !$silent; - close(FH) || die; - } -} - -while ($i--) { - my $which = ""; - if ($mount_count > 0) { - $which = int(rand() * $mount_count) + 1; - } - $d = int(rand() * $files); - do_open("$mtpt$which/$d"); - - if ($mount_count > 0) { - $which = int(rand() * $mount_count) + 1; - } - $d = int(rand() * $files); - $path = "$mtpt$which/$d"; - print "Unlink $path start [" . $$."]...\n"if !$silent; - if (unlink($path)) { - print "Unlink done [$$] $path: Success\n"if !$silent; - } else { - print "Unlink done [$$] $path: $!\n"if !$silent; - } - if (($count - $i) % 100 == 0) { - print STDERR ($count - $i) . " operations [" . $$ . "]\n"; - } -} - -my $which = ""; -if ($mount_count > 0) { - $which = int(rand() * $mount_count) + 1; -} -for ($d = 0; $d < $files; $d++) { - unlink("$mtpt$which/$d"); -} - -print "Done.\n"; diff --git a/lustre/tests/createdestroy.c b/lustre/tests/createdestroy.c deleted file mode 100644 index f1e7f4b..0000000 --- a/lustre/tests/createdestroy.c +++ /dev/null @@ -1,224 +0,0 @@ -#include <stdlib.h> -#include <stdio.h> -#include <unistd.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <errno.h> -#include <string.h> -#include <sys/wait.h> -#include <time.h> -#include <sys/time.h> - -int thread; - -#define BAD_VERBOSE (-999999999) - -#define difftime(a, b) \ - ((double)(a)->tv_sec - (b)->tv_sec + \ - ((double)((a)->tv_usec - (b)->tv_usec) / 1000000)) - -static char *cmdname(char *func) -{ - static char buf[512]; - - if (thread) { - sprintf(buf, "%s-%d", func, thread); - return buf; - } - - return func; -} - -static int be_verbose(int verbose, struct timeval *next_time, - unsigned long num, unsigned long *next_num, int num_total) -{ - struct timeval now; - - if (!verbose) - return 0; - - if (next_time != NULL) - gettimeofday(&now, NULL); - - /* A positive verbosity means to print every X iterations */ - if (verbose > 0 && - (next_num == NULL || num >= *next_num || num >= num_total)) { - *next_num += verbose; - if (next_time) { - next_time->tv_sec = now.tv_sec - verbose; - next_time->tv_usec = now.tv_usec; - } - return 1; - } - - /* A negative verbosity means to print at most each X seconds */ - if (verbose < 0 && next_time != NULL && difftime(&now, next_time) >= 0){ - next_time->tv_sec = now.tv_sec - verbose; - next_time->tv_usec = now.tv_usec; - if (next_num) - *next_num = num; - return 1; - } - - return 0; -} - -static int get_verbose(char *func, const char *arg) -{ - int verbose; - char *end; - - if (!arg || arg[0] == 'v') - verbose = 1; - else if (arg[0] == 's' || arg[0] == 'q') - verbose = 0; - else { - verbose = (int)strtoul(arg, &end, 0); - if (*end) { - fprintf(stderr, "%s: error: bad verbose option '%s'\n", - func, arg); - return BAD_VERBOSE; - } - } - - if (verbose < 0) - printf("Print status every %d seconds\n", -verbose); - else if (verbose == 1) - printf("Print status every operation\n"); - else if (verbose > 1) - printf("Print status every %d operations\n", verbose); - - return verbose; -} - -int main(int argc, char *argv[]) -{ - char filename[1024]; - int verbose = 0; - unsigned long count, i; - int threads = 0; - char *end; - int rc; - - if (argc < 3 || argc > 5) { - fprintf(stderr, - "usage: %s <filename> <count> [verbose [threads]]\n", - argv[0]); - exit(1); - } - - count = strtoul(argv[2], &end, 0); - if (*end) { - fprintf(stderr, "%s: error: bad iteration count '%s'\n", - argv[0], argv[1]); - exit(2); - } - if (argc == 4) { - verbose = get_verbose(argv[0], argv[3]); - if (verbose == BAD_VERBOSE) - exit(2); - } - if (argc == 5) { - threads = strtoul(argv[4], &end, 0); - if (*end) { - fprintf(stderr, "%s: error: bad thread count '%s'\n", - argv[0], argv[1]); - exit(2); - } - } - - for (i = 1; i <= threads; i++) { - rc = fork(); - if (rc < 0) { - fprintf(stderr, "%s: error: #%ld - %s\n", - cmdname(argv[0]), i, strerror(rc = errno)); - break; - } else if (rc == 0) { - thread = i; - break; - } else - printf("%s: thread #%ld (PID %d) started\n", - cmdname(argv[0]), i, rc); - rc = 0; - } - - if (threads && thread == 0) { /* parent process */ - int live_threads = threads; - - while (live_threads > 0) { - int status; - pid_t ret; - - ret = waitpid(0, &status, 0); - if (ret == 0) { - continue; - } - - if (ret < 0) { - fprintf(stderr, "%s: error: wait - %s\n", - argv[0], strerror(errno)); - if (!rc) - rc = errno; - } else { - /* - * This is a hack. We _should_ be able to use - * WIFEXITED(status) to see if there was an - * error, but it appears to be broken and it - * always returns 1 (OK). See wait(2). - */ - int err = WEXITSTATUS(status); - if (err || WIFSIGNALED(status)) - fprintf(stderr, - "%s: error: PID %d had rc=%d\n", - argv[0], ret, err); - if (!rc) - rc = err; - - live_threads--; - } - } - } else { - struct timeval start, end, next_time; - unsigned long next_count; - double diff; - - gettimeofday(&start, NULL); - next_time.tv_sec = start.tv_sec - verbose; - next_time.tv_usec = start.tv_usec; - - for (i = 0, next_count = verbose; i < count; i++) { - if (threads) - sprintf(filename, "%s-%d-%ld", - argv[1], thread, i); - else - sprintf(filename, "%s-%ld", argv[1], i); - - rc = mknod(filename, S_IFREG, 0); - if (rc < 0) { - fprintf(stderr, "%s: error: mknod(%s): %s\n", - cmdname(argv[0]), filename, - strerror(errno)); - rc = errno; - break; - } - if (unlink(filename) < 0) { - fprintf(stderr, "%s: error: unlink(%s): %s\n", - cmdname(argv[0]), filename, - strerror(errno)); - rc = errno; - break; - } - if (be_verbose(verbose, &next_time,i,&next_count,count)) - printf("%s: number %ld\n", cmdname(argv[0]), i); - } - - gettimeofday(&end, NULL); - diff = difftime(&end, &start); - - printf("%s: %ldx2 files in %.4gs (%.4g ops/s): rc = %d: %s", - cmdname(argv[0]), i, diff, (double)i * 2 / diff, - rc, ctime(&end.tv_sec)); - } - return rc; -} diff --git a/lustre/tests/createmany.c b/lustre/tests/createmany.c deleted file mode 100644 index 8399824..0000000 --- a/lustre/tests/createmany.c +++ /dev/null @@ -1,98 +0,0 @@ -#include <stdio.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <time.h> -#include <errno.h> -#include <string.h> -#include <fcntl.h> -#include <unistd.h> -#include <stdlib.h> - -void usage(char *prog) -{ - printf("usage: %s {-o|-m} filenamefmt count\n", prog); - printf(" %s {-o|-m} filenamefmt -seconds\n", prog); - printf(" %s {-o|-m} filenamefmt start count\n", prog); -} - -int main(int argc, char ** argv) -{ - int i, rc = 0, do_open; - char format[4096], *fmt; - char filename[4096]; - long start, last, end; - long begin = 0, count; - - if (argc < 4 || argc > 5) { - usage(argv[0]); - return 1; - } - - if (strcmp(argv[1], "-o") == 0) { - do_open = 1; - } else if (strcmp(argv[1], "-m") == 0) { - do_open = 0; - } else { - usage(argv[0]); - return 1; - } - - if (strlen(argv[2]) > 4080) { - printf("name too long\n"); - return 1; - } - - start = last = time(0); - - if (argc == 4) { - end = strtol(argv[3], NULL, 0); - if (end > 0) { - count = end; - end = -1UL >> 1; - } else { - end = start - end; - count = -1UL >> 1; - } - } else { - end = -1UL >> 1; - begin = strtol(argv[3], NULL, 0); - count = strtol(argv[4], NULL, 0); - } - - if (strchr(argv[2], '%')) - fmt = argv[2]; - else { - sprintf(format, "%s%%d", argv[2]); - fmt = format; - } - for (i = 0; i < count && time(0) < end; i++, begin++) { - sprintf(filename, fmt, begin); - if (do_open) { - int fd = open(filename, O_CREAT|O_RDWR, 0644); - if (fd < 0) { - printf("open(%s) error: %s\n", filename, - strerror(errno)); - rc = errno; - break; - } - close(fd); - } else { - rc = mknod(filename, S_IFREG| 0444, 0); - if (rc) { - printf("mknod(%s) error: %s\n", - filename, strerror(errno)); - rc = errno; - break; - } - } - if ((i % 10000) == 0) { - printf(" - created %d (time %ld ; total %ld ; last %ld)\n", - i, time(0), time(0) - start, time(0) - last); - last = time(0); - } - } - printf("total: %d creates in %ld seconds: %f creates/second\n", i, - time(0) - start, ((float)i / (time(0) - start))); - - return rc; -} diff --git a/lustre/tests/createtest.c b/lustre/tests/createtest.c deleted file mode 100644 index 6223034..0000000 --- a/lustre/tests/createtest.c +++ /dev/null @@ -1,142 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> -#include <errno.h> -#include <string.h> - -#ifndef S_SHIFT -#define S_SHIFT 12 -#endif - -int usage(char *prog) -{ - fprintf(stderr, "usage: %s <basename>\n", prog); - exit(1); -} - -int main(int argc, char *argv[]) -{ - char name[4096]; - int i; - - if (argc != 2) - usage(argv[0]); - - umask(0); - for (i = 0; i <= S_IFMT; i += (1 << S_SHIFT)) { - struct stat st; - int mode = i | 0644; - int rc; - - sprintf(name, "%s-mknod%06o", argv[1], mode); - rc = mknod(name, mode, 0x1234); - switch (i) { - case 0: - mode |= S_IFREG; - case S_IFREG: - case S_IFCHR: case S_IFBLK: - if (rc < 0 && getuid() != 0) - continue; - case S_IFSOCK: case S_IFIFO: - if (rc < 0) { - fprintf(stderr, "%s: ERROR mknod %s: %s\n", - argv[0], name, strerror(errno)); - exit(10); - } - rc = stat(name, &st); - if (rc < 0) { - fprintf(stderr, "%s: ERROR stat %s: %s", - argv[0], name, strerror(errno)); - exit(11); - } - if (st.st_mode != mode) { - fprintf(stderr, "%s: ERROR mode %s: %o != %o", - argv[0], name, st.st_mode, mode); - exit(12); - } - rc = unlink(name); - if (rc < 0) { - fprintf(stderr, "%s: ERROR unlink %s: %s", - argv[0], name, strerror(errno)); - exit(13); - } - break; - default: - if (rc == 0) { - fprintf(stderr, "%s: ERROR: %s created\n", - argv[0], name); - exit(14); - } - } - } - - for (i = 0; i <= S_IFMT; i += (1 << S_SHIFT)) { - struct stat st; - int mode; - int fd; - int rc; - - mode = i | 0644; - sprintf(name, "%s-creat%06o", argv[1], mode); - fd = open(name, O_CREAT|O_RDONLY, mode); - if (fd < 0) { - fprintf(stderr, "%s: ERROR creat %s: %s\n", - argv[0], name, strerror(errno)); - exit(21); - } - close(fd); - rc = stat(name, &st); - if (rc < 0) { - fprintf(stderr, "%s: ERROR stat %s: %s", - argv[0], name, strerror(errno)); - exit(11); - } - if (!S_ISREG(st.st_mode & S_IFMT)) { - fprintf(stderr, "%s: ERROR mode %s: %o != %o", - argv[0], name, st.st_mode & S_IFMT, S_IFREG); - exit(12); - } - rc = unlink(name); - if (rc < 0) { - fprintf(stderr, "%s: ERROR unlink %s: %s\n", - argv[0], name, strerror(errno)); - exit(20); - } - } - - for (i = 0; i <= S_IFMT; i += (1 << S_SHIFT)) { - struct stat st; - int rc; - - sprintf(name, "%s-mkdir%06o", argv[1], i | 0644); - rc = mkdir(name, i | 0664); - if (rc < 0) { - fprintf(stderr, "%s: ERROR mkdir %s: %s\n", - argv[0], name, strerror(errno)); - exit(30); - } - rc = stat(name, &st); - if (rc < 0) { - fprintf(stderr, "%s: ERROR stat %s: %s", - argv[0], name, strerror(errno)); - exit(11); - } - if (!S_ISDIR(st.st_mode)) { - fprintf(stderr, "%s: ERROR mode %s: %o != %o", - argv[0], name, st.st_mode & S_IFMT, S_IFDIR); - exit(12); - } - rc = rmdir(name); - if (rc < 0) { - fprintf(stderr, "%s: ERROR rmdir %s: %s\n", - argv[0], name, strerror(errno)); - exit(31); - } - } - - printf("%s: SUCCESS\n", argv[0]); - return 0; -} diff --git a/lustre/tests/directio.c b/lustre/tests/directio.c deleted file mode 100644 index e660ea4..0000000 --- a/lustre/tests/directio.c +++ /dev/null @@ -1,79 +0,0 @@ -#include <stdio.h> -#include <string.h> -#include <unistd.h> -#include <fcntl.h> -#include <stdlib.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/mman.h> - -// not correctly in the headers yet!! -#ifndef O_DIRECT -#define O_DIRECT 040000 /* direct disk access hint */ -#endif - -int main(int argc, char **argv) -{ - int fd; - char *buf; - int blocks, seek_blocks; - long len; - off64_t seek; - struct stat64 st; - int rc; - - if (argc != 4) { - printf("Usage: %s file seek nr_blocks\n", argv[0]); - return 1; - } - - seek_blocks = strtoul(argv[2], 0, 0); - blocks = strtoul(argv[3], 0, 0); - fd = open(argv[1], O_LARGEFILE | O_DIRECT | O_RDWR | O_CREAT, 0644); - if (fd == -1) { - printf("Cannot open %s: %s\n", argv[1], strerror(errno)); - return 1; - } - - if (fstat64(fd, &st) < 0) { - printf("Cannot stat %s: %s\n", argv[1], strerror(errno)); - return 1; - } - - printf("directio on %s for %dx%lu blocks \n", argv[1], blocks, - st.st_blksize); - - seek = (off64_t)seek_blocks * (off64_t)st.st_blksize; - if (lseek64(fd, seek, SEEK_SET) < 0) { - printf("lseek64 failed: %s\n", strerror(errno)); - return 1; - } - - len = blocks * st.st_blksize; - buf = mmap(0, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, 0, 0); - if (!buf) { - printf("No memory %s\n", strerror(errno)); - return 1; - } - - memset(buf, 0xba, len); - rc = write(fd, buf, len); - if (rc != len) { - printf("Write error %s (rc = %d)\n", strerror(errno), rc); - return 1; - } - - if (lseek64(fd, seek, SEEK_SET) < 0) { - printf("Cannot seek %s\n", strerror(errno)); - return 1; - } - - rc = read(fd, buf, len); - if (rc != len) { - printf("Read error: %s (rc = %d)\n", strerror(errno), rc); - return 1; - } - - return 0; -} diff --git a/lustre/tests/echo.sh b/lustre/tests/echo.sh deleted file mode 100755 index 335db41..0000000 --- a/lustre/tests/echo.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash - -LOV=${LOV:-0} -while [ "$1" ]; do - case $1 in - --lov) LOV="1" ;; - *) [ -z $config ] && config=$1 || OPTS="$OPTS $1" ;; - esac - shift -done - -config=${config:-$(basename $0 .sh).xml} -LMC=${LMC:-../utils/lmc -m $config} -TMP=${TMP:-/tmp} - -SERVER=${SERVER:-localhost} -CLIENT=${CLIENT:-localhost} -NET=${NET:-tcp} -SERVERNID=${SERVERNID:-$SERVER} -CLIENTNID=${CLIENTNID:-$CLIENT} - - -# FIXME: make LMC not require MDS for obdecho LOV -MDSDEV=${MDSDEV:-$TMP/mds1} -MDSSIZE=10000 - -STRIPE_BYTES=65536 -STRIPES_PER_OBJ=2 # 0 means stripe over all OSTs - -rm -f $config -# create nodes -$LMC --add node --node $SERVER || exit 1 -$LMC --add net --node $SERVER --nid $SERVERNID --nettype $NET || exit 2 - -if (($LOV)); then - $LMC --add mds --node $SERVER --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 10 - $LMC --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 11 - $LMC --add ost --node $SERVER --lov lov1 --osdtype=obdecho || exit 12 - $LMC --add ost --node $SERVER --lov lov1 --osdtype=obdecho || exit 13 - OBD_NAME=lov1 -else - $LMC --add ost --ost obd1 --node $SERVER --osdtype=obdecho || exit 12 - OBD_NAME=obd1 -fi - -if [ "$SERVER" != "$CLIENT" ]; then - $LMC --add node --node $CLIENT || exit 1 - $LMC --add net --node $CLIENT --nid $CLIENTNID --nettype $NET || exit 2 -fi - -$LMC --add echo_client --node $CLIENT --ost ${OBD_NAME} || exit 3 - diff --git a/lustre/tests/elan-client.cfg b/lustre/tests/elan-client.cfg deleted file mode 100644 index 120b605..0000000 --- a/lustre/tests/elan-client.cfg +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh -# Config file for setting up a remote server with a real OST -NETWORK=elan -LOCALHOST=5 -SERVER=4 diff --git a/lustre/tests/elan-server.cfg b/lustre/tests/elan-server.cfg deleted file mode 100644 index 7520840..0000000 --- a/lustre/tests/elan-server.cfg +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh -# Config file for setting up a remote server with a real OST -NETWORK=elan -LOCALHOST=4 -SERVER=4 diff --git a/lustre/tests/ext2_10000.gz b/lustre/tests/ext2_10000.gz deleted file mode 100644 index e700ad021894b9a311eb82fd2870eb3c9cdc906d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 10228 zcmb2|=HR%vaG?zob4dxqn;U!eSOO*39vG*qmaT1ZTO6b;6zI8a!R3I6H@qEQIgLS& zm|_}q&DS|UXA|L?8?@PRQE<Vx>u%d7Dy(_#d2gn%aeCUM^)t?tJ2CuQz1xW+TvI)B z&-`!Zt>1p$@U+NXZkcZxuN`~+^-_UlPCpg7BX?b~x|gx#-ASwSI%RpU?Uq;8<$f#r z_bq8}QRP3Ay*qvuE&Ig4@Zs~U|Ic?$nRxC0e_ur{P6h@Bp1Ie{yFPOEuLrU}wD-Jv zx<BD93y@v6HBW!{l=V}0{X8kRYvU}l$<6oXbN}2|nR=S{KzU7l7+Y<9(u|k0w@=+P zchlscKr^70Q9ybKG|Zb}B)4hD1?l5ECh&D-*4P`0eP7+~UR-_kadYfeCI*Iv{i}W~ z{9ivW<NJT0=?5mv3;_x<T=>rnB!AVDZsDko(GVC7fzc2c4S~@R7!85Z5Eu=C(GVC7 zfzc2c4S~@R7!85Z5Eu=C(GVC7fzc2c4S~@R7!85Z5Eu=C(GVC7fzc2c4S~@Rz!Cxt x+rK@S01|h&@c;Tg14b-LMukR0U^E0qLtr!nMnhn@gus4=%TYIi864&?007gFqXqx~ diff --git a/lustre/tests/ext2_25000.gz b/lustre/tests/ext2_25000.gz deleted file mode 100644 index 122ed792d8accfd9ecbcf02cec89ffe4acc137c5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 25136 zcmeI(-%Ha`7{~FWfsRTdkba{uqZ>oUqLirgBB>TMu>!M5Z$vV35xY<_S`BPER*NwN zEtv(eEpXwOl9f&(Rw27c=+|&gyKvyj<=8L7zD@s$ki9<_=kQz{&Us$f&nY&_WV>c< zM{+kbwP}?N%7Y4pLPP>bPrt1x%nMo0)znY4ZrfgcV!*A}-*p?^eHDhf<FAtHims3= zTAuQ}+;nEtZhUGRIkfd*x21X^=G}cmGZxz1^QcO$lblnAuc{00<@fiMd^ZI<F7G}3 zM%$fs^!<2e_*QpwQeNtNp49k;=K4KzLrce|)6)yT1O5-HcxX1-GT1&mxFm$4Z8{lK zeLSh1kpiYmr-TrV;ny8a_199({SBF_^NW3W%h*a$o)A%=^_DtW8C2>cBfom4o&^`v z)*XMY2OXaaQ(f_9$IP<DH+e7Sul1iTt1Yt@WKI(VvRy#eAXo2vuwOH_INyGtpyi6x z`lMGgl1R*#U;O#Fz>_P4Zrqu0NVj{#aVdl4H9JakGJ9BBT~{iz-H-keKmY**5I_I{ z1Q0*~0R#|0009ILKmY**5I_I{1Q0;re+sxF6L;3`H%nJb>`sCJ0tg_000IagfB*sr zAb<b@2q1s}0tg_000IagfB*srAb<b@>s{bQetzbQvk+B(T_)RlZ-)0HfB*srAb<b@ z2q1s}0tg_000IagfB*srAb<b@2q1s}0tg_0fb&z)9`ouffNWVBEgo1K3s^0)KHl>h L_r^wH>&*EFvEw*O diff --git a/lustre/tests/ext3_10000.gz b/lustre/tests/ext3_10000.gz deleted file mode 100644 index b372fbdeaf91970c5b33eb16ade22e901894745b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12172 zcmeIzeNYo;8UXNAPQZakJIv`doMjwZJ;#e5r7Krxw2VC{dQl*fzy=bnD6-4PC@$1! zNExs3Ivk?s2W>2Z@|A?_rie>wAYkh$wTQJPyIbPL)N+dK3c|sP1d<%R|8C~~@y{FI z=bv|;d7pXTd4A73^F~*OhE`GM-dPZ|=fi`s;`Qt1L-6=6=DfV8;_CgH>WKVjq4_8N zc6xFCs+3hDFP~VGn0`7be^FD^!p6=-(TH3$QuI>OE?p>V=f(zoz3rD6?4D<G<K(Tb z*u1erb#Iq=hHqUJj0dR5YZ2l9NKGJnZmtUonid8IClc%8zTomC6EhROCkdMK6%}nE zXLVX*#RdO?LnDg75qD~xXZSC5fx(}?donaI6iGb$#uDRj-17J0*sT+e;Qlx7Z{lWz z!*2u&KI-vKP0rk@)5$%#pU&!|bKe&T1V4ohp9&=C{R)vSZsSV=!AH8~*WjR;1(w?R z&8s?YZOP#Fp5ECS|7RtS6ifHT{xe$QzoVOWyB~<-#ur}Zit37n1Dz$~?WkV#n>Qys zMYb(C+)A-zlD37mkjSXF2$ylRM*mdj_s0e{X#%glHU31e+xaovCO4lEM$KOX;`s%x zs)gp0!FthKKJ|2n(9j(g(-HXE7!&R)sKjlbl^zv$xQ|G7FPuL(U{0Egm<-sGeFgrA z`TWRN;p=}jzwZ1`_GuvKmztWK*y+!cr_gcFk_G;*1n01C<^Ot0IxJzn`g!7+C$ndR zQHVCUzT9AvqU~N)p-SnTM*Cdm4x$jb>mtfi?=lfwGP!v)Gl9BIZXJa?sWE=Yn=+bq zm{yTxqq14LoF8!YEH(yl_1-LB*J5Hbdnewq*c!ur6K_6F>}G$ACyrYq*`<76eA_nj zDNccHsfAmp5iF@TD~}q)w$&<!sTnLWCvz!v9ZSi9Yp4g<AANSh(#o>-W&-C}ul+)~ zERL39CGK<^U5Fiar<Kt;SeaWkO&4=znA)w5a31ATCE3-^27Xp!dCj_#y{R#uC-T@q z4RPMOl)bJosR#v2X>gS_j*Z~=Ix>oxXl^^d&7pik&GL!b%xJ2Yzu6Kf*v!k0B9BDb z*PJ!Tk0L^EeFgcu$dp8+Ax=?x1N8_>s>)ImZOCI$%ME-2nKRU{uzbR<hW1O-Ptm#1 zAuZW#T!oqq_GVK)heA10b*%HRkgPwu-D!c8{VKKfZRED0^&xQxxo5C;r%f^o$erRH zVN4u)6282o^*Z_qe5pHQEA<2UC$X}RvY|8Z1-a!6@&Im<;}pWf4c#)7hRIT`jW%JZ z0|rz4?<uAT!`V4&B6Ltl>|(b<1wzF&CKURhYDWz7I|vb)zDK4+4L2-nnM|m(N;X7) z2$f4yBkA|}Glnl$m{QP7hOTZ{<~$8$NOK09mwD1aG+SRqZWv5^i7bRR;CroB=C@E@ ze?|%O4piK)@EB*%3iyj9=8w5^<ojZ{lKPo^Pn_j3Hz6Z%^I7}>dzU<dr>>^AkO%Pe zRvIBSc$&uqp*Y;xt>m0`QiZFdoO&{?F#DXdnat8!H(15&EV-vJL&<C)3kwySjHBE! zzQJ{&)KZHKyP8UIH!|(r?`wQ&l%Q5u=Tnmw?eXT1szaO=SgOxbt_oq6WBZoLk1?-c zxyv$=ne|w~GDR=5h_CeSsm)wT-Q-*2o3`Nt>{NWiZA&}*IKJgJKFQA6>(^K+*`Mu= zYw$kyU-paTISM)h!*XN{jbU1MYL6+3Yqno0GAX$ZyJO6lz!CP&G1Fnr#&_DSW7ZJ% zD~<6rBANZ(tCh&BnMgiYlChnM<qIT=@0c+D1ILcF%xWHS$gNBie@xSU&urtW`4&yf zJ$#z=dFwY?s@WlLW3YY<4DtVdh{0ukre0(ZX4dgdq7HrQee}<wwj}dO^t6c7JNH4F zs<hp74Qdf}-Z1S)yF@)JjEm5V-YwGXQ_e;xOR7@itB`LE=4K)vad}~>Vt@&UcK7dy zWquE#{qlB(L5~~SADXYA=MA=pt;Nnc@?CLe81*&Z4>vBsuOk!i#j}=TDxEwimi5sG z_))l9Zu~Xc1YePxGD4IC&+}t~S|9)ffB+Bx0zd!=00AHX1b_e#00KY&2mk>f00e*l z5C8%|00;m9AOHkj00BQP`NQkK^!f5sfQ$=&0XhaP0s$ZZ1b_e#00J+vz}&)}%Xj4n I-u+zgZ_NV*-2eap diff --git a/lustre/tests/fchdir_test.c b/lustre/tests/fchdir_test.c deleted file mode 100644 index 83c096e..0000000 --- a/lustre/tests/fchdir_test.c +++ /dev/null @@ -1,41 +0,0 @@ -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include <errno.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <unistd.h> -#include <fcntl.h> - - -int main(int argc, char **argv) -{ - int fd; - int rc; - - fd = open(".", O_RDONLY); - if (fd < 0) { - perror("opening '.' :"); - exit(2); - } - - rc = chdir("/mnt/lustre/subdir/subdir"); - if (rc) { - perror("cannot chdir subdir:"); - exit(3); - } - - rc = fchdir(fd); - if (rc) { - perror("cannot fchdir back\n"); - exit(4); - } - - rc = close(fd); - if (rc) { - perror("cannot close '.'\n"); - exit(5); - } - - return(0); -} diff --git a/lustre/tests/fs.sh b/lustre/tests/fs.sh deleted file mode 100644 index b158c6e..0000000 --- a/lustre/tests/fs.sh +++ /dev/null @@ -1,27 +0,0 @@ -#! /bin/bash -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution -#!/bin/sh - -R=/r - -insmod /lib/modules/2.4.17/kernel/drivers/block/loop.o -dd if=/dev/zero of=/tmp/fs bs=1024 count=10000 -mke2fs -b 4096 -F /tmp/fs -losetup /dev/loop/0 /tmp/fs - -insmod $R/usr/src/lustre/obdclass/obdclass.o -insmod $R/usr/src/lustre/obdext2/obdext2.o -mknod /dev/obd c 10 241 - -$R/usr/src/lustre/utils/obdctl <<EOF -device 0 -attach obdext2 OBDEXT2DEV -setup /dev/loop/0 -quit -EOF - -insmod $R/usr/src/lustre/obdfs/obdfs.o -mount -t obdfs -o device=0 none /mnt diff --git a/lustre/tests/fsx.c b/lustre/tests/fsx.c deleted file mode 100644 index a2b1d5e..0000000 --- a/lustre/tests/fsx.c +++ /dev/null @@ -1,1228 +0,0 @@ -/* - * Copyright (c) 1998-2001 Apple Computer, Inc. All rights reserved. - * - * @APPLE_LICENSE_HEADER_START@ - * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. - * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. - * - * @APPLE_LICENSE_HEADER_END@ - * - * File: fsx.c - * Author: Avadis Tevanian, Jr. - * - * File system exerciser. - * - * Rewrite and enhancements 1998-2001 Conrad Minshall -- conrad@mac.com - * - * Various features from Joe Sokol, Pat Dirks, and Clark Warner. - * - * Small changes to work under Linux -- davej@suse.de - * - * Sundry porting patches from Guy Harris 12/2001 - * $FreeBSD: src/tools/regression/fsx/fsx.c,v 1.1 2001/12/20 04:15:57 jkh Exp $ - */ - -#include <sys/types.h> -#include <sys/stat.h> -#if defined(_UWIN) || defined(__linux__) -# include <sys/param.h> -# include <limits.h> -# include <time.h> -# include <strings.h> -# include <sys/time.h> -#endif -#include <fcntl.h> -#include <sys/mman.h> -#ifndef MAP_FILE -# define MAP_FILE 0 -#endif -#include <limits.h> -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <stdarg.h> -#include <errno.h> - -#define NUMPRINTCOLUMNS 32 /* # columns of data to print on each line */ - -/* - * A log entry is an operation and a bunch of arguments. - */ - -struct log_entry { - int operation; - struct timeval tv; - int args[3]; -}; - -#define LOGSIZE 1000 - -struct log_entry oplog[LOGSIZE]; /* the log */ -int logptr = 0; /* current position in log */ -int logcount = 0; /* total ops */ - -/* - * Define operations - */ - -#define OP_READ 1 -#define OP_WRITE 2 -#define OP_TRUNCATE 3 -#define OP_CLOSEOPEN 4 -#define OP_MAPREAD 5 -#define OP_MAPWRITE 6 -#define OP_SKIPPED 7 - -int page_size; -int page_mask; - -char *original_buf; /* a pointer to the original data */ -char *good_buf; /* a pointer to the correct data */ -char *temp_buf; /* a pointer to the current data */ -char *fname; /* name of our test file */ -char logfile[1024]; /* name of our log file */ -char goodfile[1024]; /* name of our test file */ -int fd; /* fd for our test file */ - -off_t file_size = 0; -off_t biggest = 0; -char state[256]; -unsigned long testcalls = 0; /* calls to function "test" */ - -unsigned long simulatedopcount = 0; /* -b flag */ -int closeprob = 0; /* -c flag */ -int debug = 0; /* -d flag */ -unsigned long debugstart = 0; /* -D flag */ -unsigned long maxfilelen = 256 * 1024; /* -l flag */ -int sizechecks = 1; /* -n flag disables them */ -int maxoplen = 64 * 1024; /* -o flag */ -int quiet = 0; /* -q flag */ -unsigned long progressinterval = 0; /* -p flag */ -int readbdy = 1; /* -r flag */ -int style = 0; /* -s flag */ -int truncbdy = 1; /* -t flag */ -int writebdy = 1; /* -w flag */ -long monitorstart = -1; /* -m flag */ -long monitorend = -1; /* -m flag */ -int lite = 0; /* -L flag */ -long numops = -1; /* -N flag */ -int randomoplen = 1; /* -O flag disables it */ -int seed = 1; /* -S flag */ -int mapped_writes = 1; /* -W flag disables */ -int mapped_reads = 1; /* -R flag disables it */ -int fsxgoodfd = 0; -FILE * fsxlogf = NULL; -int badoff = -1; - - -void -vwarnc(code, fmt, ap) - int code; - const char *fmt; - va_list ap; -{ - fprintf(stderr, "fsx: "); - if (fmt != NULL) { - vfprintf(stderr, fmt, ap); - fprintf(stderr, ": "); - } - fprintf(stderr, "%s\n", strerror(code)); -} - - -void -warn(const char * fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - vwarnc(errno, fmt, ap); - va_end(ap); -} - - -void -prt(char *fmt, ...) -{ - va_list args; - - va_start(args, fmt); - vfprintf(stdout, fmt, args); - if (fsxlogf) - vfprintf(fsxlogf, fmt, args); - va_end(args); -} - -void -prterr(char *prefix) -{ - prt("%s%s%s\n", prefix, prefix ? ": " : "", strerror(errno)); -} - - -void -log4(int operation, int arg0, int arg1, int arg2, struct timeval *tv) -{ - struct log_entry *le; - - le = &oplog[logptr]; - le->tv = *tv; - le->operation = operation; - le->args[0] = arg0; - le->args[1] = arg1; - le->args[2] = arg2; - logptr++; - logcount++; - if (logptr >= LOGSIZE) - logptr = 0; -} - - -void -logdump(void) -{ - int i, count, down; - struct log_entry *lp; - - prt("LOG DUMP (%d total operations):\n", logcount); - if (logcount < LOGSIZE) { - i = 0; - count = logcount; - } else { - i = logptr; - count = LOGSIZE; - } - for ( ; count > 0; count--) { - int opnum; - - opnum = i+1 + (logcount/LOGSIZE)*LOGSIZE; - lp = &oplog[i]; - prt("%d: %lu.%06lu ", opnum, - lp->tv.tv_sec, lp->tv.tv_usec); - - switch (lp->operation) { - case OP_MAPREAD: - prt("MAPREAD 0x%x thru 0x%x (0x%x bytes)", - lp->args[0], lp->args[0] + lp->args[1] - 1, - lp->args[1]); - if (badoff >= lp->args[0] && badoff < - lp->args[0] + lp->args[1]) - prt("\t***RRRR***"); - break; - case OP_MAPWRITE: - prt("MAPWRITE 0x%x thru 0x%x (0x%x bytes)", - lp->args[0], lp->args[0] + lp->args[1] - 1, - lp->args[1]); - if (badoff >= lp->args[0] && badoff < - lp->args[0] + lp->args[1]) - prt("\t******WWWW"); - break; - case OP_READ: - prt("READ 0x%x thru 0x%x (0x%x bytes)", - lp->args[0], lp->args[0] + lp->args[1] - 1, - lp->args[1]); - if (badoff >= lp->args[0] && - badoff < lp->args[0] + lp->args[1]) - prt("\t***RRRR***"); - break; - case OP_WRITE: - prt("WRITE 0x%x thru 0x%x (0x%x bytes)", - lp->args[0], lp->args[0] + lp->args[1] - 1, - lp->args[1]); - if (lp->args[0] > lp->args[2]) - prt(" HOLE"); - else if (lp->args[0] + lp->args[1] > lp->args[2]) - prt(" EXTEND"); - if ((badoff >= lp->args[0] || badoff >=lp->args[2]) && - badoff < lp->args[0] + lp->args[1]) - prt("\t***WWWW"); - break; - case OP_TRUNCATE: - down = lp->args[0] < lp->args[1]; - prt("TRUNCATE %s\tfrom 0x%x to 0x%x", - down ? "DOWN" : "UP", lp->args[1], lp->args[0]); - if (badoff >= lp->args[!down] && - badoff < lp->args[!!down]) - prt("\t******WWWW"); - break; - case OP_CLOSEOPEN: - prt("CLOSE/OPEN"); - break; - case OP_SKIPPED: - prt("SKIPPED (no operation)"); - break; - default: - prt("BOGUS LOG ENTRY (operation code = %d)!", - lp->operation); - } - prt("\n"); - i++; - if (i == LOGSIZE) - i = 0; - } -} - - -void -save_buffer(char *buffer, off_t bufferlength, int fd) -{ - off_t ret; - ssize_t byteswritten; - - if (fd <= 0 || bufferlength == 0) - return; - - if (bufferlength > SSIZE_MAX) { - prt("fsx flaw: overflow in save_buffer\n"); - exit(67); - } - if (lite) { - off_t size_by_seek = lseek(fd, (off_t)0, SEEK_END); - if (size_by_seek == (off_t)-1) - prterr("save_buffer: lseek eof"); - else if (bufferlength > size_by_seek) { - warn("save_buffer: .fsxgood file too short... will -save 0x%llx bytes instead of 0x%llx\n", (unsigned long long)size_by_seek, - (unsigned long long)bufferlength); - bufferlength = size_by_seek; - } - } - - ret = lseek(fd, (off_t)0, SEEK_SET); - if (ret == (off_t)-1) - prterr("save_buffer: lseek 0"); - - byteswritten = write(fd, buffer, (size_t)bufferlength); - if (byteswritten != bufferlength) { - if (byteswritten == -1) - prterr("save_buffer write"); - else - warn("save_buffer: short write, 0x%x bytes instead -of 0x%llx\n", - (unsigned)byteswritten, - (unsigned long long)bufferlength); - } -} - - -void -report_failure(int status) -{ - logdump(); - - if (fsxgoodfd) { - if (good_buf) { - save_buffer(good_buf, file_size, fsxgoodfd); - prt("Correct content saved for comparison\n"); - prt("(maybe hexdump \"%s\" vs \"%s\")\n", - fname, goodfile); - } - close(fsxgoodfd); - } - exit(status); -} - - -#define short_at(cp) ((unsigned short)((*((unsigned char *)(cp)) << 8) | \ - *(((unsigned char *)(cp)) + 1))) - -void -check_buffers(unsigned offset, unsigned size) -{ - unsigned char c, t; - unsigned i = 0; - unsigned n = 0; - unsigned op = 0; - unsigned bad = 0; - - if (memcmp(good_buf + offset, temp_buf, size) != 0) { - prt("READ BAD DATA: offset = 0x%x, size = 0x%x\n", - offset, size); - prt("OFFSET\tGOOD\tBAD\tRANGE\n"); - while (size > 0) { - c = good_buf[offset]; - t = temp_buf[i]; - if (c != t) { - if (n == 0) { - bad = short_at(&temp_buf[i]); - prt("0x%5x\t0x%04x\t0x%04x", offset, - short_at(&good_buf[offset]), bad); - op = temp_buf[offset & 1 ? i+1 : i]; - } - n++; - badoff = offset; - } - offset++; - i++; - size--; - } - if (n) { - prt("\t0x%5x\n", n); - if (bad) - prt("operation# (mod 256) for the bad data -may be %u\n", ((unsigned)op & 0xff)); - else - prt("operation# (mod 256) for the bad data -unknown, check HOLE and EXTEND ops\n"); - } else - prt("????????????????\n"); - report_failure(110); - } -} - - -void -check_size(void) -{ - struct stat statbuf; - off_t size_by_seek; - - if (fstat(fd, &statbuf)) { - prterr("check_size: fstat"); - statbuf.st_size = -1; - } - size_by_seek = lseek(fd, (off_t)0, SEEK_END); - if (file_size != statbuf.st_size || file_size != size_by_seek) { - prt("Size error: expected 0x%llx stat 0x%llx seek 0x%llx\n", - (unsigned long long)file_size, - (unsigned long long)statbuf.st_size, - (unsigned long long)size_by_seek); - report_failure(120); - } -} - - -void -check_trunc_hack(void) -{ - struct stat statbuf; - - ftruncate(fd, (off_t)0); - ftruncate(fd, (off_t)100000); - fstat(fd, &statbuf); - if (statbuf.st_size != (off_t)100000) { - prt("no extend on truncate! not posix!\n"); - exit(130); - } - ftruncate(fd, 0); -} - - -void -doread(unsigned offset, unsigned size) -{ - struct timeval t; - off_t ret; - unsigned iret; - - offset -= offset % readbdy; - gettimeofday(&t, NULL); - if (size == 0) { - if (!quiet && testcalls > simulatedopcount) - prt("skipping zero size read\n"); - log4(OP_SKIPPED, OP_READ, offset, size, &t); - return; - } - if (size + offset > file_size) { - if (!quiet && testcalls > simulatedopcount) - prt("skipping seek/read past end of file\n"); - log4(OP_SKIPPED, OP_READ, offset, size, &t); - return; - } - - log4(OP_READ, offset, size, 0, &t); - - if (testcalls <= simulatedopcount) - return; - - if (!quiet && ((progressinterval && - testcalls % progressinterval == 0) || - (debug && - (monitorstart == -1 || - (offset + size > monitorstart && - (monitorend == -1 || offset <= monitorend)))))) - prt("%06lu %lu.%06lu read %#08x thru %#08x\t(0x%x bytes)\n", - testcalls, t.tv_sec, t.tv_usec, offset, offset + size - 1, - size); - ret = lseek(fd, (off_t)offset, SEEK_SET); - if (ret == (off_t)-1) { - prterr("doread: lseek"); - report_failure(140); - } - iret = read(fd, temp_buf, size); - if (!quiet && (debug > 1 && - (monitorstart == -1 || - (offset + size > monitorstart && - (monitorend == -1 || offset <= monitorend))))) { - gettimeofday(&t, NULL); - prt(" %lu.%06lu read done\n", t.tv_sec, t.tv_usec); - } - if (iret != size) { - if (iret == -1) - prterr("doread: read"); - else - prt("short read: 0x%x bytes instead of 0x%x\n", - iret, size); - report_failure(141); - } - check_buffers(offset, size); -} - - -void -domapread(unsigned offset, unsigned size) -{ - struct timeval t; - unsigned pg_offset; - unsigned map_size; - char *p; - - offset -= offset % readbdy; - gettimeofday(&t, NULL); - if (size == 0) { - if (!quiet && testcalls > simulatedopcount) - prt("skipping zero size read\n"); - log4(OP_SKIPPED, OP_MAPREAD, offset, size, &t); - return; - } - if (size + offset > file_size) { - if (!quiet && testcalls > simulatedopcount) - prt("skipping seek/read past end of file\n"); - log4(OP_SKIPPED, OP_MAPREAD, offset, size, &t); - return; - } - - log4(OP_MAPREAD, offset, size, 0, &t); - - if (testcalls <= simulatedopcount) - return; - - if (!quiet && ((progressinterval && - testcalls % progressinterval == 0) || - (debug && - (monitorstart == -1 || - (offset + size > monitorstart && - (monitorend == -1 || offset <= monitorend)))))) - prt("%06lu %lu.%06lu mapread %#08x thru %#08x\t(0x%x bytes)\n", - testcalls, t.tv_sec, t.tv_usec, offset, offset + size - 1, - size); - - pg_offset = offset & page_mask; - map_size = pg_offset + size; - - if ((p = (char *)mmap(0, map_size, PROT_READ, MAP_FILE | MAP_SHARED, fd, - (off_t)(offset - pg_offset))) == (char *)-1) { - prterr("domapread: mmap"); - report_failure(190); - } - if (!quiet && (debug > 1 && - (monitorstart == -1 || - (offset + size > monitorstart && - (monitorend == -1 || offset <= monitorend))))) { - gettimeofday(&t, NULL); - prt(" %lu.%06lu mmap done\n", t.tv_sec, t.tv_usec); - } - memcpy(temp_buf, p + pg_offset, size); - if (!quiet && (debug > 1 && - (monitorstart == -1 || - (offset + size > monitorstart && - (monitorend == -1 || offset <= monitorend))))) { - gettimeofday(&t, NULL); - prt(" %lu.%06lu memcpy done\n", t.tv_sec, t.tv_usec); - } - if (munmap(p, map_size) != 0) { - prterr("domapread: munmap"); - report_failure(191); - } - if (!quiet && (debug > 1 && - (monitorstart == -1 || - (offset + size > monitorstart && - (monitorend == -1 || offset <= monitorend))))) { - gettimeofday(&t, NULL); - prt(" %lu.%06lu munmap done\n", t.tv_sec, t.tv_usec); - } - - check_buffers(offset, size); -} - - -void -gendata(char *original_buf, char *good_buf, unsigned offset, unsigned size) -{ - while (size--) { - good_buf[offset] = testcalls % 256; - if (offset % 2) - good_buf[offset] += original_buf[offset]; - offset++; - } -} - - -void -dowrite(unsigned offset, unsigned size) -{ - struct timeval t; - off_t ret; - unsigned iret; - - offset -= offset % writebdy; - gettimeofday(&t, NULL); - if (size == 0) { - if (!quiet && testcalls > simulatedopcount) - prt("skipping zero size write\n"); - log4(OP_SKIPPED, OP_WRITE, offset, size, &t); - return; - } - - log4(OP_WRITE, offset, size, file_size, &t); - - gendata(original_buf, good_buf, offset, size); - if (file_size < offset + size) { - if (file_size < offset) - memset(good_buf + file_size, '\0', offset - file_size); - file_size = offset + size; - if (lite) { - warn("Lite file size bug in fsx!"); - report_failure(149); - } - } - - if (testcalls <= simulatedopcount) - return; - - if (!quiet && ((progressinterval && - testcalls % progressinterval == 0) || - (debug && - (monitorstart == -1 || - (offset + size > monitorstart && - (monitorend == -1 || offset <= monitorend)))))) - prt("%06lu %lu.%06lu write %#08x thru %#08x\t(0x%x bytes)\n", - testcalls, t.tv_sec, t.tv_usec, offset, offset + size - 1, - size); - ret = lseek(fd, (off_t)offset, SEEK_SET); - if (ret == (off_t)-1) { - prterr("dowrite: lseek"); - report_failure(150); - } - iret = write(fd, good_buf + offset, size); - if (!quiet && (debug > 1 && - (monitorstart == -1 || - (offset + size > monitorstart && - (monitorend == -1 || offset <= monitorend))))) { - gettimeofday(&t, NULL); - prt(" %lu.%06lu write done\n", t.tv_sec, t.tv_usec); - } - if (iret != size) { - if (iret == -1) - prterr("dowrite: write"); - else - prt("short write: 0x%x bytes instead of 0x%x\n", - iret, size); - report_failure(151); - } -} - - -void -domapwrite(unsigned offset, unsigned size) -{ - struct timeval t; - unsigned pg_offset; - unsigned map_size; - off_t cur_filesize; - char *p; - - offset -= offset % writebdy; - gettimeofday(&t, NULL); - if (size == 0) { - if (!quiet && testcalls > simulatedopcount) - prt("skipping zero size write\n"); - log4(OP_SKIPPED, OP_MAPWRITE, offset, size, &t); - return; - } - cur_filesize = file_size; - - log4(OP_MAPWRITE, offset, size, 0, &t); - - gendata(original_buf, good_buf, offset, size); - if (file_size < offset + size) { - if (file_size < offset) - memset(good_buf + file_size, '\0', offset - file_size); - file_size = offset + size; - if (lite) { - warn("Lite file size bug in fsx!"); - report_failure(200); - } - } - - if (testcalls <= simulatedopcount) - return; - - if (!quiet && ((progressinterval && - testcalls % progressinterval == 0) || - (debug && - (monitorstart == -1 || - (offset + size > monitorstart && - (monitorend == -1 || offset <= monitorend)))))) - prt("%06lu %lu.%06lu mapwrite %#08x thru %#08x\t(0x%x bytes)\n", - testcalls, t.tv_sec, t.tv_usec, offset, offset + size - 1, - size); - - if (file_size > cur_filesize) { - if (ftruncate(fd, file_size) == -1) { - prterr("domapwrite: ftruncate"); - exit(201); - } - if (!quiet && (debug > 1 && - (monitorstart == -1 || - (offset + size > monitorstart && - (monitorend == -1 || offset <= monitorend))))) { - gettimeofday(&t, NULL); - prt(" %lu.%06lu truncate done\n", t.tv_sec, t.tv_usec); - } - } - pg_offset = offset & page_mask; - map_size = pg_offset + size; - - if ((p = (char *)mmap(0, map_size, PROT_READ | PROT_WRITE, - MAP_FILE | MAP_SHARED, fd, - (off_t)(offset - pg_offset))) == (char *)-1) { - prterr("domapwrite: mmap"); - report_failure(202); - } - if (!quiet && (debug > 1 && - (monitorstart == -1 || - (offset + size > monitorstart && - (monitorend == -1 || offset <= monitorend))))) { - gettimeofday(&t, NULL); - prt(" %lu.%06lu mmap done\n", t.tv_sec, t.tv_usec); - } - memcpy(p + pg_offset, good_buf + offset, size); - if (!quiet && (debug > 1 && - (monitorstart == -1 || - (offset + size > monitorstart && - (monitorend == -1 || offset <= monitorend))))) { - gettimeofday(&t, NULL); - prt(" %lu.%06lu memcpy done\n", t.tv_sec, t.tv_usec); - } - if (msync(p, map_size, 0) != 0) { - prterr("domapwrite: msync"); - report_failure(203); - } - if (!quiet && (debug > 1 && - (monitorstart == -1 || - (offset + size > monitorstart && - (monitorend == -1 || offset <= monitorend))))) { - gettimeofday(&t, NULL); - prt(" %lu.%06lu msync done\n", t.tv_sec, t.tv_usec); - } - if (munmap(p, map_size) != 0) { - prterr("domapwrite: munmap"); - report_failure(204); - } - if (!quiet && (debug > 1 && - (monitorstart == -1 || - (offset + size > monitorstart && - (monitorend == -1 || offset <= monitorend))))) { - gettimeofday(&t, NULL); - prt(" %lu.%06lu munmap done\n", t.tv_sec, t.tv_usec); - } -} - - -void -dotruncate(unsigned size) -{ - struct timeval t; - int oldsize = file_size; - - size -= size % truncbdy; - gettimeofday(&t, NULL); - if (size > biggest) { - biggest = size; - if (!quiet && testcalls > simulatedopcount) - prt("truncating to largest ever: 0x%x\n", size); - } - - log4(OP_TRUNCATE, size, (unsigned)file_size, 0, &t); - - if (size > file_size) - memset(good_buf + file_size, '\0', size - file_size); - file_size = size; - - if (testcalls <= simulatedopcount) - return; - - if ((progressinterval && testcalls % progressinterval == 0) || - (debug && (monitorstart == -1 || monitorend == -1 || - size <= monitorend))) - prt("%06lu %lu.%06lu trunc from %#08x to %#08x\n", - testcalls, t.tv_sec, t.tv_usec, oldsize, size); - if (ftruncate(fd, (off_t)size) == -1) { - prt("ftruncate1: %x\n", size); - prterr("dotruncate: ftruncate"); - report_failure(160); - } - if (!quiet && debug > 1) { - gettimeofday(&t, NULL); - prt(" %lu.%06lu trunc done\n", t.tv_sec, t.tv_usec); - } -} - - -void -writefileimage() -{ - ssize_t iret; - - if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) { - prterr("writefileimage: lseek"); - report_failure(171); - } - iret = write(fd, good_buf, file_size); - if ((off_t)iret != file_size) { - if (iret == -1) - prterr("writefileimage: write"); - else - prt("short write: 0x%x bytes instead of 0x%llx\n", - iret, (unsigned long long)file_size); - report_failure(172); - } - if (lite ? 0 : ftruncate(fd, file_size) == -1) { - prt("ftruncate2: %llx\n", (unsigned long long)file_size); - prterr("writefileimage: ftruncate"); - report_failure(173); - } -} - - -void -docloseopen(void) -{ - struct timeval t; - - if (testcalls <= simulatedopcount) - return; - - gettimeofday(&t, NULL); - log4(OP_CLOSEOPEN, file_size, (unsigned)file_size, 0, &t); - - if (debug) - prt("%06lu %lu.%06lu close/open\n", testcalls, t.tv_sec, - t.tv_usec); - if (close(fd)) { - prterr("docloseopen: close"); - report_failure(180); - } - if (!quiet && debug > 1) { - gettimeofday(&t, NULL); - prt(" %lu.%06lu close done\n", t.tv_sec, t.tv_usec); - } - fd = open(fname, O_RDWR, 0); - if (fd < 0) { - prterr("docloseopen: open"); - report_failure(181); - } - if (!quiet && debug > 1) { - gettimeofday(&t, NULL); - prt(" %lu.%06lu open done\n", t.tv_sec, t.tv_usec); - } -} - - -void -test(void) -{ - unsigned long offset; - unsigned long size = maxoplen; - unsigned long rv = random(); - unsigned long op = rv % (3 + !lite + mapped_writes); - - /* turn off the map read if necessary */ - - if (op == 2 && !mapped_reads) - op = 0; - - if (simulatedopcount > 0 && testcalls == simulatedopcount) - writefileimage(); - - testcalls++; - - if (debugstart > 0 && testcalls >= debugstart) - debug = 1; - - if (!quiet && testcalls < simulatedopcount && testcalls % 100000 == 0) - prt("%lu...\n", testcalls); - - /* - * READ: op = 0 - * WRITE: op = 1 - * MAPREAD: op = 2 - * TRUNCATE: op = 3 - * MAPWRITE: op = 3 or 4 - */ - if (lite ? 0 : op == 3 && (style & 1) == 0) /* vanilla truncate? */ - dotruncate(random() % maxfilelen); - else { - if (randomoplen) - size = random() % (maxoplen+1); - if (lite ? 0 : op == 3) - dotruncate(size); - else { - offset = random(); - if (op == 1 || op == (lite ? 3 : 4)) { - offset %= maxfilelen; - if (offset + size > maxfilelen) - size = maxfilelen - offset; - if (op != 1) - domapwrite(offset, size); - else - dowrite(offset, size); - } else { - if (file_size) - offset %= file_size; - else - offset = 0; - if (offset + size > file_size) - size = file_size - offset; - if (op != 0) - domapread(offset, size); - else - doread(offset, size); - } - } - } - if (sizechecks && testcalls > simulatedopcount) - check_size(); - if (closeprob && (rv >> 3) < (1 << 28) / closeprob) - docloseopen(); -} - - -void -cleanup(sig) - int sig; -{ - if (sig) - prt("signal %d\n", sig); - prt("testcalls = %lu\n", testcalls); - exit(sig); -} - - -void -usage(void) -{ - fprintf(stdout, "usage: %s", - "fsx [-dnqLOW] [-b opnum] [-c Prob] [-l flen] [-m -start:end] [-o oplen] [-p progressinterval] [-r readbdy] [-s style] [-t -truncbdy] [-w writebdy] [-D startingop] [-N numops] [-P dirpath] [-S seed] -fname\n\ - -b opnum: beginning operation number (default 1)\n\ - -c P: 1 in P chance of file close+open at each op (default infinity)\n\ - -d: debug output for all operations [-d -d = more debugging]\n\ - -l flen: the upper bound on file size (default 262144)\n\ - -m startop:endop: monitor (print debug output) specified byte range -(default 0:infinity)\n\ - -n: no verifications of file size\n\ - -o oplen: the upper bound on operation size (default 65536)\n\ - -p progressinterval: debug output at specified operation interval\n\ - -q: quieter operation\n\ - -r readbdy: 4096 would make reads page aligned (default 1)\n\ - -s style: 1 gives smaller truncates (default 0)\n\ - -t truncbdy: 4096 would make truncates page aligned (default 1)\n\ - -w writebdy: 4096 would make writes page aligned (default 1)\n\ - -D startingop: debug output starting at specified operation\n\ - -L: fsxLite - no file creations & no file size changes\n\ - -N numops: total # operations to do (default infinity)\n\ - -O: use oplen (see -o flag) for every op (default random)\n\ - -P: save .fsxlog and .fsxgood files in dirpath (default ./)\n\ - -S seed: for random # generator (default 1) 0 gets timestamp\n\ - -W: mapped write operations DISabled\n\ - -R: read() system calls only (mapped reads disabled)\n\ - fname: this filename is REQUIRED (no default)\n"); - exit(90); -} - - -int -getnum(char *s, char **e) -{ - int ret = -1; - - *e = (char *) 0; - ret = strtol(s, e, 0); - if (*e) - switch (**e) { - case 'b': - case 'B': - ret *= 512; - *e = *e + 1; - break; - case 'k': - case 'K': - ret *= 1024; - *e = *e + 1; - break; - case 'm': - case 'M': - ret *= 1024*1024; - *e = *e + 1; - break; - case 'w': - case 'W': - ret *= 4; - *e = *e + 1; - break; - } - return (ret); -} - - -static const char *basename(const char *path) -{ - char *c = strrchr(path, '/'); - - return c ? c++ : path; -} - -int -main(int argc, char **argv) -{ - int i, style, ch; - char *endp; - int dirpath = 0; - - goodfile[0] = 0; - logfile[0] = 0; - - page_size = getpagesize(); - page_mask = page_size - 1; - - setvbuf(stdout, (char *)0, _IOLBF, 0); /* line buffered stdout */ - - while ((ch = getopt(argc, argv, "b:c:dl:m:no:p:qr:s:t:w:D:LN:OP:RS:W")) - != EOF) - switch (ch) { - case 'b': - simulatedopcount = getnum(optarg, &endp); - if (!quiet) - fprintf(stdout, "Will begin at operation -%ld\n", - simulatedopcount); - if (simulatedopcount == 0) - usage(); - simulatedopcount -= 1; - break; - case 'c': - closeprob = getnum(optarg, &endp); - if (!quiet) - fprintf(stdout, - "Chance of close/open is 1 in %d\n", - closeprob); - if (closeprob <= 0) - usage(); - break; - case 'd': - debug++; - break; - case 'l': - maxfilelen = getnum(optarg, &endp); - if (maxfilelen <= 0) - usage(); - break; - case 'm': - monitorstart = getnum(optarg, &endp); - if (monitorstart < 0) - usage(); - if (!endp || *endp++ != ':') - usage(); - monitorend = getnum(endp, &endp); - if (monitorend < 0) - usage(); - if (monitorend == 0) - monitorend = -1; /* aka infinity */ - debug = 1; - case 'n': - sizechecks = 0; - break; - case 'o': - maxoplen = getnum(optarg, &endp); - if (maxoplen <= 0) - usage(); - break; - case 'p': - progressinterval = getnum(optarg, &endp); - if (progressinterval < 0) - usage(); - break; - case 'q': - quiet = 1; - break; - case 'r': - readbdy = getnum(optarg, &endp); - if (readbdy <= 0) - usage(); - break; - case 's': - style = getnum(optarg, &endp); - if (style < 0 || style > 1) - usage(); - break; - case 't': - truncbdy = getnum(optarg, &endp); - if (truncbdy <= 0) - usage(); - break; - case 'w': - writebdy = getnum(optarg, &endp); - if (writebdy <= 0) - usage(); - break; - case 'D': - debugstart = getnum(optarg, &endp); - if (debugstart < 1) - usage(); - break; - case 'L': - lite = 1; - break; - case 'N': - numops = getnum(optarg, &endp); - if (numops < 0) - usage(); - break; - case 'O': - randomoplen = 0; - break; - case 'P': - strncpy(goodfile, optarg, sizeof(goodfile)); - strcat(goodfile, "/"); - strncpy(logfile, optarg, sizeof(logfile)); - strcat(logfile, "/"); - dirpath = 1; - break; - case 'R': - mapped_reads = 0; - break; - case 'S': - seed = getnum(optarg, &endp); - if (seed == 0) - seed = time(0) % 10000; - if (!quiet) - fprintf(stdout, "Seed set to %d\n", seed); - if (seed < 0) - usage(); - break; - case 'W': - mapped_writes = 0; - if (!quiet) - fprintf(stdout, "mapped writes DISABLED\n"); - break; - - default: - usage(); - /* NOTREACHED */ - } - argc -= optind; - argv += optind; - if (argc != 1) - usage(); - fname = argv[0]; - - signal(SIGHUP, cleanup); - signal(SIGINT, cleanup); - signal(SIGPIPE, cleanup); - signal(SIGALRM, cleanup); - signal(SIGTERM, cleanup); - signal(SIGXCPU, cleanup); - signal(SIGXFSZ, cleanup); - signal(SIGVTALRM, cleanup); - signal(SIGUSR1, cleanup); - signal(SIGUSR2, cleanup); - - initstate(seed, state, 256); - setstate(state); - fd = open(fname, O_RDWR|(lite ? 0 : O_CREAT|O_TRUNC), 0666); - if (fd < 0) { - prterr(fname); - exit(91); - } - strncat(goodfile, dirpath ? basename(fname) : fname, 256); - strcat (goodfile, ".fsxgood"); - fsxgoodfd = open(goodfile, O_RDWR|O_CREAT|O_TRUNC, 0666); - if (fsxgoodfd < 0) { - prterr(goodfile); - exit(92); - } - strncat(logfile, dirpath ? basename(fname) : fname, 256); - strcat (logfile, ".fsxlog"); - fsxlogf = fopen(logfile, "w"); - if (fsxlogf == NULL) { - prterr(logfile); - exit(93); - } - if (lite) { - off_t ret; - file_size = maxfilelen = lseek(fd, (off_t)0, SEEK_END); - if (file_size == (off_t)-1) { - prterr(fname); - warn("main: lseek eof"); - exit(94); - } - ret = lseek(fd, (off_t)0, SEEK_SET); - if (ret == (off_t)-1) { - prterr(fname); - warn("main: lseek 0"); - exit(95); - } - } - original_buf = (char *) malloc(maxfilelen); - for (i = 0; i < maxfilelen; i++) - original_buf[i] = random() % 256; - good_buf = (char *) malloc(maxfilelen); - memset(good_buf, '\0', maxfilelen); - temp_buf = (char *) malloc(maxoplen); - memset(temp_buf, '\0', maxoplen); - if (lite) { /* zero entire existing file */ - ssize_t written; - - written = write(fd, good_buf, (size_t)maxfilelen); - if (written != maxfilelen) { - if (written == -1) { - prterr(fname); - warn("main: error on write"); - } else - warn("main: short write, 0x%x bytes instead -of 0x%x\n", - (unsigned)written, maxfilelen); - exit(98); - } - } else - check_trunc_hack(); - - while (numops == -1 || numops--) - test(); - - if (close(fd)) { - prterr("close"); - report_failure(99); - } - prt("All operations completed A-OK!\n"); - - exit(0); - return 0; -} diff --git a/lustre/tests/intent-test.sh b/lustre/tests/intent-test.sh deleted file mode 100755 index 9113f17..0000000 --- a/lustre/tests/intent-test.sh +++ /dev/null @@ -1,122 +0,0 @@ -#!/bin/bash -x - -MTPT=/mnt/lustre - -remount() { - umount $MTPT || exit -1 - debugctl clear - mount -t lustre_lite -o osc=OSCDEV-UUID,mdc=MDCDEV-UUID none $MTPT -} - -# Test mkdir -mkdir $MTPT/dir -mkdir $MTPT/dir2 - -# Test mkdir on existing directory -mkdir $MTPT/dir - -remount - -# Test mkdir on existing directory with no locks already held -mkdir $MTPT/dir - -remount - -# Use mknod to create a file -./mcreate $MTPT/file -# ...on an existing file. -./mcreate $MTPT/file - -remount - -# Use mknod to create a file with no locks already held -./mcreate $MTPT/file - -remount - -ls -l $MTPT/file - -remount - -cat $MTPT/file -./mcreate $MTPT/file2 -cat $MTPT/file2 -./mcreate $MTPT/file3 - -remount - -./tchmod 777 $MTPT/file3 - -remount - -./mcreate $MTPT/file4 -./tchmod 777 $MTPT/file4 - -remount - -ls -l $MTPT/file4 -./tchmod 777 $MTPT/file4 - -remount - -cat $MTPT/file4 -./tchmod 777 $MTPT/file4 - -remount - -touch $MTPT/file5 -touch $MTPT/file6 -touch $MTPT/file5 - -remount - -touch $MTPT/file5 - -remount - -echo foo >> $MTPT/file -cat $MTPT/file - -remount - -cat $MTPT/file - -echo foo >> $MTPT/iotest -echo bar >> $MTPT/iotest -cat $MTPT/iotest - -remount - -cat $MTPT/iotest -echo baz >> $MTPT/iotest - -remount - -ls $MTPT - -remount - -mkdir $MTPT/new -ls $MTPT - -remount - -ls $MTPT -mkdir $MTPT/newer -ls $MTPT - -remount - -cat $MTPT/iotest -echo "Testing truncation..." -echo foo > $MTPT/iotest -echo bar >> $MTPT/iotest -cat $MTPT/iotest -echo "trucating to 4 bytes now..." -./truncate $MTPT/iotest 4 -cat $MTPT/iotest - -remount - -ls $MTPT -rmdir $MTPT/foo diff --git a/lustre/tests/intent-test2.sh b/lustre/tests/intent-test2.sh deleted file mode 100644 index 428039c..0000000 --- a/lustre/tests/intent-test2.sh +++ /dev/null @@ -1,70 +0,0 @@ -#!/bin/bash - -SRCDIR="`dirname $0`" -. $SRCDIR/common.sh - -setup_opts "$@" - -set -vx - -MTPT1=/mnt/lustre1 -MTPT2=/mnt/lustre2 - -remount() { - umount $MTPT1 || exit -1 - umount $MTPT2 || exit -1 - debugctl clear - setup_mount || fail "cannot remount /mnt/lustre" -} - -fail() { - echo "unexpected failure" - exit -1 -} - -[ "`mount | grep $MTPT1`" ] || . llsetup.sh "$@" || exit -1 - -mkdir $MTPT1/dir1 || fail -echo "Next mkdir should fail" -mkdir $MTPT2/dir1 && fail -mkdir $MTPT2/dir2 || fail -echo "Next mkdirs should fail" -mkdir $MTPT1/dir2 && fail - -remount - -echo "Next 2 mkdir should fail" -mkdir $MTPT2/dir1 && fail -mkdir $MTPT1/dir2 && fail - -./mcreate $MTPT2/file1 -echo "Next mcreate should fail" -./mcreate $MTPT2/file1 && fail -./mcreate $MTPT2/file2 || fail -echo "Next mcreate should fail" -./mcreate $MTPT1/file2 && fail - -remount - -echo "Next 2 mcreates should fail" -./mcreate $MTPT2/file1 && fail -./mcreate $MTPT1/file2 && fail - -rmdir $MTPT1/dir2 || fail -echo "Next rmdir should fail" -rmdir $MTPT2/dir2 && fail -rmdir $MTPT2/dir1 || fail - -remount - -echo "Next rpmdir should fail" - -echo "File I/O: you should see increasing sequences of contiguous numbers" -echo 1 >> $MTPT1/file1 -cat $MTPT2/file1 -echo 2 >> $MTPT2/file1 -cat $MTPT1/file1 -echo 3 >> $MTPT2/file1 -cat $MTPT1/file1 -echo 4 >> $MTPT1/file1 -cat $MTPT1/file1 diff --git a/lustre/tests/ldaptest.c b/lustre/tests/ldaptest.c deleted file mode 100644 index c1a7499..0000000 --- a/lustre/tests/ldaptest.c +++ /dev/null @@ -1,27 +0,0 @@ -#include <ldap.h> -#include <stdio.h> -#include <errno.h> -#include <string.h> - -int main(int argc, char **argv) -{ - LDAP *ld; - int err; - - ld = ldap_init("localhost", 389); - if (!ld) { - fprintf(stderr, "ldap_init: %s\n", strerror(errno)); - exit(1); - } - - err = ldap_bind_s(ld, "cn=Manager,dc=lustre,dc=cfs", "secret", - LDAP_AUTH_SIMPLE); - if (err) { - fprintf(stderr, "ldap_bind: %s\n", ldap_err2string(err)); - exit(1); - } - - - - -} diff --git a/lustre/tests/ldlm.cfg b/lustre/tests/ldlm.cfg deleted file mode 100644 index 054f983..0000000 --- a/lustre/tests/ldlm.cfg +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -# Config file for setting up the lock manager -SETUP_LDLM=y diff --git a/lustre/tests/leak_finder.pl b/lustre/tests/leak_finder.pl deleted file mode 100644 index b8d234b..0000000 --- a/lustre/tests/leak_finder.pl +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/perl -w - -use IO::Handle; - -STDOUT->autoflush(1); -STDERR->autoflush(1); - -my ($line, $memory); -my $debug_line = 0; - -while ($line = <>) { - $debug_line++; - my ($file, $func, $lno, $name, $size, $addr, $type); - if ($line =~ m/^.*\((.*):(\d+):(.*)\(\) (\d+ \| )?\d+\+\d+\): [vk](.*) '(.*)': (\d+) at (.*) \(tot .*$/) { - $file = $1; - $lno = $2; - $func = $3; - $type = $5; - $name = $6; - $size = $7; - $addr = $8; - - # we can't dump the log after portals has exited, so skip "leaks" - # from memory freed in the portals module unloading. - if ($func eq 'portals_handle_init') { - next; - } - printf("%8s %6d bytes at %s called %s (%s:%s:%d)\n", $type, $size, - $addr, $name, $file, $func, $lno); - } else { - next; - } - - if ($type eq 'malloced') { - $memory->{$addr}->{name} = $name; - $memory->{$addr}->{size} = $size; - $memory->{$addr}->{file} = $file; - $memory->{$addr}->{func} = $func; - $memory->{$addr}->{lno} = $lno; - $memory->{$addr}->{debug_line} = $debug_line; - } else { - if (!defined($memory->{$addr})) { - print STDERR "*** Free without malloc ($size bytes at $addr, $file:$func:$lno)\n"; - next; - } - my ($oldname, $oldsize, $oldfile, $oldfunc, $oldlno) = $memory->{$addr}; - - if ($memory->{$addr}->{size} != $size) { - print STDERR "*** Free different size ($memory->{$addr}->{size} alloced, $size freed).\n"; - print STDERR " malloc at $memory->{$addr}->{file}:$memory->{$addr}->{func}:$memory->{$addr}->{lno}, free at $file:$func:$lno\n"; - next; - } - - delete $memory->{$addr}; - } -} - -# Sort leak output by allocation time -my @sorted = sort { - return $memory->{$a}->{debug_line} <=> $memory->{$b}->{debug_line}; -} keys(%{$memory}); - -my $key; -foreach $key (@sorted) { - my ($oldname, $oldsize, $oldfile, $oldfunc, $oldlno) = $memory->{$key}; - print STDERR "*** Leak: $memory->{$key}->{size} bytes allocated at $key ($memory->{$key}->{file}:$memory->{$key}->{func}:$memory->{$key}->{lno}, debug file line $memory->{$key}->{debug_line})\n"; -} - -print "Done.\n"; diff --git a/lustre/tests/lkcdmap b/lustre/tests/lkcdmap deleted file mode 100755 index 20c8c20..0000000 --- a/lustre/tests/lkcdmap +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/sh -TMP=${TMP:-/tmp} -LCMD=$TMP/lkcd-cmds-`hostname` -echo "Storing LKCD module info in $LCMD" -cat /tmp/ogdb-`hostname` | while read JUNK M JUNK; do - MOD="../$M" - MAP=`echo $MOD | sed -e 's/\.o$/.map/'` - MODNAME=`basename $MOD | sed -e 's/\.o$//'` - - nm $MOD > $MAP - echo namelist -a $PWD/$MOD | tee -a $LCMD - echo symtab -a $PWD/$MAP $MODNAME | tee -a $LCMD -done diff --git a/lustre/tests/llcleanup.sh b/lustre/tests/llcleanup.sh deleted file mode 100755 index b718e93..0000000 --- a/lustre/tests/llcleanup.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/sh - -SRCDIR="`dirname $0`/" -[ -f $SRCDIR/common.sh ] || SRCDIR="/lib/lustre" - -. $SRCDIR/common.sh - -setup_opts "$@" - -TIME=`date +'%s'` - -$DBGCTL debug_kernel /tmp/debug.1.$TIME -cleanup_client -$DBGCTL debug_kernel /tmp/debug.2.$TIME -cleanup_server - -cleanup_ldlm -cleanup_lustre -cleanup_portals diff --git a/lustre/tests/lldlm.sh b/lustre/tests/lldlm.sh deleted file mode 100755 index 58da470..0000000 --- a/lustre/tests/lldlm.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/sh - -SRCDIR="`dirname $0`/" -. $SRCDIR/common.sh - -export DEBUG_WAIT=yes -. $SRCDIR/llsetup.sh $SRCDIR/net-local.cfg $SRCDIR/ldlm.cfg $SRCDIR/obdecho.cfg $SRCDIR/client-echo.cfg || exit 2 - -cat <<EOF -********************************************** -To run tests, use $OBDCTL. -$OBDCTL -device `$OBDCTL name2dev OSCDEV` -probe - -To test basic locking functionality: -test_ldlm - -The regression stress test will start some -number of threads, each locking and unlocking -extents from a set of resources. To run it: -ldlm_regress_start [numthreads [refheld [numres [numext]]]] -numthreads is the number of threads to start. - (default 1) -refheld is the total number of resources to hold, - between all the threads. Once this number - is reached, every time a lock is granted - or matched, the oldest reference is - decremented. - (default 10) -numres is the number of resources to use - (default 10) -numext is the number of extents to divide - each resource into - (default 10) - -To stop the test: -ldlm_regress_stop -********************************************** -EOF diff --git a/lustre/tests/llecho.sh b/lustre/tests/llecho.sh deleted file mode 100644 index 5afade1..0000000 --- a/lustre/tests/llecho.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/sh - -LCONF=${LCONF:-../utils/lconf} -NAME=${NAME:-echo} - -config=$NAME.xml -mkconfig=$NAME.sh - -if [ "$LUSTRE" ]; then - lustre_opt="--lustre=$LUSTRE" -fi - -sh -x $mkconfig $config || exit 1 - -$LCONF $lustre_opt --reformat --gdb $OPTS $config || exit 4 - -cat <<EOF - -run getattr tests as: -../utils/lctl --device '\$ECHO_$SERVER' test_getattr 1000000 -EOF diff --git a/lustre/tests/llechocleanup.sh b/lustre/tests/llechocleanup.sh deleted file mode 100755 index 22d7550..0000000 --- a/lustre/tests/llechocleanup.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/sh - -LCONF=../utils/lconf -NAME=${NAME:-echo} -TMP=${TMP:-/tmp} - -config=$NAME.xml -mkconfig=$NAME.sh - -if [ ! -f $config ]; then - sh $mkconfig $config || exit 1 -fi - -${LCONF} --cleanup $NAME.xml - diff --git a/lustre/tests/llext3.sh b/lustre/tests/llext3.sh deleted file mode 100755 index 9826ac7..0000000 --- a/lustre/tests/llext3.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh - -SRCDIR="`dirname $0`/" -. $SRCDIR/common.sh - -export DEBUG_WAIT=yes -. $SRCDIR/llsetup.sh $SRCDIR/net-local.cfg $SRCDIR/client-mount.cfg $SRCDIR/mds.cfg $SRCDIR/obdext2.cfg || exit 2 - -debug_client_on -#debug_client_off diff --git a/lustre/tests/llmodules.sh b/lustre/tests/llmodules.sh deleted file mode 100644 index a39b73c..0000000 --- a/lustre/tests/llmodules.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/sh - -SRCDIR="`dirname $0`/" -[ -f $SRCDIR/common.sh ] || SRCDIR="/lib/lustre" - -. $SRCDIR/common.sh - -setup_opts "$@" - -[ -c /dev/portals ] || mknod /dev/portals c 10 240 -do_insmod $PORTALS/linux/oslib/portals.o || exit -1 -case $NETWORK in -elan) do_insmod $PORTALS/linux/rqswnal/kqswnal.o || exit -1 - ;; -tcp) do_insmod $PORTALS/linux/socknal/ksocknal.o || exit -1 - ;; -*) fail "$0: unknown NETWORK '$NETWORK'" ;; -esac - -[ -c /dev/obd ] || mknod /dev/obd c 10 241 - -do_insmod $LUSTRE/obdclass/obdclass.o || exit -1 -do_insmod $LUSTRE/ptlrpc/ptlrpc.o || exit -1 -do_insmod $LUSTRE/ldlm/ldlm.o || exit -1 -do_insmod $LUSTRE/extN/extN.o || \ - echo "info: can't load extN.o module, not fatal if using ext3" -do_insmod $LUSTRE/mds/mds.o || exit -1 -do_insmod $LUSTRE/obdclass/fsfilt_extN.o || \ - echo "info: can't load fsfilt_extN.o module, needs extN.o" -do_insmod $LUSTRE/obdecho/obdecho.o || exit -1 -do_insmod $LUSTRE/obdfilter/obdfilter.o || exit -1 -do_insmod $LUSTRE/ost/ost.o || exit -1 -do_insmod $LUSTRE/osc/osc.o || exit -1 -do_insmod $LUSTRE/mdc/mdc.o || exit -1 -do_insmod $LUSTRE/lov/lov.o || exit -1 -do_insmod $LUSTRE/llite/llite.o || exit -1 -echo "$R/tmp/lustre-log" > /proc/sys/portals/debug_path - -list_mods - - diff --git a/lustre/tests/llmount-client.sh b/lustre/tests/llmount-client.sh deleted file mode 100644 index 503f93f..0000000 --- a/lustre/tests/llmount-client.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/sh -SRCDIR="`dirname $0`/" -. $SRCDIR/common.sh - -export DEBUG_WAIT=yes -. $SRCDIR/llsetup.sh $SRCDIR/net-client.cfg $SRCDIR/ldlm.cfg $SRCDIR/client-mount.cfg || exit 2 - -debug_client_on -#debug_client_off diff --git a/lustre/tests/llmount-server.sh b/lustre/tests/llmount-server.sh deleted file mode 100644 index d31f033..0000000 --- a/lustre/tests/llmount-server.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/sh -SRCDIR="`dirname $0`/" -. $SRCDIR/common.sh - -export DEBUG_WAIT=yes -. $SRCDIR/llsetup.sh $SRCDIR/net-server.cfg $SRCDIR/ldlm.cfg $SRCDIR/mds.cfg $SRCDIR/obdfilter.cfg || exit 2 - -debug_client_on -#debug_client_off diff --git a/lustre/tests/llmount.sh b/lustre/tests/llmount.sh deleted file mode 100755 index 8e3b37b..0000000 --- a/lustre/tests/llmount.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/sh -# suggested boilerplate for test script - -export PATH=`dirname $0`/../utils:$PATH - -LCONF=${LCONF:-lconf} -NAME=${NAME:-local} - -config=$NAME.xml -mkconfig=$NAME.sh - -if [ "$PORTALS" ]; then - portals_opt="--portals=$PORTALS" -fi - -if [ "$LUSTRE" ]; then - lustre_opt="--lustre=$LUSTRE" -fi - -if [ "$LDAPURL" ]; then - conf_opt="--ldapurl $LDAPURL --config $NAME" -else - sh $mkconfig $config || exit 1 - conf_opt="$config" -fi - -[ "$NODE" ] && node_opt="--node $NODE" - -if [ "$1" = "-v" ]; then - verbose="-v" -fi - -${LCONF} $portals_opt $lustre_opt $node_opt ${REFORMAT:---reformat} --gdb \ - $verbose $conf_opt || exit 2 diff --git a/lustre/tests/llmountcleanup.sh b/lustre/tests/llmountcleanup.sh deleted file mode 100755 index 572cd65..0000000 --- a/lustre/tests/llmountcleanup.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/sh - -export PATH=`dirname $0`/../utils:$PATH - -LCONF=${LCONF:-lconf} -NAME=${NAME:-local} -TMP=${TMP:-/tmp} - -config=$NAME.xml -mkconfig=$NAME.sh - -if [ "$PORTALS" ]; then - portals_opt="--portals=$PORTALS" -fi - -if [ "$LUSTRE" ]; then - lustre_opt="--lustre=$LUSTRE" -fi - -if [ "$1" = "--force" ]; then - force="--force" -fi - -if [ "$LDAPURL" ]; then - conf_opt="--ldapurl $LDAPURL --config $NAME" -else - if [ ! -f $config -o $mkconfig -nt $config ]; then - sh $mkconfig $config || exit 1 - fi - conf_opt="$config" -fi - -[ "$NODE" ] && node_opt="--node $NODE" - -sync; sleep 2; sync -${LCONF} $portals_opt $lustre_opt $node_opt --cleanup $force \ - --dump $TMP/debug $conf_opt -rc=$? -BUSY=`dmesg | grep -i destruct` -if [ "$BUSY" ]; then - echo "$BUSY" 1>&2 - mv $TMP/debug $TMP/debug-busy.`date +%s` - exit 255 -fi -LEAK_LUSTRE=`dmesg | grep "obd mem.*leaked" | tail -1 | grep -v "leaked: 0"` -LEAK_PORTALS=`dmesg | tail -20 | grep "Portals memory leaked"` -if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then - echo "$LEAK_LUSTRE" 1>&2 - echo "$LEAK_PORTALS" 1>&2 - mv $TMP/debug $TMP/debug-leak.`date +%s` - exit 254 -fi - -exit $rc diff --git a/lustre/tests/llrext3.sh b/lustre/tests/llrext3.sh deleted file mode 100755 index c47fe9b..0000000 --- a/lustre/tests/llrext3.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh - -SRCDIR="`dirname $0`/" -. $SRCDIR/common.sh - -export DEBUG_WAIT=yes -. $SRCDIR/llrsetup.sh $SRCDIR/net-local.cfg $SRCDIR/client-mount.cfg $SRCDIR/mds.cfg $SRCDIR/obdext2.cfg || exit 2 - -debug_client_on -#debug_client_off diff --git a/lustre/tests/llrmount.sh b/lustre/tests/llrmount.sh deleted file mode 100755 index b12c1ae..0000000 --- a/lustre/tests/llrmount.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/sh - -LCONF=${LCONF:-../utils/lconf} -NAME=${NAME:-local} - -config=$NAME.xml -mkconfig=$NAME.sh - -if [ "$PORTALS" ]; then - portals_opt="--portals=$PORTALS" -fi - -if [ "$LUSTRE" ]; then - lustre_opt="--lustre=$LUSTRE" -fi - -if [ "$LDAPURL" ]; then - conf_opt="--ldapurl $LDAPURL --config $NAME" -else - if [ ! -f $config -o $mkconfig -nt $config ]; then - sh $mkconfig $config || exit 1 - fi - conf_opt="$config" -fi - -[ "$NODE" ] && node_opt="--node $NODE" - -${LCONF} $portals_opt $lustre_opt $node_opt --gdb $conf_opt || exit 2 diff --git a/lustre/tests/llrsetup.sh b/lustre/tests/llrsetup.sh deleted file mode 100644 index 44bfcae..0000000 --- a/lustre/tests/llrsetup.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/sh - -SRCDIR="`dirname $0`/" -[ -f $SRCDIR/common.sh ] || SRCDIR="/lib/lustre" - -. $SRCDIR/common.sh - -setup_opts "$@" - -setup_portals -setup_lustre -setup_ldlm - -setup_server old_fs -setup_client diff --git a/lustre/tests/llsetup.sh b/lustre/tests/llsetup.sh deleted file mode 100644 index 4828f26..0000000 --- a/lustre/tests/llsetup.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/sh -vx - -SRCDIR="`dirname $0`/" -[ -f $SRCDIR/common.sh ] || SRCDIR="/lib/lustre" - -. $SRCDIR/common.sh - -setup_opts "$@" - -setup_portals || exit $? -setup_lustre || exit $? -setup_ldlm || exit $? - -setup_server new_fs || exit $? -setup_client || exit $? diff --git a/lustre/tests/llsimple.sh b/lustre/tests/llsimple.sh deleted file mode 100755 index d22ddc6..0000000 --- a/lustre/tests/llsimple.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh - -SRCDIR="`dirname $0`/" -[ -f $SRCDIR/common.sh ] || SRCDIR="/lib/lustre" - -. $SRCDIR/common.sh - -setup_opts "$@" - -setup_portals || exit $? -setup_lustre || exit $? diff --git a/lustre/tests/local.sh b/lustre/tests/local.sh deleted file mode 100755 index 25d05d2..0000000 --- a/lustre/tests/local.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -export PATH=`dirname $0`/../utils:$PATH - -config=${1:-local.xml} - -LMC="${LMC:-lmc} -m $config" -TMP=${TMP:-/tmp} - -MDSDEV=${MDSDEV:-$TMP/mds1} -MDSSIZE=${MDSSIZE:-50000} - -OSTDEV=${OSTDEV:-$TMP/ost1} -OSTSIZE=${OSTSIZE:-200000} -FSTYPE=${FSTYPE:-ext3} - -rm -f $config - -# create nodes -${LMC} --add node --node localhost || exit 10 -${LMC} --add net --node localhost --nid localhost --nettype tcp || exit 11 - -# configure mds server -${LMC} --add mds --nspath /mnt/mds_ns --node localhost --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE || exit 20 - -# configure ost -${LMC} --add ost --nspath /mnt/ost_ns --node localhost --ost ost1 --fstype $FSTYPE --dev $OSTDEV --size $OSTSIZE || exit 30 - -# create client config -${LMC} --add mtpt --node localhost --path /mnt/lustre --mds mds1 --ost ost1 || exit 40 diff --git a/lustre/tests/lov.sh b/lustre/tests/lov.sh deleted file mode 100755 index 3956f9e..0000000 --- a/lustre/tests/lov.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash - -export PATH=`dirname $0`/../utils:$PATH - -config=${1:-lov.xml} - -LMC=${LMC:-lmc} -TMP=${TMP:-/tmp} - -MDSDEV=${MDSDEV:-$TMP/mds1} -MDSSIZE=${MDSSIZE:-50000} - -OSTDEV1=${OSTDEV1:-$TMP/ost1} -OSTDEV2=${OSTDEV2:-$TMP/ost2} -OSTDEV3=${OSTDEV3:-$TMP/ost3} -OSTSIZE=${OSTSIZE:-100000} - -STRIPE_BYTES=65536 -STRIPES_PER_OBJ=2 # 0 means stripe over all OSTs - -# create nodes -${LMC} -o $config --add net --node localhost --nid localhost --nettype tcp || exit 1 - -# configure mds server -${LMC} -m $config --format --add mds --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 10 - -# configure ost -${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0 || exit 20 -${LMC} -m $config --add ost --node localhost --lov lov1 --dev $OSTDEV1 --size $OSTSIZE || exit 21 -${LMC} -m $config --add ost --node localhost --lov lov1 --dev $OSTDEV2 --size $OSTSIZE || exit 22 -${LMC} -m $config --add ost --node localhost --lov lov1 --dev $OSTDEV3 --size $OSTSIZE || exit 23 - -# create client config -${LMC} -m $config --add mtpt --node localhost --path /mnt/lustre --mds mds1 --lov lov1 || exit 30 diff --git a/lustre/tests/lstiming.sh b/lustre/tests/lstiming.sh deleted file mode 100644 index 0b494e4..0000000 --- a/lustre/tests/lstiming.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/bash - -set -e - -PATH=$PATH:. - -CHECKSTAT=${CHECKSTAT:-"checkstat -v"} -MOUNT1=${MOUNT1:-/mnt/lustre1} -MOUNT2=${MOUNT2:-/mnt/lustre2} -DIRNAME=${DIRNAME:-"ls-timing"} -DIRSIZE=${DIRSIZE:-200} -export NAME=${NAME:-mount2} - -error () { - echo FAIL - exit 1 -} - -pass() { - echo PASS -} -echo "Mounting..." -mount | grep $MOUNT1 || sh llmount.sh - -echo -n "Preparing test directory with $DIRSIZE files..." -rm -rf "$MOUNT1/$DIRNAME" -rm -rf "$MOUNT2/$DIRNAME" -mkdir -p "$MOUNT1/$DIRNAME" -[ -d "$MOUNT2/$DIRNAME" ] || error -createmany -o $MOUNT1/$DIRNAME/file 0 $DIRSIZE &> /dev/null -echo "done" - -echo -n "Cached ls: " -time ls -lr $MOUNT1/$DIRNAME 1> /dev/null - -echo -n "Uncached ls: " -time ls -lr $MOUNT2/$DIRNAME 1> /dev/null - - -fsx $MOUNT1/$DIRNAME/fsx.file &>/dev/null & -fsxpid=$! - -echo -n "Cached busy ls:" -time ls -lr $MOUNT1/$DIRNAME 1> /dev/null - -echo -n "Uncached busy ls: " -time ls -lr $MOUNT2/$DIRNAME 1> /dev/null - -kill $fsxpid - -exit diff --git a/lustre/tests/lustre.cfg b/lustre/tests/lustre.cfg deleted file mode 100644 index cc97b1b..0000000 --- a/lustre/tests/lustre.cfg +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/sh - -### REMOVE THE FOLLOWING LINES IN ORDER TO TEST LUSTRE WITH THIS CONFIG ### -echo "lustre: edit /etc/lustre/lustre.cfg to enable, exiting" 1>&2 -exit 1 - -# Common configuration options -# Config file for setting up a local OST and MDS server -NETWORK=tcp -LOCALHOST=`hostname` -SERVER=$LOCALHOST -OSTNODE=$LOCALHOST -CLIENTS=* -PORT=988 - -# Set up the lock manager (required) -SETUP_LDLM=y - -#case `echo $LOCALHOST | sed "s/\.[^|]*//"` in -case $LOCALHOST in -$SERVER) - # Config for setting up a metadata server - MDSFS=extN - MDSDEV=/tmp/mds - MDSSIZE=50000 - SETUP_MDS=y - ;; -esac - -case $LOCALHOST in -$OSTNODE) - # Config for setting up an object storage target with obdfilter - OSTDEV=/tmp/ost - OSTSIZE=200000 - OSTFS=extN - OSTTYPE=obdfilter - SETUP_OST=y - ;; -esac - -case $LOCALHOST in -$CLIENTS) - # Config for setting up a client filesystem mount - SETUP_MDC=y - SETUP_OSC=y - OSCMT=/mnt/lustre - SETUP_MOUNT=y - ;; -esac diff --git a/lustre/tests/mcr-individual-ost-nogw-config.sh b/lustre/tests/mcr-individual-ost-nogw-config.sh deleted file mode 100755 index 0401bf5..0000000 --- a/lustre/tests/mcr-individual-ost-nogw-config.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash - -config=${1:-echo-no-gw.xml} - -LMC="save_cmd" -LMC_REAL="../../lustre/utils/lmc -m $config" - -# TCP/IP servers -SERVER_START=0 -SERVER_CNT=62 - -TCPBUF=1048576 - -h2tcp () { - echo "${1}" -} -BATCH=/tmp/lmc-batch.$$ -save_cmd() { - echo "$@" >> $BATCH -} - -[ -f $config ] && rm $config - -# Client node -${LMC} --add net --node client --tcpbuf $TCPBUF --nid '*' --nettype tcp || exit 1 - -# this is crude, but effective -let server_per_gw=($SERVER_CNT / $GW_CNT ) -let tot_server=$server_per_gw*$GW_CNT - -let server=$SERVER_START -while (( $server < $SERVER_CNT + SERVER_START )); -do - echo "server: $server" - OST=ba$server - # server node - ${LMC} --add net --node $OST --tcpbuf $TCPBUF --nid $OST --nettype tcp || exit 1 - # the device on the server - ${LMC} --add ost --node $OST --obd obd_$OST --obdtype=obdecho || exit 3 - # osc on client - ${LMC} --add oscref --node client --osc OSC_obd_$OST - let server=$server+1 -done - -$LMC_REAL --batch $BATCH -rm -f $BATCH diff --git a/lustre/tests/mcr-mds-failover-config.sh b/lustre/tests/mcr-mds-failover-config.sh deleted file mode 100755 index 29ec215..0000000 --- a/lustre/tests/mcr-mds-failover-config.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/sh - -LMC=/usr/local/cfs/lustre/utils/lmc -# LMC="echo lmc" -CONFIG=mcr-mds-failover.xml -LUSTRE_QUERY=/usr/local/cfs/lustre-failover/lustre-query -GW_NODE=mcr21 -CLIENT_ELAN=`hostname | sed s/[^0-9]*//;` -OST=${OST:-ba50} -UUIDLIST=${UUIDLIST:-/usr/local/admin/ba-ost/UUID.txt} -OST_UUID=`awk "/$OST / { print \\$3 }" $UUIDLIST` -[ "$OST_UUID" ] && OST_UUID="--ostuuid=$OST_UUID" || echo "$OST: no UUID" -MDS_DEVICE=/dev/sda3 -MDS_SIZE=500000 -TCPBUF=1048576 - -MDSNODES=`$LUSTRE_QUERY -h emcri -s id=mds -f` -ACTIVEMDS=`$LUSTRE_QUERY -h emcri -s id=mds -a` - -echo "MDS nodes: $MDSNODES, active: $ACTIVEMDS" - -h2elan () { - echo $1 | sed 's/[^0-9]*//g' -} - -h2tcp () { - echo "${1}" -} - - -# create client node -$LMC -o $CONFIG --add net --node client --nid '*' --nettype elan -$LMC -m $CONFIG --add net --router --node mcr21 --tcpbuf $TCPBUF --nid `h2tcp $GW_NODE` --nettype tcp -$LMC -m $CONFIG --add net --router --node mcr21 --nid `h2elan $GW_NODE` --nettype elan -$LMC -m $CONFIG --add route --node $GW_NODE --nettype elan --gw `h2elan $GW_NODE` --lo $CLIENT_ELAN - -# create MDS node entries -for mds in $MDSNODES; do - elanaddr=`$LUSTRE_QUERY -h emcri -s id=$mds -e` - $LMC -m $CONFIG --add net --node $mds --nid $elanaddr --nettype elan - $LMC -m $CONFIG --add mds --node $mds --mds mds_$mds --dev $MDS_DEVICE --size $MDS_SIZE -done - -# create OST node entry -$LMC -m $CONFIG --add net --node $OST --tcpbuf $TCPBUF --nid $OST --nettype tcp -$LMC -m $CONFIG --add ost --node $OST --ost ost_$OST $OST_UUID --dev bluearc -$LMC -m $CONFIG --add route --node $GW_NODE --nettype tcp --gw `h2tcp $GW_NODE` --lo $OST - -# mount -$LMC -m $CONFIG --add mtpt --node client --path /mnt/lustre --mds mds_$ACTIVEMDS --lov ost_$OST diff --git a/lustre/tests/mcr-routed-config.sh b/lustre/tests/mcr-routed-config.sh deleted file mode 100755 index 8d8a100..0000000 --- a/lustre/tests/mcr-routed-config.sh +++ /dev/null @@ -1,93 +0,0 @@ -#!/bin/bash - -BASE=`hostname | sed "s/[i0-9]*$//"` -[ $BASE = "mcr" ] && OSTBASE=${OSTBASE:-ba} || OSTBASE=${OSTBASE:-ba-ost-} - -config=${1:-$BASE.xml} - -BATCH=/tmp/lmc-batch.$$ -save_cmd() { - echo "$@" >> $BATCH -} - -LMC="save_cmd" -LMC_REAL="../utils/lmc -m $config" - -# TCP/IP servers -SERVER_START=0 -SERVER_CNT=32 -GW_START=0 -GW_CNT=16 -MDS=${BASE}23 -UUIDLIST=${UUIDLIST:-/usr/local/admin/ba-ost/UUID.txt} - -echo "MDS: $MDS" - -# This is needed for to create route for elan network -CLIENT_LO=38 -CLIENT_HI=191 - -TCPBUF=1048576 - -h2elan () { - echo $1 | sed 's/[^0-9]*//g' -} - -h2tcp () { - echo "${1}" -} - -# map gateway NN to host NN (assumes mcr[22-25] are not gateways) -gw2node() { - [ $1 -gt 21 ] && echo $(($1 + 4)) || echo $1 -} - -[ -f $config ] && rm $config - -${LMC} --add net --node $MDS --nid `h2elan $MDS` --nettype elan || exit 1 -${LMC} --add mds --node $MDS --mds mds1 --dev /tmp/mds1 --size 100000 || exit 1 -${LMC} --add lov --lov lov1 --mds mds1 --stripe_sz 65536 --stripe_cnt 1 --stripe_pattern 0 - -# Client node -#${LMC} --add net --node client --tcpbuf $TCPBUF --nid '*' --nettype tcp || exit 1 -${LMC} --add net --node client --nid '*' --nettype elan || exit 1 -${LMC} --add mtpt --node client --path /mnt/lustre --mds mds1 --lov lov1 - -# this is crude, but effective -let server_per_gw=($SERVER_CNT / $GW_CNT ) -let tot_server=$server_per_gw*$GW_CNT -echo "Allocating $server_per_gw OSTs per gateway." -echo "For a total of $tot_server Blue Arc OSTs" - -let gw=$GW_START -let server=$SERVER_START -while (( $gw < $GW_CNT + GW_START )); -do - gwnode=$BASE`gw2node $gw` - echo "Router: $gwnode" - ${LMC} --add net --router --node $gwnode --tcpbuf $TCPBUF --nid `h2tcp $gwnode` --nettype tcp || exit 1 - ${LMC} --add net --node $gwnode --nid `h2elan $gwnode` --nettype elan || exit 1 - ${LMC} --add route --node $gwnode --nettype elan --gw `h2elan $gwnode` --lo `h2elan $CLIENT_LO` --hi `h2elan $CLIENT_HI` || exit 2 - - let i=0 - while (( $i < $server_per_gw )); - do - OST=${OSTBASE}$server - echo "server: $OST" - OST_UUID=`awk "/$OST / { print \\$3 }" $UUIDLIST` - [ "$OST_UUID" ] && OST_UUID="--ostuuid $OST_UUID" || echo "$OST: no UUID" - # server node - ${LMC} --add net --node $OST --tcpbuf $TCPBUF --nid $OST --nettype tcp || exit 1 - # the device on the server - ${LMC} --add ost --lov lov1 --node $OST $OBD_UUID --dev bluearc || exit 3 - # route to server - ${LMC} --add route --node $gwnode --nettype tcp --gw `h2tcp $gwnode` --lo $OST || exit 2 - let server=$server+1 - let i=$i+1 - done - - let gw=$gw+1 -done - -$LMC_REAL --batch $BATCH -rm -f $BATCH diff --git a/lustre/tests/mcr.sh b/lustre/tests/mcr.sh deleted file mode 100755 index 4777337..0000000 --- a/lustre/tests/mcr.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash - -config=${1:-mcr.xml} - -LMC="../utils/lmc -m $config" - -# TCP/IP servers -SERVERS="ba-ost-1 ba-ost-2" -ROUTER=dev5 - -# Elan clients -CLIENT_LO=dev2 -CLIENT_HI=dev25 - -TCPBUF=1048576 - - -h2elan () { - echo $1 | sed 's/[^0-9]*//g' -} - -h2tcp () { - echo "${1}" -} - -[ -f $config ] && rm $config - -# Client node -${LMC} --add net --node client --nid '*' --nettype elan || exit 1 -# Router node -${LMC} --add net --router --node $ROUTER --tcpbuf $TCPBUF --nid `h2tcp $ROUTER` --nettype tcp || exit 1 -${LMC} --add net --node $ROUTER --nid `h2elan $ROUTER` --nettype elan|| exit 1 -${LMC} -m $config --add route --node $ROUTER --nettype elan --gw `h2elan $ROUTER` --lo `h2elan $CLIENT_LO` --hi `h2elan $CLIENT_HI` || exit 2 - -for s in $SERVERS - do - # server node - ${LMC} --add net --node $s --tcpbuf $TCPBUF --nid $s --nettype tcp || exit 1 - # route to server - ${LMC} --add route --node $ROUTER --nettype tcp --gw `h2tcp $ROUTER` --lo $s || exit 2 - # the device on the server - ${LMC} --add ost --node $s --obd obd_$s --obdtype=obdecho || exit 3 - # attach to the device on the client (this would normally be a mount) - ${LMC} --add oscref --node client --osc OSC_obd_$s || exit 4 -done diff --git a/lustre/tests/mcreate.c b/lustre/tests/mcreate.c deleted file mode 100644 index 9d48b11..0000000 --- a/lustre/tests/mcreate.c +++ /dev/null @@ -1,23 +0,0 @@ -#include <stdio.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <errno.h> -#include <string.h> -#include <fcntl.h> -#include <unistd.h> - -int main(int argc, char ** argv) -{ - int rc; - - if (argc < 2) { - printf("Usage %s filename\n", argv[0]); - return 1; - } - - rc = mknod(argv[1], S_IFREG | 0644, 0); - if (rc) { - printf("mknod(%s) error: %s\n", argv[1], strerror(errno)); - } - return rc; -} diff --git a/lustre/tests/mcrlov.sh b/lustre/tests/mcrlov.sh deleted file mode 100755 index cce8878..0000000 --- a/lustre/tests/mcrlov.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash - -config=${1:-mcrlov.xml} - -LMC="../utils/lmc -m $config" - -# TCP/IP servers -SERVERS="ba-ost-1 ba-ost-2" -ROUTER=dev5 -MDS=dev7 -TMP=${TMP:-/tmp} - -# Elan clients -CLIENT_LO=dev2 -CLIENT_HI=dev25 - -TCPBUF=1048576 - - -h2elan () { - echo $1 | sed 's/[^0-9]*//g' -} - -h2tcp () { - echo "${1}" -} - -[ -f $config ] && rm $config - -# Client node -${LMC} --add net --node client --nid '*' --nettype elan || exit 1 -# Router node -${LMC} --add net --router --node $ROUTER --tcpbuf $TCPBUF --nid `h2tcp $ROUTER` --nettype tcp || exit 1 -${LMC} --add net --node $ROUTER --nid `h2elan $ROUTER` --nettype elan|| exit 1 -${LMC} --add route --node $ROUTER --gw `h2elan $ROUTER` --lo `h2elan $CLIENT_LO` --hi `h2elan $CLIENT_HI` --nettype elan || exit 2 - -${LMC} --add net --node $MDS --nid `h2elan $MDS` --nettype elan || exit 1 -${LMC} --add mds --node $MDS --mds mds1 --dev $TMP/mds1 --size 100000 || exit 1 -${LMC} --add lov --lov lov1 --mds mds1 --stripe_sz 65536 --stripe_cnt 0 --stripe_pattern 0 || exit 1 - -${LMC} --add mtpt --node client --path /mnt/lustre --mds mds1 --lov lov1 - -for s in $SERVERS - do - # server node - ${LMC} --add net --node $s --tcpbuf $TCPBUF --nid $s --nettype tcp || exit 1 - # route to server - ${LMC} --add route --node $ROUTER --nettype tcp --gw `h2tcp $ROUTER` --lo $s || exit 2 - # the device on the server - #${LMC} --format --lov lov1 --node $s --ost bluearc || exit 3 - ${LMC} --add ost --lov lov1 --node $s --dev bluearc --format || exit 3 -done diff --git a/lustre/tests/mdcreq.sh b/lustre/tests/mdcreq.sh deleted file mode 100644 index bd54c96..0000000 --- a/lustre/tests/mdcreq.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/sh - -SRCDIR="`dirname $0`" -. $SRCDIR/common.sh - -NETWORK=tcp -LOCALHOST=localhost -SERVER=localhost -PORT=988 -TMP=${TMP:-/tmp} - -setup_portals -setup_lustre - -MDSFS=ext2 -new_fs ${MDSFS} $TMP/mds 1000 -MDS=$LOOPDEV - -echo 0xffffffff > /proc/sys/portals/debug - -$OBDCTL <<EOF -device 0 -attach mds MDSDEV -setup ${MDS} ${MDSFS} -quit -EOF - -mknod /dev/request c 10 244 - -./testreq --getattr -./testreq --setattr -./testreq --readpage -./testreq --open -./testreq --close junk_file_handle -./testreq --create - -echo "Done." diff --git a/lustre/tests/mdcreqcleanup.sh b/lustre/tests/mdcreqcleanup.sh deleted file mode 100755 index 3cacfbd..0000000 --- a/lustre/tests/mdcreqcleanup.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/sh - -SRCDIR="`dirname $0`/" -. $SRCDIR/common.sh - -rmmod llite -rmmod mdc - -$OBDCTL <<EOF -device 0 -cleanup -detach -quit -EOF - -rmmod mds -rmmod osc -rmmod ost -rmmod obdext2 -rmmod obdclass -rmmod ptlrpc - -$PTLCTL <<EOF -setup tcp -disconnect localhost -del_uuid self -del_uuid mds -EOF - -losetup -d ${LOOP}0 - -killall acceptor -rmmod ksocknal -rmmod portals diff --git a/lustre/tests/mds.cfg b/lustre/tests/mds.cfg deleted file mode 100644 index b9b9f78..0000000 --- a/lustre/tests/mds.cfg +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh -# Config file for setting up a metadata server -MDSFS=extN -MDSDEV=/tmp/mds -MDSSIZE=25000 -SETUP_MDS=y -# LOVUUID=LovUUID diff --git a/lustre/tests/mkdirdeep.c b/lustre/tests/mkdirdeep.c deleted file mode 100644 index 38c0cea..0000000 --- a/lustre/tests/mkdirdeep.c +++ /dev/null @@ -1,257 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Compile with: - * cc -I../../portals/include -o mkdirdeep mkdirdeep.c - * -L../../portals/linux/utils -lptlctl - */ - -#include <stdio.h> -#include <stdlib.h> -#include <getopt.h> -#include <string.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> -#include <linux/limits.h> -#include <portals/lltrace.h> - -static int opt_verbose = 0; -static int opt_trace = 0; - -void usage(const char *pname) -{ - fprintf(stderr, "Usage: %s --depth <d> [--output <outputtracefilename>]" - " [--mknod] [--verbose] [--notrace] <basepath>\n", pname); - exit(1); -} - -int do_mkdir(char *path) -{ - int rc = mkdir(path, 0755); - - if (rc) { - fprintf(stderr, "mkdir(%s) failed: %s\n", - path, strerror(errno)); - exit(1); - } - if (opt_verbose) - printf("mkdir %s\n", path); - - return rc; -} - - -int do_mknod(char *path) -{ - int rc = mknod(path, 0755, S_IFIFO); - - if (rc) { - fprintf(stderr, "mkdir(%s) failed: %s\n", - path, strerror(errno)); - exit(1); - } - if (opt_verbose) - printf("mknod %s\n", path); - - return rc; -} - -int do_chdir(char* path) -{ - int rc = chdir(path); - - if (rc) { - fprintf(stderr, "chdir(%s) failed: %s\n", - path, strerror(errno)); - exit(1); - } - if (opt_verbose) - printf("chdir %s\n", path); - - return rc; -} - -int do_stat(char *path) -{ - char mark_buf[PATH_MAX + 50]; - struct stat mystat; - int rc = stat(path, &mystat); - - if (rc) { - fprintf(stderr, "stat(%s) failed: %s\n", - path, strerror(errno)); - exit(1); - } - if (opt_verbose) - printf("stat %s = inode %lu\n", path, mystat.st_ino); - - if (opt_trace) { - snprintf(mark_buf, PATH_MAX, "stat %s = inode %lu", - path, mystat.st_ino); - ltrace_mark(0, mark_buf); - } - - return rc; -} - -int main(int argc, char** argv) -{ - int c, i, mypid; - int opt_depth = 1; - int opt_mknod = 0; - - static struct option long_opt[] = { - {"depth", 1, 0, 'd' }, - {"help", 0, 0, 'h' }, - {"mknod", 0, 0, 'm' }, - {"output", 1, 0, 'o' }, - {"trace", 1, 0, 't' }, - {"verbose", 0, 0, 'v' }, - {0,0,0,0} - }; - - char *outputfilename = NULL; - char *base_pathname; - char pathname[PATH_MAX]; - char mark_buf[PATH_MAX + 50]; - char mycwd[PATH_MAX]; - char *pname = argv[0]; - - while ((c = getopt_long(argc, argv, "d:mhvo:", long_opt, NULL)) != -1) { - switch (c) { - case 'd': - opt_depth = atoi(optarg); - if ((opt_depth == 0) || (opt_depth > 1100)) - usage(pname); - break; - case 'm': - opt_mknod = 1; - break; - case 't': - opt_trace = 1; - break; - case 'v': - opt_verbose = 1; - break; - case 'o': - outputfilename = optarg; - break; - case 'h': - case '?': - case ':': - default: - usage(pname); - break; - } - } - - if (optind != (argc - 1)) - usage(pname); - - base_pathname = argv[optind]; - mypid = getpid(); - - if (!getcwd(&mycwd[0], sizeof(mycwd))) { - fprintf(stderr, "%s: unable to getcwd()\n", pname); - exit(1); - } - - printf("%s(pid=%d) depth=%d mknod=%d, basepathname=%s, trace=%d\n", - pname, mypid, opt_depth, opt_mknod, base_pathname, opt_trace); - - if (outputfilename) - printf("outputfilename=%s\n", outputfilename); - - if (opt_trace) { - ltrace_start(); - ltrace_clear(); - snprintf(mark_buf, PATH_MAX, "Initialize - mkdir %s; chdir %s", - base_pathname, base_pathname); - ltrace_mark(2, mark_buf); - } - - if (do_mkdir(base_pathname)!=0) - exit(1); - if (do_chdir(base_pathname)!=0) - exit(1); - - /* Create directory tree with depth level of subdirectories */ - - if (opt_trace) { - snprintf(mark_buf, PATH_MAX, - "Create Directory Tree (depth %d)", opt_depth); - ltrace_mark(2, mark_buf); - } - - for (i = 0; i < opt_depth; i++) { - snprintf(pathname, sizeof(pathname), "%d", i + 1); - - if (i == (opt_depth - 1)) { - /* Last Iteration */ - - if (opt_trace) { - snprintf(mark_buf, PATH_MAX, - "Tree Leaf (%d) %s/stat", i, - (opt_mknod ? "mknod" : "mkdir")); - ltrace_mark(3, mark_buf); - } - - if (opt_mknod) - do_mknod(pathname); - else - do_mkdir(pathname); - /* Now stat it */ - do_stat(pathname); - } else { - /* Not Leaf */ - - if (opt_trace) { - snprintf(mark_buf, sizeof(mark_buf), - "Tree Level (%d) mkdir/stat/chdir", i); - ltrace_mark(3, mark_buf); - } - - do_mkdir(pathname); - do_stat(pathname); - do_chdir(pathname); - } - } - - /* Stat through directory tree with fullpaths */ - - if (opt_trace) { - snprintf(mark_buf, PATH_MAX, "Walk Directory Tree"); - ltrace_mark(2, mark_buf); - } - - do_chdir(base_pathname); - - strncpy(pathname, base_pathname, sizeof(pathname)); - - c = strlen(base_pathname); - for (i = 0; i < opt_depth; i++) { - c += snprintf(pathname + c, sizeof(pathname) - c, "/%d", i+1); - - if (opt_trace) { - snprintf(mark_buf, PATH_MAX, "stat %s", pathname); - ltrace_mark(2, mark_buf); - } - - do_stat(pathname); - } - - if (opt_trace && outputfilename) { - ltrace_write_file(outputfilename); - ltrace_add_processnames(outputfilename); - ltrace_stop(); - } - - do_chdir(base_pathname); - - printf("%s (pid=%d) done.\n", pname, mypid); - - return 0; -} diff --git a/lustre/tests/mkdirmany.c b/lustre/tests/mkdirmany.c deleted file mode 100755 index 26c3016..0000000 --- a/lustre/tests/mkdirmany.c +++ /dev/null @@ -1,40 +0,0 @@ -#include <stdio.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <time.h> -#include <errno.h> -#include <string.h> -#include <fcntl.h> -#include <unistd.h> -#include <stdlib.h> - -int main(int argc, char ** argv) -{ - int i, rc, count; - char dirname[4096]; - - if (argc < 3) { - printf("Usage %s dirnamebase count\n", argv[0]); - return 1; - } - - if (strlen(argv[1]) > 4080) { - printf("name too long\n"); - return 1; - } - - count = strtoul(argv[2], NULL, 0); - - for (i=0 ; i < count ; i++) { - sprintf(dirname, "%s-%d", argv[1], i); - rc = mkdir(dirname, 0444); - if (rc) { - printf("mkdir(%s) error: %s\n", - dirname, strerror(errno)); - break; - } - if ((i % 10000) == 0) - printf(" - created %d (time %ld)\n", i, time(0)); - } - return rc; -} diff --git a/lustre/tests/mlink.c b/lustre/tests/mlink.c deleted file mode 100755 index 5688b9f..0000000 --- a/lustre/tests/mlink.c +++ /dev/null @@ -1,25 +0,0 @@ -#include <stdio.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <errno.h> -#include <string.h> -#include <fcntl.h> -#include <unistd.h> - -int main(int argc, char ** argv) -{ - int rc; - - if (argc < 3) { - printf("Usage: %s file link\n", argv[0]); - return 1; - } - - rc = link(argv[1], argv[2]); - if (rc) { - printf("link(%s, %s) error: %s\n", argv[1], argv[2], - strerror(errno)); - return errno; - } - return 0; -} diff --git a/lustre/tests/modules.cfg b/lustre/tests/modules.cfg deleted file mode 100755 index 10fb9a7..0000000 --- a/lustre/tests/modules.cfg +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -# Config file for running tests on a single host over loopback TCP -NETWORK=tcp diff --git a/lustre/tests/mount2.sh b/lustre/tests/mount2.sh deleted file mode 100644 index 07de3ed..0000000 --- a/lustre/tests/mount2.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -config=${1:-mount2.xml} - -SRCDIR=`dirname $0` -PATH=$SRCDIR:$SRCDIR/../utils:$PATH -LMC="${LMC:-lmc} -m $config" -TMP=${TMP:-/tmp} - -MDSDEV=${MDSDEV:-$TMP/mds1} -MDSSIZE=${MDSSIZE:-50000} - -OSTDEV=${OSTDEV:-$TMP/ost1} -OSTSIZE=${OSTSIZE:-200000} - -rm -f $config - -# create nodes -${LMC} --add node --node localhost || exit 10 -${LMC} --add net --node localhost --nid localhost --nettype tcp || exit 11 - -# configure mds server -${LMC} --add mds --node localhost --mds mds1 --dev $MDSDEV --size $MDSSIZE || exit 20 - -# configure ost -${LMC} --add ost --node localhost --ost ost1 --dev $OSTDEV --size $OSTSIZE || exit 30 - -# create client config -${LMC} --add mtpt --node localhost --path /mnt/lustre1 --mds mds1 --ost ost1 || exit 40 -${LMC} --add mtpt --node localhost --path /mnt/lustre2 --mds mds1 --ost ost1 || exit 40 diff --git a/lustre/tests/mount2fs.sh b/lustre/tests/mount2fs.sh deleted file mode 100644 index 27b570d..0000000 --- a/lustre/tests/mount2fs.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash -# -# Test case for 2 different filesystems mounted on the same client. -# Uses 3 umls - -config=${1-mds-bug.xml} -LMC=${LMC-../utils/lmc} -TMP=${TMP:-/tmp} - -MDSDEV=$TMP/mds1 -MDSDEV2=$TMP/mds2 -MDSSIZE=50000 - -OSTDEV1=$TMP/ost1 -OSTDEV2=$TMP/ost2 -OSTSIZE=100000 - -MDSNODE=uml1 -OSTNODE=uml2 -CLIENT=uml3 - -# create nodes -${LMC} -o $config --add net --node $MDSNODE --nid $MDSNODE --nettype tcp || exit 1 -${LMC} -m $config --add net --node $OSTNODE --nid $OSTNODE --nettype tcp || exit 2 -${LMC} -m $config --add net --node $CLIENT --nid $CLIENT --nettype tcp || exit 3 - -# configure mds server -${LMC} -m $config --format --add mds --node $MDSNODE --mds mds1 --dev $MDSDEV --size $MDSSIZE ||exit 10 -${LMC} -m $config --format --add mds --node $MDSNODE --mds mds2 --dev $MDSDEV2 --size $MDSSIZE ||exit 10 - -# configure ost -${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz 65536 --stripe_cnt 0 --stripe_pattern 0 || exit 20 -${LMC} -m $config --add lov --lov lov2 --mds mds2 --stripe_sz 65536 --stripe_cnt 0 --stripe_pattern 0 || exit 20 -${LMC} -m $config --add ost --node $OSTNODE --lov lov1 --dev $OSTDEV1 --size $OSTSIZE || exit 21 -${LMC} -m $config --add ost --node $OSTNODE --lov lov2 --dev $OSTDEV2 --size $OSTSIZE || exit 22 - -# create client config -${LMC} -m $config --add mtpt --node $CLIENT --path /mnt/lustre --mds mds1 --lov lov1 || exit 30 -${LMC} -m $config --add mtpt --node $CLIENT --path /mnt/lustre2 --mds mds2 --lov lov2 || exit 30 - - - - diff --git a/lustre/tests/multifstat.c b/lustre/tests/multifstat.c deleted file mode 100644 index fa510bc..0000000 --- a/lustre/tests/multifstat.c +++ /dev/null @@ -1,62 +0,0 @@ -#include <stdio.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#include <fcntl.h> -#include <errno.h> -#include <string.h> - -int main(int argc, char **argv) -{ - int fd1, fd2; - struct stat st1, st2; - - if (argc != 3) { - printf("Usage %s file1 file2\n", argv[0]); - return 1; - } - - - fd1 = open(argv[1], O_CREAT| O_RDWR, 0666); - if (fd1 == -1) { - printf("Error opening %s: %s\n", argv[1], strerror(errno)); - return errno; - } - - fd2 = open(argv[2], O_RDONLY); - if (fd2 == -1) { - printf("Error opening %s: %s\n", argv[2], strerror(errno)); - return errno; - } - - sleep(1); - - if ( write(fd1, "hello", strlen("hello")) != strlen("hello")) { - printf("Error writing: %s\n", strerror(errno)); - return errno; - } - - if ( fstat(fd1, &st1) ) { - printf("Error statting %s: %s\n", argv[1], strerror(errno)); - return errno; - } - - if ( fstat(fd2, &st2) ) { - printf("Error statting %s: %s\n", argv[2], strerror(errno)); - return errno; - } - - if ( st1.st_size != st2.st_size ) { - printf("Sizes don't match %ld, %ld\n", - st1.st_size, st2.st_size); - return 1; - } - - if ( st1.st_mtime != st2.st_mtime ) { - printf("Mtimes don't match %ld, %ld\n", - st1.st_mtime, st2.st_mtime); - return 1; - } - - return 0; -} diff --git a/lustre/tests/munlink.c b/lustre/tests/munlink.c deleted file mode 100755 index a3c18c5..0000000 --- a/lustre/tests/munlink.c +++ /dev/null @@ -1,23 +0,0 @@ -#include <stdio.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <errno.h> -#include <string.h> -#include <fcntl.h> -#include <unistd.h> - -int main(int argc, char ** argv) -{ - int rc; - - if (argc < 2) { - printf("Usage %s filename\n", argv[0]); - return 1; - } - - rc = unlink(argv[1]); - if (rc) { - printf("unlink(%s) error: %s\n", argv[1], strerror(errno)); - } - return rc; -} diff --git a/lustre/tests/net-client.cfg b/lustre/tests/net-client.cfg deleted file mode 100644 index 7cd4a8d..0000000 --- a/lustre/tests/net-client.cfg +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -# Config file for setting up a remote server with a real OST -NETWORK=tcp -LOCALHOST=dev5 -SERVER=dev4 -PORT=988 diff --git a/lustre/tests/net-local.cfg b/lustre/tests/net-local.cfg deleted file mode 100644 index 2ce3abe..0000000 --- a/lustre/tests/net-local.cfg +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -# Config file for running tests on a single host over loopback TCP -NETWORK=tcp -LOCALHOST=localhost -SERVER=localhost -PORT=988 diff --git a/lustre/tests/net-server.cfg b/lustre/tests/net-server.cfg deleted file mode 100644 index 8386525..0000000 --- a/lustre/tests/net-server.cfg +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -# Config file for setting up a remote server with a real OST -NETWORK=tcp -LOCALHOST=dev4 -SERVER=dev4 -PORT=988 diff --git a/lustre/tests/o_directory.c b/lustre/tests/o_directory.c deleted file mode 100644 index d4b2c1b..0000000 --- a/lustre/tests/o_directory.c +++ /dev/null @@ -1,51 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ - -/* for O_DIRECTORY */ -#define _GNU_SOURCE - -#include <fcntl.h> -#include <unistd.h> -#include <stdlib.h> -#include <stdio.h> -#include <errno.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> - -int main(int argc, char **argv) -{ - int fd, rc; - - if (argc != 2) { - printf("Usage: %s <filename>\n", argv[0]); - exit(1); - } - - fd = open(argv[1], O_RDONLY | O_CREAT, 0600); - if (fd == -1) { - printf("Error opening %s for create: %s\n", argv[1], - strerror(errno)); - exit(1); - } - rc = close(fd); - if (rc < 0) { - printf("Error closing %s: %s\n", argv[1], strerror(errno)); - exit(1); - } - - fd = open(argv[1], O_DIRECTORY); - if (fd >= 0) { - printf("opening %s as directory should have returned an " - "error!\n", argv[1]); - exit(1); - } - if (errno != ENOTDIR) { - printf("opening %s as directory, expected -ENOTDIR and got " - "%s\n", argv[1], strerror(errno)); - exit(1); - } - - return 0; -} diff --git a/lustre/tests/obddisk.cfg b/lustre/tests/obddisk.cfg deleted file mode 100644 index 22e6ef2..0000000 --- a/lustre/tests/obddisk.cfg +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -# Config file for setting up an object storage target with obdfilter -OSTDEV=/dev/hda7 -OSTFS=ext2 -OSTTYPE=obdfilter -SETUP_OST=y diff --git a/lustre/tests/obdecho.cfg b/lustre/tests/obdecho.cfg deleted file mode 100644 index 2c2b40f..0000000 --- a/lustre/tests/obdecho.cfg +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh -# Config file for setting up a test (echo) OST -OSTTYPE=obdecho -SETUP_OST=y diff --git a/lustre/tests/obdfilter.cfg b/lustre/tests/obdfilter.cfg deleted file mode 100644 index e9021c2..0000000 --- a/lustre/tests/obdfilter.cfg +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh -# Config file for setting up an object storage target with obdfilter -OSTDEV=/tmp/ost -OSTSIZE=10000 -OSTFS=extN -OSTTYPE=obdfilter -SETUP_OST=y diff --git a/lustre/tests/open_delay.c b/lustre/tests/open_delay.c deleted file mode 100644 index de4815c..0000000 --- a/lustre/tests/open_delay.c +++ /dev/null @@ -1,27 +0,0 @@ -#include <fcntl.h> -#include <unistd.h> -#include <stdlib.h> -#include <stdio.h> -#include <liblustre.h> -#include <linux/obd.h> -#include <linux/lustre_lib.h> -#include <linux/lustre_lite.h> -#include <linux/obd_lov.h> - -int main(int argc, char **argv) -{ - int fd; - - if (argc != 2) { - printf("Usage %s <filename>\n", argv[0]); - exit(1); - } - - fd = open(argv[1], O_RDONLY | O_LOV_DELAY_CREATE); - if (fd == -1) { - printf("Error opening %s\n", argv[1]); - exit(1); - } - - return 0; -} diff --git a/lustre/tests/openclose.c b/lustre/tests/openclose.c deleted file mode 100644 index 1294b13..0000000 --- a/lustre/tests/openclose.c +++ /dev/null @@ -1,142 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ - -#include <stdlib.h> -#include <stdio.h> -#include <unistd.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <errno.h> -#include <string.h> -#include <sys/wait.h> -#include <sys/ioctl.h> - -#include <linux/lustre_lite.h> - -#ifndef O_DIRECT -# define O_DIRECT 040000 /* direct disk access hint */ -#endif - -int main(int argc, char *argv[]) -{ - char filename[1024]; - unsigned long count, i; - int thread = 0; - int threads = 0; - int rc = 0; - int fd, ioctl_flags = 0; - - if (argc < 3 || argc > 4) { - fprintf(stderr, "usage: %s <filename> <iterations> [threads]\n", - argv[0]); - exit(1); - } - - count = strtoul(argv[2], NULL, 0); - if (argc == 4) - threads = strtoul(argv[3], NULL, 0); - - for (i = 1; i <= threads; i++) { - rc = fork(); - if (rc < 0) { - fprintf(stderr, "error: %s: #%ld - %s\n", argv[0], i, - strerror(rc = errno)); - break; - } else if (rc == 0) { - thread = i; - argv[2] = "--device"; - break; - } else - printf("%s: thread #%ld (PID %d) started\n", - argv[0], i, rc); - rc = 0; - } - - if (threads && thread == 0) { /* parent process */ - int live_threads = threads; - - while (live_threads > 0) { - int status; - pid_t ret; - - ret = waitpid(0, &status, 0); - if (ret == 0) - continue; - - if (ret < 0) { - fprintf(stderr, "error: %s: wait - %s\n", - argv[0], strerror(errno)); - if (!rc) - rc = errno; - } else { - /* - * This is a hack. We _should_ be able to use - * WIFEXITED(status) to see if there was an - * error, but it appears to be broken and it - * always returns 1 (OK). See wait(2). - */ - int err = WEXITSTATUS(status); - if (err || WIFSIGNALED(status)) - fprintf(stderr, - "%s: PID %d had rc=%d\n", - argv[0], ret, err); - if (!rc) - rc = err; - } - live_threads--; - } - } else { - if (threads) - sprintf(filename, "%s-%d", argv[1], thread); - else - strcpy(filename, argv[1]); - - fd = open(filename, O_RDWR|O_CREAT, 0644); - if (fd < 0) { - fprintf(stderr, "open(%s, O_CREAT): %s\n", filename, - strerror(errno)); - exit(errno); - } - if (close(fd) < 0) { - fprintf(stderr, "close(): %s\n", strerror(errno)); - rc = errno; - goto unlink; - } - - for (i = 0; i < count; i++) { - fd = open(filename, O_RDWR|O_LARGEFILE|O_DIRECT); - if (fd < 0) { - fprintf(stderr, "open(%s, O_RDWR): %s\n", - filename, strerror(errno)); - rc = errno; - break; - } - if (ioctl(fd, LL_IOC_SETFLAGS, &ioctl_flags) < 0 && - errno != ENOTTY) { - fprintf(stderr, "ioctl(): %s\n", - strerror(errno)); - rc = errno; - break; - } - if (close(fd) < 0) { - fprintf(stderr, "close(): %s\n", - strerror(errno)); - rc = errno; - break; - } - } - unlink: - if (unlink(filename) < 0) { - fprintf(stderr, "unlink(%s): %s\n", filename, - strerror(errno)); - rc = errno; - } - if (threads) - printf("Thread %d done: rc = %d\n", thread, rc); - else - printf("Done: rc = %d\n", rc); - } - return rc; -} diff --git a/lustre/tests/opendevunlink.c b/lustre/tests/opendevunlink.c deleted file mode 100644 index fde7d36..0000000 --- a/lustre/tests/opendevunlink.c +++ /dev/null @@ -1,111 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ - -#define _GNU_SOURCE - -#include <stdio.h> -#include <errno.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <dirent.h> -#include <string.h> - -int main(int argc, char **argv) -{ - char *dname1, *dname2; - int fddev1, fddev2, rc; - //DIR *dp; - struct stat st1, st2; - - if (argc < 2 || argc > 3) { - fprintf(stderr, "usage: %s filename1 [filename2]\n", argv[0]); - exit(1); - } - - dname1 = argv[1]; - if (argc == 3) - dname2 = argv[2]; - else - dname2 = argv[1]; - - //create the special file (right now only test on pipe) - fprintf(stderr, "creating special file %s\n", dname1); - rc = mknod(dname1, 0777|S_IFIFO, 0); - if (rc == -1) { - fprintf(stderr, "creating %s fails: %s\n", - dname1, strerror(errno)); - exit(1); - } - - // open the special file again - fprintf(stderr, "opening file\n"); - fddev1 = open(dname1, O_RDONLY | O_NONBLOCK); - if (fddev1 == -1) { - fprintf(stderr, "open %s fails: %s\n", - dname1, strerror(errno)); - exit(1); - } - - // doesn't matter if the two dirs are the same?? - fddev2 = open(dname2, O_RDONLY | O_NONBLOCK); - if (fddev2 == -1) { - fprintf(stderr, "open %s fails: %s\n", - dname2, strerror(errno)); - exit(1); - } - - // delete the special file - fprintf (stderr, "unlinking %s\n", dname1); - rc = unlink(dname1); - if (rc) { - fprintf(stderr, "unlink %s error: %s\n", - dname1, strerror(errno)); - exit(1); - } - - if (access(dname2, F_OK) == 0){ - fprintf(stderr, "%s still exists\n", dname2); - exit(1); - } - - if (access(dname1, F_OK) == 0){ - fprintf(stderr, "%s still exists\n", dname1); - exit(1); - } - - // fchmod one special file - rc = fchmod (fddev1, 0777); - if(rc == -1) - { - fprintf(stderr, "fchmod unlinked special file %s fails: %s\n", - dname1, strerror(errno)); - exit(1); - } - - // fstat two files to check if they are the same - rc = fstat(fddev1, &st1); - if(rc == -1) - { - fprintf(stderr, "fstat unlinked special file %s fails: %s\n", - dname1, strerror(errno)); - exit(1); - } - - rc = fstat(fddev2, &st2); - if (rc == -1) { - fprintf(stderr, "fstat file %s fails: %s\n", - dname2, strerror(errno)); - exit(1); - } - - if (st1.st_mode != st2.st_mode) { // can we do this? - fprintf(stderr, "fstat different value on %s and %s\n", dname1, dname2); - exit(1); - } - - fprintf(stderr, "Ok, everything goes well.\n"); - return 0; -} - diff --git a/lustre/tests/opendirunlink.c b/lustre/tests/opendirunlink.c deleted file mode 100644 index 2664618..0000000 --- a/lustre/tests/opendirunlink.c +++ /dev/null @@ -1,122 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ - -#define _GNU_SOURCE - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <dirent.h> -#include <string.h> - -int main(int argc, char **argv) -{ - char *dname1, *dname2; - int fddir1, fddir2, rc; - //DIR *dp; - struct stat st1, st2; - - if (argc < 2 || argc > 3) { - fprintf(stderr, "usage: %s dirname1 [dirname2]\n", argv[0]); - exit(1); - } - - dname1 = argv[1]; - if (argc == 3) - dname2 = argv[2]; - else - dname2 = argv[1]; - - //create the directory - fprintf(stderr, "creating directory %s\n", dname1); - rc = mkdir(dname1, 0744); - if (rc == -1) { - fprintf(stderr, "creating %s fails: %s\n", - dname1, strerror(errno)); - exit(1); - } - - // open the dir again - fprintf(stderr, "opening directory\n"); - fddir1 = open(dname1, O_RDONLY | O_DIRECTORY); - if (fddir1 == -1) { - fprintf(stderr, "open %s fails: %s\n", - dname1, strerror(errno)); - exit(1); - } - - // doesn't matter if the two dirs are the same?? - fddir2 = open(dname2, O_RDONLY | O_DIRECTORY); - if (fddir2 == -1) { - fprintf(stderr, "open %s fails: %s\n", - dname2, strerror(errno)); - exit(1); - } - - // another method -/* - if ( (dp = opendir(dname2)) == NULL) { - fprintf(stderr, "opendir() %s\n", strerror(errno)); - exit(1); - } - fddir = dirfd(dp); -*/ - - // delete the dir - fprintf (stderr, "unlinking %s\n", dname1); - rc = rmdir(dname1); - if (rc) { - fprintf(stderr, "unlink %s error: %s\n", - dname1, strerror(errno)); - exit(1); - } - - if (access(dname2, F_OK) == 0){ - fprintf(stderr, "%s still exists\n", dname2); - exit(1); - } - - if (access(dname1, F_OK) == 0){ - fprintf(stderr, "%s still exists\n", dname1); - exit(1); - } - - // fchmod the dir - rc = fchmod (fddir1, 0777); - if(rc == -1) - { - fprintf(stderr, "fchmod unlinked dir fails %s\n", - strerror(errno)); - exit(1); - } - - // fstat two dirs to check if they are the same - rc = fstat(fddir1, &st1); - if(rc == -1) - { - fprintf(stderr, "fstat unlinked dir %s fails %s\n", - dname1, strerror(errno)); - exit(1); - } - - rc = fstat(fddir2, &st2); - if (rc == -1) { - fprintf(stderr, "fstat dir %s fails %s\n", - dname2, strerror(errno)); - exit(1); - } - - if (st1.st_mode != st2.st_mode) { // can we do this? - fprintf(stderr, "fstat different value on %s and %s\n", dname1, dname2); - exit(1); - } - - fprintf(stderr, "Ok, everything goes well.\n"); - return 0; -} - diff --git a/lustre/tests/openfile.c b/lustre/tests/openfile.c deleted file mode 100644 index 7d8cc6b..0000000 --- a/lustre/tests/openfile.c +++ /dev/null @@ -1,153 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ - -#if 0 -#define DEBUG -#endif - -#define _GNU_SOURCE - -#include <stdio.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <errno.h> -#include <string.h> -#include <stdlib.h> -#include <unistd.h> - -typedef struct flag_mapping { - char string[20]; - int flag; -} FLAG_MAPPING; - -FLAG_MAPPING flag_table[] = { - {"O_RDONLY", O_RDONLY}, - {"O_WRONLY", O_WRONLY}, - {"O_RDWR", O_RDWR}, - {"O_CREAT", O_CREAT}, - {"O_EXCL", O_EXCL}, - {"O_NOCTTY", O_NOCTTY}, - {"O_TRUNC", O_TRUNC}, - {"O_APPEND", O_APPEND}, - {"O_NONBLOCK", O_NONBLOCK}, - {"O_NDELAY", O_NDELAY}, - {"O_SYNC", O_SYNC}, - {"O_NOFOLLOW", O_NOFOLLOW}, - {"O_DIRECTORY", O_DIRECTORY}, - {"O_LARGEFILE", O_LARGEFILE}, - {"", -1} -}; - -void Usage_and_abort(void) -{ - fprintf(stderr, "Usage: openfile -f flags [ -m mode ] filename \n"); - fprintf(stderr, "e.g. openfile -f O_RDWR:O_CREAT -m 0755 /etc/passwd\n"); - exit(-1); -} - -int main(int argc, char** argv) -{ - int fd; - int flags=0; - mode_t mode=0; - char* fname=NULL; - int mode_set=0; - int flag_set=0; - int file_set=0; - char c; - char* cloned_flags; - - if (argc == 1) - Usage_and_abort(); - - while ((c = getopt (argc, argv, "f:m:")) != -1) { - switch (c) { - case 'f': { - char *tmp; - - cloned_flags = (char *)malloc(strlen(optarg)); - if (cloned_flags == NULL) { - fprintf(stderr, "Insufficient memory.\n"); - exit(-1); - } - - strncpy(cloned_flags, optarg, strlen(optarg)); - for (tmp = strtok(optarg, ":|"); tmp; - tmp = strtok(NULL, ":|")) { - int i = 0; -#ifdef DEBUG - printf("flags = %s\n",tmp); -#endif - flag_set = 1; - for (i = 0; flag_table[i].flag != -1; i++) { - if (!strcmp(tmp, flag_table[i].string)){ - flags |= flag_table[i].flag; - break; - } - } - - if (flag_table[i].flag == -1) { - fprintf(stderr, "No such flag: %s\n", - tmp); - exit(-1); - } - } -#ifdef DEBUG - printf("flags = %x\n", flags); -#endif - break; - } - case 'm': -#ifdef DEBUG - printf("mode = %s\n", optarg); -#endif - mode = strtol(optarg, NULL, 8); - mode_set = 1; -#ifdef DEBUG - printf("mode = %o\n", mode); -#endif - break; - default: - fprintf(stderr, "Bad parameters.\n"); - Usage_and_abort(); - } - } - - if (optind == argc) { - fprintf(stderr, "Bad parameters.\n"); - Usage_and_abort(); - } - - fname = argv[optind]; - file_set = 1; - - if (!flag_set || !file_set) { - fprintf(stderr, "Missing flag or file-name\n"); - exit(-1); - } - - - if (mode_set) - fd = open(fname, flags, mode); - else - fd = open(fname, flags); - - if (fd != -1) { - fprintf(stderr, "Succeed in opening file \"%s\"(flags=%s", - fname, cloned_flags); - - if (mode_set) - fprintf(stderr, ", mode=%o", mode); - fprintf(stderr, ")\n"); - close(fd); - } else { - fprintf(stderr, "Error in opening file \"%s\"(flags=%s", - fname, cloned_flags); - if (mode_set) - fprintf(stderr, ", mode=%o", mode); - fprintf(stderr, ") %d: %s\n", errno, strerror(errno)); - } - return errno; -} diff --git a/lustre/tests/openme.c b/lustre/tests/openme.c deleted file mode 100644 index 9a1f3f3..0000000 --- a/lustre/tests/openme.c +++ /dev/null @@ -1,23 +0,0 @@ -#include <fcntl.h> -#include <unistd.h> -#include <stdlib.h> -#include <stdio.h> - -int main(int argc, char **argv) -{ - int fd; - - if (argc != 2) { - printf("Usage openme <filename>\n"); - exit(1); - } - - fd = open(argv[1], O_RDONLY | O_CREAT, 0600); - if (fd == -1) { - printf("Error opening %s\n", argv[1]); - exit(1); - } - - sleep(10000000); - return 0; -} diff --git a/lustre/tests/openunlink.c b/lustre/tests/openunlink.c deleted file mode 100644 index e7671c8..0000000 --- a/lustre/tests/openunlink.c +++ /dev/null @@ -1,147 +0,0 @@ -#include <stdio.h> -#include <fcntl.h> -#include <string.h> -#include <errno.h> -#include <sys/types.h> -#include <stdlib.h> -#include <unistd.h> - -#define T1 "write before unlink\n" -#define T2 "write after unlink\n" -char buf[128]; - -int main(int argc, char **argv) -{ - char *fname, *fname2; - int fd, rc; - - if (argc < 2 || argc > 3) { - fprintf(stderr, "usage: %s filename [filename2]\n", argv[0]); - exit(1); - } - - fname = argv[1]; - if (argc == 3) - fname2 = argv[2]; - else - fname2 = argv[1]; - - fprintf(stderr, "opening\n"); - fd = open(fname, O_RDWR | O_TRUNC | O_CREAT, 0644); - if (fd == -1) { - fprintf(stderr, "open (normal) %s\n", strerror(errno)); - exit(1); - } - - fprintf(stderr, "writing\n"); - rc = write(fd, T1, strlen(T1) + 1); - if (rc != strlen(T1) + 1) { - fprintf(stderr, "write (normal) %s\n", strerror(errno)); - exit(1); - } - - if (argc == 3) { - fprintf(stderr, "closing %s\n", fname); - rc = close(fd); - if (rc) { - fprintf(stderr, "close (normal) %s\n", strerror(errno)); - exit(1); - } - - fprintf(stderr, "opening %s\n", fname2); - fd = open(fname2, O_RDWR); - if (fd == -1) { - fprintf(stderr, "open (unlink) %s\n", strerror(errno)); - exit(1); - } - - fprintf (stderr, "unlinking %s\n", fname2); - rc = unlink(fname2); - if (rc) { - fprintf(stderr, "unlink %s\n", strerror(errno)); - exit(1); - } - - if (access(fname2, F_OK) == 0) { - fprintf(stderr, "%s still exists\n", fname2); - exit(1); - } - } else { - printf("unlink %s and press enter\n", fname); - getc(stdin); - } - - if (access(fname, F_OK) == 0) { - fprintf(stderr, "%s still exists\n", fname); - exit(1); - } - - fprintf(stderr, "reading\n"); - rc = read(fd, buf, strlen(T1) + 1); - if (rc != strlen(T1) + 1) { - fprintf(stderr, "read (unlink) %s rc %d\n", - strerror(errno), rc); - exit(1); - } - - fprintf(stderr, "comparing data\n"); - if (memcmp(buf, T1, strlen(T1) + 1) ) { - fprintf(stderr, "FAILURE: read wrong data after unlink\n"); - exit(1); - } - - fprintf(stderr, "truncating\n"); - rc = ftruncate(fd, 0); - if (rc ) { - fprintf(stderr, "truncate (unlink) %s\n", strerror(errno)); - exit(1); - } - - fprintf(stderr, "seeking\n"); - rc = lseek(fd, 0, SEEK_SET); - if (rc) { - fprintf(stderr, "seek (after unlink trunc) %s\n", - strerror(errno)); - exit(1); - } - - fprintf(stderr, "writing again\n"); - rc = write(fd, T2, strlen(T2) + 1); - if (rc != strlen(T2) + 1) { - fprintf(stderr, "write (after unlink trunc) %s (rc %d)\n", - strerror(errno), rc); - exit(1); - } - - fprintf(stderr, "seeking\n"); - rc = lseek(fd, 0, SEEK_SET); - if (rc) { - fprintf(stderr, "seek (before unlink read) %s\n", - strerror(errno)); - exit(1); - } - - fprintf(stderr, "reading again\n"); - rc = read(fd, buf, strlen(T2) + 1); - if (rc != strlen(T2) + 1) { - fprintf(stderr, "read (after unlink rewrite) %s\n", - strerror(errno)); - exit(1); - } - - fprintf(stderr, "comparing data again\n"); - if (memcmp(buf, T2, strlen(T2) + 1)) { - fprintf(stderr, "FAILURE: read wrong data after rewrite\n"); - exit(1); - } - - fprintf(stderr, "closing again\n"); - rc = close(fd); - if (rc) { - fprintf(stderr, "close (unlink) %s\n", strerror(errno)); - exit(1); - } - - fprintf(stderr, "SUCCESS - goto beer\n"); - return 0; -} diff --git a/lustre/tests/ostreq.sh b/lustre/tests/ostreq.sh deleted file mode 100644 index 2d600ca..0000000 --- a/lustre/tests/ostreq.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/sh - -SRCDIR="`dirname $0`/" -. $SRCDIR/common.sh - -SERVER=localhost -PORT=988 -TMP=${TMP:-/tmp} - -$ACCEPTOR $PORT - -$PTLCTL <<EOF -mynid localhost -setup tcp -connect $SERVER $PORT -add_uuid $SERVER -add_uuid self -quit -EOF - -setup_lustre - -tmp_fs ext2 $TMP/fs 10000 -OBD=${LOOPDEV} - -$OBDCTL <<EOF -device 0 -attach obdext2 OBDEXT2DEV -setup ${OBD} -device 1 -attach ost OSTDEV -setup 0 -device 2 -attach osc OSCDEV -setup -1 -quit -EOF diff --git a/lustre/tests/recovery-cleanup.sh b/lustre/tests/recovery-cleanup.sh deleted file mode 100755 index c8f85ee..0000000 --- a/lustre/tests/recovery-cleanup.sh +++ /dev/null @@ -1,141 +0,0 @@ -#!/bin/sh - -set -ex - -LUSTRE=${LUSTRE:-`dirname $0`/..} -LTESTDIR=${LTESTDIR:-"$LUSTRE/../ltest"} -PATH=$PATH:$LUSTRE/utils:$LUSTRE/tests - -. $LTESTDIR/functional/llite/common/common.sh - -# Allow us to override the setup if we already have a mounted system by -# setting SETUP=" " and CLEANUP=" " -SETUP=${SETUP:-"setup"} -CLEANUP=${CLEANUP:-"cleanup"} - -PDSH='pdsh -S -w' - -# XXX I wish all this stuff was in some default-config.sh somewhere -MDSNODE=${MDSNODE:-mdev6} -OSTNODE=${OSTNODE:-mdev7} -CLIENT=${CLIENT:-mdev8} -NETWORKTYPE=${NETWORKTYPE:-tcp} -MOUNTPT=${MOUNTPT:-/mnt/lustre} -CONFIG=${CONFIG:-recovery-cleanup.xml} -MDSDEV=${MDSDEV:-/tmp/mds} -OSTDEV=${OSTDEV:-/tmp/ost} -MDSSIZE=${MDSSIZE:-100000} -OSTSIZE=${OSTSIZE:-100000} - -do_mds() { - $PDSH $MDSNODE "PATH=\$PATH:$LUSTRE/utils:$LUSTRE/tests; cd $PWD; $@" -} - -do_client() { - $PDSH $CLIENT "PATH=\$PATH:$LUSTRE/utils:$LUSTRE/tests; cd $PWD; $@" -} - -do_ost() { - $PDSH $OSTNODE "PATH=\$PATH:$LUSTRE/utils:$LUSTRE/tests; cd $PWD; $@" -} - -drop_request() { - do_mds "echo 0x121 > /proc/sys/lustre/fail_loc" - do_client "$1 & sleep ${TIMEOUT:-5}; sleep 2; kill \$!" - do_mds "echo 0 > /proc/sys/lustre/fail_loc" -} - -make_config() { - rm -f $CONFIG - for NODE in $CLIENT $MDSNODE $OSTNODE; do - lmc -m $CONFIG --add net --node $NODE --nid `h2$NETWORKTYPE $NODE` \ - --nettype $NETWORKTYPE || exit 4 - done - lmc -m $CONFIG --add mds --node $MDSNODE --mds mds1 --dev $MDSDEV \ - --size $MDSSIZE || exit 5 - lmc -m $CONFIG --add ost --node $OSTNODE --ost ost1 --dev $OSTDEV \ - --size $OSTSIZE || exit 6 - lmc -m $CONFIG --add mtpt --node $CLIENT --path $MOUNTPT --mds mds1 \ - --ost ost1 || exit 7 -} - -start_mds() { - do_mds "lconf $@ $CONFIG" -} - -shutdown_mds() { - do_mds "lconf $@ --cleanup $CONFIG" -} - -start_ost() { - do_ost "lconf $@ $CONFIG" -} - -shutdown_ost() { - do_ost "lconf $@ --cleanup $CONFIG" -} - -mount_client() { - do_client "lconf $@ $CONFIG" -} - -unmount_client() { - do_client "lconf $@ --cleanup $CONFIG" -} - -setup() { - make_config - start_mds ${REFORMAT:---reformat} - start_ost ${REFORMAT:---reformat} - mount_client --timeout=${TIMEOUT:-5} --recovery_upcall=/bin/true -} - -cleanup() { - do_mds "echo 0 > /proc/sys/lustre/fail_loc" - unmount_client $@ || true - shutdown_mds $@ || true - shutdown_ost $@ || true -} - -wait_for_timeout() { - # wait to make sure we enter recovery - # it'd be better if the upcall notified us somehow, I think - sleep $(( ${TIMEOUT:-5} + 2 )) -} - -try_to_cleanup() { - kill -INT $! - unmount_client --force --dump /tmp/client-cleanup-`date +%s`.log - mount_client --timeout=${TIMEOUT:-5} --recovery_upcall=/bin/true -} - -if [ ! -z "$ONLY" ]; then - eval "$ONLY" - exit $? -fi - -$SETUP - -drop_request "mcreate /mnt/lustre/1" & wait_for_timeout -try_to_cleanup - -drop_request "tchmod 111 /mnt/lustre/2" & wait_for_timeout -try_to_cleanup - -drop_request "statone /mnt/lustre/2" & wait_for_timeout -try_to_cleanup - -do_client "cp /etc/resolv.conf /mnt/lustre/resolv.conf" -drop_request "cat /mnt/lustre/resolv.conf > /dev/null" & wait_for_timeout -try_to_cleanup - -drop_request "mv /mnt/lustre/resolv.conf /mnt/lustre/renamed" & wait_for_timeout -try_to_cleanup - -drop_request "mlink /mnt/lustre/renamed-again /mnt/lustre/link1" & wait_for_timeout -try_to_cleanup - -drop_request "munlink /mnt/lustre/link1" & wait_for_timeout -try_to_cleanup - -$CLEANUP '--dump /tmp/`hostname`-cleanup.log' diff --git a/lustre/tests/recovery-small-upcall.sh b/lustre/tests/recovery-small-upcall.sh deleted file mode 100755 index 02e9f69..0000000 --- a/lustre/tests/recovery-small-upcall.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -LUSTRE=`dirname $0`/.. -$LUSTRE/utils/lctl --device %$3 recover || logger -p kern.info recovery failed: $@ diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh deleted file mode 100755 index ebf0a0c..0000000 --- a/lustre/tests/recovery-small.sh +++ /dev/null @@ -1,169 +0,0 @@ -#!/bin/sh - -set -ex - -LUSTRE=${LUSTRE:-`dirname $0`/..} -LTESTDIR=${LTESTDIR:-$LUSTRE/../ltest} -PATH=$PATH:$LUSTRE/utils:$LUSTRE/tests - -RLUSTRE=${RLUSTRE:-$LUSTRE} -RPWD=${RPWD:-$PWD} - -. $LTESTDIR/functional/llite/common/common.sh - -# Allow us to override the setup if we already have a mounted system by -# setting SETUP=" " and CLEANUP=" " -SETUP=${SETUP:-"setup"} -CLEANUP=${CLEANUP:-"cleanup"} - -PDSH=${PDSH:-'pdsh -S -w'} - -# XXX I wish all this stuff was in some default-config.sh somewhere -MDSNODE=${MDSNODE:-mdev6} -OSTNODE=${OSTNODE:-mdev7} -CLIENT=${CLIENT:-mdev8} -NETWORKTYPE=${NETWORKTYPE:-tcp} -MOUNTPT=${MOUNTPT:-/mnt/lustre} -CONFIG=${CONFIG:-recovery-small.xml} -MDSDEV=${MDSDEV:-/tmp/mds} -OSTDEV=${OSTDEV:-/tmp/ost} -MDSSIZE=${MDSSIZE:-100000} -OSTSIZE=${OSTSIZE:-100000} -UPCALL=${UPCALL:-$RPWD/recovery-small-upcall.sh} -FSTYPE=${FSTYPE:-ext3} -TIMEOUT=${TIMEOUT:-5} -do_mds() { - $PDSH $MDSNODE "PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests; cd $RPWD; $@" || exit $? -} - -do_client() { - $PDSH $CLIENT "PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests; cd $RPWD; $@" || exit $? -} - -do_ost() { - $PDSH $OSTNODE "PATH=\$PATH:$RLUSTRE/utils:$RLUSTRE/tests; cd $RPWD; $@" || exit $? -} - -drop_request() { -# OBD_FAIL_MDS_ALL_REQUEST_NET - do_mds "echo 0x121 > /proc/sys/lustre/fail_loc" - do_client "$1" - do_mds "echo 0 > /proc/sys/lustre/fail_loc" -} - -drop_reply() { -# OBD_FAIL_MDS_ALL_REPLY_NET - do_mds "echo 0x120 > /proc/sys/lustre/fail_loc" - do_client "$@" - do_mds "echo 0 > /proc/sys/lustre/fail_loc" -} - -pause_bulk() { -#define OBD_FAIL_OST_BRW_PAUSE_BULK 0x214 - do_ost "echo 0x214 > /proc/sys/lustre/fail_loc" - do_client "$1" - do_client "sync" - do_ost "echo 0 > /proc/sys/lustre/fail_loc" -} -make_config() { - rm -f $CONFIG - for NODE in $CLIENT $MDSNODE $OSTNODE; do - lmc -m $CONFIG --add net --node $NODE --nid `h2$NETWORKTYPE $NODE` \ - --nettype $NETWORKTYPE || exit 4 - done - lmc -m $CONFIG --add mds --node $MDSNODE --mds mds1 --dev $MDSDEV \ - --size $MDSSIZE --fstype $FSTYPE || exit 5 - lmc -m $CONFIG --add ost --node $OSTNODE --ost ost1 --dev $OSTDEV \ - --size $OSTSIZE --fstype $FSTYPE || exit 6 - lmc -m $CONFIG --add mtpt --node $CLIENT --path $MOUNTPT --mds mds1 \ - --ost ost1 || exit 7 -} - -start_mds() { - do_mds "lconf $@ $CONFIG" -} - -shutdown_mds() { - do_mds "lconf $@ --cleanup $CONFIG" -} - -start_ost() { - do_ost "lconf $@ $CONFIG" -} - -shutdown_ost() { - do_ost "lconf $@ --cleanup $CONFIG" -} - -mount_client() { - do_client "lconf $@ $CONFIG" -} - -unmount_client() { - do_client "lconf $@ --cleanup $CONFIG" -} - -setup() { - start_mds --timeout=$TIMEOUT ${REFORMAT} - start_ost --timeout=$TIMEOUT ${REFORMAT} - # XXX we should write our own upcall, when we move this somewhere better. - mount_client --timeout=${TIMEOUT} \ - --lustre_upcall=$UPCALL -} - -cleanup() { - do_mds "echo 0 > /proc/sys/lustre/fail_loc" - unmount_client $@ || true - shutdown_mds $@ || true - shutdown_ost $@ || true -} - -replay() { - do_mds "sync" - do_mds 'echo -e "device \$mds1\\nprobe\\nnotransno\\nreadonly" | lctl' - do_client "$1" & - shutdown_mds -f - start_mds - wait - do_client "df -h $MOUNTPT" # trigger failover, if we haven't already -} - -if [ ! -z "$ONLY" ]; then - eval "$ONLY" - exit $? -fi - -make_config - -REFORMAT=--reformat $SETUP -unset REFORMAT - -drop_request "mcreate /mnt/lustre/1" -drop_reply "mcreate /mnt/lustre/2" -# replay "mcreate /mnt/lustre/3" - -drop_request "tchmod 111 /mnt/lustre/2" -drop_reply "tchmod 666 /mnt/lustre/2" -# replay "tchmod 444 /mnt/lustre/2" - -drop_request "statone /mnt/lustre/2" -drop_reply "statone /mnt/lustre/2" -# replay "statone /mnt/lustre/2" - -do_client "cp /etc/resolv.conf /mnt/lustre/resolv.conf" -drop_request "cat /mnt/lustre/resolv.conf > /dev/null" -drop_reply "cat /mnt/lustre/resolv.conf > /dev/null" - -drop_request "mv /mnt/lustre/resolv.conf /mnt/lustre/renamed" -drop_reply "mv /mnt/lustre/renamed /mnt/lustre/renamed-again" - -drop_request "mlink /mnt/lustre/renamed-again /mnt/lustre/link1" -drop_reply "mlink /mnt/lustre/renamed-again /mnt/lustre/link2" - -drop_request "munlink /mnt/lustre/link1" -drop_reply "munlink /mnt/lustre/link2" - -#bug 1423 -drop_reply "touch /mnt/lustre/renamed" - -$CLEANUP diff --git a/lustre/tests/rename.pl b/lustre/tests/rename.pl deleted file mode 100644 index 3ba9368..0000000 --- a/lustre/tests/rename.pl +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/perl -use strict; -use diagnostics; -use Getopt::Long; - -sub usage () { - print "Usage: $0 <mount point prefix> <iterations>\n"; - print "example: $0 --count=2 /mnt/lustre 50\n"; - print " will test in /mnt/lustre1 and /mnt/lustre2\n"; - print " $0 --count=0 /mnt/lustre 50\n"; - print " will test in /mnt/lustre only\n"; - exit; -} -my ($j, $k, $d, $f1, $f2, $path, $silent); -my $count = 0; -my $create = 10; - -GetOptions("silent!"=> \$silent, - "count=i" => \$count, - "create=i" => \$create); - -my $mtpt = shift || usage(); -my $i = shift || usage(); -my $total = $i; -my $files = 6; -my $dirs = 3; -my $mcreate = 0; # should we use mcreate or open? - -my $which = ""; -if ($count > 0) { - $which = int(rand() * $count) + 1; -} - -$k = $dirs; -if ($create == 0) { - $k = 0; -} -while ($k--) { - $path = "$mtpt$which/$k"; - my $rc = mkdir $path, 0755; - print "mkdir $path failed: $!\n" if !$rc; - $j = $files; - while ($j--) { - `./mcreate $path/$j`; - } -} - -while ($i--) { - my $which = ""; - if ($count > 0) { - $which = int(rand() * $count) + 1; - } - $d = int(rand() * $dirs); - $f1 = int(rand() * $files); - $f2 = int(rand() * $files); - print "[$$] $mtpt$which/$d/$f1 $mtpt$which/$d/$f2 ...\n" if !$silent; - my $rc = rename "$mtpt$which/$d/$f1", "$mtpt$which/$d/$f2"; - print "[$$] done: $rc\n" if !$silent; - if (($total - $i) % 100 == 0) { - print STDERR "[" . $$ . "]" . ($total - $i) . " operations\n"; - } -} - -$k = $dirs; -if ($create == 0) { - $k = 0; -} -while ($k--) { - $path = "$mtpt$which/$k"; - $j = $files; - while ($j--) { - unlink "$path/$j"; - } - my $rc = rmdir $path; - print "rmdir $path failed: $!\n" if !$rc; -} - -print "Done.\n"; diff --git a/lustre/tests/runas.c b/lustre/tests/runas.c deleted file mode 100644 index 20981e8..0000000 --- a/lustre/tests/runas.c +++ /dev/null @@ -1,124 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - */ -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <string.h> -#include <errno.h> -#include <sys/types.h> -#include <sys/wait.h> - -#define DEBUG 0 - -void Usage_and_abort(void) -{ - fprintf(stderr, "Usage: runas -u user_id [ -g grp_id ]" - " command_to_be_run \n"); - exit(-1); -} - -// Usage: runas -u user_id [ -g grp_id ] [--] command_to_be_run -// return: the return value of "command_to_be_run" -// NOTE: returning -1 might be the return code of this program itself or -// the "command_to_be_run" - -// ROOT runs "runas" for free -// Other users run "runas" requires chmod 6755 "command_to_be_run" - -int main(int argc, char **argv) -{ - char **my_argv; - int status; - int c,i; - int gid_is_set = 0; - int uid_is_set = 0; - uid_t user_id; - gid_t grp_id; - - if (argc == 1) - Usage_and_abort(); - - // get UID and GID - while ((c = getopt (argc, argv, "+u:g:h")) != -1) { - switch (c) { - case 'u': - user_id = (uid_t)atoi(optarg); - uid_is_set = 1; - if (!gid_is_set) - grp_id = user_id; - break; - - case 'g': - grp_id = (gid_t)atoi(optarg); - gid_is_set = 1; - break; - - case 'h': - Usage_and_abort(); - break; - - default: - //fprintf(stderr, "Bad parameters.\n"); - //Usage_and_abort (); - break; - } - } - - if (!uid_is_set) - Usage_and_abort(); - - if (optind == argc) { - fprintf(stderr, "Bad parameters.\n"); - Usage_and_abort(); - } - - // assemble the command - my_argv = (char**)malloc(sizeof(char*)*(argc+1-optind)); - if (my_argv == NULL) { - fprintf(stderr, "Error in allocating memory. (%s)\n", - strerror(errno)); - exit(-1); - } - - for (i = optind; i < argc; i++) { - my_argv[i-optind] = argv[i]; - //printf("%s\n",my_argv[i-optind]); - } - my_argv[i-optind] = NULL; - -#if DEBUG - system("whoami"); -#endif - - // set GID - status = setregid(grp_id, grp_id); - if (status == -1) { - fprintf(stderr, "Cannot change grp_ID to %d, errno=%d (%s)\n", - grp_id, errno, strerror(errno) ); - exit(-1); - } - - // set UID - status = setreuid(user_id, user_id ); - if(status == -1) { - fprintf(stderr,"Cannot change user_ID to %d, errno=%d (%s)\n", - user_id, errno, strerror(errno) ); - exit(-1); - } - - - fprintf(stderr, "running as USER(%d), Grp (%d): ", user_id, grp_id ); - - for (i = 0; i < argc - optind; i++) - fprintf(stderr, " [%s]", my_argv[i]); - - fprintf(stderr, "\n"); - fflush(stderr); - - // The command to be run - execvp(my_argv[0], my_argv); - fprintf(stderr, "execvp fails running %s\n", my_argv[0]); - exit(-1); -} - diff --git a/lustre/tests/rundbench b/lustre/tests/rundbench deleted file mode 100755 index cb417d2..0000000 --- a/lustre/tests/rundbench +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/sh - -DIR=${DIR:-/mnt/lustre/`hostname`} -#[ -e /proc/sys/portals/debug ] && echo 0 > /proc/sys/portals/debug -mkdir -p $DIR -TGT=$DIR/client.txt -SRC=${SRC:-/usr/lib/dbench/client.txt} -[ ! -e $TGT -a -e $SRC ] && echo "copying $SRC to $TGT" && cp $SRC $TGT -SRC=/usr/lib/dbench/client_plain.txt -[ ! -e $TGT -a -e $SRC ] && echo "copying $SRC to $TGT" && cp $SRC $TGT -cd $DIR -echo "running 'dbench $@' on $PWD at `date`" -dbench -c client.txt $@ diff --git a/lustre/tests/runfailure-client-mds-recover.sh b/lustre/tests/runfailure-client-mds-recover.sh deleted file mode 100755 index 8ea79df..0000000 --- a/lustre/tests/runfailure-client-mds-recover.sh +++ /dev/null @@ -1,103 +0,0 @@ -#!/bin/sh -SRCDIR=. - -. common.sh - -reconnect () { - -$OBDCTL <<EOF -name2dev RPCDEV -newconn -quit -EOF - -} - - - -echo -echo "Test 1 drop request:" `date` "creating /mnt/lustre/foo" -echo -rm -rf /mnt/lustre/* -echo 0x80000107 > /proc/sys/lustre/fail_loc -touch /mnt/lustre/foo & -ps axww | grep touch -echo "MDS dropped create request -- sleep 4 secs - watch for timeout" -sleep 7 -# reconnect -sleep 1 -echo "did things recover? check for file foo." -ls -l /mnt/lustre -echo "Test 1 done" - - -echo -echo "Test 2 test delay queue:" `date` "creating /mnt/lustre/foo" -echo -rm -rf /mnt/lustre/* -mkdir /mnt/lustre/a -echo 0x80000107 > /proc/sys/lustre/fail_loc -touch /mnt/lustre/foo & -ps axww | grep touch -echo "MDS dropped create request -- sleep 4 secs - watch for timeout" -sleep 4 -touch /mnt/lustre/a/f & -#reconnect -sleep 5 -echo "did things recover? check for file foo and a/f" -ls -l /mnt/lustre -ls -l /mnt/lustre/a -echo "Test 2 done" - -echo -echo "Test 3 dropped reply:" `date` "creating /mnt/lustre/foo2" -echo -rm -rf /mnt/lustre/* -echo 0x80000119 > /proc/sys/lustre/fail_loc -touch /mnt/lustre/foo2 & -ps axww | grep touch -echo "MDS dropped create request -- sleep 4 secs - watch for timeout" -sleep 4 -# reconnect -echo failure cleared -sleep 4 -echo "did things recover? check for file foo2" -ls -l /mnt/lustre -echo "Test 3 done" - - -echo -echo "Test 4: Multiple failures" -echo -echo 0x0000107 > /proc/sys/lustre/fail_loc -touch /mnt/lustre/bar & -ps axww | grep touch -echo "touch program will have repeated failures sleeping 10" -sleep 10 -echo 0 > /proc/sys/lustre/fail_loc -# reconnect -sleep 6 -echo "failure cleared" -echo "did things recover? Check for file bar" -ls -l /mnt/lustre/bar - -echo "Test 4 done" - - -echo -echo "Test 5: Continue writing during recovery:" `date` "creating and writing/mnt/lustre/foo" -echo -rm -rf /mnt/lustre/* -./openme /mnt/lustre/foo3 & -./writeme /mnt/lustre/iogoeson & -sleep 1 -ls -l /mnt/lustre -echo 0x80000107 > /proc/sys/lustre/fail_loc -mknod /mnt/lustre/dev c 10 240 & -echo "MDS dropped create request -- sleep 4 secs - watch for timeout" -sleep 6 -# reconnect -sleep 1 -echo "did things recover? check for file foo, bar, check log for reopen." -ls -l /mnt/lustre -echo "Test 5 done" diff --git a/lustre/tests/runfailure-mds b/lustre/tests/runfailure-mds deleted file mode 100755 index f2942c3..0000000 --- a/lustre/tests/runfailure-mds +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/sh - -SRCDIR="`dirname $0`" -. $SRCDIR/common.sh - -. $SRCDIR/llmount.sh - -MNT="setup_mount" - -test_fail() { - echo $1 > /proc/sys/lustre/fail_loc - shift - echo "Running '$*'" - $* - - echo "Cleaning up and restarting MDS" - umount /mnt/lustre || fail "unable to unmount" - $OBDCTL <<- EOF - name2dev MDSDEV - cleanup - detach - quit - EOF - - echo 0 > /proc/sys/lustre/fail_loc - - $OBDCTL <<- EOF - newdev - attach mds MDSDEV - setup ${MDS} ${MDSFS} - quit - EOF - $MNT -} - -#set -vx - -touch /mnt/lustre/foo -chmod a+x /mnt/lustre/foo -sync - -# OBD_FAIL_MDS_REINT_SETATTR_WRITE - MDS will discard data from setattr -test_fail 0x10a chmod 000 /mnt/lustre/foo -ls -l /mnt/lustre/foo -[ ! -x /mnt/lustre/foo ] && fail "/mnt/lustre/foo is not executable!" - -# OBD_FAIL_MDS_REINT_CREATE_WRITE - MDS will not create the file -test_fail 0x10c touch /mnt/lustre/bar -ls /mnt/lustre/bar -[ $? -eq 0 ] && fail "/mnt/lustre/bar was created!" - -# OBD_FAIL_MDS_REINT_UNLINK_WRITE - MDS will discard data from unlink -test_fail 0x10e rm /mnt/lustre/foo -ls /mnt/lustre/foo -[ $? -eq 1 ] && fail "/mnt/lustre/foo has been removed!" - -# OBD_FAIL_MDS_REINT_RENAME_WRITE - MDS will discard data from rename -test_fail 0x112 mv /mnt/lustre/foo /mnt/lustre/bar -ls /mnt/lustre/foo /mnt/lustre/bar -[ ! -f /mnt/lustre/foo -o -f /mnt/lustre/bar ] && \ - fail "/mnt/lustre/foo has been renamed to bar!" - -echo "Done." diff --git a/lustre/tests/runfailure-net b/lustre/tests/runfailure-net deleted file mode 100755 index ce5634b..0000000 --- a/lustre/tests/runfailure-net +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/sh - -fail() { - echo "ERROR: $1" 1>&2 - [ $2 ] && RC=$2 || RC=1 - exit $RC -} - -test_fail() { - oldtimeout=`cat /proc/sys/lustre/timeout` - echo $TIMEOUT > /proc/sys/lustre/timeout - echo $1 > /proc/sys/lustre/fail_loc - shift - $* & - sleep $TIMEOUT - sleep 2 # fudge - kill -9 $! - - echo $oldtimeout > /proc/sys/lustre/timeout - echo 0 > /proc/sys/lustre/fail_loc - umount -f /mnt/lustre || fail "cannot unmount /mnt/lustre" - mount -t lustre_lite -o "osc=$OSC,mdc=$MDC" none /mnt/lustre || \ - fail "cannot remount $OSC/$MDC on /mnt/lustre" -} - -set -vx - -LCTL=../utils/lctl -OSC=OSC_localhost_UUID -MDC=MDC_client1_UUID -TIMEOUT=5 # complete in finite time - -[ "`mount | grep /mnt/lustre`" ] || echo | sh llmount.sh || exit -1 - -# GETATTR_NET - ls will hang on the getattr -# test_fail 0x102 ls -l /mnt/lustre - -# READPAGE_NET - ls will hang reading in new pages (lost+found is not in cache) -test_fail 0x104 ls /mnt/lustre - -sleep 1 - -# REINT_NET - touch will hang on setattr -test_fail 0x107 touch /mnt/lustre - -# REINT_NET - touch will hang on create -test_fail 0x107 touch /mnt/lustre/tt - -# REINT_NET - mv will hang on rename -touch /mnt/lustre/foo -test_fail 0x107 mv /mnt/lustre/foo /mnt/lustre/bar - -# REINT_NET - rm will hang on unlink -touch /mnt/lustre/salmon -test_fail 0x107 rm /mnt/lustre/salmon - -# OPEN_NET - touch will hang on open -touch /mnt/lustre/foo -test_fail 0x113 cat /mnt/lustre/foo - -# CLOSE_NET - ls will hang on close -test_fail 0x115 ./testreq --close junk_file_handle - -echo 0 > /proc/sys/lustre/fail_loc - -echo "Done." diff --git a/lustre/tests/runfailure-ost b/lustre/tests/runfailure-ost deleted file mode 100755 index 0c68d5a..0000000 --- a/lustre/tests/runfailure-ost +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/sh - -SRCDIR="`dirname $0`" -. $SRCDIR/common.sh - -setup_opts "$@" - -set -vx - -test_fail() { - echo $1 > /proc/sys/lustre/fail_loc - shift - echo "Running '$*'" - $* & - sleep 1 - kill -9 $! - - echo 0 > /proc/sys/lustre/fail_loc - umount /mnt/lustre || fail "cannot unmount /mnt/lustre" - setup_mount || fail "cannot remount /mnt/lustre" -} - -[ "`mount | grep /mnt/lustre`" ] || . llsetup.sh "$@" || exit -1 - -# OBD_FAIL_OST_OPEN_NET: OST will discard open request packet -touch /mnt/lustre/foo -test_fail 0x208 cat /mnt/lustre/foo - -# OBD_FAIL_OST_CLOSE_NET: OST will discard close request packet -test_fail 0x209 cat /mnt/lustre/foo - -# OBD_FAIL_OST_CREATE_NET: OST will discard create request packet -test_fail 0x204 touch /mnt/lustre/bar - -# OBD_FAIL_OST_DESTROY_NET: OST will discard destroy request packet -test_fail 0x205 rm /mnt/lustre/foo - -# OBD_FAIL_OST_BRW_NET: OST will discard read request packet -echo foo >> /mnt/lustre/foo -test_fail 0x20a cat /mnt/lustre/foo - -# OBD_FAIL_OST_BRW_NET: OST will discard write request packet -test_fail 0x20a "echo bar >> /mnt/lustre/foo" - -# OBD_FAIL_OST_PUNCH_NET: OST will discard truncate request packet -test_fail 0x208 "echo bar > /mnt/lustre/foo" - -# OBD_FAIL_OST_STATFS_NET: OST will discard statfs request packet -test_fail 0x208 df /mnt/lustre - -echo "Done." diff --git a/lustre/tests/runiozone b/lustre/tests/runiozone deleted file mode 100755 index c2eec04..0000000 --- a/lustre/tests/runiozone +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/sh -[ -z "$SIZE" ] && SIZE=5g -[ -z "$COUNT" ] && COUNT=100 -[ -z "$VERIFY" ] && VERIFY="-+d" -[ -z "$ODIR" ] && ODIR="-I" -[ -z "$REC" ] && REC=64 -[ -z "$FILE" ] && FILE=/mnt/lustre/iozone.$$ -[ $1 ] && SIZE=$1 -LOOP=0 -rm -f endiozone -echo 0 > /proc/sys/portals/debug -while date; do - LOOP=`expr $LOOP + 1` - echo "Test #$LOOP" - iozone $VERIFY $ODIR -r $REC -i 0 -i 1 -f $FILE -s $SIZE 2>&1 || exit $? - [ -f endiozone -o $LOOP -ge $COUNT ] && rm -f endiozone && exit 0 -done | tee /tmp/iozone.log diff --git a/lustre/tests/runobdstat b/lustre/tests/runobdstat deleted file mode 100644 index 886ce8f2..0000000 --- a/lustre/tests/runobdstat +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh -PATH=`dirname $0`/../utils:$PATH - -obdstat filter 1 | while read LINE; do - echo "`date +s`: $LINE" - [ "$1" ] && echo "`date +s`: $LINE" >> $1 -done diff --git a/lustre/tests/runregression-brw.sh b/lustre/tests/runregression-brw.sh deleted file mode 100644 index 4d86248..0000000 --- a/lustre/tests/runregression-brw.sh +++ /dev/null @@ -1,111 +0,0 @@ -#!/bin/sh -SRCDIR="`dirname $0`/" -export PATH=/sbin:/usr/sbin:$SRCDIR:$PATH - -LOOPS=${LOOPS:-1} -COUNT=${COUNT:-1000000} -COUNT_10=`expr $COUNT / 10` -COUNT_100=`expr $COUNT / 100` - -ENDRUN=endrun-`hostname` - -ECHONAME="`lctl device_list 2> /dev/null | awk '/ echo_client / { print $4 }' | tail -1`" - -if [ -z "$ECHONAME" ]; then - echo "$0: needs an ECHO_CLIENT set up first" 1>&2 - exit 1 -fi - -cleanup () { - lctl --device \$$ECHONAME destroy $OID -} - -runthreads() { - THR=$1 - DO=$2 - CNT=$3 - V=$4 - PGS=$5 - - case $DO in - test_getattr) - RW= - ;; - test_brw_write) - DO=test_brw - RW=w - ;; - test_brw_read) - DO=test_brw - RW=r - ;; - esac - - lctl --threads $THR v \$$ECHONAME $DO $CNT $RW $V $PGS $OID || exit 1 - - if [ -e $ENDRUN ]; then - rm $ENDRUN - echo "exiting because $ENDRUN file was found" - cleanup - fi -} - -[ -z "$OID" ] && OID=`lctl --device \\$$ECHONAME create 1 | awk '/is object id/ { print $6 }'` && echo "created object $OID" -[ -z "$OID" ] && echo "error creating object" 1>&2 && exit 1 - -# TODO: obdctl needs to check on the progress of each forked thread -# (IPC SHM, sockets?) to see if it hangs. -for i in `seq $LOOPS`; do - PG=1 - PGVW=${PGVW:-16} - PGVR=${PGVR:-16} - - # We use '--threads 1 X' instead of '--device X' so that - # obdctl can monitor the forked thread for progress (TODO). - debug_server_off - debug_client_off - runthreads 1 test_brw_write 1000 -30 $PG - runthreads 1 test_brw_read 1000 -30 $PG - - [ "$PGVW" ] && runthreads 1 test_brw_write 100 -30 $PGVW - [ "$PGVW" ] && runthreads 1 test_brw_read 1600 -30 $PG - [ "$PGVR" ] && runthreads 1 test_brw_read 100 -30 $PGVR - - runthreads 1 test_brw_write $COUNT -30 $PG - runthreads 1 test_brw_read $COUNT -30 $PG - - [ "$PGVW" ] && runthreads 1 test_brw_write $COUNT_10 -30 $PGVW - [ "$PGVR" ] && runthreads 1 test_brw_read $COUNT_10 -30 $PGVR - - runthreads 2 test_brw_write $COUNT -30 $PG - runthreads 2 test_brw_read $COUNT -30 $PG - - [ "$PGVW" ] && runthreads 2 test_brw_write $COUNT_10 -30 $PGVW - [ "$PGVR" ] && runthreads 2 test_brw_read $COUNT_10 -30 $PGVR - - runthreads 10 test_brw_write $COUNT_10 -30 $PG - runthreads 10 test_brw_read $COUNT_10 -30 $PG - - [ "$PGVW" ] && runthreads 10 test_brw_write $COUNT_100 -60 $PGVW - [ "$PGVR" ] && runthreads 10 test_brw_read $COUNT_100 -60 $PGVR - - runthreads 32 test_brw_write $COUNT_10 -30 $PG - runthreads 32 test_brw_read $COUNT_10 -30 $PG - - [ "$PGVW" ] && runthreads 32 test_brw_write $COUNT_100 -60 $PGVW - [ "$PGVR" ] && runthreads 32 test_brw_read $COUNT_100 -60 $PGVR - - runthreads 64 test_brw_write $COUNT_10 -30 $PG - runthreads 64 test_brw_read $COUNT_10 -30 $PG - - [ "$PGVW" ] && runthreads 64 test_brw_write $COUNT_100 -60 $PGVW - [ "$PGVR" ] && runthreads 64 test_brw_read $COUNT_100 -60 $PGVR - - runthreads 100 test_brw_write $COUNT_100 -60 $PG - runthreads 100 test_brw_read $COUNT_100 -60 $PG - - [ "$PGVW" ] && runthreads 100 test_brw_write $COUNT_100 -60 $PGVW - [ "$PGVR" ] && runthreads 100 test_brw_read $COUNT_100 -60 $PGVR -done - -cleanup diff --git a/lustre/tests/runregression-mds.sh b/lustre/tests/runregression-mds.sh deleted file mode 100755 index ecfe0d9..0000000 --- a/lustre/tests/runregression-mds.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/bin/sh - -SRCDIR="`dirname $0`" - -ENDRUN=endrun-`hostname` - -fail() { - echo "ERROR: $1" 1>&2 - [ $2 ] && RC=$2 || RC=1 - exit $RC -} - -export PATH=/sbin:/usr/sbin:$SRCDIR:$PATH - -cleanup() { - trap 0 - $LCONF --cleanup $OPTS -} - -[ "$COUNT" ] || COUNT=1000 - -[ "$LCONF" ] || LCONF=$SRCDIR/../utils/lconf - -[ -z "$*" ] && fail "usage: $0 [--reformat] <conf>.xml" 1 - -OSCMT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`" -if [ -z "$OSCMT" ]; then - $LCONF $@ || exit 1 - trap cleanup 0 - OSCMT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`" - [ -z "$OSCMT" ] && fail "no lustre filesystem mounted" 1 -fi - -V="-10" -while [ "$1" ]; do - case $1 in - -v|--verbose) V="1";; - --reformat) : ;; - *) OPTS="$OPTS $1" ;; - esac - shift -done - -OSCTMP=`echo $OSCMT | tr "/" "."` -USED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -1` -USED=`expr $USED + 16` # Some space for the status file - -THREADS=1 -while [ $THREADS -lt 196 ]; do - echo "starting $THREADS threads at `date`" - [ $V -gt 0 ] || echo 0 > /proc/sys/portals/debug - $SRCDIR/createdestroy /mnt/lustre/file-$$ $COUNT $V $THREADS - $SRCDIR/openclose /mnt/lustre/file-$$ $COUNT $THREADS - THREADS=`expr $THREADS + 5` - $LCONF --cleanup $OPTS || fail 10 - $LCONF $OPTS || fail 11 -done - -rm -f $ENDRUN - -NOWUSED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -1` -if [ $NOWUSED -gt $USED ]; then - echo "Space not all freed: now ${NOWUSED}kB, was ${USED}kB." 1>&2 - echo "This is normal on BA OSTs, because of subdirectories." 1>&2 -fi - -cleanup diff --git a/lustre/tests/runregression-net.sh b/lustre/tests/runregression-net.sh deleted file mode 100644 index 6de9a6c..0000000 --- a/lustre/tests/runregression-net.sh +++ /dev/null @@ -1,99 +0,0 @@ -#!/bin/sh -SRCDIR="`dirname $0`/" -export PATH=/sbin:/usr/sbin:$SRCDIR/../utils:$PATH - -COUNT=${COUNT:-1000000} -COUNT_10=`expr $COUNT / 10` -COUNT_100=`expr $COUNT / 100` -COUNT_1000=`expr $COUNT / 1000` - -ENDRUN=endrun-`hostname` - -ECHONAME="`lctl device_list 2> /dev/null | awk '/ echo_client / { print $4 }' | tail -1`" - -if [ -z "$ECHONAME" ]; then - echo "$0: needs an ECHO_CLIENT set up first" 1>&2 - exit 1 -fi - -cleanup () { - lctl --device \$$ECHONAME destroy $OID -} - -runthreads() { - THR=$1 - DO=$2 - CNT=$3 - V=$4 - PGS=$5 - - case $DO in - test_getattr) - RW= - ;; - test_brw_write) - DO=test_brw - RW=w - ;; - test_brw_read) - DO=test_brw - RW=r - ;; - esac - - lctl --threads $THR v \$$ECHONAME $DO $CNT $RW $V $PGS $OID || exit 1 - - if [ -e $ENDRUN ]; then - rm $ENDRUN - echo "exiting because $ENDRUN file was found" - cleanup - fi -} - -[ -z "$OID" ] && OID=`lctl --device \\$$ECHONAME create 1 | awk '/is object id/ { print $6 }'` && echo "created object $OID" -[ -z "$OID" ] && echo "error creating object" 1>&2 && exit 1 - -# TODO: obdctl needs to check on the progress of each forked thread -# (IPC SHM, sockets?) to see if it hangs. -for CMD in test_getattr test_brw_write test_brw_read; do - case $CMD in - test_getattr) - PG= - PGV= - ;; - test_brw_write) - PG=1 - PGV=${PGV:-16} - ;; - test_brw_read) - PG=1 - PGV=${PGV:-16} - ;; - esac - - # We use '--threads 1 X' instead of '--device X' so that - # obdctl can monitor the forked thread for progress (TODO). - runthreads 1 $CMD 1 1 $PG - runthreads 1 $CMD 100 1 $PG - - echo 0 > /proc/sys/portals/debug - runthreads 1 $CMD $COUNT_100 -10 $PG - [ "$PGV" ] && runthreads 1 $CMD $COUNT_1000 -10 $PGV - - runthreads 1 $CMD $COUNT -30 $PG - [ "$PGV" ] && runthreads 1 $CMD $COUNT_10 -30 $PGV - - runthreads 2 $CMD $COUNT_100 -30 $PG - [ "$PGV" ] && runthreads 2 $CMD $COUNT_1000 -30 $PGV - - runthreads 2 $CMD $COUNT -30 $PG - [ "$PGV" ] && runthreads 2 $CMD $COUNT_10 -30 $PGV - - runthreads 10 $CMD $COUNT_10 -30 $PG - [ "$PGV" ] && runthreads 10 $CMD $COUNT_100 -30 $PGV - - runthreads 100 $CMD $COUNT_100 -30 $PG - [ "$PGV" ] && runthreads 100 $CMD $COUNT_1000 -30 $PGV -done - -lctl --device \$$ECHONAME destroy $OID diff --git a/lustre/tests/runslabinfo b/lustre/tests/runslabinfo deleted file mode 100755 index eba407d..0000000 --- a/lustre/tests/runslabinfo +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh -while sleep 1 ; do - echo '-----------------------' - egrep "ll_|ldlm|filp|dentry|inode|portals|size-[0-9]* " /proc/slabinfo -done diff --git a/lustre/tests/runtests b/lustre/tests/runtests deleted file mode 100755 index e59f5f4..0000000 --- a/lustre/tests/runtests +++ /dev/null @@ -1,130 +0,0 @@ -#!/bin/sh -# -# Script which does some basic tests to ensure we haven't regressed. -# Probably a good idea to run this before doing any checkins. -# In the future this can become more fancy, but it's OK for now. - -SRCDIR="`dirname $0`" -fail() { - echo "ERROR: $1" 1>&2 - [ $2 ] && RC=$2 || RC=1 - exit $RC -} - -log() { - echo "$*" - lctl mark "$*" -} - -export PATH=/sbin:/usr/sbin:$SRCDIR:$SRCDIR/../utils:$PATH - -ERROR= -SRC=/etc -[ "$COUNT" ] || COUNT=1000 - -[ "$LCONF" ] || LCONF=$SRCDIR/../utils/lconf - -[ "$MCREATE" ] || MCREATE=$SRCDIR/../tests/mcreate - -[ "$MKDIRMANY" ] || MKDIRMANY=$SRCDIR/../tests/mkdirmany - -while [ "$1" ]; do - case $1 in - *.xml) export NAME=`echo $1 | sed "s/.xml//"` ;; - esac - shift -done - -OSCMT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`" -if [ -z "$OSCMT" ]; then - sh llmount.sh - OSCMT="`mount | awk '/ lustre_lite / { print $3 }' | tail -1`" - [ -z "$OSCMT" ] && fail "no lustre filesystem mounted" 1 - I_MOUNTED="yes" -fi - -OSCTMP=`echo $OSCMT | tr "/" "."` -USED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -1` -USED=`expr $USED + 16` # Some space for the status file - -# let's start slowly here... -log "touching $OSCMT" -touch $OSCMT || fail "can't touch $OSCMT" 2 -HOSTS=$OSCMT/hosts.$$ - -# this will cause the following cp to trigger bug #620096 -log "create an empty file $HOSTS" -mcreate $HOSTS - -log "copying /etc/hosts to $HOSTS" -cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS" 3 -log "comparing /etc/hosts and $HOSTS" -diff -u /etc/hosts $HOSTS || fail "$HOSTS different" 4 -log "renaming $HOSTS to $HOSTS.ren" -mv $HOSTS $HOSTS.ren || fail "can't rename $HOSTS to $HOSTS.ren" 5 -log "copying /etc/hosts to $HOSTS again" -cp /etc/hosts $HOSTS || fail "can't cp /etc/hosts to $HOSTS again" 6 -log "truncating $HOSTS" -> $HOSTS || fail "can't truncate $HOSTS" 8 -log "removing $HOSTS" -rm $HOSTS || fail "can't remove $HOSTS" 9 - -DST=$OSCMT/runtest.$$ -# let's start slowly here... -log "creating $DST" -mkdir $DST || fail "can't mkdir $DST" 10 - -# ok, that hopefully worked, so let's do a little more, with files that -# haven't changed in the last day (hopefully they don't change during test) -FILES=`find $SRC -type f -mtime +1 -ctime +1 | head -$COUNT` -log "copying files from $SRC to $DST$SRC" -tar cf - $FILES | tar xvf - -C $DST || fail "copying $SRC" 11 - -log "comparing newly copied files" -for f in $FILES; do - [ $V ] && log "verifying $DST/$f" - diff -q $f $DST/$f || ERROR=11 -done - -[ "$ERROR" ] && fail "old and new files are different" $ERROR - -sh llmountcleanup.sh || exit 19 -sh llrmount.sh || exit 20 - -log "comparing previously copied files" -for f in $FILES; do - [ $V ] && log "verifying $DST/$f" - diff -q $f $DST/$f || ERROR=22 -done - -[ "$ERROR" ] && fail "old and new files are different on second diff" $ERROR - -sh llmountcleanup.sh || exit 19 -sh llrmount.sh || exit 20 - -log "renaming $HOSTS.ren to $HOSTS" -mv $HOSTS.ren $HOSTS || fail "can't rename $HOSTS.ren to $HOSTS" 32 -log "truncating $HOSTS" -> $HOSTS || fail "can't truncate $HOSTS" 34 -log "removing $HOSTS" -rm $HOSTS || fail "can't remove $HOSTS again" 36 -log "removing $DST" -rm -r $V $DST || fail "can't remove $DST" 37 - -# mkdirmany test (bug 589) -log "running mkdirmany $OSCMT/base$$ 100" -$MKDIRMANY $OSCMT/base$$ 100 || fail "mkdirmany failed" -log "removing mkdirmany directories" -rmdir $OSCMT/base$$* || fail "mkdirmany cleanup failed" - -log "done" - -NOWUSED=`df | awk "/$OSCTMP/ { print \\$3 }" | tail -1` -if [ $NOWUSED -gt $USED ]; then - echo "Space not all freed: now ${NOWUSED}kB, was ${USED}kB." 1>&2 - echo "This is normal on BA OSTs, because of subdirectories." 1>&2 -fi - -if [ "$I_MOUNTED" = "yes" ]; then - sh llmountcleanup.sh || exit 29 -fi diff --git a/lustre/tests/runvmstat b/lustre/tests/runvmstat deleted file mode 100755 index b04d84c..0000000 --- a/lustre/tests/runvmstat +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -vmstat 1 | while read LINE ; do - LINE="`date +%s`: $LINE" - echo $LINE - [ "$1" ] && echo $LINE >> $1 -done diff --git a/lustre/tests/sanity-ldlm.sh b/lustre/tests/sanity-ldlm.sh deleted file mode 100644 index e5bd422..0000000 --- a/lustre/tests/sanity-ldlm.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/bash - -set -e - -SRCDIR=`dirname $0` -PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH - -MOUNT=${MOUNT:-/mnt/lustre} -DIR=${DIR:-$MOUNT} -export NAME=$NAME -clean() { - echo -n "cln.." - sh llmountcleanup.sh > /dev/null || exit 20 -} -CLEAN=${CLEAN:-clean} -start() { - echo -n "mnt.." - sh llrmount.sh > /dev/null || exit 10 - echo "done" -} -START=${START:-start} - -log() { - echo "$*" - lctl mark "$*" || /bin/true -} - -pass() { - echo PASS -} - -mount | grep $MOUNT || sh llmount.sh - -log '== drop ldlm request ======================== test 1' -echo 0x302 > /proc/sys/lustre/fail_loc -echo 3 > /proc/sys/lustre/timeout -touch $DIR/f & -sleep 5 -echo 0 > /proc/sys/lustre/fail_loc -lctl --device 6 recover -pass -$CLEAN -$START - -log '== drop ldlm reply (bug 1139) ================ test 2' -echo 0x213 > /proc/sys/lustre/fail_loc -echo 3 > /proc/sys/lustre/timeout -touch $DIR/f -pass -$CLEAN -$START - -log '== drop reply after completion (bug 1068) ==== test 3' -touch $DIR/f -stat $DIR/f -echo 0x213 > /proc/sys/lustre/fail_loc -echo 3 > /proc/sys/lustre/timeout -echo foo >> $DIR/f -pass -$CLEAN -$START diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh deleted file mode 100644 index 46d0072..0000000 --- a/lustre/tests/sanity.sh +++ /dev/null @@ -1,881 +0,0 @@ -#!/bin/bash -# -# Run select tests by setting ONLY, or as arguments to the script. -# Skip specific tests by setting EXCEPT. -# -# e.g. ONLY="22 23" or ONLY="`seq 32 39`" or EXCEPT="31" -set -e - -ONLY=${ONLY:-"$*"} -ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"34 35"} # bugs 1365 and 1360 respectively - -SRCDIR=`dirname $0` -PATH=$PWD/$SRCDIR:$SRCDIR:$SRCDIR/../utils:$PATH - -CHECKSTAT=${CHECKSTAT:-"./checkstat -v"} -CREATETEST=${CREATETEST:-createtest} -LFIND=${LFIND:-lfind} -LSTRIPE=${LSTRIPE:-lstripe} -LCTL=${LCTL:-lctl} -MCREATE=${MCREATE:-mcreate} -TOEXCL=${TOEXCL:-toexcl} -TRUNCATE=${TRUNCATE:-truncate} - -if [ $UID -ne 0 ]; then - RUNAS_ID="$UID" - RUNAS="" -else - RUNAS_ID=${RUNAS_ID:-500} - RUNAS=${RUNAS:-"runas -u $RUNAS_ID"} -fi - -MOUNT=${MOUNT:-/mnt/lustre} -DIR=${DIR:-$MOUNT} -export NAME=$NAME - -SAVE_PWD=$PWD - -clean() { - echo -n "cln.." - sh llmountcleanup.sh > /dev/null || exit 20 -} - -CLEAN=${CLEAN:-clean} -start() { - echo -n "mnt.." - sh llrmount.sh > /dev/null || exit 10 - echo "done" -} -START=${START:-start} - -log() { - echo "$*" - lctl mark "$*" || true -} - -run_one() { - if ! mount | grep -q $MOUNT; then - $START - fi - log "== test $1: $2" - test_$1 || error - pass - cd $SAVE_PWD - $CLEAN -} - -run_test() { - for O in $ONLY; do - if [ "`echo $1 | grep '\<'$O'[a-z]*\>'`" ]; then - echo "" - run_one $1 "$2" - return $? - else - echo -n "." - fi - done - for X in $EXCEPT $ALWAYS_EXCEPT; do - if [ "`echo $1 | grep '\<'$X'[a-z]*\>'`" ]; then - echo "skipping excluded test $1" - return 0 - fi - done - if [ -z "$ONLY" ]; then - run_one $1 "$2" - return $? - fi -} - -error() { - echo FAIL - exit 1 -} - -pass() { - echo PASS -} - -if ! mount | grep $MOUNT; then - sh llmount.sh - I_MOUNTED=yes -fi - -echo preparing for tests involving mounts -EXT2_DEV=/tmp/SANITY.LOOP -dd if=/dev/zero of=$EXT2_DEV bs=1k seek=1000 count=1 > /dev/null -mke2fs -F $EXT2_DEV > /dev/null - -test_0() { - touch $DIR/f - $CHECKSTAT -t file $DIR/f || error - rm $DIR/f - $CHECKSTAT -a $DIR/f || error -} -run_test 0 "touch .../f ; rm .../f =============================" - -test_1a() { - mkdir $DIR/d1 - mkdir $DIR/d1/d2 - $CHECKSTAT -t dir $DIR/d1/d2 || error -} -run_test 1a "mkdir .../d1; mkdir .../d1/d2 =====================" - -test_1b() { - rmdir $DIR/d1/d2 - rmdir $DIR/d1 - $CHECKSTAT -a $DIR/d1 || error -} -run_test 1b "rmdir .../d1/d2; rmdir .../d1 =====================" - -test_2a() { - mkdir $DIR/d2 - touch $DIR/d2/f - $CHECKSTAT -t file $DIR/d2/f || error -} -run_test 2a "mkdir .../d2; touch .../d2/f ======================" - -test_2b() { - rm -r $DIR/d2 - $CHECKSTAT -a $DIR/d2 || error -} -run_test 2b "rm -r .../d2; touch .../d2/f ======================" - -test_3a() { - mkdir $DIR/d3 - $CHECKSTAT -t dir $DIR/d3 || error -} -run_test 3a "mkdir .../d3 ======================================" - -test_3b() { - touch $DIR/d3/f - $CHECKSTAT -t file $DIR/d3/f || error -} -run_test 3b "touch .../d3/f ====================================" - -test_3c() { - rm -r $DIR/d3 - $CHECKSTAT -a $DIR/d3 || error -} -run_test 3c "rm -r .../d3 ======================================" - -test_4a() { - mkdir $DIR/d4 - $CHECKSTAT -t dir $DIR/d4 || error -} -run_test 4a "mkdir .../d4 ======================================" - -test_4b() { - mkdir $DIR/d4/d2 - $CHECKSTAT -t dir $DIR/d4/d2 || error -} -run_test 4b "mkdir .../d4/d2 ===================================" - -test_5() { - mkdir $DIR/d5 - mkdir $DIR/d5/d2 - chmod 0707 $DIR/d5/d2 - $CHECKSTAT -t dir -p 0707 $DIR/d5/d2 || error -} -run_test 5 "mkdir .../d5 .../d5/d2; chmod .../d5/d2 ============" - -test_6() { - touch $DIR/f6 - chmod 0666 $DIR/f6 - $CHECKSTAT -t file -p 0666 $DIR/f6 || error -} -run_test 6 "touch .../f6; chmod .../f6 =========================" - -test_7a() { - mkdir $DIR/d7 - $MCREATE $DIR/d7/f - chmod 0666 $DIR/d7/f - $CHECKSTAT -t file -p 0666 $DIR/d7/f || error -} -run_test 7a "mkdir .../d7; mcreate .../d7/f; chmod .../d7/f ====" - -test_7b() { - $MCREATE $DIR/d7/f2 - echo -n foo > $DIR/d7/f2 - [ "`cat $DIR/d7/f2`" = "foo" ] || error - $CHECKSTAT -t file -s 3 $DIR/d7/f2 || error -} -run_test 7b "mkdir .../d7; mcreate d7/f2; echo foo > d7/f2 =====" - -test_8() { - mkdir $DIR/d8 - touch $DIR/d8/f - chmod 0666 $DIR/d8/f - $CHECKSTAT -t file -p 0666 $DIR/d8/f || error -} -run_test 8 "mkdir .../d8; touch .../d8/f; chmod .../d8/f =======" - -test_9() { - mkdir $DIR/d9 - mkdir $DIR/d9/d2 - mkdir $DIR/d9/d2/d3 - $CHECKSTAT -t dir $DIR/d9/d2/d3 || error -} -run_test 9 "mkdir .../d9 .../d9/d2 .../d9/d2/d3 ================" - -test_10() { - mkdir $DIR/d10 - mkdir $DIR/d10/d2 - touch $DIR/d10/d2/f - $CHECKSTAT -t file $DIR/d10/d2/f || error -} -run_test 10 "mkdir .../d10 .../d10/d2; touch .../d10/d2/f ======" - -test_11() { - mkdir $DIR/d11 - mkdir $DIR/d11/d2 - chmod 0666 $DIR/d11/d2 - chmod 0705 $DIR/d11/d2 - $CHECKSTAT -t dir -p 0705 $DIR/d11/d2 || error -} -run_test 11 "mkdir .../d11 d11/d2; chmod .../d11/d2 ============" - -test_12() { - mkdir $DIR/d12 - touch $DIR/d12/f - chmod 0666 $DIR/d12/f - chmod 0654 $DIR/d12/f - $CHECKSTAT -t file -p 0654 $DIR/d12/f || error -} -run_test 12 "touch .../d12/f; chmod .../d12/f .../d12/f ========" - -test_13() { - mkdir $DIR/d13 - dd if=/dev/zero of=$DIR/d13/f count=10 - > $DIR/d13/f - $CHECKSTAT -t file -s 0 $DIR/d13/f || error -} -run_test 13 "creat .../d13/f; dd .../d13/f; > .../d13/f ========" - -test_14() { - mkdir $DIR/d14 - touch $DIR/d14/f - rm $DIR/d14/f - $CHECKSTAT -a $DIR/d14/f || error -} -run_test 14 "touch .../d14/f; rm .../d14/f; rm .../d14/f =======" - -test_15() { - mkdir $DIR/d15 - touch $DIR/d15/f - mv $DIR/d15/f $DIR/d15/f2 - $CHECKSTAT -t file $DIR/d15/f2 || error -} -run_test 15 "touch .../d15/f; mv .../d15/f .../d15/f2 ==========" - -test_16() { - mkdir $DIR/d16 - touch $DIR/d16/f - rm -rf $DIR/d16/f - $CHECKSTAT -a $DIR/d16/f || error -} -run_test 16 "touch .../d16/f; rm -rf .../d16/f =================" - -test_17a() { - mkdir $DIR/d17 - touch $DIR/d17/f - ln -s $DIR/d17/f $DIR/d17/l-exist - ls -l $DIR/d17 - $CHECKSTAT -l $DIR/d17/f $DIR/d17/l-exist || error - $CHECKSTAT -f -t f $DIR/d17/l-exist || error - rm -f $DIR/l-exist - $CHECKSTAT -a $DIR/l-exist || error -} -run_test 17a "symlinks: create, remove (real) ==================" - -test_17b() { - ln -s no-such-file $DIR/d17/l-dangle - ls -l $DIR/d17 - $CHECKSTAT -l no-such-file $DIR/d17/l-dangle || error - $CHECKSTAT -fa $DIR/d17/l-dangle || error - rm -f $DIR/l-dangle - $CHECKSTAT -a $DIR/l-dangle || error -} -run_test 17b "symlinks: create, remove (dangling) ==============" - -test_18() { - touch $DIR/f - ls $DIR || error -} -run_test 18 "touch .../f ; ls ... ==============================" - -test_19() { - touch $DIR/f - ls -l $DIR - rm $DIR/f - $CHECKSTAT -a $DIR/f || error -} -run_test 19 "touch .../f ; ls -l ... ===========================" - -test_20() { - touch $DIR/f - rm $DIR/f - log "1 done" - touch $DIR/f - rm $DIR/f - log "2 done" - touch $DIR/f - rm $DIR/f - log "3 done" - $CHECKSTAT -a $DIR/f || error -} -run_test 20 "touch .../f ; ls -l ... ===========================" - -test_21() { - mkdir $DIR/d21 - [ -f $DIR/d21/dangle ] && rm -f $DIR/d21/dangle - ln -s dangle $DIR/d21/link - echo foo >> $DIR/d21/link - cat $DIR/d21/dangle - $CHECKSTAT -t link $DIR/d21/link || error - $CHECKSTAT -f -t file $DIR/d21/link || error -} -run_test 21 "write to dangling link ============================" - -test_22() { - mkdir $DIR/d22 - chown $RUNAS_ID $DIR/d22 - # Tar gets pissy if it can't access $PWD *sigh* - (cd /tmp; - $RUNAS tar cf - /etc/hosts /etc/sysconfig/network | \ - $RUNAS tar xfC - $DIR/d22) - ls -lR $DIR/d22/etc - $CHECKSTAT -t dir $DIR/d22/etc || error - $CHECKSTAT -u \#$RUNAS_ID $DIR/d22/etc || error -} -run_test 22 "unpack tar archive as non-root user ===============" - -test_23() { - mkdir $DIR/d23 - $TOEXCL $DIR/d23/f23 - $TOEXCL -e $DIR/d23/f23 || error -} -run_test 23 "O_CREAT|O_EXCL in subdir ==========================" - -test_24a() { - echo '============ rename sanity =================================' - echo '-- same directory rename' - mkdir $DIR/R1 - touch $DIR/R1/f - mv $DIR/R1/f $DIR/R1/g - $CHECKSTAT -t file $DIR/R1/g || error -} -run_test 24a "touch .../R1/f; rename .../R1/f .../R1/g =========" - -test_24b() { - mkdir $DIR/R2 - touch $DIR/R2/{f,g} - mv $DIR/R2/f $DIR/R2/g - $CHECKSTAT -a $DIR/R2/f || error - $CHECKSTAT -t file $DIR/R2/g || error -} -run_test 24b "touch .../R2/{f,g}; rename .../R2/f .../R2/g =====" - -test_24c() { - mkdir $DIR/R3 - mkdir $DIR/R3/f - mv $DIR/R3/f $DIR/R3/g - $CHECKSTAT -a $DIR/R3/f || error - $CHECKSTAT -t dir $DIR/R3/g || error -} -run_test 24c "mkdir .../R3/f; rename .../R3/f .../R3/g =========" - -test_24d() { - mkdir $DIR/R4 - mkdir $DIR/R4/{f,g} - perl -e "rename \"$DIR/R4/f\", \"$DIR/R4/g\";" - $CHECKSTAT -a $DIR/R4/f || error - $CHECKSTAT -t dir $DIR/R4/g || error -} -run_test 24d "mkdir .../R4/{f,g}; rename .../R4/f .../R4/g =====" - -test_24e() { - echo '-- cross directory renames --' - mkdir $DIR/R5{a,b} - touch $DIR/R5a/f - mv $DIR/R5a/f $DIR/R5b/g - $CHECKSTAT -a $DIR/R5a/f || error - $CHECKSTAT -t file $DIR/R5b/g || error -} -run_test 24e "touch .../R5a/f; rename .../R5a/f .../R5b/g ======" - -test_24f() { - mkdir $DIR/R6{a,b} - touch $DIR/R6a/f $DIR/R6b/g - mv $DIR/R6a/f $DIR/R6b/g - $CHECKSTAT -a $DIR/R6a/f || error - $CHECKSTAT -t file $DIR/R6b/g || error -} -run_test 24f "touch .../R6a/f R6b/g; mv .../R6a/f .../R6b/g ====" - -test_24g() { - mkdir $DIR/R7{a,b} - mkdir $DIR/R7a/d - mv $DIR/R7a/d $DIR/R7b/e - $CHECKSTAT -a $DIR/R7a/d || error - $CHECKSTAT -t dir $DIR/R7b/e || error -} -run_test 24g "mkdir .../R7a/d; rename .../R7a/d .../R5b/e ======" - -test_24h() { - mkdir $DIR/R8{a,b} - mkdir $DIR/R8a/d $DIR/R8b/e - perl -e "rename \"$DIR/R8a/d\", \"$DIR/R8b/e\";" - $CHECKSTAT -a $DIR/R8a/d || error - $CHECKSTAT -t dir $DIR/R8b/e || error -} -run_test 24h "mkdir .../R8{a,b} R8a/{d,e}; mv .../R8a/d .../R8b/e" - -test_24i() { - echo "-- rename error cases" - mkdir $DIR/R9 - mkdir $DIR/R9/a - touch $DIR/R9/f - perl -e "rename \"$DIR/R9/f\", \"$DIR/R9/a\";" - $CHECKSTAT -t file $DIR/R9/f || error - $CHECKSTAT -t dir $DIR/R9/a || error - $CHECKSTAT -a file $DIR/R9/a/f || error -} -run_test 24i "rename file to dir error: touch f ; mkdir a ; rename f a =====" - -test_24j() { - mkdir $DIR/R10 - perl -e "rename \"$DIR/R10/f\", \"$DIR/R10/g\"" - $CHECKSTAT -t dir $DIR/R10 || error - $CHECKSTAT -a $DIR/R10/f || error - $CHECKSTAT -a $DIR/R10/g || error -} -run_test 24j "source does not exist ============================" - -test_25a() { - echo '== symlink sanity =======================================' - mkdir $DIR/d25 - ln -s d25 $DIR/s25 - touch $DIR/s25/foo || error -} -run_test 25a "create file in symlinked directory ===============" - -test_25b() { - $CHECKSTAT -t file $DIR/s25/foo || error -} -run_test 25b "lookup file in symlinked directory ===============" - -test_26a() { - mkdir $DIR/d26 - mkdir $DIR/d26/d26-2 - ln -s d26/d26-2 $DIR/s26 - touch $DIR/s26/foo || error -} -run_test 26a "multiple component symlink =======================" - -test_26b() { - ln -s d26/d26-2/foo $DIR/s26-2 - touch $DIR/s26-2 || error -} -run_test 26b "multiple component symlink at end of lookup ======" - -test_26c() { - mkdir $DIR/d26.2 - touch $DIR/d26.2/foo - ln -s d26.2 $DIR/s26.2-1 - ln -s s26.2-1 $DIR/s26.2-2 - ln -s s26.2-2 $DIR/s26.2-3 - chmod 0666 $DIR/s26.2-3/foo -} -run_test 26c "chain of symlinks ================================" - -# recursive symlinks (bug 439) -test_26d() { - ln -s d26-3/foo $DIR/d26-3 -} -run_test 26d "create multiple component recursive symlink ======" - -test_26e() { - rm $DIR/d26-3 -} -run_test 26e "unlink multiple component recursive symlink ======" - -test_27a() { - echo '== stripe sanity ========================================' - mkdir $DIR/d27 - $LSTRIPE $DIR/d27/f0 8192 0 1 - $CHECKSTAT -t file $DIR/d27/f0 - pass - log "test_27b: write to one stripe file =========================" - cp /etc/hosts $DIR/d27/f0 -} -run_test 27a "one stripe file ==================================" - -test_27c() { - $LSTRIPE $DIR/d27/f01 8192 0 2 - pass - log "test_27d: write to two stripe file file f01 ================" - dd if=/dev/zero of=$DIR/d27/f01 bs=4k count=4 -} -run_test 27c "create two stripe file f01 =======================" - -test_27d() { - $LSTRIPE $DIR/d27/fdef 0 -1 0 - $CHECKSTAT -t file $DIR/d27/fdef - #dd if=/dev/zero of=$DIR/d27/fdef bs=4k count=4 -} -run_test 27d "create file with default settings ================" - -test_27e() { - $LSTRIPE $DIR/d27/f12 8192 1 2 - $LSTRIPE $DIR/d27/f12 8192 1 2 && error - $CHECKSTAT -t file $DIR/d27/f12 || error - #dd if=/dev/zero of=$DIR/d27/f12 bs=4k count=4 -} -run_test 27e "lstripe existing file (should return error) ======" - - -test_27f() { - $LSTRIPE $DIR/d27/fbad 100 1 2 || true - dd if=/dev/zero of=$DIR/d27/f12 bs=4k count=4 -} -run_test 27f "lstripe with bad stripe size (should return error on LOV)" - -test_27g() { - $MCREATE $DIR/d27/fnone || error - pass - log "test 27.9: lfind ============================================" - $LFIND $DIR/d27 -} -run_test 27g "mcreate file without objects to test lfind =======" - -test_28() { - mkdir $DIR/d28 - $CREATETEST $DIR/d28/ct || error -} -run_test 28 "create/mknod/mkdir with bad file types ============" - -test_29() { - mkdir $DIR/d29 - touch $DIR/d29/foo - log 'first d29' - ls -l $DIR/d29 - MDCDIR=${MDCDIR:-/proc/fs/lustre/ldlm/ldlm/MDC_*} - LOCKCOUNTORIG=`cat $MDCDIR/lock_count` - LOCKUNUSEDCOUNTORIG=`cat $MDCDIR/lock_unused_count` - log 'second d29' - ls -l $DIR/d29 - log 'done' - LOCKCOUNTCURRENT=`cat $MDCDIR/lock_count` - LOCKUNUSEDCOUNTCURRENT=`cat $MDCDIR/lock_unused_count` - if [ $LOCKCOUNTCURRENT -gt $LOCKCOUNTORIG ]; then - echo "CURRENT: $LOCKCOUNTCURRENT > $LOCKCOUNTORIG" - error - fi - if [ $LOCKUNUSEDCOUNTCURRENT -gt $LOCKUNUSEDCOUNTORIG ]; then - echo "UNUSED: $LOCKUNUSEDCOUNTCURRENT > $LOCKUNUSEDCOUNTORIG" - error - fi -} -run_test 29 "IT_GETATTR regression ============================" - -test_30() { - cp `which ls` $DIR - $DIR/ls / - rm $DIR/ls -} -run_test 30 "run binary from Lustre (execve) ===================" - -test_31() { - ./openunlink $DIR/f31 $DIR/f31 || error -} -run_test 31 "open-unlink file ==================================" - -test_32a() { - echo "== more mountpoints and symlinks =================" - [ -e $DIR/d32a ] && rm -fr $DIR/d32a - mkdir -p $DIR/d32a/ext2-mountpoint - mount -t ext2 -o loop $EXT2_DEV $DIR/d32a/ext2-mountpoint || error - $CHECKSTAT -t dir $DIR/d32a/ext2-mountpoint/.. || error - umount $DIR/d32a/ext2-mountpoint || error -} -run_test 32a "stat d32a/ext2-mountpoint/.. =====================" - -test_32b() { - [ -e $DIR/d32b ] && rm -fr $DIR/d32b - mkdir -p $DIR/d32b/ext2-mountpoint - mount -t ext2 -o loop $EXT2_DEV $DIR/d32b/ext2-mountpoint || error - ls -al $DIR/d32b/ext2-mountpoint/.. || error - umount $DIR/d32b/ext2-mountpoint || error -} -run_test 32b "open d32b/ext2-mountpoint/.. =====================" - -test_32c() { - [ -e $DIR/d32c ] && rm -fr $DIR/d32c - mkdir -p $DIR/d32c/ext2-mountpoint - mount -t ext2 -o loop $EXT2_DEV $DIR/d32c/ext2-mountpoint || error - mkdir -p $DIR/d32c/d2/test_dir - $CHECKSTAT -t dir $DIR/d32c/ext2-mountpoint/../d2/test_dir || error - umount $DIR/d32c/ext2-mountpoint || error -} -run_test 32c "stat d32c/ext2-mountpoint/../d2/test_dir =========" - -test_32d() { - [ -e $DIR/d32d ] && rm -fr $DIR/d32d - mkdir -p $DIR/d32d/ext2-mountpoint - mount -t ext2 -o loop $EXT2_DEV $DIR/d32d/ext2-mountpoint || error - mkdir -p $DIR/d32d/d2/test_dir - ls -al $DIR/d32d/ext2-mountpoint/../d2/test_dir || error - umount $DIR/d32d/ext2-mountpoint || error -} -run_test 32d "open d32d/ext2-mountpoint/../d2/test_dir ==========" - -test_32e() { - [ -e $DIR/d32e ] && rm -fr $DIR/d32e - mkdir -p $DIR/d32e/tmp - TMP_DIR=$DIR/d32e/tmp - ln -s $DIR/d32e $TMP_DIR/symlink11 - ln -s $TMP_DIR/symlink11 $TMP_DIR/../symlink01 - $CHECKSTAT -t link $DIR/d32e/tmp/symlink11 || error - $CHECKSTAT -t link $DIR/d32e/symlink01 || error -} -run_test 32e "stat d32e/symlink->tmp/symlink->lustre-subdir =====" - -test_32f() { - [ -e $DIR/d32f ] && rm -fr $DIR/d32f - mkdir -p $DIR/d32f/tmp - TMP_DIR=$DIR/d32f/tmp - ln -s $DIR/d32f $TMP_DIR/symlink11 - ln -s $TMP_DIR/symlink11 $TMP_DIR/../symlink01 - ls $DIR/d32f/tmp/symlink11 || error - ls $DIR/d32f/symlink01 || error -} -run_test 32f "open d32f/symlink->tmp/symlink->lustre-subdir =====" - -test_32g() { - [ -e $DIR/d32g ] && rm -fr $DIR/d32g - [ -e $DIR/test_dir ] && rm -fr $DIR/test_dir - mkdir -p $DIR/test_dir - mkdir -p $DIR/d32g/tmp - TMP_DIR=$DIR/d32g/tmp - ln -s $DIR/test_dir $TMP_DIR/symlink12 - ln -s $TMP_DIR/symlink12 $TMP_DIR/../symlink02 - $CHECKSTAT -t link $DIR/d32g/tmp/symlink12 || error - $CHECKSTAT -t link $DIR/d32g/symlink02 || error - $CHECKSTAT -t dir -f $DIR/d32g/tmp/symlink12 || error - $CHECKSTAT -t dir -f $DIR/d32g/symlink02 || error -} -run_test 32g "stat d32g/symlink->tmp/symlink->lustre-subdir/test_dir" - -test_32h() { - [ -e $DIR/d32h ] && rm -fr $DIR/d32h - [ -e $DIR/test_dir ] && rm -fr $DIR/test_dir - mkdir -p $DIR/test_dir - mkdir -p $DIR/d32h/tmp - TMP_DIR=$DIR/d32h/tmp - ln -s $DIR/test_dir $TMP_DIR/symlink12 - ln -s $TMP_DIR/symlink12 $TMP_DIR/../symlink02 - ls $DIR/d32h/tmp/symlink12 || error - ls $DIR/d32h/symlink02 || error -} -run_test 32h "open d32h/symlink->tmp/symlink->lustre-subdir/test_dir" - -test_32i() { - [ -e $DIR/d32i ] && rm -fr $DIR/d32i - mkdir -p $DIR/d32i/ext2-mountpoint - mount -t ext2 -o loop $EXT2_DEV $DIR/d32i/ext2-mountpoint || error - touch $DIR/d32i/test_file - $CHECKSTAT -t file $DIR/d32i/ext2-mountpoint/../test_file || error - umount $DIR/d32i/ext2-mountpoint || error -} -run_test 32i "stat d32i/ext2-mountpoint/../test_file ============" - -test_32j() { - [ -e $DIR/d32j ] && rm -fr $DIR/d32j - mkdir -p $DIR/d32j/ext2-mountpoint - mount -t ext2 -o loop $EXT2_DEV $DIR/d32j/ext2-mountpoint || error - touch $DIR/d32j/test_file - cat $DIR/d32j/ext2-mountpoint/../test_file || error - umount $DIR/d32j/ext2-mountpoint || error -} -run_test 32j "open d32j/ext2-mountpoint/../test_file ============" - -test_32k() { - [ -e $DIR/d32k ] && rm -fr $DIR/d32k - mkdir -p $DIR/d32k/ext2-mountpoint - mount -t ext2 -o loop $EXT2_DEV $DIR/d32k/ext2-mountpoint - mkdir -p $DIR/d32k/d2 - touch $DIR/d32k/d2/test_file || error - $CHECKSTAT -t file $DIR/d32k/ext2-mountpoint/../d2/test_file || error - umount $DIR/d32k/ext2-mountpoint || error -} -run_test 32k "stat d32k/ext2-mountpoint/../d2/test_file =========" - -test_32l() { - [ -e $DIR/d32l ] && rm -fr $DIR/d32l - mkdir -p $DIR/d32l/ext2-mountpoint - mount -t ext2 -o loop $EXT2_DEV $DIR/d32l/ext2-mountpoint || error - mkdir -p $DIR/d32l/d2 - touch $DIR/d32l/d2/test_file - cat $DIR/d32l/ext2-mountpoint/../d2/test_file || error - umount $DIR/d32l/ext2-mountpoint || error -} -run_test 32l "open d32l/ext2-mountpoint/../d2/test_file =========" - -test_32m() { - [ -e $DIR/d32m ] && rm -fr $DIR/d32m - mkdir -p $DIR/d32m/tmp - TMP_DIR=$DIR/d32m/tmp - ln -s $DIR $TMP_DIR/symlink11 - ln -s $TMP_DIR/symlink11 $TMP_DIR/../symlink01 - $CHECKSTAT -t link $DIR/d32m/tmp/symlink11 || error - $CHECKSTAT -t link $DIR/d32m/symlink01 || error -} -run_test 32m "stat d32m/symlink->tmp/symlink->lustre-root =======" - -test_32n() { - [ -e $DIR/d32n ] && rm -fr $DIR/d32n - mkdir -p $DIR/d32n/tmp - TMP_DIR=$DIR/d32n/tmp - ln -s $DIR $TMP_DIR/symlink11 - ln -s $TMP_DIR/symlink11 $TMP_DIR/../symlink01 - ls -l $DIR/d32n/tmp/symlink11 || error - ls -l $DIR/d32n/symlink01 || error -} -run_test 32n "open d32n/symlink->tmp/symlink->lustre-root =======" - -test_32o() { - [ -e $DIR/d32o ] && rm -fr $DIR/d32o - [ -e $DIR/test_file ] && rm -fr $DIR/test_file - touch $DIR/test_file - mkdir -p $DIR/d32o/tmp - TMP_DIR=$DIR/d32o/tmp - ln -s $DIR/test_file $TMP_DIR/symlink12 - ln -s $TMP_DIR/symlink12 $TMP_DIR/../symlink02 - $CHECKSTAT -t link $DIR/d32o/tmp/symlink12 || error - $CHECKSTAT -t link $DIR/d32o/symlink02 || error - $CHECKSTAT -t file -f $DIR/d32o/tmp/symlink12 || error - $CHECKSTAT -t file -f $DIR/d32o/symlink02 || error -} -run_test 32o "stat d32o/symlink->tmp/symlink->lustre-root/test_file" - -test_32p() { - [ -e $DIR/d32p ] && rm -fr $DIR/d32p - [ -e $DIR/test_file ] && rm -fr $DIR/test_file - touch $DIR/test_file - mkdir -p $DIR/d32p/tmp - TMP_DIR=$DIR/d32p/tmp - ln -s $DIR/test_file $TMP_DIR/symlink12 - ln -s $TMP_DIR/symlink12 $TMP_DIR/../symlink02 - cat $DIR/d32p/tmp/symlink12 || error - cat $DIR/d32p/symlink02 || error -} -run_test 32p "open d32p/symlink->tmp/symlink->lustre-root/test_file" - -# chmod 444 /mnt/lustre/somefile -# open(/mnt/lustre/somefile, O_RDWR) -# Should return -1 -test_33() { - [ -e $DIR/test_33_file ] && rm -fr $DIR/test_33_file - touch $DIR/test_33_file - chmod 444 $DIR/test_33_file - chown $RUNAS_ID $DIR/test_33_file - $RUNAS openfile -f O_RDWR $DIR/test_33_file && error || true -} -run_test 33 "write file with mode 444 (should return error) ====" - -test_34() { - $MCREATE $DIR/f - $TRUNCATE $DIR/f 100 - rm $DIR/f -} -run_test 34 "truncate file that has not been opened ============" - -test_35() { - [ -e $DIR/test_35_file ] && rm -fr $DIR/test_35_file - cp /bin/sh $DIR/test_35_file - chmod 444 $DIR/test_35_file - chown $RUNAS_ID $DIR/test_35_file - $DIR/test_35_file && error - return 0 -} -run_test 35 "exec file with mode 444 (should return error) =====" - -test_36a() { - log 36 "cvs operations ====================================" - mkdir -p $DIR/cvsroot - chown $RUNAS_ID $DIR/cvsroot - $RUNAS cvs -d $DIR/cvsroot init -} -run_test 36a "cvs init =========================================" - -test_36b() { - # on the LLNL clusters, runas will still pick up root's $TMP settings, - # which will not be writable for the runas user, and then you get a CVS - # error message with a corrupt path string (CVS bug) and panic. - # We're not using much space, so just stick it in /tmp, which is - # safe. - OLDTMPDIR=$TMPDIR - OLDTMP=$TMP - TMPDIR=/tmp - TMP=/tmp - - cd /etc/init.d - $RUNAS cvs -d $DIR/cvsroot import -m "nomesg" reposname vtag rtag - - TMPDIR=$OLDTMPDIR - TMP=$OLDTMP -} -run_test 36b "cvs import =======================================" - -test_36c() { - cd $DIR - mkdir -p $DIR/reposname - chown $RUNAS_ID $DIR/reposname - $RUNAS cvs -d $DIR/cvsroot co reposname -} -run_test 36c "cvs checkout =====================================" - -test_36d() { - cd $DIR/reposname - $RUNAS touch foo36 - $RUNAS cvs add -m 'addmsg' foo36 -} -run_test 36d "cvs add ==========================================" - -test_36e() { - cd $DIR/reposname - $RUNAS cvs update -} -run_test 36e "cvs update =======================================" - -# XXX change this: use a non root user -test_36f() { - cd $DIR/reposname - $RUNAS cvs commit -m 'nomsg' foo36 -} -run_test 36f "cvs commit =======================================" - -test_37() { - mkdir -p $DIR/dextra - echo f > $DIR/dextra/fbugfile - mount -t ext2 -o loop /$EXT2_DEV $DIR/dextra - ls $DIR/dextra |grep "\<fbugfile\>" && error - umount /$EXT2_DEV - rm -f DIR/dextra/fbugfile -} -run_test 37 "ls a mounted file system to check the old contents =====" - -# open(file, O_DIRECTORY) will leak a request and not cleanup (bug 1501) -test_38() { - o_directory $DIR/test38 -} -run_test 38 "open a regular file with O_DIRECTORY ==============" - - -log "cleanup: ======================================================" -rm -r $DIR/[Rdfs][1-9]* -if [ "$I_MOUNTED" = "yes" ]; then - sh llmountcleanup.sh || error -fi - -echo '=========================== finished ===============================' diff --git a/lustre/tests/sanityN.sh b/lustre/tests/sanityN.sh deleted file mode 100644 index 8145e63..0000000 --- a/lustre/tests/sanityN.sh +++ /dev/null @@ -1,131 +0,0 @@ -#!/bin/bash - -set -e - -PATH=$PATH:. - -CHECKSTAT=${CHECKSTAT:-"checkstat -v"} -MOUNT1=${MOUNT1:-/mnt/lustre1} -MOUNT2=${MOUNT2:-/mnt/lustre2} -export NAME=${NAME:-mount2} - -clean() { - echo -n "cln.." - sh llmountcleanup.sh > /dev/null -} - -CLEAN=${CLEAN:-clean} -start() { - echo -n "mnt.." - sh llrmount.sh > /dev/null - echo -n "done" -} -START=${START:-start} - -error () { - echo FAIL - exit 1 -} - -pass() { - echo PASS -} - -mkdir -p $MOUNT2 -mount | grep $MOUNT1 || sh llmount.sh - -echo -n "test 1: check create on 2 mtpt's..." -touch $MOUNT1/f1 -[ -f $MOUNT2/f1 ] || error -pass - -echo "test 2: check attribute updates on 2 mtpt's..." -chmod 777 $MOUNT2/f1 -$CHECKSTAT -t file -p 0777 $MOUNT1/f1 || error -pass - -echo "test 2b: check cached attribute updates on 2 mtpt's..." -touch $MOUNT1/f2b -ls -l $MOUNT2/f2b -chmod 777 $MOUNT2/f2b -$CHECKSTAT -t file -p 0777 $MOUNT1/f2b || error -pass - -echo "test 2c: check cached attribute updates on 2 mtpt's..." -touch $MOUNT1/f2c -ls -l $MOUNT2/f2c -chmod 777 $MOUNT1/f2c -$CHECKSTAT -t file -p 0777 $MOUNT2/f2c || error -pass - -echo "test 3: check after remount attribute updates on 2 mtpt's..." -chmod a-x $MOUNT2/f1 -$CLEAN -$START -$CHECKSTAT -t file -p 0666 $MOUNT1/f1 || error -pass - -echo "test 4: unlink on one mountpoint removes file on other..." -rm $MOUNT2/f1 -$CHECKSTAT -a $MOUNT1/f1 || error -pass - -echo -n "test 5: symlink on one mtpt, readlink on another..." -( cd $MOUNT1 ; ln -s this/is/good lnk ) - -[ "this/is/good" = "`perl -e 'print readlink("/mnt/lustre2/lnk");'`" ] || error -pass - -echo -n "test 6: fstat validation on multiple mount points..." -./multifstat $MOUNT1/f6 $MOUNT2/f6 -pass - -if [ -n "$BUG_1365" ]; then -echo -n "test 7: create a file on one mount, truncate it on the other..." -mcreate $MOUNT1/f1 -truncate $MOUNT2/f1 100 -rm $MOUNT1/f1 -pass -else -echo "Skipping test for 1365: set \$BUG_1365 to run it (and crash, likely)." -fi - -echo "test 9: remove of open file on other node..." -./openunlink $MOUNT1/f9 $MOUNT2/f9 || error -pass - -echo "test 9b: remove of open directory on other node..." -./opendirunlink $MOUNT1/dir1 $MOUNT2/dir1 || error -pass - -#echo "test 9c: remove of open special file on other node..." -#./opendevunlink $MOUNT1/dev1 $MOUNT2/dev1 || error -#pass - -echo -n "test 10: append of file with sub-page size on multiple mounts..." -MTPT=1 -> $MOUNT2/f10 -for C in a b c d e f g h i j k l; do - MOUNT=`eval echo \\$MOUNT$MTPT` - echo -n $C >> $MOUNT/f10 - [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1 -done -[ "`cat $MOUNT1/f10`" = "abcdefghijkl" ] && pass || error - -echo -n "test 11: write of file with sub-page size on multiple mounts..." -MTPT=1 -OFFSET=0 -> $MOUNT2/f11 -for C in a b c d e f g h i j k l; do - MOUNT=`eval echo \\$MOUNT$MTPT` - echo -n $C | dd of=$MOUNT/f11 bs=1 seek=$OFFSET count=1 - [ "$MTPT" -eq 1 ] && MTPT=2 || MTPT=1 - OFFSET=`expr $OFFSET + 1` -done -[ "`cat $MOUNT1/f11`" = "abcdefghijkl" ] && pass || error - -rm -f $MOUNT1/f[0-9]* $MOUNT1/lnk - -$CLEAN - -exit diff --git a/lustre/tests/snaprun.sh b/lustre/tests/snaprun.sh deleted file mode 100755 index ea77cfb..0000000 --- a/lustre/tests/snaprun.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/sh -# Utility script to test several features of a snapshot filesystem -# Assumes that snapshot has already been configured -# -# Copyright (C) 2001 Cluster File Systems, Inc. -# -# This code is issued under the GNU General Public License. -# See the file COPYING in this distribution - -OBDDIR="`dirname $0`/.." -. $OBDDIR/demos/config.sh - -qrun ls $MNTOBD -qrun chown bin.bin $MNTOBD -qrun ls -ld $MNTOBD -qrun ls -ld $MNTSNAP -qrun cp /etc/hosts $MNTOBD -qrun ls $MNTOBD -qrun ls $MNTSNAP - -# More complicated because we can't pass ">>" as an argument easily -echo -n "Run 'echo today >> $MNTOBD/hello' [Y/n]" ; read JUNK -case $JUNK in - n*|N*) echo "not run" ;; - *) plog log "echo today >> $MNTOBD/hello" - echo "today" >> $MNTOBD/hello ;; -esac - -qrun cat $MNTOBD/hello -qrun cat $MNTSNAP/hello -qrun cat $MNTOBD/link -qrun cat $MNTSNAP/link -qrun rm $MNTOBD/goodbye -qrun ls $MNTOBD -qrun ls $MNTSNAP -qrun cat $MNTSNAP/goodbye diff --git a/lustre/tests/stat.c b/lustre/tests/stat.c deleted file mode 100644 index b719900..0000000 --- a/lustre/tests/stat.c +++ /dev/null @@ -1,24 +0,0 @@ -#include <stdio.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <errno.h> -#include <string.h> -#include <fcntl.h> -#include <unistd.h> - -int main(int argc, char ** argv) -{ - int rc; - struct stat buf; - - if (argc < 2) { - printf("Usage %s filename\n", argv[0]); - return 1; - } - - rc = stat(argv[1], &buf); - if (rc) { - printf("stat(%s) error: %s\n", argv[1], strerror(errno)); - } - return rc; -} diff --git a/lustre/tests/statmany.c b/lustre/tests/statmany.c deleted file mode 100644 index edfa47b..0000000 --- a/lustre/tests/statmany.c +++ /dev/null @@ -1,215 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <errno.h> -#include <string.h> -#include <fcntl.h> -#include <getopt.h> -#include <unistd.h> -#include <time.h> -#include <limits.h> -#include <sys/ioctl.h> - -#if 0 -#include <linux/extN_fs.h> -#endif -#include <liblustre.h> -#include <linux/lustre_lib.h> -#include <linux/obd.h> - -struct option longopts[] = { - {"ea", 0, 0, 'e'}, - {"lookup", 0, 0, 'l'}, - {"random", 0, 0, 'r'}, - {"stat", 0, 0, 's'}, - {NULL, 0, 0, 0}, -}; -char *shortopts = "ehlr:s0123456789"; - -static int usage(char *prog, FILE *out) -{ - fprintf(out, - "Usage: %s [-r rand_seed] {-s|-e|-l} filenamebase total_files iterations\n" - "-r : random seed\n" - "-s : regular stat() calls\n" - "-e : open then GET_EA ioctl\n" - "-l : lookup ioctl only\n", prog); - exit(out == stderr); -} - -#ifndef LONG_MAX -#define LONG_MAX (1 << ((8 * sizeof(long)) - 1)) -#endif - -int main(int argc, char ** argv) -{ - long i, count, iter = LONG_MAX, mode, offset; - long int start, length = LONG_MAX, last, rc = 0; - char parent[4096], *t; - char c, *prog = argv[0], *base; - int seed = 0; - int fd = -1; - - while ((c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) { - char *e; - switch (c) { - case 'r': - seed = strtoul(optarg, &e, 0); - if (*e) { - fprintf(stderr, "bad -r option %s\n", optarg); - usage(prog, stderr); - } - break; - case 'e': - case 'l': - case 's': - mode = c; - break; - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - if (length == LONG_MAX) - length = c - '0'; - else - length = length * 10 + (c - '0'); - break; - case 'h': - usage(prog, stdout); - case '?': - usage(prog, stderr); - } - } - - if (optind + 2 + (length == LONG_MAX) != argc) { - fprintf(stderr, "missing filenamebase, total_files, or iterations\n"); - usage(prog, stderr); - } - - base = argv[optind]; - if (strlen(base) > 4080) { - fprintf(stderr, "filenamebase too long\n"); - exit(1); - } - - if (seed == 0) { - int f = open("/dev/urandom", O_RDONLY); - - if (f < 0 || read(f, &seed, sizeof(seed)) < sizeof(seed)) - seed = time(0); - if (f > 0) - close(f); - } - - printf("using seed %u\n", seed); - srand(seed); - - count = strtoul(argv[optind + 1], NULL, 0); - if (length == LONG_MAX) { - iter = strtoul(argv[optind + 2], NULL, 0); - printf("running for %lu iterations\n", iter); - } else - printf("running for %lu seconds\n", length); - - start = last = time(0); - - t = strrchr(base, '/'); - if (t == NULL) { - strcpy(parent, "."); - offset = -1; - } else { - strncpy(parent, base, t - base); - offset = t - base + 1; - } - - if (mode == 'l') { - fd = open(parent, O_RDONLY); - if (fd < 0) { - printf("open(%s) error: %s\n", parent, - strerror(errno)); - exit(errno); - } - } - - for (i = 0; i < iter && time(0) - start < length; i++) { - char filename[4096]; - int tmp; - - tmp = random() % count; - sprintf(filename, "%s%d", base, tmp); - - if (mode == 'e') { -#if 0 - fd = open(filename, O_RDWR|O_LARGEFILE); - if (fd < 0) { - printf("open(%s) error: %s\n", filename, - strerror(errno)); - break; - } - rc = ioctl(fd, EXTN_IOC_GETEA, NULL); - if (rc < 0) { - printf("ioctl(%s) error: %s\n", filename, - strerror(errno)); - break; - } - close(fd); - break; -#endif - } else if (mode == 's') { - struct stat buf; - - rc = stat(filename, &buf); - if (rc < 0) { - printf("stat(%s) error: %s\n", filename, - strerror(errno)); - break; - } - } else if (mode == 'l') { - struct obd_ioctl_data data; - char rawbuf[8192]; - char *buf = rawbuf; - int max = sizeof(rawbuf); - - memset(&data, 0, sizeof(data)); - data.ioc_version = OBD_IOCTL_VERSION; - data.ioc_len = sizeof(data); - if (offset >= 0) - data.ioc_inlbuf1 = filename + offset; - else - data.ioc_inlbuf1 = filename; - data.ioc_inllen1 = strlen(data.ioc_inlbuf1) + 1; - - if (obd_ioctl_pack(&data, &buf, max)) { - printf("ioctl_pack failed.\n"); - break; - } - - rc = ioctl(fd, IOC_MDC_LOOKUP, buf); - if (rc < 0) { - printf("ioctl(%s) error: %s\n", filename, - strerror(errno)); - break; - } - } - if ((i % 10000) == 0) { - printf(" - stat %lu (time %ld ; total %ld ; last %ld)\n", - i, time(0), time(0) - start, time(0) - last); - last = time(0); - } - } - - if (mode == 'l') - close(fd); - - printf("total: %lu stats in %ld seconds: %f stats/second\n", i, - time(0) - start, ((float)i / (time(0) - start))); - - exit(rc); -} diff --git a/lustre/tests/statone.c b/lustre/tests/statone.c deleted file mode 100644 index 5250984..0000000 --- a/lustre/tests/statone.c +++ /dev/null @@ -1,60 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> - -#include <liblustre.h> -#include <linux/lustre_lib.h> -#include <linux/obd.h> - -int main(int argc, char **argv) -{ - struct obd_ioctl_data data; - char rawbuf[8192], parent[4096], *buf = rawbuf, *base, *t; - int max = sizeof(rawbuf), fd, offset, rc; - - if (argc != 2) { - printf("usage: %s filename\n", argv[0]); - return 1; - } - - base = argv[1]; - t = strrchr(base, '/'); - if (!t) { - strcpy(parent, "."); - offset = -1; - } else { - strncpy(parent, base, t - base); - offset = t - base - 1; - } - - fd = open(parent, O_RDONLY); - if (fd < 0) { - printf("open(%s) error: %s\n", parent, strerror(errno)); - exit(errno); - } - - memset(&data, 0, sizeof(data)); - data.ioc_version = OBD_IOCTL_VERSION; - data.ioc_len = sizeof(data); - if (offset >= 0) - data.ioc_inlbuf1 = base + offset + 2; - else - data.ioc_inlbuf1 = base; - data.ioc_inllen1 = strlen(data.ioc_inlbuf1) + 1; - - if (obd_ioctl_pack(&data, &buf, max)) { - printf("ioctl_pack failed.\n"); - exit(1); - } - - rc = ioctl(fd, IOC_MDC_LOOKUP, buf); - if (rc < 0) { - printf("ioctl(%s/%s) error: %s\n", parent, - data.ioc_inlbuf1, strerror(errno)); - exit(errno); - } - - return 0; -} diff --git a/lustre/tests/tbox.sh b/lustre/tests/tbox.sh deleted file mode 100644 index 337e1b8..0000000 --- a/lustre/tests/tbox.sh +++ /dev/null @@ -1,116 +0,0 @@ -# tbox.sh - Shell functions to manage tinderbox build reporting -# Copyright (C) 2002 Cluster File Systems, Inc. -# Gord Eagle <gord@clusterfs.com>, 2002-08-22 - -HOSTNAME=`hostname` -PROGNAME=`echo "$0" | sed -e 's%^.*/%%'` -MAILPROG="${MAILPROG-mail}" - -TBOX_PHASE=build # or test -TBOX_STARTTIME=`date +%s` -TBOX_LOG="${TBOX_LOG-/tmp/tbox.$$.$TBOX_STARTTIME.log}" -TBOX_BUILDMAIL=tinderbox_builds@lustre.org -TBOX_BUILDNAME="${TBOX_BUILDNAME-$PROGNAME-$HOSTNAME}" - -# Send a status message to the list. -tbox_status() { - [ -n "$TBOX_BUILDNAME" -a -n "$TBOX_BUILDMAIL" ] || return 0 - [ "$#" -ge 4 ] || return 1 - if [ "$#" -gt 4 ]; then - log="$5" - echo >> $log - else - log= - fi - - TREE="$1" - SUBJECT="$2" - STATUS="$3" - TIMENOW="$4" - - echo "sending tinderbox mail to $TBOX_BUILDMAIL: $TREE $SUBJECT $STATUS" - - TMPFILE="/tmp/tinderbox.boilerplate.$$.$TIMENOW" - - cat > $TMPFILE <<-EOF - tinderbox: tree: $TREE - tinderbox: starttime: $TBOX_STARTTIME - tinderbox: timenow: $TIMENOW - tinderbox: builddate: $TBOX_STARTTIME - tinderbox: status: $STATUS - tinderbox: buildname: $TBOX_BUILDNAME - tinderbox: errorparser: unix - tinderbox: END - -EOF - - cat $TMPFILE $log | $MAILPROG -s "build $SUBJECT ($TBOX_BUILDNAME)" $TBOX_BUILDMAIL - rm -f $TMPFILE -} - -# Send out the failure or success message based on exit status. -tbox_exit() { - TREE="$1" - TAILPID="$2" - CODE=${3-$?} - if [ $CODE -eq 0 ]; then - SUBJECT=successful - STATUS=success - else - SUBJECT=failed - STATUS="${TBOX_PHASE}_failed" - fi - - # Send off the status message. - trap 0 - tbox_status "$TREE" "$SUBJECT" "$STATUS" - rm -f $TBOX_LOG - - # Wait for tail to display all output, then finish it. - sleep 1 - kill $TAILPID - exit $CODE -} - -# Run a subprogram, but stop it from sending its own tinderbox -# messages. -tbox_absorb_log() { - # This probably doesn't do what you think it does... it only prepends - # TBOX_LOG= to our arguments. - set TBOX_LOG= "$@" - - # Now evaluate the command. - eval "$@" -} - -# Start the log for a given tree. -tbox_start_log() { - TREE="$1" - - # Send status messages to stdout, stderr. - exec 6>&1 7>&2 - - [ -n "$TBOX_LOG" ] || return 0 - - # Initialize the output log file. - : > $TBOX_LOG - - # Send all our output to the log. - exec >>$TBOX_LOG 2>&1 - - # Monitor it on the old stdout. - tail -f $TBOX_LOG 1>&6 & - - # Allow tail to print our last output before exiting. - trap "tbox_exit \"$TREE\" $! 1" 1 2 10 15 - trap "tbox_exit \"$TREE\" $!" 0 -} - - -# Begin writing to the log and send out the initial status. -# tbox_start TREE -tbox_start() { - TREE="$1" - tbox_start_log "$TREE" - tbox_status "$TREE" starting building "$TBOX_STARTTIME" -} diff --git a/lustre/tests/tchmod.c b/lustre/tests/tchmod.c deleted file mode 100644 index 08732ff..0000000 --- a/lustre/tests/tchmod.c +++ /dev/null @@ -1,18 +0,0 @@ -#include <sys/types.h> -#include <sys/stat.h> -#include <errno.h> -#include <stdio.h> -#include <stdlib.h> - -int main(int argc, char **argv) -{ - mode_t mode; - - if (argc != 3) { - printf("usage: %s mode name\n", argv[0]); - return 1; - } - - mode = strtoul(argv[1], NULL, 8); - return chmod(argv[2], mode) ? errno : 0; -} diff --git a/lustre/tests/test.c b/lustre/tests/test.c deleted file mode 100755 index d4c6bf7..0000000 --- a/lustre/tests/test.c +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (C) 2001 Cluster File Systems, Inc. - * - * This code is issued under the GNU General Public License. - * See the file COPYING in this distribution - */ - -#include <stdio.h> -#include <stdlib.h> -#include <fcntl.h> -#include <errno.h> -#include <sys/ioctl.h> -#include <sys/stat.h> -#include <asm/statfs.h> -#include <unistd.h> -#include <linux/lustre_idl.h> - -#define LOOP_DEVICE "/dev/loop0" -#define OBD_DEVICE "/dev/obd" - -int main (int argc, char * argv[]) -{ - int fd, rc, err = -1; - struct stat stat_buf; - struct statfs stfs; - - - if (argc < 2) { - printf("syntax: %s command [argument]\n", argv[0]); - printf("Where command is one of \"setup\", \"create\", \"destroy\", or \"sync\".\n"); - exit(1); - } - if (stat(LOOP_DEVICE, &stat_buf)) { - printf("Couldn't stat(" LOOP_DEVICE ").\n"); - exit(1); - } - printf("Device: %u\n", (unsigned int) stat_buf.st_rdev); - - fd = open (OBD_DEVICE, O_RDONLY); - if (fd == -1) { - printf("Couldn't open " OBD_DEVICE ".\n"); - exit(1); - } - - if (!strcmp(argv[1], "setup")) { - rc = ioctl(fd, OBD_IOC_SETUP, &stat_buf.st_rdev); - fprintf(stderr, "rc = %d, errno = %d\n", rc, errno); - } else if (!strcmp(argv[1], "create")) { - int iter, i; - - if (argc < 3) { - printf("create requires a nonzero argument.\n"); - exit(1); - } - - iter = atoi(argv[2]); - if (iter < 1) { - printf("create requires a nonzero argument.\n"); - exit(1); - } - printf("creating %d objects...\n", iter); - - for (i = 0; i < iter; i++) { - if ((rc = ioctl(fd, OBD_IOC_CREATE, &err))) { - fprintf(stderr, "Error; aborting.\n"); - break; - } - if ((rc = ioctl(fd, OBD_IOC_DESTROY, &err))) { - fprintf(stderr, "Error; aborting.\n"); - break; - } - } - fprintf(stderr, "rc = %d, errno = %d, err = %d\n", - rc, errno, err); - } else if (!strcmp(argv[1], "sync")) { - rc = ioctl(fd, OBD_IOC_SYNC, &err); - fprintf(stderr, "rc = %d, errno = %d, err = %d\n", - rc, errno, err); - } else if (!strcmp(argv[1], "destroy")) { - int ino; - - if (argc < 3) { - printf("destroy requires a nonzero inode number.\n"); - exit(1); - } - - ino = atoi(argv[2]); - if (ino < 1) { - printf("destroy requires a nonzero inode number.\n"); - exit(1); - } - - rc = ioctl(fd, OBD_IOC_DESTROY, &ino); - fprintf(stderr, "rc = %d, errno = %d\n", rc, errno); - } else { - printf("Invalid command, run with no arguments for help.\n"); - } - close(fd); - - return 0; -} diff --git a/lustre/tests/test2.c b/lustre/tests/test2.c deleted file mode 100755 index fbbe6bb..0000000 --- a/lustre/tests/test2.c +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (C) 2001 Cluster File Systems, Inc. - * - * This code is issued under the GNU General Public License. - * See the file COPYING in this distribution - */ - -#include <stdio.h> -#include <stdlib.h> -#include <fcntl.h> -#include <errno.h> -#include <sys/stat.h> -#include <unistd.h> -#include <sys/types.h> - -/* Beware when setting FSROOT that I've not made any attempts to avoid buffer - * overruns below--this is a test program, it's a static buffer. */ -#define FSROOT "/mnt" -#define OBD_ITERATIONS 10000 - -int main (int argc, char * argv[]) -{ - int fd, rc, err = -1; - struct stat stat_buf; - - if (argc < 2) { - printf("syntax: %s command\n", argv[0]); - printf("Where command is one of \"setup\" or \"create\".\n"); - exit(1); - } - - if (!strcmp(argv[1], "setup")) { - printf("This is silly.\n"); - } else if (!strcmp(argv[1], "create")) { - int i, iter; - - if (argc < 3) { - printf("create requires a nonzero argument.\n"); - exit(1); - } - - iter = atoi(argv[2]); - - if (iter < 1) { - printf("create requires a nonzero argument.\n"); - exit(1); - } - printf("creating %d files...\n", iter); - - for (i = 0; i < iter; i++) { - fd = creat(FSROOT "/foo123", S_IRWXU); - close(fd); - unlink(FSROOT "/foo123"); - } - } else { - printf("Invalid command, run with no arguments for help.\n"); - } - - return 0; -} diff --git a/lustre/tests/test_brw.c b/lustre/tests/test_brw.c deleted file mode 100644 index 6cbfcb5..0000000 --- a/lustre/tests/test_brw.c +++ /dev/null @@ -1,221 +0,0 @@ -#include <stdio.h> -#include <string.h> -#include <unistd.h> -#include <fcntl.h> -#include <stdlib.h> -#include <errno.h> -#include <sys/mman.h> -#include <sys/types.h> -#include <sys/stat.h> - -// not correctly in the headers yet!! -//#define O_DIRECT 0 -#ifndef O_DIRECT -#define O_DIRECT 040000 /* direct disk access hint */ -#endif - -#define CERROR(fmt, arg...) fprintf(stderr, fmt, ## arg) -#ifndef __u64 -#define __u64 long long -#define cpu_to_le64(v) (v) -#define le64_to_cpu(v) (v) -#endif - -#ifndef LPU64 -#define LPU64 "%Lu" -#define LPX64 "%#Lx" -#endif - -#define READ 1 -#define WRITE 2 - -#define LPDS sizeof(__u64) -int page_debug_setup(void *addr, int len, __u64 off, __u64 id) -{ - off = cpu_to_le64(off); - id = cpu_to_le64(id); - memcpy(addr, (char *)&off, LPDS); - memcpy(addr + LPDS, (char *)&id, LPDS); - - addr += len - LPDS - LPDS; - memcpy(addr, (char *)&off, LPDS); - memcpy(addr + LPDS, (char *)&id, LPDS); - - return 0; -} - -int page_debug_check(char *who, void *addr, int size, __u64 off, __u64 id) -{ - __u64 ne_off; - int err = 0; - - ne_off = le64_to_cpu(off); - id = le64_to_cpu(id); - if (memcmp(addr, (char *)&ne_off, LPDS)) { - CERROR("%s: for offset "LPU64" off: "LPX64" != "LPX64"\n", - who, off, *(__u64 *)addr, ne_off); - err = -EINVAL; - } - if (memcmp(addr + LPDS, (char *)&id, LPDS)) { - CERROR("%s: for offset "LPU64" id: "LPX64" != "LPX64"\n", - who, off, *(__u64 *)(addr + LPDS), id); - err = -EINVAL; - } - - addr += size - LPDS - LPDS; - if (memcmp(addr, (char *)&ne_off, LPDS)) { - CERROR("%s: for offset "LPU64" end off: "LPX64" != "LPX64"\n", - who, off, *(__u64 *)addr, ne_off); - err = -EINVAL; - } - if (memcmp(addr + LPDS, (char *)&id, LPDS)) { - CERROR("%s: for offset "LPU64" end id: "LPX64" != "LPX64"\n", - who, off, *(__u64 *)(addr + LPDS), id); - err = -EINVAL; - } - - return err; -} -#undef LPDS - -void usage(char *prog) -{ - fprintf(stderr, - "usage: %s file count [[d]{r|w|rw} [pages_per_vec [objid]]]\n", - prog); - exit(1); -} - -int main(int argc, char **argv) -{ - int fd; - char *buf; - long long count, last, offset; - long pg_vec, len; - long long objid = 3; - struct stat st; - int flags = 0; - int cmd = 0; - char *end; - int rc; - - if (argc < 3 || argc > 6) - usage(argv[0]); - - count = strtoull(argv[2], &end, 0); - if (*end) { - fprintf(stderr, "%s: invalid count '%s'\n", argv[0], argv[2]); - usage(argv[0]); - } - if (argc >= 4) { - if (strchr(argv[3], 'r')) { - cmd = READ; - flags = O_RDONLY; - } - if (strchr(argv[3], 'w')) { - cmd |= WRITE; - flags = O_RDWR | O_CREAT; - } - if (strchr(argv[3], 'd')) { - flags |= O_DIRECT; - } - if (!cmd) - usage(argv[0]); - } else { - cmd = READ | WRITE; - flags = O_RDWR | O_CREAT | O_DIRECT; - } - - if (argc >= 5) { - pg_vec = strtoul(argv[4], &end, 0); - if (*end) { - fprintf(stderr, "%s: invalid pages_per_vec '%s'\n", - argv[0], argv[4]); - usage(argv[0]); - } - } - - if (argc >= 6) { - objid = strtoull(argv[5], &end, 0); - if (*end) { - fprintf(stderr, "%s: invalid objid '%s'\n", - argv[0], argv[5]); - usage(argv[0]); - } - } - - printf("%s: %s on %s(objid "LPX64") for "LPU64"x%ld pages \n", - argv[0], flags & O_DIRECT ? "directio" : "i/o", - argv[1], objid, count, pg_vec); - - fd = open(argv[1], flags | O_LARGEFILE); - if (fd == -1) { - fprintf(stderr, "%s: cannot open %s: %s\n", argv[0], - argv[1], strerror(errno)); - return 3; - } - - rc = fstat(fd, &st); - if (rc < 0) { - fprintf(stderr, "%s: cannot stat %s: %s\n", argv[0], - argv[1], strerror(errno)); - return 4; - } - - len = pg_vec * st.st_blksize; - last = (long long)count * len; - - buf = mmap(0, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, 0, 0); - if (!buf) { - fprintf(stderr, "%s: no buffer memory %s\n", - argv[0], strerror(errno)); - return 2; - } - - for (offset = 0; offset < last && cmd & WRITE; offset += len) { - int i; - - for (i = 0; i < len; i += st.st_blksize) - page_debug_setup(buf + i, st.st_blksize, offset + i, - objid); - - rc = write(fd, buf, len); - - for (i = 0; i < len; i += st.st_blksize) { - if (page_debug_check("write", buf + i, st.st_blksize, - offset + i, objid)) - return 10; - } - - if (rc != len) { - fprintf(stderr, "%s: write error: %s, rc %d != %ld\n", - argv[0], strerror(errno), rc, len); - return 4; - } - } - - if (lseek(fd, 0, SEEK_SET) != 0) { - fprintf(stderr, "%s: cannot seek %s\n", - argv[0], strerror(errno)); - return 5; - } - - for (offset = 0; offset < last && cmd & READ; offset += len) { - int i; - - rc = read(fd, buf, len); - if (rc != len) { - fprintf(stderr, "%s: read error: %s, rc %d != %ld\n", - argv[0], strerror(errno), rc, len); - return 6; - } - - for (i = 0; i < len; i += st.st_blksize) { - if (page_debug_check("read", buf + i, st.st_blksize, - offset + i, objid)) - return 11; - } - } - - return 0; -} diff --git a/lustre/tests/testreq.c b/lustre/tests/testreq.c deleted file mode 100644 index 774398d..0000000 --- a/lustre/tests/testreq.c +++ /dev/null @@ -1,141 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <stdlib.h> -#include <stdio.h> -#include <fcntl.h> -#include <errno.h> -#include <string.h> -#include <signal.h> -#include <unistd.h> -#include <sys/ioctl.h> - -#define _GNU_SOURCE -#include <getopt.h> -#undef _GNU_SOURCE - -#include <liblustre.h> -#include <linux/lustre_mds.h> - -static void usage(char *argv0, int status) -{ - printf( -"Usage: %s [OPTION...]\n\ -\n\ ---getattr <directory>\n\ ---setattr <directory>\n\ ---readpage <directory>\n\ ---open <directory>\n\ ---close <directory handle (returned by open)>\n\ ---create <new name>\n", argv0); - - exit(status); -} - -int main(int argc, char **argv) -{ - int fd = 0; - int rc = 0; - int c = 0; - long cmd = 0; - unsigned long arg; - char *short_opts = "h", *name = argv[0]; - static struct option long_opts[] = { -#define OPT_GETATTR -2 - {"getattr", no_argument, NULL, OPT_GETATTR}, -#define OPT_READPAGE -3 - {"readpage", no_argument, NULL, OPT_READPAGE}, -#define OPT_SETATTR -4 - {"setattr", no_argument, NULL, OPT_SETATTR}, -#define OPT_CREATE -5 - {"create", no_argument, NULL, OPT_CREATE}, -#define OPT_OPEN -6 - {"open", no_argument, NULL, OPT_OPEN}, -#define OPT_CLOSE -7 - {"close", required_argument, NULL, OPT_CLOSE}, -#define OPT_HELP 'h' - {"help", no_argument, NULL, OPT_HELP}, - {0} - }; - - do { - c = getopt_long(argc, argv, short_opts, long_opts, NULL); - - switch (c) { - case OPT_HELP: - usage(argv[0], 0); - break; - case OPT_GETATTR: - cmd = IOC_REQUEST_GETATTR; - name = "getattr"; - arg = 2; - break; - case OPT_SETATTR: - cmd = IOC_REQUEST_SETATTR; - name = "setattr"; - arg = 2; - break; - case OPT_READPAGE: - cmd = IOC_REQUEST_READPAGE; - name = "readpage"; - arg = 2; - break; - case OPT_CREATE: - cmd = IOC_REQUEST_CREATE; - name ="create"; - arg = 2; - break; - case OPT_OPEN: - cmd = IOC_REQUEST_OPEN; - name = "open"; - arg = 2; - break; - case OPT_CLOSE: - cmd = IOC_REQUEST_CLOSE; - name = "close"; - arg = strtoul(optarg, NULL, 0); - break; - case '?': - usage(argv[0], 1); - } - } while (c != -1); - - if (cmd == 0) - usage(argv[0], 1); - - fd = open("/dev/request", O_RDONLY); - if (fd == -1) { - fprintf(stderr, "error opening /dev/request: %s\n", - strerror(errno)); - exit(1); - } - - fprintf(stderr, "Executing %s test (arg=%lu)...\n", name, arg); - if (cmd == IOC_REQUEST_OPEN) { - rc = ioctl(fd, cmd, &arg); - printf("%lu\n", arg); - } else - rc = ioctl(fd, cmd, arg); - fprintf(stderr, "result code: %d\n", rc); - - return 0; -} diff --git a/lustre/tests/toexcl.c b/lustre/tests/toexcl.c deleted file mode 100644 index 7f099e8..0000000 --- a/lustre/tests/toexcl.c +++ /dev/null @@ -1,77 +0,0 @@ -#include <sys/types.h> -#include <sys/stat.h> -#include <stdio.h> -#include <stdlib.h> -#include <fcntl.h> -#include <errno.h> -#include <string.h> -#include <unistd.h> - -void -usage (char *argv0, int help) -{ - char *progname = strrchr(argv0, '/'); - - if (progname == NULL) - progname = argv0; - - fprintf (help ? stdout : stderr, - "Usage: %s [-e] file\n", progname); - - if (!help) - { - fprintf (stderr, " or try '-h' for help\n"); - exit (1); - } - - printf ("Create the given file with O_EXCL...\n"); - printf (" -e expect EEXIST\n"); - printf (" -h print help"); - printf (" Exit status is 0 on success, 1 on failure\n"); -} - -int main(int argc, char **argv) -{ - int rc; - int want_eexist = 0; - - while ((rc = getopt (argc, argv, "eh")) != -1) - switch (rc) - { - case 'e': - want_eexist = 1; - break; - case 'h': - usage (argv[1], 1); - return (0); - default: - usage (argv[0], 0); - } - - if (optind != argc - 1) { - usage (argv[0], 0); - return 1; - } - - rc = open(argv[optind], O_CREAT|O_EXCL, 0644); - if (rc == -1) - { - if (want_eexist && errno == EEXIST) - { - printf("open failed: %s (expected)\n", strerror(errno)); - return (0); - } - printf("open failed: %s\n", strerror(errno)); - return (1); - } else { - if (want_eexist) - { - printf("open success (expecting EEXIST).\n"); - return (1); - } - printf("open success.\n"); - return (0); - } - - return ((rc == 0) ? 0 : 1); -} diff --git a/lustre/tests/trivial.sh b/lustre/tests/trivial.sh deleted file mode 100755 index abfecf0..0000000 --- a/lustre/tests/trivial.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh -# Simple test of mount and unmount -sh llsetup.sh obdecho.cfg net-local.cfg client-echo.cfg || exit 1 -# FIXME: Scan logs for any unusual things (unbalanced allocations, errors) -sh llcleanup.sh obdecho.cfg net-local.cfg client-echo.cfg -OBD_LEAK=`dmesg | awk '/obd memory leaked/ { print $7 }'` -[ "$OBD_LEAK" != "0" ] && echo "OBD memory leak: $OBD_LEAK bytes" && ERR=1 -NAL_LEAK=`dmesg | awk '/NAL unloaded/ { print $7 }' -[ "$NAL_LEAK" != "0)" ] && echo "Portals memory leak: $NAL_LEAK" && ERR=1 -/sbin/lsmod | grep -q portals && "Portals module still loaded" && ERR=1 -exit $ERR diff --git a/lustre/tests/truncate.c b/lustre/tests/truncate.c deleted file mode 100644 index c49fb15..0000000 --- a/lustre/tests/truncate.c +++ /dev/null @@ -1,24 +0,0 @@ -#include <unistd.h> -#include <stdio.h> -#include <errno.h> -#include <string.h> -#include <stdlib.h> - -int main(int argc, char **argv) -{ - unsigned long long off; - int err; - - if (argc != 3) { - printf("usage %s file bytes\n", argv[0]); - return 1; - } - - off = strtoull(argv[2], NULL, 0); - err = truncate64(argv[1], off); - if ( err ) - printf("Error truncating %s to %Ld: %s\n", argv[1], off, - strerror(errno)); - - return err; -} diff --git a/lustre/tests/uml.sh b/lustre/tests/uml.sh deleted file mode 100644 index 2b3adc3..0000000 --- a/lustre/tests/uml.sh +++ /dev/null @@ -1,96 +0,0 @@ -#!/bin/bash - -export PATH=`dirname $0`/../utils:$PATH - -config=${1:-uml.xml} -LMC=${LMC:-lmc} -TMP=${TMP:-/tmp} - -MDSDEV=${MDSDEV:-$TMP/mds1} -MDSSIZE=${MDSSIZE:-50000} - -OSTDEVBASE=$TMP/ost -#OSTDEV1=${OSTDEV1:-${OSTDEVBASE}1} -#OSTDEV2=${OSTDEV2:-${OSTDEVBASE}2} -#etc -OSTSIZE=${OSTSIZE:-100000} -STRIPECNT=${STRIPECNT:-1} - -FSTYPE=${FSTYPE:-ext3} - -NETTYPE=${NETTYPE:-tcp} - -# NOTE - You can't have different MDS/OST nodes and also have clients on the -# MDS/OST nodes without using --endlevel and --startlevel during lconf. -# You can put both MDS/OST on one node and client can be there too. -# CLIENTS is a space-separated list of client nodes. -# -# The rule is that both the MDS and the OST must be set up before any -# of the clients can be started, so plan accordingly. - -# Three separate systems -MDSNODE=${MDSNODE:-uml1} -OSTNODES=${OSTNODES:-"uml2 uml2"} -CLIENTS=${CLIENTS:-"uml3"} - -# Single system with additional clients -#MDSNODE=uml1 -#OSTNODES="uml1 uml1" -#CLIENTS="$MDSNODE client" - -# Two systems with client on MDS, and additional clients (set up OST first) -#MDSNODE=uml1 -#OSTNODES="uml2 uml2" -#CLIENTS="$MDSNODE client" - -# Two systems with client on OST, and additional clients (set up MDS first) -#MDSNODE=uml1 -#OSTNODES="uml2 uml2" -#CLIENTS="$OSTNODES client" - -rm -f $config - -h2tcp () { - case $1 in - client) echo '\*' ;; - *) echo $1 ;; - esac -} - -h2elan () { - case $1 in - client) echo '\*' ;; - *) echo $1 | sed "s/[^0-9]*//" ;; - esac -} - -# create nodes -echo -n "adding NET for:" -for NODE in `echo $MDSNODE $OSTNODES $CLIENTS | tr -s " " "\n" | sort -u`; do - echo -n " $NODE" - ${LMC} -m $config --add net --node $NODE --nid `h2$NETTYPE $NODE` --nettype $NETTYPE || exit 1 -done - -# configure mds server -echo; echo "adding MDS on: $MDSNODE" -${LMC} -m $config --add mds --format --node $MDSNODE --mds mds1 --fstype $FSTYPE --dev $MDSDEV --size $MDSSIZE ||exit 10 - -# configure ost -${LMC} -m $config --add lov --lov lov1 --mds mds1 --stripe_sz 65536 --stripe_cnt $STRIPECNT --stripe_pattern 0 || exit 20 -COUNT=1 -echo -n "adding OST on:" -for NODE in $OSTNODES; do - eval OSTDEV=\$OSTDEV$COUNT - echo -n " $NODE" - OSTDEV=${OSTDEV:-$OSTDEVBASE$COUNT} - ${LMC} -m $config --add ost --node $NODE --lov lov1 --fstype $FSTYPE --dev $OSTDEV --size $OSTSIZE || exit 21 - COUNT=`expr $COUNT + 1` -done - -# create client config(s) -echo; echo -n "adding CLIENT on:" -for NODE in $CLIENTS; do - echo -n " $NODE" - ${LMC} -m $config --add mtpt --node $NODE --path /mnt/lustre --mds mds1 --lov lov1 || exit 30 -done -echo diff --git a/lustre/tests/unlinkmany.c b/lustre/tests/unlinkmany.c deleted file mode 100644 index ba1bee7..0000000 --- a/lustre/tests/unlinkmany.c +++ /dev/null @@ -1,74 +0,0 @@ -#include <stdio.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <time.h> -#include <errno.h> -#include <string.h> -#include <fcntl.h> -#include <unistd.h> -#include <stdlib.h> - -void usage(char *prog) -{ - printf("usage: %s filenamefmt count\n", prog); - printf(" %s filenamefmt start count\n", prog); -} - -int main(int argc, char ** argv) -{ - int i, rc = 0; - char format[4096], *fmt; - char filename[4096]; - long start, last; - long begin = 0, count; - - if (argc < 3 || argc > 4) { - usage(argv[0]); - return 1; - } - - if (strlen(argv[1]) > 4080) { - printf("name too long\n"); - return 1; - } - - start = last = time(0); - - if (argc == 3) { - count = strtol(argv[2], NULL, 0); - if (count < 1) { - printf("count must be at least one\n"); - return 1; - } - } else { - begin = strtol(argv[2], NULL, 0); - count = strtol(argv[3], NULL, 0); - } - - if (strchr(argv[1], '%')) { - fmt = argv[1]; - } else { - sprintf(format, "%s%%d", argv[1]); - fmt = format; - } - for (i = 0; i < count; i++, begin++) { - sprintf(filename, fmt, begin); - rc = unlink(filename); - if (rc) { - printf("unlink(%s) error: %s\n", - filename, strerror(errno)); - rc = errno; - break; - } - if ((i % 10000) == 0) { - printf(" - unlinked %d (time %ld ; total %ld ; last " - "%ld)\n", i, time(0), time(0) - start, - time(0) - last); - last = time(0); - } - } - printf("total: %d unlinks in %ld seconds: %f unlinks/second\n", i, - time(0) - start, ((float)i / (time(0) - start))); - - return rc; -} diff --git a/lustre/tests/utime.c b/lustre/tests/utime.c deleted file mode 100644 index c6a5d7d..0000000 --- a/lustre/tests/utime.c +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Simple test for validating mtime on a file create and set via utime. - */ -#include <stdio.h> -#include <stdlib.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> -#include <time.h> -#include <string.h> -#include <utime.h> -#include <errno.h> - -void usage(char *prog) -{ - fprintf(stderr, "usage: %s <filename>\n", prog); - exit(1); -} - -int main(int argc, char *argv[]) -{ - long before_mknod, after_mknod; - long before_utime, after_utime; - const char *prog = argv[0]; - const char *filename = argv[1]; - struct stat st; - int rc; - - if (argc != 2) - usage(argv[0]); - - before_mknod = time(0); - rc = mknod(filename, 0700, S_IFREG); - after_mknod = time(0); - if (rc && errno != EEXIST) { - fprintf(stderr, "%s: mknod(%s) failed: rc %d: %s\n", - prog, filename, errno, strerror(errno)); - return 2; - } else if (!rc) { - rc = stat(filename, &st); - if (rc) { - fprintf(stderr, "%s: stat(%s) failed: rc %d: %s\n", - prog, filename, errno, strerror(errno)); - return 3; - } - - if (st.st_mtime < before_mknod || st.st_mtime > after_mknod) { - fprintf(stderr, - "%s: bad mknod times %lu <= %lu <= %lu false\n", - prog, before_mknod, st.st_mtime, after_mknod); - return 4; - } - - printf("%s: good mknod times %lu <= %lu <= %lu\n", - prog, before_mknod, st.st_mtime, after_mknod); - - sleep(5); - } - - before_utime = time(0); - rc = utime(filename, NULL); - after_utime = time(0); - if (rc) { - fprintf(stderr, "%s: utime(%s) failed: rc %d: %s\n", - prog, filename, errno, strerror(errno)); - return 5; - } - - rc = stat(filename, &st); - if (rc) { - fprintf(stderr, "%s: second stat(%s) failed: rc %d: %s\n", - prog, filename, errno, strerror(errno)); - return 6; - } - - if (st.st_mtime < before_utime || st.st_mtime > after_utime) { - fprintf(stderr, "%s: bad utime times %lu <= %lu <= %lu false\n", - prog, before_utime, st.st_mtime, after_utime); - return 7; - } - - printf("%s: good utime times %lu <= %lu <= %lu\n", - prog, before_utime, st.st_mtime, after_utime); - - return 0; -} diff --git a/lustre/tests/wantedi.c b/lustre/tests/wantedi.c deleted file mode 100644 index 94ed7495..0000000 --- a/lustre/tests/wantedi.c +++ /dev/null @@ -1,49 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <errno.h> -#include <string.h> -#include <fcntl.h> -#include <getopt.h> -#include <unistd.h> -#include <time.h> -#include <limits.h> -#include <sys/ioctl.h> -#include <liblustre.h> -#include <linux/obd.h> -#include <linux/lustre_lib.h> - -static int usage(char *prog, FILE *out) -{ - fprintf(out, - "Usage: %s <dir> <desired child ino>\n", prog); - exit(out == stderr); -} - -#define EXTN_IOC_CREATE_INUM _IOW('f', 5, long) - -int main(int argc, char ** argv) -{ - int dirfd, wantedi, rc; - - if (argc < 2 || argc > 3) - usage(argv[0], stderr); - - dirfd = open(argv[1], O_RDONLY); - if (dirfd < 0) { - perror("open"); - exit(1); - } - - wantedi = atoi(argv[2]); - printf("Creating %s/%d with ino %d\n", argv[1], wantedi, wantedi); - - rc = ioctl(dirfd, EXTN_IOC_CREATE_INUM, wantedi); - if (rc < 0) { - perror("ioctl(EXTN_IOC_CREATE_INUM)"); - exit(2); - } - - return 0; -} diff --git a/lustre/tests/writeme.c b/lustre/tests/writeme.c deleted file mode 100644 index a376063..0000000 --- a/lustre/tests/writeme.c +++ /dev/null @@ -1,32 +0,0 @@ -#include <fcntl.h> -#include <unistd.h> -#include <stdlib.h> -#include <stdio.h> -#include <string.h> - -int main(int argc, char **argv) -{ - int fd, rc; - int i = 0; - char buf[4096]; - - memset(buf, 0, 4096); - - if (argc != 2) { - printf("Usage: %s <filename>\n", argv[0]); - exit(1); - } - - fd = open(argv[1], O_RDWR | O_CREAT, 0600); - if (fd == -1) { - printf("Error opening %s\n", argv[1]); - exit(1); - } - - while (1) { - sprintf(buf, "write %d\n", i); - rc = write(fd, buf, sizeof(buf)); - sleep(1); - } - return 0; -} diff --git a/lustre/utils/.cvsignore b/lustre/utils/.cvsignore deleted file mode 100644 index 06a1588..0000000 --- a/lustre/utils/.cvsignore +++ /dev/null @@ -1,18 +0,0 @@ -.Xrefs -config.log -config.status -configure -Makefile -Makefile.in -.deps -tags -TAGS -obdctl -lctl -lfind -lstripe -obdstat -obdio -obdbarrier -lload -wirecheck \ No newline at end of file diff --git a/lustre/utils/Lustre/.cvsignore b/lustre/utils/Lustre/.cvsignore deleted file mode 100644 index 97e22b9..0000000 --- a/lustre/utils/Lustre/.cvsignore +++ /dev/null @@ -1,4 +0,0 @@ -Makefile -Makefile.in -.deps -*.pyc diff --git a/lustre/utils/Lustre/Makefile.am b/lustre/utils/Lustre/Makefile.am deleted file mode 100644 index e8e522f..0000000 --- a/lustre/utils/Lustre/Makefile.am +++ /dev/null @@ -1,2 +0,0 @@ -pymod_SCRIPTS = __init__.py lustredb.py error.py cmdline.py -EXTRA_DIST = $(pymod_SCRIPTS) diff --git a/lustre/utils/Lustre/__init__.py b/lustre/utils/Lustre/__init__.py deleted file mode 100644 index c1b93e6..0000000 --- a/lustre/utils/Lustre/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -__all__ = ["lustredb"] - -from lustredb import LustreDB, LustreDB_XML, LustreDB_LDAP -from error import LconfError, OptionError -from cmdline import Options - -CONFIG_VERSION="2003060501" diff --git a/lustre/utils/Lustre/cmdline.py b/lustre/utils/Lustre/cmdline.py deleted file mode 100644 index 53bb6e8..0000000 --- a/lustre/utils/Lustre/cmdline.py +++ /dev/null @@ -1,178 +0,0 @@ -#!/usr/bin/env python -# -# Copyright (C) 2002 Cluster File Systems, Inc. -# Author: Robert Read <rread@clusterfs.com> -# This file is part of Lustre, http://www.lustre.org. -# -# Lustre is free software; you can redistribute it and/or -# modify it under the terms of version 2 of the GNU General Public -# License as published by the Free Software Foundation. -# -# Lustre is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Lustre; if not, write to the Free Software -# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -# - -# Standard the comand line handling for all the python tools. - -import sys, getopt, types -import string -import error - -class Options: - FLAG = 1 - PARAM = 2 - INTPARAM = 3 - def __init__(self, cmd, remain_help, options): - self.options = options - shorts = "" - longs = [] - options.append(('help,h', "Print this help")) - for opt in options: - long = self.long(opt) - short = self.short(opt) - if self.type(opt) in (Options.PARAM, Options.INTPARAM): - if short: short = short + ':' - if long: long = long + '=' - shorts = shorts + short - longs.append(long) - self.short_opts = shorts - self.long_opts = longs - self.cmd = cmd - self.remain_help = remain_help - - def init_values(self): - values = {} - for opt in self.options: - values[self.key(opt)] = self.default(opt) - return values - - def long(self, option): - n = string.find(option[0], ',') - if n < 0: return option[0] - else: return option[0][0:n] - - def key(self, option): - key = self.long(option) - return string.replace(key, '-', '_') - - def short(self, option): - n = string.find(option[0], ',') - if n < 0: return '' - else: return option[0][n+1:] - - def help(self, option): - return option[1] - - def type(self, option): - if len(option) >= 3: - return option[2] - return Options.FLAG - - def default(self, option): - if len(option) >= 4: - return option[3] - return None - - def lookup_option(self, key, key_func): - for opt in self.options: - if key_func(opt) == key: - return opt - - def lookup_short(self, key): - return self.lookup_option(key, self.short) - - def lookup_long(self, key): - return self.lookup_option(key, self.long) - - def handle_opts(self, opts): - values = self.init_values() - for o, a in opts: - if o[0:2] != '--': - option = self.lookup_short(o[1:]) - else: - option = self.lookup_long(o[2:]) - if self.type(option) == Options.PARAM: - val = a - elif self.type(option) == Options.INTPARAM: - try: - val = int(a) - except ValueError, e: - raise error.OptionError("option: '%s' expects integer value, got '%s' " % (o,a)) - else: - val = 1 - values[self.key(option)] = val - return values - - - class option_wrapper: - def __init__(self, values): - self.__dict__['values'] = values - def __getattr__(self, name): - if self.values.has_key(name): - return self.values[name] - else: - raise error.OptionError("bad option name: " + name) - def __setattr__(self, name, value): - self.values[name] = value - - def parse(self, argv): - try: - opts, args = getopt.getopt(argv, self.short_opts, self.long_opts) - values = self.handle_opts(opts) - if values["help"]: - self.usage() - sys.exit(0) - return self.option_wrapper(values), args - except getopt.error, e: - raise error.OptionError(str(e)) - - def usage(self): - ret = 'usage: %s [options] %s\n' % (self.cmd, self.remain_help) - for opt in self.options: - s = self.short(opt) - if s: str = "-%s|--%s" % (s,self.long(opt)) - else: str = "--%s" % (self.long(opt),) - if self.type(opt) in (Options.PARAM, Options.INTPARAM): - str = "%s <arg>" % (str,) - help = self.help(opt) - n = string.find(help, '\n') - if self.default(opt) != None: - if n < 0: - str = "%-15s %s (default=%s)" %(str, help, - self.default(opt)) - else: - str = "%-15s %s (default=%s)%s" %(str, help[0:n], - self.default(opt), - help[n:]) - else: - str = "%-15s %s" %(str, help) - ret = ret + str + "\n" - print ret - -# Test driver -if __name__ == "__main__": - cl = Options("test", "xml_file", [ - ('verbose,v', "verbose ", Options.FLAG, 0), - ('cleanup,d', "shutdown"), - ('gdb', "Display gdb module file ", Options.FLAG, 0), - ('device', "device path ", Options.PARAM), - ('ldapurl', "LDAP server URL ", Options.PARAM), - ('lustre', "Lustre source dir ", Options.PARAM), - ('portals', "Portals source dir ", Options.PARAM), - ('maxlevel', """Specify the maximum level - Levels are aproximatly like: - 70 - mountpoint, echo_client, osc, mdc, lov""", - Options.INTPARAM, 100), - - ]) - - conf, args = cl.parse(sys.argv[1:]) - - for key in conf.values.keys(): - print "%-10s = %s" % (key, conf.values[key]) diff --git a/lustre/utils/Lustre/error.py b/lustre/utils/Lustre/error.py deleted file mode 100644 index 6c30416..0000000 --- a/lustre/utils/Lustre/error.py +++ /dev/null @@ -1,10 +0,0 @@ -import exceptions - -class LconfError (exceptions.Exception): - def __init__(self, args): - self.args = args - -class OptionError (exceptions.Exception): - def __init__(self, args): - self.args = args - diff --git a/lustre/utils/Lustre/lustredb.py b/lustre/utils/Lustre/lustredb.py deleted file mode 100644 index 82d487c..0000000 --- a/lustre/utils/Lustre/lustredb.py +++ /dev/null @@ -1,413 +0,0 @@ -import sys, types, string, os -import re, exceptions -import xml.dom.minidom -import Lustre - -# ============================================================ -# XML processing and query - -class LustreDB: - def lookup(self, uuid): - """ lookup returns a new LustreDB instance""" - return self._lookup_by_uuid(uuid) - - def lookup_name(self, name, class_name = ""): - """ lookup returns a new LustreDB instance""" - return self._lookup_by_name(name, class_name) - - def lookup_class(self, class_name): - """ lookup returns a new LustreDB instance""" - return self._lookup_by_class(class_name) - - def get_val(self, tag, default=None): - v = self._get_val(tag) - if v: - return v - if default != None: - return default - return None - - def get_class(self): - return self._get_class() - - def get_val_int(self, tag, default=0): - str = self._get_val(tag) - try: - if str: - return int(str) - return default - except ValueError: - raise Lustre.LconfError("text value is not integer: " + str) - - def get_first_ref(self, tag): - """ Get the first uuidref of the type TAG. Only - one is expected. Returns the uuid.""" - uuids = self._get_refs(tag) - if len(uuids) > 0: - return uuids[0] - return None - - def get_refs(self, tag): - """ Get all the refs of type TAG. Returns list of uuids. """ - uuids = self._get_refs(tag) - return uuids - - def get_all_refs(self): - """ Get all the refs. Returns list of uuids. """ - uuids = self._get_all_refs() - return uuids - - def nid2server(self, nid, net_type): - netlist = self.lookup_class('network') - for net_db in netlist: - if net_db.get_val('nid') == nid and net_db.get_val('nettype') == net_type: - return net_db - return None - - # Find the target_device for target on a node - # node->profiles->device_refs->target - def get_node_tgt_dev(self, node_name, target_uuid): - node_db = self.lookup_name(node_name) - if not node_db: - return None - return node_db.get_tgt_dev(target_uuid) - - # get all network uuids for this node - def get_networks(self): - ret = [] - prof_list = self.get_refs('profile') - for prof_uuid in prof_list: - prof_db = self.lookup(prof_uuid) - net_list = prof_db.get_refs('network') - for net_uuid in net_list: - ret.append(net_uuid) - return ret - - def get_active_dev(self, tgtuuid): - tgt = self.lookup(tgtuuid) - tgt_dev_uuid =tgt.get_first_ref('active') - return tgt_dev_uuid - - def get_tgt_dev(self, tgtuuid): - prof_list = self.get_refs('profile') - for prof_uuid in prof_list: - prof_db = self.lookup(prof_uuid) - if not prof_db: - panic("profile:", profile, "not found.") - for ref_class, ref_uuid in prof_db.get_all_refs(): - if ref_class in ('osd', 'mdsdev'): - devdb = self.lookup(ref_uuid) - uuid = devdb.get_first_ref('target') - if tgtuuid == uuid: - return ref_uuid - return None - - def get_group(self, group): - ret = [] - devs = self.lookup_class('mds') - for tgt in devs: - if tgt.get_val('group', "") == group: - ret.append(tgt.getUUID()) - devs = self.lookup_class('ost') - for tgt in devs: - if tgt.get_val('group', "") == group: - ret.append(tgt.getUUID()) - return ret - - # Change the current active device for a target - def update_active(self, tgtuuid, new_uuid): - self._update_active(tgtuuid, new_uuid) - - def get_version(self): - return self.get_val('version') - -class LustreDB_XML(LustreDB): - def __init__(self, dom, root_node): - # init xmlfile - self.dom_node = dom - self.root_node = root_node - - def xmltext(self, dom_node, tag): - list = dom_node.getElementsByTagName(tag) - if len(list) > 0: - dom_node = list[0] - dom_node.normalize() - if dom_node.firstChild: - txt = string.strip(dom_node.firstChild.data) - if txt: - return txt - - def xmlattr(self, dom_node, attr): - return dom_node.getAttribute(attr) - - def _get_val(self, tag): - """a value could be an attribute of the current node - or the text value in a child node""" - ret = self.xmlattr(self.dom_node, tag) - if not ret: - ret = self.xmltext(self.dom_node, tag) - return ret - - def _get_class(self): - return self.dom_node.nodeName - - def get_ref_type(self, ref_tag): - res = string.split(ref_tag, '_') - return res[0] - - # - # [(ref_class, ref_uuid),] - def _get_all_refs(self): - list = [] - for n in self.dom_node.childNodes: - if n.nodeType == n.ELEMENT_NODE: - ref_uuid = self.xml_get_ref(n) - ref_class = self.get_ref_type(n.nodeName) - list.append((ref_class, ref_uuid)) - - list.sort() - return list - - def _get_refs(self, tag): - """ Get all the refs of type TAG. Returns list of uuids. """ - uuids = [] - refname = '%s_ref' % tag - reflist = self.dom_node.getElementsByTagName(refname) - for r in reflist: - uuids.append(self.xml_get_ref(r)) - return uuids - - def xmllookup_by_uuid(self, dom_node, uuid): - for n in dom_node.childNodes: - if n.nodeType == n.ELEMENT_NODE: - if self.xml_get_uuid(n) == uuid: - return n - else: - n = self.xmllookup_by_uuid(n, uuid) - if n: return n - return None - - def _lookup_by_uuid(self, uuid): - dom = self. xmllookup_by_uuid(self.root_node, uuid) - if dom: - return LustreDB_XML(dom, self.root_node) - - def xmllookup_by_name(self, dom_node, name): - for n in dom_node.childNodes: - if n.nodeType == n.ELEMENT_NODE: - if self.xml_get_name(n) == name: - return n - else: - n = self.xmllookup_by_name(n, name) - if n: return n - return None - - def _lookup_by_name(self, name, class_name): - dom = self.xmllookup_by_name(self.root_node, name) - if dom: - return LustreDB_XML(dom, self.root_node) - - def xmllookup_by_class(self, dom_node, class_name): - return dom_node.getElementsByTagName(class_name) - - def _lookup_by_class(self, class_name): - ret = [] - domlist = self.xmllookup_by_class(self.root_node, class_name) - for node in domlist: - ret.append(LustreDB_XML(node, self.root_node)) - return ret - - def xml_get_name(self, n): - return n.getAttribute('name') - - def getName(self): - return self.xml_get_name(self.dom_node) - - def xml_get_ref(self, n): - return n.getAttribute('uuidref') - - def xml_get_uuid(self, dom_node): - return dom_node.getAttribute('uuid') - - def getUUID(self): - return self.xml_get_uuid(self.dom_node) - - # Convert routes from the router to a route that will be used - # on the local system. The network type and gw are changed to the - # interface on the router the local system will connect to. - def get_local_routes(self, type, gw): - """ Return the routes as a list of tuples of the form: - [(type, gw, lo, hi),]""" - res = [] - tbl = self.dom_node.getElementsByTagName('routetbl') - for t in tbl: - routes = t.getElementsByTagName('route') - for r in routes: - net_type = self.xmlattr(r, 'type') - if type != net_type: - lo = self.xmlattr(r, 'lo') - hi = self.xmlattr(r, 'hi') - tgt_cluster_id = self.xmlattr(r, 'tgtclusterid') - res.append((type, gw, tgt_cluster_id, lo, hi)) - return res - - def get_route_tbl(self): - ret = [] - for r in self.dom_node.getElementsByTagName('route'): - net_type = self.xmlattr(r, 'type') - gw = self.xmlattr(r, 'gw') - gw_cluster_id = self.xmlattr(r, 'gwclusterid') - tgt_cluster_id = self.xmlattr(r, 'tgtclusterid') - lo = self.xmlattr(r, 'lo') - hi = self.xmlattr(r, 'hi') - ret.append((net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi)) - return ret - - def _update_active(self, tgt, new): - raise Lustre.LconfError("updates not implemented for XML") - -# ================================================================ -# LDAP Support -class LustreDB_LDAP(LustreDB): - def __init__(self, name, attrs, - base = "fs=lustre", - parent = None, - url = "ldap://localhost", - user = "cn=Manager, fs=lustre", - pw = "" - ): - self._name = name - self._attrs = attrs - self._base = base - self._parent = parent - self._url = url - self._user = user - self._pw = pw - if parent: - self.l = parent.l - self._base = parent._base - else: - self.open() - - def open(self): - import ldap - try: - self.l = ldap.initialize(self._url) - # Set LDAP protocol version used - self.l.protocol_version=ldap.VERSION3 - # user and pw only needed if modifying db - self.l.bind_s(self._user, self._pw, ldap.AUTH_SIMPLE); - except ldap.LDAPError, e: - raise Lustre.LconfError('Unable to connection to ldap server') - - try: - self._name, self._attrs = self.l.search_s(self._base, - ldap.SCOPE_BASE)[0] - except ldap.LDAPError, e: - raise Lustre.LconfError("no config found in ldap: %s" - % (self._base,)) - def close(self): - self.l.unbind_s() - - def ldap_search(self, filter): - """Return list of uuids matching the filter.""" - import ldap - dn = self._base - ret = [] - uuids = [] - try: - for name, attrs in self.l.search_s(dn, ldap.SCOPE_ONELEVEL, - filter, ["uuid"]): - for v in attrs['uuid']: - uuids.append(v) - except ldap.NO_SUCH_OBJECT, e: - pass - except ldap.LDAPError, e: - print e # FIXME: die here? - if len(uuids) > 0: - for uuid in uuids: - ret.append(self._lookup_by_uuid(uuid)) - return ret - - def _lookup_by_name(self, name, class_name): - list = self.ldap_search("lustreName=%s" %(name)) - if len(list) == 1: - return list[0] - return None - - def _lookup_by_class(self, class_name): - return self.ldap_search("objectclass=%s" %(string.upper(class_name))) - - def _lookup_by_uuid(self, uuid): - import ldap - dn = "uuid=%s,%s" % (uuid, self._base) - ret = None - try: - for name, attrs in self.l.search_s(dn, ldap.SCOPE_BASE, - "objectclass=*"): - ret = LustreDB_LDAP(name, attrs, parent = self) - - except ldap.NO_SUCH_OBJECT, e: - pass # just return empty list - except ldap.LDAPError, e: - print e # FIXME: die here? - return ret - - - def _get_val(self, k): - ret = None - if self._attrs.has_key(k): - v = self._attrs[k] - if type(v) == types.ListType: - ret = str(v[0]) - else: - ret = str(v) - return ret - - def _get_class(self): - return string.lower(self._attrs['objectClass'][0]) - - def get_ref_type(self, ref_tag): - return ref_tag[:-3] - - # - # [(ref_class, ref_uuid),] - def _get_all_refs(self): - list = [] - for k in self._attrs.keys(): - if re.search('.*Ref', k): - for uuid in self._attrs[k]: - ref_class = self.get_ref_type(k) - list.append((ref_class, uuid)) - return list - - def _get_refs(self, tag): - """ Get all the refs of type TAG. Returns list of uuids. """ - uuids = [] - refname = '%sRef' % tag - if self._attrs.has_key(refname): - return self._attrs[refname] - return [] - - def getName(self): - return self._get_val('lustreName') - - def getUUID(self): - return self._get_val('uuid') - - def get_route_tbl(self): - return [] - - def _update_active(self, tgtuuid, newuuid): - """Return list of uuids matching the filter.""" - import ldap - dn = "uuid=%s,%s" %(tgtuuid, self._base) - ret = [] - uuids = [] - try: - self.l.modify_s(dn, [(ldap.MOD_REPLACE, "activeRef", newuuid)]) - except ldap.NO_SUCH_OBJECT, e: - print e - except ldap.LDAPError, e: - print e # FIXME: die here? - return diff --git a/lustre/utils/Makefile.am b/lustre/utils/Makefile.am deleted file mode 100644 index 02da299..0000000 --- a/lustre/utils/Makefile.am +++ /dev/null @@ -1,22 +0,0 @@ -# Administration utilities Makefile -DEFS= -SUBDIRS = Lustre - -CFLAGS:=-g -O2 -I$(top_srcdir)/utils -I$(top_srcdir)/portals/include -I$(srcdir)/../include -Wall -L../portals/utils -KFLAGS:= -CPPFLAGS = $(HAVE_LIBREADLINE) -lctl_LDADD := $(LIBREADLINE) -lptlctl -lload_LDADD := -lptlctl -sbin_PROGRAMS = lctl lfind lstripe obdio obdbarrier lload wirecheck -sbin_SCRIPTS = lconf lmc llanalyze llstat.pl llobdstat.pl lactive load_ldap.sh -wirecheck_SOURCES = wirecheck.c -lctl_SOURCES = parser.c obd.c lctl.c parser.h obdctl.h -lload_SOURCES = lload.c -obdio_SOURCES = obdio.c obdiolib.c obdiolib.h -obdbarrier_SOURCES = obdbarrier.c obdiolib.c obdiolib.h -lfind_SOURCES = lfind.c -lstripe_SOURCES = lstripe.c -lfind_CPPFLAGS = -D_XOPEN_SOURCE=500 -EXTRA_DIST = $(sbin_SCRIPTS) - -include $(top_srcdir)/Rules diff --git a/lustre/utils/automatic-reconnect-sample b/lustre/utils/automatic-reconnect-sample deleted file mode 100755 index bf9ecc4..0000000 --- a/lustre/utils/automatic-reconnect-sample +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/sh - -if [ -z "$1" ]; then - echo "No UUID given to Lustre upcall!" | wall - exit 1 -fi - -# FIXME: OSTHOST can't be hard-coded! -OST=$1 -OSTHOST=dev7 -LUSTRE=/home/pschwan/lustre/lustre - -while ( ! ping -c 1 -w 3 $OSTHOST ) ; do - sleep 2 -done; - -echo -n "OST $OSTHOST UUID $OST responding to pings : " -date - -$LUSTRE/utils/lctl <<EOF -network tcp -close_uuid $OST -del_uuid $OST -connect $OSTHOST 988 -add_uuid $OST $OSTHOST -quit -EOF - -$LUSTRE/utils/lctl <<EOF -device \$RPCDEV -probe -newconn $OST -quit -EOF diff --git a/lustre/utils/ha_assist.sh b/lustre/utils/ha_assist.sh deleted file mode 100755 index 0f737f5..0000000 --- a/lustre/utils/ha_assist.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh - -echo primary `date` >> /tmp/halog - - diff --git a/lustre/utils/ha_assist2.sh b/lustre/utils/ha_assist2.sh deleted file mode 100755 index a07d8b5..0000000 --- a/lustre/utils/ha_assist2.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -set -vx -date -echo "ha assist checking for problems" -sleep 3 -if [ ! -e /tmp/halog ]; then - echo "no problems, exiting" - exit -fi - -echo "removing /tmp/halog" -rm /tmp/halog - -echo secondary start `date` -echo "- please supply a new mds" - -# invoke ldap client here - - -/usr/src/portals/linux/utils/ptlctl <<EOF3 -setup tcp -close_uuid mds -del_uuid mds -connect dev5 988 -add_uuid mds -quit -EOF3 - -echo "connected to new MDS!" - -/usr/src/obd/utils/obdctl <<EOF2 -name2dev RPCDEV -newconn -quit -EOF2 diff --git a/lustre/utils/lactive b/lustre/utils/lactive deleted file mode 100644 index a5e8580..0000000 --- a/lustre/utils/lactive +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/env python -# -# Copyright (C) 2002 Cluster File Systems, Inc. -# Author: Robert Read <rread@clusterfs.com> -# This file is part of Lustre, http://www.lustre.org. -# -# Lustre is free software; you can redistribute it and/or -# modify it under the terms of version 2 of the GNU General Public -# License as published by the Free Software Foundation. -# -# Lustre is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Lustre; if not, write to the Free Software -# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -# - -# For all the OST/MDSs that are primary on the --primary node, set -# them to be active on --active if that OST is available on --active. -# -# Make the active node the active node for all devices it shares with the -# old. The bulk of this code is for figuring out which devices to -# change, and what to change them to. - -# XXX add error checking -# XXX make this code less ugly - -import sys, getopt, types -import string, os -import ldap -PYMOD_DIR = "/usr/lib/lustre/python" - -def development_mode(): - base = os.path.dirname(sys.argv[0]) - if os.access(base+"/Makefile.am", os.R_OK): - return 1 - return 0 - -if not development_mode(): - sys.path.append(PYMOD_DIR) - -import Lustre - -lactive_options = [ - ('ldapurl',"LDAP server URL", Lustre.Options.PARAM, - "ldap://localhost"), - ('config', "Cluster config name used for LDAP query", Lustre.Options.PARAM), - ('group', "The group of devices to update", Lustre.Options.PARAM), - ('active', "The active node name", Lustre.Options.PARAM), - ] - -def fatal(*args): - msg = string.join(map(str,args)) - print "! " + msg - sys.exit(1) - - -cl = Lustre.Options("lactive","", lactive_options) -config, args = cl.parse(sys.argv[1:]) - -if not (config.group or config.active): - fatal("Must specify both group and active node.") - -if not config.config: - fatal("Missing config") - -base = "config=%s,fs=lustre" % (config.config,) -db = Lustre.LustreDB_LDAP('', {}, base=base, pw = "secret", - url = config.ldapurl) - -active_node = db.lookup_name(config.active) -if not active_node: - fatal(config.active, "node not found in database.") - -devices = db.get_group(config.group) -if len(devices) < 0: - fatal("no devices found for group", config.group) - -# for all devices in group - # lookup device in active node - # update the active device -for tgtuuid in devices: - active_uuid = db.get_active_dev(tgtuuid) - new_active_uuid = active_node.get_tgt_dev(tgtuuid) - if active_uuid != new_active_uuid: - print ("%s: changing active %s to %s:%s" - % (tgtuuid, active_uuid, - config.active, new_active_uuid)) - db.update_active(tgtuuid, new_active_uuid) - - - - - diff --git a/lustre/utils/lconf b/lustre/utils/lconf deleted file mode 100755 index 15e5a2c..0000000 --- a/lustre/utils/lconf +++ /dev/null @@ -1,2285 +0,0 @@ -#!/usr/bin/env python -# -# Copyright (C) 2002 Cluster File Systems, Inc. -# Author: Robert Read <rread@clusterfs.com> -# This file is part of Lustre, http://www.lustre.org. -# -# Lustre is free software; you can redistribute it and/or -# modify it under the terms of version 2 of the GNU General Public -# License as published by the Free Software Foundation. -# -# Lustre is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Lustre; if not, write to the Free Software -# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -# -# lconf - lustre configuration tool -# -# lconf is the main driver script for starting and stopping -# lustre filesystem services. -# -# Based in part on the XML obdctl modifications done by Brian Behlendorf - -import sys, getopt, types -import string, os, stat, popen2, socket, time, random, fcntl, select -import re, exceptions, signal -import xml.dom.minidom - -if sys.version[0] == '1': - from FCNTL import F_GETFL, F_SETFL -else: - from fcntl import F_GETFL, F_SETFL - -PYMOD_DIR = "/usr/lib/lustre/python" - -def development_mode(): - base = os.path.dirname(sys.argv[0]) - if os.access(base+"/Makefile.am", os.R_OK): - return 1 - return 0 - -if not development_mode(): - sys.path.append(PYMOD_DIR) - -import Lustre - -# Global parameters -MAXTCPBUF = 1048576 -DEFAULT_TCPBUF = 1048576 -# -# Maximum number of devices to search for. -# (the /dev/loop* nodes need to be created beforehand) -MAX_LOOP_DEVICES = 256 -PORTALS_DIR = 'portals' - - -# Please keep these uptodate with the values in portals/kp30.h -ptldebug_names = { - "trace" : (1 << 0), - "inode" : (1 << 1), - "super" : (1 << 2), - "ext2" : (1 << 3), - "malloc" : (1 << 4), - "cache" : (1 << 5), - "info" : (1 << 6), - "ioctl" : (1 << 7), - "blocks" : (1 << 8), - "net" : (1 << 9), - "warning" : (1 << 10), - "buffs" : (1 << 11), - "other" : (1 << 12), - "dentry" : (1 << 13), - "portals" : (1 << 14), - "page" : (1 << 15), - "dlmtrace" : (1 << 16), - "error" : (1 << 17), - "emerg" : (1 << 18), - "ha" : (1 << 19), - "rpctrace" : (1 << 20), - "vfstrace" : (1 << 21), - } - -subsystem_names = { - "undefined" : (0 << 24), - "mdc" : (1 << 24), - "mds" : (2 << 24), - "osc" : (3 << 24), - "ost" : (4 << 24), - "class" : (5 << 24), - "obdfs" : (6 << 24), - "llite" : (7 << 24), - "rpc" : (8 << 24), - "ext2obd" : (9 << 24), - "portals" : (10 << 24), - "socknal" : (11 << 24), - "qswnal" : (12 << 24), - "pinger" : (13 << 24), - "filter" : (14 << 24), - "trace" : (15 << 24), - "echo" : (16 << 24), - "ldlm" : (17 << 24), - "lov" : (18 << 24), - "gmnal" : (19 << 24), - "ptlrouter" : (20 << 24), - "cobd" : (21 << 24), - "ptlbd" : (22 << 24), - } - - -first_cleanup_error = 0 -def cleanup_error(rc): - global first_cleanup_error - if not first_cleanup_error: - first_cleanup_error = rc - -# ============================================================ -# debugging and error funcs - -def fixme(msg = "this feature"): - raise Lustre.LconfError, msg + ' not implmemented yet.' - -def panic(*args): - msg = string.join(map(str,args)) - if not config.noexec: - raise Lustre.LconfError(msg) - else: - print "! " + msg - -def log(*args): - msg = string.join(map(str,args)) - print msg - -def logall(msgs): - for s in msgs: - print string.strip(s) - -def debug(*args): - if config.verbose: - msg = string.join(map(str,args)) - print msg - - -# ack, python's builtin int() does not support '0x123' syntax. -# eval can do it, although what a hack! -def my_int(s): - try: - if s[0:2] == '0x': - return eval(s, {}, {}) - else: - return int(s) - except SyntaxError, e: - raise ValueError("not a number") - except NameError, e: - raise ValueError("not a number") - -# ============================================================ -# locally defined exceptions -class CommandError (exceptions.Exception): - def __init__(self, cmd_name, cmd_err, rc=None): - self.cmd_name = cmd_name - self.cmd_err = cmd_err - self.rc = rc - - def dump(self): - import types - if type(self.cmd_err) == types.StringType: - if self.rc: - print "! %s (%d): %s" % (self.cmd_name, self.rc, self.cmd_err) - else: - print "! %s: %s" % (self.cmd_name, self.cmd_err) - elif type(self.cmd_err) == types.ListType: - if self.rc: - print "! %s (error %d):" % (self.cmd_name, self.rc) - else: - print "! %s:" % (self.cmd_name) - for s in self.cmd_err: - print "> %s" %(string.strip(s)) - else: - print self.cmd_err - - -# ============================================================ -# handle daemons, like the acceptor -class DaemonHandler: - """ Manage starting and stopping a daemon. Assumes daemon manages - it's own pid file. """ - - def __init__(self, cmd): - self.command = cmd - self.path ="" - - def start(self): - if self.running(): - log(self.command, "already running.") - if not self.path: - self.path = find_prog(self.command) - if not self.path: - panic(self.command, "not found.") - ret, out = runcmd(self.path +' '+ self.command_line()) - if ret: - raise CommandError(self.path, out, ret) - - def stop(self): - if self.running(): - pid = self.read_pidfile() - try: - log ("killing process", pid) - os.kill(pid, 15) - #time.sleep(1) # let daemon die - except OSError, e: - log("unable to kill", self.command, e) - if self.running(): - log("unable to kill", self.command) - - def running(self): - pid = self.read_pidfile() - if pid: - try: - os.kill(pid, 0) - except OSError: - self.clean_pidfile() - else: - return 1 - return 0 - - def read_pidfile(self): - try: - fp = open(self.pidfile(), 'r') - pid = int(fp.read()) - fp.close() - return pid - except IOError: - return 0 - - def clean_pidfile(self): - """ Remove a stale pidfile """ - log("removing stale pidfile:", self.pidfile()) - try: - os.unlink(self.pidfile()) - except OSError, e: - log(self.pidfile(), e) - -class AcceptorHandler(DaemonHandler): - def __init__(self, port, net_type, send_mem, recv_mem, irq_aff, nid_xchg): - DaemonHandler.__init__(self, "acceptor") - self.port = port - self.flags = '' - self.send_mem = send_mem - self.recv_mem = recv_mem - - if net_type == 'toe': - self.flags = self.flags + ' -N 4' - if irq_aff: - self.flags = self.flags + ' -i' - if nid_xchg: - self.flags = self.flags + ' -x' - - def pidfile(self): - return "/var/run/%s-%d.pid" % (self.command, self.port) - - def command_line(self): - return string.join(map(str,('-s', self.send_mem, '-r', self.recv_mem, self.flags, self.port))) - -acceptors = {} - -# start the acceptors -def run_acceptors(): - for port in acceptors.keys(): - daemon = acceptors[port] - if not daemon.running(): - daemon.start() - -def run_one_acceptor(port): - if acceptors.has_key(port): - daemon = acceptors[port] - if not daemon.running(): - daemon.start() - else: - panic("run_one_acceptor: No acceptor defined for port:", port) - -def stop_acceptor(port): - if acceptors.has_key(port): - daemon = acceptors[port] - if daemon.running(): - daemon.stop() - - -# ============================================================ -# handle lctl interface -class LCTLInterface: - """ - Manage communication with lctl - """ - - def __init__(self, cmd): - """ - Initialize close by finding the lctl binary. - """ - self.lctl = find_prog(cmd) - self.save_file = '' - if not self.lctl: - if config.noexec: - debug('! lctl not found') - self.lctl = 'lctl' - else: - raise CommandError('lctl', "unable to find lctl binary.") - - def use_save_file(self, file): - self.save_file = file - - def set_nonblock(self, fd): - fl = fcntl.fcntl(fd, F_GETFL) - fcntl.fcntl(fd, F_SETFL, fl | os.O_NDELAY) - - def run(self, cmds): - """ - run lctl - the cmds are written to stdin of lctl - lctl doesn't return errors when run in script mode, so - stderr is checked - should modify command line to accept multiple commands, or - create complex command line options - """ - cmd_line = self.lctl - if self.save_file: - cmds = '\n dump ' + self.save_file + cmds - - debug("+", cmd_line, cmds) - if config.noexec: return (0, []) - - child = popen2.Popen3(cmd_line, 1) # Capture stdout and stderr from command - child.tochild.write(cmds + "\n") - child.tochild.close() - - # From "Python Cookbook" from O'Reilly - outfile = child.fromchild - outfd = outfile.fileno() - self.set_nonblock(outfd) - errfile = child.childerr - errfd = errfile.fileno() - self.set_nonblock(errfd) - - outdata = errdata = '' - outeof = erreof = 0 - while 1: - ready = select.select([outfd,errfd],[],[]) # Wait for input - if outfd in ready[0]: - outchunk = outfile.read() - if outchunk == '': outeof = 1 - outdata = outdata + outchunk - if errfd in ready[0]: - errchunk = errfile.read() - if errchunk == '': erreof = 1 - errdata = errdata + errchunk - if outeof and erreof: break - # end of "borrowed" code - - ret = child.wait() - if os.WIFEXITED(ret): - rc = os.WEXITSTATUS(ret) - else: - rc = 0 - if rc or len(errdata): - raise CommandError(self.lctl, errdata, rc) - return rc, outdata - - def runcmd(self, *args): - """ - run lctl using the command line - """ - cmd = string.join(map(str,args)) - debug("+", self.lctl, cmd) - rc, out = run(self.lctl, cmd) - if rc: - raise CommandError(self.lctl, out, rc) - return rc, out - - - def network(self, net, nid): - """ initialized network and add "self" """ - cmds = """ - network %s - mynid %s - quit """ % (net, nid) - self.run(cmds) - - # create a new connection - def connect(self, srv): - cmds = "\n add_uuid %s %s %s" % (srv.uuid, srv.nid, srv.net_type) - if srv.net_type in ('tcp', 'toe') and not config.lctl_dump: - flags = '' - if srv.irq_affinity: - flags = flags + 'i' - if srv.nid_exchange: - flags = flags + 'x' - cmds = """%s - network %s - send_mem %d - recv_mem %d - connect %s %d %s""" % (cmds, srv.net_type, - srv.send_mem, - srv.recv_mem, - srv.hostaddr, srv.port, flags ) - - cmds = cmds + "\n quit" - self.run(cmds) - - # Recover a device - def recover(self, dev_name, new_conn): - cmds = """ - device $%s - probe - recover %s""" %(dev_name, new_conn) - self.run(cmds) - - # add a route to a range - def add_route(self, net, gw, lo, hi): - cmds = """ - network %s - add_route %s %s %s - quit """ % (net, - gw, lo, hi) - self.run(cmds) - - - def del_route(self, net, gw, lo, hi): - cmds = """ - ignore_errors - network %s - del_route %s - quit """ % (net, lo) - self.run(cmds) - - # add a route to a host - def add_route_host(self, net, uuid, gw, tgt): - cmds = """ - network %s - add_uuid %s %s %s - add_route %s %s - quit """ % (net, - uuid, tgt, net, - gw, tgt) - self.run(cmds) - - # add a route to a range - def del_route_host(self, net, uuid, gw, tgt): - cmds = """ - ignore_errors - network %s - del_uuid %s - del_route %s - quit """ % (net, uuid, tgt) - self.run(cmds) - - # disconnect one connection - def disconnect(self, net, nid, port, servuuid): - cmds = """ - ignore_errors - network %s - disconnect %s - del_uuid %s - quit""" % (net, nid, servuuid) - self.run(cmds) - - def del_uuid(self, servuuid): - cmds = """ - ignore_errors - del_uuid %s - quit""" % (servuuid,) - self.run(cmds) - - # disconnect all - def disconnectAll(self, net): - cmds = """ - ignore_errors - network %s - disconnect - quit""" % (net) - self.run(cmds) - - # create a new device with lctl - def newdev(self, attach, setup = ""): - cmds = """ - newdev - attach %s - setup %s - quit""" % (attach, setup) - self.run(cmds) - - # cleanup a device - def cleanup(self, name, uuid, force, failover = 0): - if failover: force = 1 - cmds = """ - ignore_errors - device $%s - cleanup %s %s - detach - quit""" % (name, ('', 'force')[force], - ('', 'failover')[failover]) - self.run(cmds) - - # create an lov - def lov_setconfig(self, uuid, mdsuuid, stripe_cnt, stripe_sz, stripe_off, - pattern, devlist): - cmds = """ - device $%s - probe - lov_setconfig %s %d %d %d %s %s - quit""" % (mdsuuid, uuid, stripe_cnt, stripe_sz, stripe_off, pattern, devlist) - self.run(cmds) - - # dump the log file - def dump(self, dump_file): - cmds = """ - debug_kernel %s 1 - quit""" % (dump_file) - self.run(cmds) - - # get list of devices - def device_list(self): - try: - rc, out = self.runcmd('device_list') - except CommandError, e: - if config.cleanup: - out = [] - else: - raise e - return out - - # get lustre version - def lustre_version(self): - rc, out = self.runcmd('version') - return out - - # dump mount options - def mount_option(self, option): - cmds = """ - mount_option %s - quit""" % (option) - self.run(cmds) -# ============================================================ -# Various system-level functions -# (ideally moved to their own module) - -# Run a command and return the output and status. -# stderr is sent to /dev/null, could use popen3 to -# save it if necessary -def runcmd(cmd): - debug ("+", cmd) - if config.noexec: return (0, []) - f = os.popen(cmd + ' 2>&1') - out = f.readlines() - ret = f.close() - if ret: - ret = ret >> 8 - else: - ret = 0 - return (ret, out) - -def run(*args): - cmd = string.join(map(str,args)) - return runcmd(cmd) - -# Run a command in the background. -def run_daemon(*args): - cmd = string.join(map(str,args)) - debug ("+", cmd) - if config.noexec: return 0 - f = os.popen(cmd + ' 2>&1') - ret = f.close() - if ret: - ret = ret >> 8 - else: - ret = 0 - return ret - -# Determine full path to use for an external command -# searches dirname(argv[0]) first, then PATH -def find_prog(cmd): - syspath = string.split(os.environ['PATH'], ':') - cmdpath = os.path.dirname(sys.argv[0]) - syspath.insert(0, cmdpath); - if config.portals: - syspath.insert(0, os.path.join(config.portals, 'utils/')) - for d in syspath: - prog = os.path.join(d,cmd) - if os.access(prog, os.X_OK): - return prog - return '' - -# Recursively look for file starting at base dir -def do_find_file(base, mod): - fullname = os.path.join(base, mod) - if os.access(fullname, os.R_OK): - return fullname - for d in os.listdir(base): - dir = os.path.join(base,d) - if os.path.isdir(dir): - module = do_find_file(dir, mod) - if module: - return module - -def find_module(src_dir, dev_dir, modname): - mod = '%s.o' % (modname) - module = src_dir +'/'+ dev_dir +'/'+ mod - try: - if os.access(module, os.R_OK): - return module - except OSError: - pass - return None - -# is the path a block device? -def is_block(path): - s = () - try: - s = os.stat(path) - except OSError: - return 0 - return stat.S_ISBLK(s[stat.ST_MODE]) - -# build fs according to type -# fixme: dangerous -def mkfs(dev, devsize, fstype,jsize): - block_cnt = '' - jopt = '' - if devsize: - if devsize < 8000: - panic("size of filesystem on '%s' must be larger than 8MB, but is set to %s"% - (dev, devsize)) - # devsize is in 1k, and fs block count is in 4k - block_cnt = devsize/4 - - if fstype in ('ext3', 'extN'): - # ext3 journal size is in megabytes - if jsize: jopt = "-J size=%d" %(jsize,) - mkfs = 'mkfs.ext2 -j -b 4096 -F ' - elif fstype == 'reiserfs': - # reiserfs journal size is in blocks - if jsize: jopt = "--journal_size %d" %(jsize,) - mkfs = 'mkreiserfs -ff' - else: - print 'unsupported fs type: ', fstype - - (ret, out) = run (mkfs, jopt, dev, block_cnt) - if ret: - panic("Unable to build fs:", dev, string.join(out)) - # enable hash tree indexing on fsswe - if fstype in ('ext3', 'extN'): - htree = 'echo "feature FEATURE_C5" | debugfs -w' - (ret, out) = run (htree, dev) - if ret: - panic("Unable to enable htree:", dev) - -# some systems use /dev/loopN, some /dev/loop/N -def loop_base(): - import re - loop = '/dev/loop' - if not os.access(loop + str(0), os.R_OK): - loop = loop + '/' - if not os.access(loop + str(0), os.R_OK): - panic ("can't access loop devices") - return loop - -# find loop device assigned to thefile -def find_loop(file): - loop = loop_base() - for n in xrange(0, MAX_LOOP_DEVICES): - dev = loop + str(n) - if os.access(dev, os.R_OK): - (stat, out) = run('losetup', dev) - if out and stat == 0: - m = re.search(r'\((.*)\)', out[0]) - if m and file == m.group(1): - return dev - else: - break - return '' - -# create file if necessary and assign the first free loop device -def init_loop(file, size, fstype, journal_size): - dev = find_loop(file) - if dev: - print 'WARNING file:', file, 'already mapped to', dev - return dev - if config.reformat or not os.access(file, os.R_OK | os.W_OK): - if size < 8000: - panic("size of loopback file '%s' must be larger than 8MB, but is set to %s" % (file,size)) - (ret, out) = run("dd if=/dev/zero bs=1k count=0 seek=%d of=%s" %(size, - file)) - if ret: - panic("Unable to create backing store:", file) - mkfs(file, size, fstype, journal_size) - - loop = loop_base() - # find next free loop - for n in xrange(0, MAX_LOOP_DEVICES): - dev = loop + str(n) - if os.access(dev, os.R_OK): - (stat, out) = run('losetup', dev) - if stat: - run('losetup', dev, file) - return dev - else: - print "out of loop devices" - return '' - print "out of loop devices" - return '' - -# undo loop assignment -def clean_loop(file): - dev = find_loop(file) - if dev: - ret, out = run('losetup -d', dev) - if ret: - log('unable to clean loop device:', dev, 'for file:', file) - logall(out) - -# determine if dev is formatted as a <fstype> filesystem -def need_format(fstype, dev): - # FIXME don't know how to implement this - return 0 - -# initialize a block device if needed -def block_dev(dev, size, fstype, format, journal_size): - if config.noexec: return dev - if not is_block(dev): - dev = init_loop(dev, size, fstype, journal_size) - elif config.reformat or (need_format(fstype, dev) and format == 'yes'): - mkfs(dev, size, fstype, journal_size) - -# else: -# panic("device:", dev, -# "not prepared, and autoformat is not set.\n", -# "Rerun with --reformat option to format ALL filesystems") - - return dev - -def if2addr(iface): - """lookup IP address for an interface""" - rc, out = run("/sbin/ifconfig", iface) - if rc or not out: - return None - addr = string.split(out[1])[1] - ip = string.split(addr, ':')[1] - return ip - -def get_local_nid(net_type, wildcard): - """Return the local nid.""" - local = "" - if os.access('/proc/elan/device0/position', os.R_OK): - local = get_local_address('elan', '*') - else: - local = get_local_address(net_type, wildcard) - return local - -def get_local_address(net_type, wildcard): - """Return the local address for the network type.""" - local = "" - if net_type in ('tcp', 'toe'): - if ':' in wildcard: - iface, star = string.split(wildcard, ':') - local = if2addr(iface) - if not local: - panic ("unable to determine ip for:", wildcard) - else: - host = socket.gethostname() - local = socket.gethostbyname(host) - elif net_type == 'elan': - # awk '/NodeId/ { print $2 }' '/proc/elan/device0/position' - try: - fp = open('/proc/elan/device0/position', 'r') - lines = fp.readlines() - fp.close() - for l in lines: - a = string.split(l) - if a[0] == 'NodeId': - local = a[1] - break - except IOError, e: - log(e) - elif net_type == 'gm': - fixme("automatic local address for GM") - elif net_type == 'scimac': - scinode="/opt/scali/sbin/scinode" - if os.path.exists(scinode): - (rc,local) = run(scinode) - else: - panic (scinode, " not found on node with scimac networking") - if rc: - panic (scinode, " failed") - local=string.rstrip(local[0]) - - return local - - -# XXX: instead of device_list, ask for $name and see what we get -def is_prepared(name): - """Return true if a device exists for the name""" - if config.lctl_dump: - return 0 - if config.noexec and config.cleanup: - return 1 - try: - # expect this format: - # 1 UP ldlm ldlm ldlm_UUID 2 - out = lctl.device_list() - for s in out: - if name == string.split(s)[3]: - return 1 - except CommandError, e: - e.dump() - return 0 - -def is_network_prepared(): - """If the LDLM device exists, then assume that all networking - has been configured""" - return is_prepared('ldlm') - -def fs_is_mounted(path): - """Return true if path is a mounted lustre filesystem""" - try: - fp = open('/proc/mounts') - lines = fp.readlines() - fp.close() - for l in lines: - a = string.split(l) - if a[1] == path and a[2] == 'lustre_lite': - return 1 - except IOError, e: - log(e) - return 0 - - -# ============================================================ -# Classes to prepare and cleanup the various objects -# -class Module: - """ Base class for the rest of the modules. The default cleanup method is - defined here, as well as some utilitiy funcs. - """ - def __init__(self, module_name, db): - self.db = db - self.module_name = module_name - self.name = self.db.getName() - self.uuid = self.db.getUUID() - self.kmodule_list = [] - self._server = None - self._connected = 0 - - def info(self, *args): - msg = string.join(map(str,args)) - print self.module_name + ":", self.name, self.uuid, msg - - def cleanup(self): - """ default cleanup, used for most modules """ - self.info() - try: - lctl.cleanup(self.name, self.uuid, config.force) - except CommandError, e: - log(self.module_name, "cleanup failed: ", self.name) - e.dump() - cleanup_error(e.rc) - - def add_portals_module(self, dev_dir, modname): - """Append a module to list of modules to load.""" - self.kmodule_list.append((config.portals, dev_dir, modname)) - - def add_lustre_module(self, dev_dir, modname): - """Append a module to list of modules to load.""" - self.kmodule_list.append((config.lustre, dev_dir, modname)) - - def mod_loaded(self, modname): - """Check if a module is already loaded. Look in /proc/modules for it.""" - fp = open('/proc/modules') - lines = fp.readlines() - fp.close() - # please forgive my tired fingers for this one - ret = filter(lambda word, mod=modname: word == mod, - map(lambda line: string.split(line)[0], lines)) - return ret - - def load_module(self): - """Load all the modules in the list in the order they appear.""" - for src_dir, dev_dir, mod in self.kmodule_list: - # (rc, out) = run ('/sbin/lsmod | grep -s', mod) - if self.mod_loaded(mod) and not config.noexec: - continue - log ('loading module:', mod, 'srcdir', src_dir, 'devdir', dev_dir) - if src_dir: - module = find_module(src_dir, dev_dir, mod) - if not module: - panic('module not found:', mod) - (rc, out) = run('/sbin/insmod', module) - if rc: - raise CommandError('insmod', out, rc) - else: - (rc, out) = run('/sbin/modprobe', mod) - if rc: - raise CommandError('modprobe', out, rc) - - def cleanup_module(self): - """Unload the modules in the list in reverse order.""" - if not self.safe_to_clean(): - return - rev = self.kmodule_list - rev.reverse() - for src_dir, dev_dir, mod in rev: - if not self.mod_loaded(mod) and not config.noexec: - continue - # debug hack - if mod == 'portals' and config.dump: - lctl.dump(config.dump) - log('unloading module:', mod) - (rc, out) = run('/sbin/rmmod', mod) - if rc: - log('! unable to unload module:', mod) - logall(out) - - def safe_to_clean(self): - return 1 - - def safe_to_clean_modules(self): - return self.safe_to_clean() - -class Network(Module): - def __init__(self,db): - Module.__init__(self, 'NETWORK', db) - self.net_type = self.db.get_val('nettype') - self.nid = self.db.get_val('nid', '*') - self.cluster_id = self.db.get_val('clusterid', "0") - self.port = self.db.get_val_int('port', 0) - self.send_mem = self.db.get_val_int('sendmem', DEFAULT_TCPBUF) - self.recv_mem = self.db.get_val_int('recvmem', DEFAULT_TCPBUF) - self.irq_affinity = self.db.get_val_int('irqaffinity', 0) - self.nid_exchange = self.db.get_val_int('nidexchange', 0) - - if '*' in self.nid: - if self.nid_exchange: - self.nid = get_local_nid(self.net_type, self.nid) - else: - self.nid = get_local_address(self.net_type, self.nid) - if not self.nid: - panic("unable to set nid for", self.net_type, self.nid) - debug("nid:", self.nid) - - self.hostaddr = self.db.get_val('hostaddr', self.nid) - if '*' in self.hostaddr: - self.hostaddr = get_local_address(self.net_type, self.hostaddr) - if not self.nid: - panic("unable to set nid for", self.net_type, self.hostaddr) - debug("hostaddr:", self.hostaddr) - - self.add_portals_module("libcfs", 'portals') - if node_needs_router(): - self.add_portals_module("router", 'kptlrouter') - if self.net_type == 'tcp': - self.add_portals_module("knals/socknal", 'ksocknal') - if self.net_type == 'toe': - self.add_portals_module("knals/toenal", 'ktoenal') - if self.net_type == 'elan': - self.add_portals_module("knals/qswnal", 'kqswnal') - if self.net_type == 'gm': - self.add_portals_module("knals/gmnal", 'kgmnal') - if self.net_type == 'scimac': - self.add_portals_module("knals/scimacnal", 'kscimacnal') - - def prepare(self): - if is_network_prepared(): - return - self.info(self.net_type, self.nid, self.port) - lctl.network(self.net_type, self.nid) - if self.port and node_is_router(): - run_one_acceptor(self.port) - self.connect_peer_gateways() - - def connect_peer_gateways(self): - for router in self.db.lookup_class('node'): - if router.get_val_int('router', 0): - # if this is a peer with a nid less than mine, - # then connect. - for netuuid in router.get_networks(): - net = self.db.lookup(netuuid) - gw = Network(net) - if (gw.cluster_id == self.cluster_id and - gw.net_type == self.net_type): - # hack: compare as numbers if possible, this should all - # go away once autoconnect is done. - # This also conveniently prevents us from connecting to ourself. - try: - gw_nid = my_int(gw.nid) - self_nid = my_int(self.nid) - except ValueError, e: - gw_nid = gw.nid - self_nid = self.nid - if gw_nid < self_nid: - lctl.connect(gw) - - def disconnect_peer_gateways(self): - for router in self.db.lookup_class('node'): - if router.get_val_int('router', 0): - # if this is a peer with a nid less than mine, - # then connect. - for netuuid in router.get_networks(): - net = self.db.lookup(netuuid) - gw = Network(net) - if (gw.cluster_id == self.cluster_id and - gw.net_type == self.net_type): - # hack: compare as numbers if possible, this should all - # go away once autoconnect is done. - # This also conveniently prevents us from connecting to ourself. - try: - gw_nid = my_int(gw.nid) - self_nid = my_int(self.nid) - except ValueError, e: - gw_nid = gw.nid - self_nid = self.nid - if gw_nid < self_nid: - try: - lctl.disconnect(router.net_type, router.nid, router.port, - router.uuid) - except CommandError, e: - print "disconnectAll failed: ", self.name - e.dump() - cleanup_error(e.rc) - - def safe_to_clean(self): - return not is_network_prepared() - - def cleanup(self): - self.info(self.net_type, self.nid, self.port) - if self.port: - stop_acceptor(self.port) - if node_is_router(): - self.disconnect_peer_gateways() - try: - lctl.disconnectAll(self.net_type) - except CommandError, e: - print "disconnectAll failed: ", self.name - e.dump() - cleanup_error(e.rc) - -class RouteTable(Module): - def __init__(self,db): - Module.__init__(self, 'ROUTES', db) - def prepare(self): - if is_network_prepared(): - return - self.info() - for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl(): - lctl.add_route(net_type, gw, lo, hi) - if net_type in ('tcp', 'toe') and local_net_type(net_type) and lo == hi: - srvdb = self.db.nid2server(lo, net_type) - if not srvdb: - panic("no server for nid", lo) - else: - srv = Network(srvdb) - lctl.connect(srv) - - def safe_to_clean(self): - return not is_network_prepared() - - def cleanup(self): - if is_network_prepared(): - # the network is still being used, don't clean it up - return - for net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi in self.db.get_route_tbl(): - if net_type in ('tcp', 'toe') and local_net_type(net_type) and hi == '': - srvdb = self.db.nid2server(lo, net_type) - if not srvdb: - panic("no server for nid", lo) - else: - srv = Network(srvdb) - try: - lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid) - except CommandError, e: - print "disconnect failed: ", self.name - e.dump() - cleanup_error(e.rc) - try: - lctl.del_route(net_type, gw, lo, hi) - except CommandError, e: - print "del_route failed: ", self.name - e.dump() - cleanup_error(e.rc) - -class LDLM(Module): - def __init__(self,db): - Module.__init__(self, 'LDLM', db) - self.add_lustre_module('obdclass', 'obdclass') - self.add_lustre_module('ptlrpc', 'ptlrpc') - self.add_lustre_module('ldlm', 'ldlm') - - def prepare(self): - if is_prepared(self.name): - return - self.info() - lctl.newdev(attach="ldlm %s %s" % ('ldlm', 'ldlm_UUID')) - - def safe_to_clean(self): - out = lctl.device_list() - return len(out) <= 1 - - def cleanup(self): - if is_prepared(self.name): - Module.cleanup(self) - -class LOV(Module): - def __init__(self, db, uuid): - Module.__init__(self, 'LOV', db) - self.add_lustre_module('mdc', 'mdc') - self.add_lustre_module('lov', 'lov') - self.mds_uuid = self.db.get_first_ref('mds') - mds= self.db.lookup(self.mds_uuid) - self.mds_name = mds.getName() - self.stripe_sz = self.db.get_val_int('stripesize', 65536) - self.stripe_off = self.db.get_val_int('stripeoffset', 0) - self.pattern = self.db.get_val_int('stripepattern', 0) - self.devlist = self.db.get_refs('obd') - self.stripe_cnt = self.db.get_val_int('stripecount', len(self.devlist)) - self.osclist = [] - self.client_uuid = generate_client_uuid(self.name) - self.mdc_name = '' - self.mdc = get_mdc(db, self.client_uuid, self.name, self.mds_uuid) - for obd_uuid in self.devlist: - obd = self.db.lookup(obd_uuid) - osc = get_osc(obd, self.client_uuid, self.name) - if osc: - self.osclist.append(osc) - else: - panic('osc not found:', obd_uuid) - - def prepare(self): - if is_prepared(self.name): - return - for osc in self.osclist: - try: - # Only ignore connect failures with --force, which - # isn't implemented here yet. - osc.prepare(ignore_connect_failure=0) - except CommandError, e: - print "Error preparing OSC %s (inactive)\n" % osc.uuid - raise e - self.mdc.prepare() - self.mdc_name = self.mdc.name - self.info(self.mds_uuid, self.stripe_cnt, self.stripe_sz, - self.stripe_off, self.pattern, self.devlist, self.mds_name) - lctl.newdev(attach="lov %s %s" % (self.name, self.uuid), - setup ="%s" % (self.mdc_name)) - - def cleanup(self): - if is_prepared(self.name): - Module.cleanup(self) - for osc in self.osclist: - osc.cleanup() - mdc = get_mdc(self.db, self.client_uuid, self.name, self.mds_uuid) - mdc.cleanup() - - def load_module(self): - for osc in self.osclist: - osc.load_module() - break - Module.load_module(self) - - def cleanup_module(self): - Module.cleanup_module(self) - for osc in self.osclist: - osc.cleanup_module() - break - -class LOVConfig(Module): - def __init__(self,db): - Module.__init__(self, 'LOVConfig', db) - - self.lov_uuid = self.db.get_first_ref('lov') - l = self.db.lookup(self.lov_uuid) - self.lov = LOV(l, "YOU_SHOULD_NEVER_SEE_THIS_UUID") - - def prepare(self): - lov = self.lov - self.info(lov.mds_uuid, lov.stripe_cnt, lov.stripe_sz, lov.stripe_off, - lov.pattern, lov.devlist, lov.mds_name) - lctl.lov_setconfig(lov.uuid, lov.mds_name, lov.stripe_cnt, - lov.stripe_sz, lov.stripe_off, lov.pattern, - string.join(lov.devlist)) - - def cleanup(self): - #nothing to do here - pass - -class MDSDEV(Module): - def __init__(self,db): - Module.__init__(self, 'MDSDEV', db) - self.devpath = self.db.get_val('devpath','') - self.size = self.db.get_val_int('devsize', 0) - self.journal_size = self.db.get_val_int('journalsize', 0) - self.fstype = self.db.get_val('fstype', '') - self.nspath = self.db.get_val('nspath', '') - # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid - target_uuid = self.db.get_first_ref('target') - mds = self.db.lookup(target_uuid) - self.name = mds.getName() - self.lovconfig_uuids = mds.get_refs('lovconfig') - self.filesystem_uuids = mds.get_refs('filesystem') - # FIXME: if fstype not set, then determine based on kernel version - self.format = self.db.get_val('autoformat', "no") - if mds.get_val('failover', 0): - self.failover_mds = 'f' - else: - self.failover_mds = 'n' - active_uuid = get_active_target(mds) - if not active_uuid: - panic("No target device found:", target_uuid) - if active_uuid == self.uuid: - self.active = 1 - else: - self.active = 0 - if self.active and config.group and config.group != ost.get_val('group'): - self.active = 0 - - self.target_dev_uuid = self.uuid - self.uuid = target_uuid - # modules - self.add_lustre_module('mds', 'mds') - if self.fstype: - self.add_lustre_module('obdclass', 'fsfilt_%s' % (self.fstype)) - - def load_module(self): - if self.active: - Module.load_module(self) - - def prepare(self): - if is_prepared(self.name): - return - if not self.active: - debug(self.uuid, "not active") - return - self.info(self.devpath, self.fstype, self.format) - run_acceptors() - blkdev = block_dev(self.devpath, self.size, self.fstype, self.format, - self.journal_size) - if not is_prepared('MDT'): - lctl.newdev(attach="mdt %s %s" % ('MDT', 'MDT_UUID'), - setup ="") - if self.nspath: - run ("mkdir", self.nspath) - lctl.newdev(attach="mds %s %s" % (self.name, self.uuid), - setup ="%s %s %s" %(blkdev, self.fstype, self.nspath)) - for uuid in self.lovconfig_uuids: - db = self.db.lookup(uuid) - lovconfig = LOVConfig(db) - lovconfig.prepare() - if config.mds_ost_conn: - for uuid in self.filesystem_uuids: - log("open clients for filesystem:", uuid) - fs = self.db.lookup(uuid) - obd_uuid = fs.get_first_ref('obd') - client_uuid = generate_client_uuid(self.name) - client = VOSC(client_uuid, self.db.lookup(obd_uuid), self.name) - client.prepare() - - - def msd_remaining(self): - out = lctl.device_list() - for s in out: - if string.split(s)[2] in ('mds',): - return 1 - - def safe_to_clean(self): - return self.active - - def safe_to_clean_modules(self): - return not self.msd_remaining() - - def cleanup(self): - if not self.active: - debug(self.uuid, "not active") - return - if is_prepared(self.name): - self.info() - try: - lctl.cleanup(self.name, self.uuid, config.force, - config.failover) - except CommandError, e: - log(self.module_name, "cleanup failed: ", self.name) - e.dump() - cleanup_error(e.rc) - Module.cleanup(self) - if config.mds_ost_conn: - for uuid in self.filesystem_uuids: - log("clean clients for filesystem:", uuid) - log("open clients for filesystem:", uuid) - fs = self.db.lookup(uuid) - obd_uuid = fs.get_first_ref('obd') - client = VOSC(self.db.lookup(obd_uuid), self.name) - client.cleanup() - if not self.msd_remaining() and is_prepared('MDT'): - try: - lctl.cleanup("MDT", "MDT_UUID", config.force, - config.failover) - except CommandError, e: - print "cleanup failed: ", self.name - e.dump() - cleanup_error(e.rc) - clean_loop(self.devpath) - -class OSD(Module): - def __init__(self, db): - Module.__init__(self, 'OSD', db) - self.osdtype = self.db.get_val('osdtype') - self.devpath = self.db.get_val('devpath', '') - self.size = self.db.get_val_int('devsize', 0) - self.journal_size = self.db.get_val_int('journalsize', 0) - self.fstype = self.db.get_val('fstype', '') - self.nspath = self.db.get_val('nspath', '') - target_uuid = self.db.get_first_ref('target') - ost = self.db.lookup(target_uuid) - self.name = ost.getName() - self.format = self.db.get_val('autoformat', 'yes') - if ost.get_val('failover', 0): - self.failover_ost = 'f' - else: - self.failover_ost = 'n' - - active_uuid = get_active_target(ost) - if not active_uuid: - panic("No target device found:", target_uuid) - if active_uuid == self.uuid: - self.active = 1 - else: - self.active = 0 - if self.active and config.group and config.group != ost.get_val('group'): - self.active = 0 - - self.target_dev_uuid = self.uuid - self.uuid = target_uuid - # modules - self.add_lustre_module('ost', 'ost') - # FIXME: should we default to ext3 here? - if self.fstype: - self.add_lustre_module('obdclass' , 'fsfilt_%s' % (self.fstype)) - self.add_lustre_module(self.osdtype, self.osdtype) - - def load_module(self): - if self.active: - Module.load_module(self) - - # need to check /proc/mounts and /etc/mtab before - # formatting anything. - # FIXME: check if device is already formatted. - def prepare(self): - if is_prepared(self.name): - return - if not self.active: - debug(self.uuid, "not active") - return - self.info(self.osdtype, self.devpath, self.size, self.fstype, - self.format, self.journal_size) - run_acceptors() - if self.osdtype == 'obdecho': - blkdev = '' - else: - blkdev = block_dev(self.devpath, self.size, self.fstype, - self.format, self.journal_size) - if self.nspath: - run ("mkdir", self.nspath) - lctl.newdev(attach="%s %s %s" % (self.osdtype, self.name, self.uuid), - setup ="%s %s %s %s" %(blkdev, self.fstype, - self.failover_ost, self.nspath)) - if not is_prepared('OSS'): - lctl.newdev(attach="ost %s %s" % ('OSS', 'OSS_UUID'), - setup ="") - - def osd_remaining(self): - out = lctl.device_list() - for s in out: - if string.split(s)[2] in ('obdfilter', 'obdecho'): - return 1 - - def safe_to_clean(self): - return self.active - - def safe_to_clean_modules(self): - return not self.osd_remaining() - - def cleanup(self): - if not self.active: - debug(self.uuid, "not active") - return - if is_prepared(self.name): - self.info() - try: - lctl.cleanup(self.name, self.uuid, config.force, - config.failover) - except CommandError, e: - log(self.module_name, "cleanup failed: ", self.name) - e.dump() - cleanup_error(e.rc) - if not self.osd_remaining() and is_prepared('OSS'): - try: - lctl.cleanup("OSS", "OSS_UUID", config.force, - config.failover) - except CommandError, e: - print "cleanup failed: ", self.name - e.dump() - cleanup_error(e.rc) - if not self.osdtype == 'obdecho': - clean_loop(self.devpath) - -# Generic client module, used by OSC and MDC -class Client(Module): - def __init__(self, tgtdb, uuid, module, owner): - self.target_name = tgtdb.getName() - self.target_uuid = tgtdb.getUUID() - self.db = tgtdb - - self.tgt_dev_uuid = get_active_target(tgtdb) - if not self.tgt_dev_uuid: - panic("No target device found for target:", self.target_name) - - self.kmodule_list = [] - self._server = None - self._connected = 0 - - self.module = module - self.module_name = string.upper(module) - self.name = '%s_%s_%s_%s' % (self.module_name, socket.gethostname(), - self.target_name, owner) - self.uuid = uuid - self.lookup_server(self.tgt_dev_uuid) - self.add_lustre_module(module, module) - - def lookup_server(self, srv_uuid): - """ Lookup a server's network information """ - self._server_nets = get_ost_net(self.db, srv_uuid) - if len(self._server_nets) == 0: - panic ("Unable to find a server for:", srv_uuid) - - def get_servers(self): - return self._server_nets - - def prepare(self, ignore_connect_failure = 0): - self.info(self.target_uuid) - if is_prepared(self.name): - self.cleanup() - try: - srv = choose_local_server(self.get_servers()) - if srv: - lctl.connect(srv) - else: - srv, r = find_route(self.get_servers()) - if srv: - lctl.add_route_host(r[0], srv.uuid, r[1], r[3]) - else: - panic ("no route to", self.target_uuid) - except CommandError, e: - if not ignore_connect_failure: - raise e - if srv: - lctl.newdev(attach="%s %s %s" % (self.module, self.name, self.uuid), - setup ="%s %s" %(self.target_uuid, srv.uuid)) - - def cleanup(self): - if is_prepared(self.name): - Module.cleanup(self) - try: - srv = choose_local_server(self.get_servers()) - if srv: - lctl.disconnect(srv.net_type, srv.nid, srv.port, srv.uuid) - else: - srv, r = find_route(self.get_servers()) - if srv: - lctl.del_route_host(r[0], srv.uuid, r[1], r[2]) - except CommandError, e: - log(self.module_name, "cleanup failed: ", self.name) - e.dump() - cleanup_error(e.rc) - - -class MDC(Client): - def __init__(self, db, uuid, owner): - Client.__init__(self, db, uuid, 'mdc', owner) - -class OSC(Client): - def __init__(self, db, uuid, owner): - Client.__init__(self, db, uuid, 'osc', owner) - - -class COBD(Module): - def __init__(self, db): - Module.__init__(self, 'COBD', db) - self.real_uuid = self.db.get_first_ref('realobd') - self.cache_uuid = self.db.get_first_ref('cacheobd') - self.add_lustre_module('cobd' , 'cobd') - - # need to check /proc/mounts and /etc/mtab before - # formatting anything. - # FIXME: check if device is already formatted. - def prepare(self): - if is_prepared(self.name): - return - self.info(self.real_uuid, self.cache_uuid) - lctl.newdev(attach="cobd %s %s" % (self.name, self.uuid), - setup ="%s %s" %(self.real_uuid, self.cache_uuid)) - - -# virtual interface for OSC and LOV -class VOSC(Module): - def __init__(self, db, uuid, owner): - Module.__init__(self, 'VOSC', db) - if db.get_class() == 'lov': - self.osc = LOV(db, uuid) - else: - self.osc = get_osc(db, uuid, owner) - def get_uuid(self): - return self.osc.uuid - def get_name(self): - return self.osc.name - def prepare(self): - self.osc.prepare() - def cleanup(self): - self.osc.cleanup() - def load_module(self): - self.osc.load_module() - def cleanup_module(self): - self.osc.cleanup_module() - def need_mdc(self): - return self.db.get_class() != 'lov' - def get_mdc_name(self): - if self.db.get_class() == 'lov': - return self.osc.mdc_name - return '' - - -class ECHO_CLIENT(Module): - def __init__(self,db): - Module.__init__(self, 'ECHO_CLIENT', db) - self.add_lustre_module('obdecho', 'obdecho') - self.obd_uuid = self.db.get_first_ref('obd') - obd = self.db.lookup(self.obd_uuid) - self.osc = VOSC(obd, self.uuid, self.name) - - def prepare(self): - if is_prepared(self.name): - return - self.osc.prepare() # XXX This is so cheating. -p - self.info(self.obd_uuid) - - lctl.newdev(attach="echo_client %s %s" % (self.name, self.uuid), - setup = self.osc.get_name()) - - def cleanup(self): - if is_prepared(self.name): - Module.cleanup(self) - self.osc.cleanup() - - def load_module(self): - self.osc.load_module() - Module.load_module(self) - def cleanup_module(self): - Module.cleanup_module(self) - self.osc.cleanup_module() - -def generate_client_uuid(name): - client_uuid = '%05x_%.19s_%05x%05x' % (int(random.random() * 1048576), - name, - int(random.random() * 1048576), - int(random.random() * 1048576)) - return client_uuid[:36] - -class Mountpoint(Module): - def __init__(self,db): - Module.__init__(self, 'MTPT', db) - self.path = self.db.get_val('path') - self.fs_uuid = self.db.get_first_ref('filesystem') - fs = self.db.lookup(self.fs_uuid) - self.mds_uuid = fs.get_first_ref('mds') - self.obd_uuid = fs.get_first_ref('obd') - obd = self.db.lookup(self.obd_uuid) - client_uuid = generate_client_uuid(self.name) - self.vosc = VOSC(obd, client_uuid, self.name) - if self.vosc.need_mdc(): - self.add_lustre_module('mdc', 'mdc') - self.mdc = get_mdc(db, client_uuid, self.name, self.mds_uuid) - self.add_lustre_module('llite', 'llite') - - - def prepare(self): - if fs_is_mounted(self.path): - log(self.path, "already mounted.") - return - self.vosc.prepare() - if self.vosc.need_mdc(): - self.mdc.prepare() - mdc_name = self.mdc.name - else: - mdc_name = self.vosc.get_mdc_name() - if not mdc_name: - self.vosc.cleanup() - panic("Unable to determine MDC name. Probably need to cleanup before re-mounting.") - self.info(self.path, self.mds_uuid, self.obd_uuid) - if config.lctl_dump: - cmd = "osc=%s,mdc=%s" % (self.vosc.get_name(), mdc_name) - lctl.mount_option(cmd) - return - cmd = "mount -t lustre_lite -o osc=%s,mdc=%s %s %s" % \ - (self.vosc.get_name(), mdc_name, config.config, self.path) - run("mkdir", self.path) - ret, val = run(cmd) - if ret: - self.vosc.cleanup() - if self.vosc.need_mdc(): - self.mdc.cleanup() - panic("mount failed:", self.path, ":", string.join(val)) - - def cleanup(self): - self.info(self.path, self.mds_uuid,self.obd_uuid) - if fs_is_mounted(self.path): - if config.force: - (rc, out) = run("umount", "-f", self.path) - else: - (rc, out) = run("umount", self.path) - if rc: - raise CommandError('umount', out, rc) - - if fs_is_mounted(self.path): - panic("fs is still mounted:", self.path) - - self.vosc.cleanup() - if self.vosc.need_mdc(): - self.mdc.cleanup() - - def load_module(self): - self.vosc.load_module() - Module.load_module(self) - def cleanup_module(self): - Module.cleanup_module(self) - self.vosc.cleanup_module() - - -# ============================================================ -# misc query functions - -def get_ost_net(self, osd_uuid): - srv_list = [] - if not osd_uuid: - return srv_list - osd = self.lookup(osd_uuid) - node_uuid = osd.get_first_ref('node') - node = self.lookup(node_uuid) - if not node: - panic("unable to find node for osd_uuid:", osd_uuid, - " node_ref:", node_uuid) - for net_uuid in node.get_networks(): - db = node.lookup(net_uuid) - srv_list.append(Network(db)) - return srv_list - - -# the order of iniitailization is based on level. -def getServiceLevel(self): - type = self.get_class() - ret=0; - if type in ('network',): - ret = 5 - elif type in ('routetbl',): - ret = 6 - elif type in ('ldlm',): - ret = 20 - elif type in ('osd', 'cobd'): - ret = 30 - elif type in ('mdsdev',): - ret = 40 - elif type in ('mountpoint', 'echoclient'): - ret = 70 - else: - panic("Unknown type: ", type) - - if ret < config.minlevel or ret > config.maxlevel: - ret = 0 - return ret - -# -# return list of services in a profile. list is a list of tuples -# [(level, db_object),] -def getServices(self): - list = [] - for ref_class, ref_uuid in self.get_all_refs(): - servdb = self.lookup(ref_uuid) - if servdb: - level = getServiceLevel(servdb) - if level > 0: - list.append((level, servdb)) - else: - panic('service not found: ' + ref_uuid) - - list.sort() - return list - - -############################################################ -# MDC UUID hack - -# FIXME: clean this mess up! -# -# OSC is no longer in the xml, so we have to fake it. -# this is getting ugly and begging for another refactoring -def get_osc(ost_db, uuid, owner): - osc = OSC(ost_db, uuid, owner) - return osc - -def get_mdc(db, uuid, owner, mds_uuid): - mds_db = db.lookup(mds_uuid); - if not mds_db: - panic("no mds:", mds_uuid) - mdc = MDC(mds_db, uuid, owner) - return mdc - -############################################################ -# routing ("rooting") - -# list of (nettype, cluster_id) -local_clusters = [] - -def find_local_clusters(node_db): - global local_clusters - for netuuid in node_db.get_networks(): - net = node_db.lookup(netuuid) - srv = Network(net) - debug("add_local", netuuid) - local_clusters.append((srv.net_type, srv.cluster_id)) - if srv.port > 0: - if acceptors.has_key(srv.port): - panic("duplicate port:", srv.port) - acceptors[srv.port] = AcceptorHandler(srv.port, srv.net_type, - srv.send_mem, srv.recv_mem, - srv.irq_affinity, - srv.nid_exchange) - -# This node is a gateway. -is_router = 0 -def node_is_router(): - return is_router - -# If there are any routers found in the config, then this will be true -# and all nodes will load kptlrouter. -needs_router = 0 -def node_needs_router(): - return needs_router or is_router - -# list of (nettype, gw, tgt_cluster_id, lo, hi) -# Currently, these local routes are only added to kptlrouter route -# table if they are needed to connect to a specific server. This -# should be changed so all available routes are loaded, and the -# ptlrouter can make all the decisions. -local_routes = [] - -def find_local_routes(lustre): - """ Scan the lustre config looking for routers . Build list of - routes. """ - global local_routes, needs_router - local_routes = [] - list = lustre.lookup_class('node') - for router in list: - if router.get_val_int('router', 0): - needs_router = 1 - for (local_type, local_cluster_id) in local_clusters: - gw = None - for netuuid in router.get_networks(): - db = router.lookup(netuuid) - if (local_type == db.get_val('nettype') and - local_cluster_id == db.get_val('clusterid')): - gw = db.get_val('nid') - break - if gw: - debug("find_local_routes: gw is", gw) - for route in router.get_local_routes(local_type, gw): - local_routes.append(route) - debug("find_local_routes:", local_routes) - - -def choose_local_server(srv_list): - for srv in srv_list: - if local_net_type(srv.net_type): - return srv - -def local_net_type(net_type): - for cluster in local_clusters: - if net_type == cluster[0]: - return 1 - return 0 - -def find_route(srv_list): - frm_type = local_clusters[0][0] - for srv in srv_list: - debug("find_route: srv:", srv.hostaddr, "type: ", srv.net_type) - to_type = srv.net_type - to = srv.hostaddr # XXX should this be hostaddr, or nid? - cluster_id = srv.cluster_id - debug ('looking for route to', to_type, to) - for r in local_routes: - debug("find_route: ", r) - if (r[3] <= to and to <= r[4]) and cluster_id == r[2]: - return srv, r - return None,None - -def get_active_target(db): - target_uuid = db.getUUID() - target_name = db.getName() - node_name = get_select(target_name) - if node_name: - tgt_dev_uuid = db.get_node_tgt_dev(node_name, target_uuid) - else: - tgt_dev_uuid = db.get_first_ref('active') - return tgt_dev_uuid - - -############################################################ -# lconf level logic -# Start a service. -def newService(db): - type = db.get_class() - debug('Service:', type, db.getName(), db.getUUID()) - n = None - if type == 'ldlm': - n = LDLM(db) - elif type == 'lov': - n = LOV(db, "YOU_SHOULD_NEVER_SEE_THIS_UUID") - elif type == 'network': - n = Network(db) - elif type == 'routetbl': - n = RouteTable(db) - elif type == 'osd': - n = OSD(db) - elif type == 'cobd': - n = COBD(db) - elif type == 'mdsdev': - n = MDSDEV(db) - elif type == 'mountpoint': - n = Mountpoint(db) - elif type == 'echoclient': - n = ECHO_CLIENT(db) - else: - panic ("unknown service type:", type) - return n - -# -# Prepare the system to run lustre using a particular profile -# in a the configuration. -# * load & the modules -# * setup networking for the current node -# * make sure partitions are in place and prepared -# * initialize devices with lctl -# Levels is important, and needs to be enforced. -def for_each_profile(db, prof_list, operation): - for prof_uuid in prof_list: - prof_db = db.lookup(prof_uuid) - if not prof_db: - panic("profile:", profile, "not found.") - services = getServices(prof_db) - operation(services) - -def doSetup(services): - if config.nosetup: - return - for s in services: - n = newService(s[1]) - n.prepare() - -def doModules(services): - if config.nomod: - return - for s in services: - n = newService(s[1]) - n.load_module() - -def doCleanup(services): - if config.nosetup: - return - services.reverse() - for s in services: - n = newService(s[1]) - if n.safe_to_clean(): - n.cleanup() - -def doUnloadModules(services): - if config.nomod: - return - services.reverse() - for s in services: - n = newService(s[1]) - if n.safe_to_clean_modules(): - n.cleanup_module() - -# -# Load profile for -def doHost(lustreDB, hosts): - global is_router - node_db = None - for h in hosts: - node_db = lustreDB.lookup_name(h, 'node') - if node_db: - break - if not node_db: - print 'No host entry found.' - return - - is_router = node_db.get_val_int('router', 0) - lustre_upcall = node_db.get_val('lustreUpcall', '') - portals_upcall = node_db.get_val('portalsUpcall', '') - timeout = node_db.get_val_int('timeout', 0) - - find_local_clusters(node_db) - if not is_router: - find_local_routes(lustreDB) - - # Two step process: (1) load modules, (2) setup lustre - # if not cleaning, load modules first. - prof_list = node_db.get_refs('profile') - - if config.recover: - if not (config.tgt_uuid and config.client_uuid and config.conn_uuid): - raise Lustre.LconfError( "--recovery requires --tgt_uuid <UUID> " + - "--client_uuid <UUID> --conn_uuid <UUID>") - doRecovery(lustreDB, lctl, config.tgt_uuid, config.client_uuid, - config.conn_uuid) - elif config.cleanup: - if config.force: - # the command line can override this value - timeout = 5 - # ugly hack, only need to run lctl commands for --dump - if config.lctl_dump: - for_each_profile(node_db, prof_list, doCleanup) - return - - sys_set_timeout(timeout) - sys_set_ptldebug() - sys_set_subsystem() - sys_set_lustre_upcall(lustre_upcall) - sys_set_portals_upcall(portals_upcall) - - for_each_profile(node_db, prof_list, doCleanup) - for_each_profile(node_db, prof_list, doUnloadModules) - - else: - # ugly hack, only need to run lctl commands for --dump - if config.lctl_dump: - for_each_profile(node_db, prof_list, doSetup) - return - - sys_make_devices() - sys_set_netmem_max('/proc/sys/net/core/rmem_max', MAXTCPBUF) - sys_set_netmem_max('/proc/sys/net/core/wmem_max', MAXTCPBUF) - - for_each_profile(node_db, prof_list, doModules) - - sys_set_debug_path() - sys_set_ptldebug() - sys_set_subsystem() - script = config.gdb_script - run(lctl.lctl, ' modules >', script) - if config.gdb: - log ("The GDB module script is in", script) - # pause, so user has time to break and - # load the script - time.sleep(5) - sys_set_timeout(timeout) - sys_set_lustre_upcall(lustre_upcall) - sys_set_portals_upcall(portals_upcall) - - for_each_profile(node_db, prof_list, doSetup) - -def doRecovery(db, lctl, tgt_uuid, client_uuid, conn_uuid): - tgt = db.lookup(tgt_uuid) - if not tgt: - raise Lustre.LconfError("doRecovery: "+ tgt_uuid +" not found.") - new_uuid = get_active_target(tgt) - if not new_uuid: - raise Lustre.LconfError("doRecovery: no active target found for: " + - tgt_uuid) - net = choose_local_server(get_ost_net(db, new_uuid)) - if not net: - raise Lustre.LconfError("Unable to find a connection to:" + new_uuid) - # XXX, better to do a full disconnect here - log("Reconnecting", tgt_uuid, " to ", net.uuid); - lctl.del_uuid(conn_uuid) - lctl.connect(net) - lctl.recover(client_uuid, net.uuid) - - -def setupModulePath(cmd, portals_dir = PORTALS_DIR): - base = os.path.dirname(cmd) - if development_mode(): - if not config.lustre: - config.lustre = (os.path.join(base, "..")) - # normalize the portals dir, using command line arg if set - if config.portals: - portals_dir = config.portals - dir = os.path.join(config.lustre, portals_dir) - config.portals = dir - debug('config.portals', config.portals) - elif config.lustre and config.portals: - # production mode - # if --lustre and --portals, normalize portals - # can ignore POTRALS_DIR here, since it is probly useless here - config.portals = os.path.join(config.lustre, config.portals) - debug('config.portals B', config.portals) - -def sysctl(path, val): - debug("+ sysctl", path, val) - if config.noexec: - return - try: - fp = open(os.path.join('/proc/sys', path), 'w') - fp.write(str(val)) - fp.close() - except IOError, e: - panic(str(e)) - - -def sys_set_debug_path(): - sysctl('portals/debug_path', config.debug_path) - -def sys_set_lustre_upcall(upcall): - # the command overrides the value in the node config - if config.lustre_upcall: - upcall = config.lustre_upcall - elif config.upcall: - upcall = config.upcall - if upcall: - sysctl('lustre/upcall', upcall) - -def sys_set_portals_upcall(upcall): - # the command overrides the value in the node config - if config.portals_upcall: - upcall = config.portals_upcall - elif config.upcall: - upcall = config.upcall - if upcall: - sysctl('portals/upcall', upcall) - -def sys_set_timeout(timeout): - # the command overrides the value in the node config - if config.timeout > 0: - timeout = config.timeout - if timeout != None and timeout > 0: - sysctl('lustre/timeout', timeout) - -def sys_set_ptldebug(): - if config.ptldebug != None: - try: - val = eval(config.ptldebug, ptldebug_names) - val = "0x%x" % (val,) - sysctl('portals/debug', val) - except NameError, e: - panic(str(e)) - -def sys_set_subsystem(): - if config.subsystem != None: - try: - val = eval(config.ptldebug, ptldebug_names) - val = "0x%x" % (val,) - sysctl('portals/subsystem_debug', val) - except NameError, e: - panic(str(e)) - -def sys_set_netmem_max(path, max): - debug("setting", path, "to at least", max) - if config.noexec: - return - fp = open(path) - str = fp.readline() - fp.close - cur = int(str) - if max > cur: - fp = open(path, 'w') - fp.write('%d\n' %(max)) - fp.close() - - -def sys_make_devices(): - if not os.access('/dev/portals', os.R_OK): - run('mknod /dev/portals c 10 240') - if not os.access('/dev/obd', os.R_OK): - run('mknod /dev/obd c 10 241') - - -# Add dir to the global PATH, if not already there. -def add_to_path(new_dir): - syspath = string.split(os.environ['PATH'], ':') - if new_dir in syspath: - return - os.environ['PATH'] = os.environ['PATH'] + ':' + new_dir - -def default_debug_path(): - path = '/tmp/lustre-log' - if os.path.isdir('/r'): - return '/r' + path - else: - return path - -def default_gdb_script(): - script = '/tmp/ogdb' - if os.path.isdir('/r'): - return '/r' + script - else: - return script - - -DEFAULT_PATH = ('/sbin', '/usr/sbin', '/bin', '/usr/bin') -# ensure basic elements are in the system path -def sanitise_path(): - for dir in DEFAULT_PATH: - add_to_path(dir) - -# global hack for the --select handling -tgt_select = {} -def init_select(arg): - # arg = "service=nodeA,service2=nodeB" - global tgt_select - if arg: - list = string.split(arg, ',') - for entry in list: - srv, node = string.split(entry, '=') - tgt_select[srv] = node - -def get_select(srv): - if tgt_select.has_key(srv): - return tgt_select[srv] - return None - - -PARAM = Lustre.Options.PARAM -INTPARAM = Lustre.Options.INTPARAM -lconf_options = [ - ('verbose,v', "Print system commands as they are run"), - ('ldapurl',"LDAP server URL, eg. ldap://localhost", PARAM), - ('config', "Cluster config name used for LDAP query", PARAM), - ('select', "service=nodeA,service2=nodeB ", PARAM), - ('node', "Load config for <nodename>", PARAM), - ('cleanup,d', "Cleans up config. (Shutdown)"), - ('force,f', "Forced unmounting and/or obd detach during cleanup", - Lustre.Options.FLAG, 0), - ('mds_ost_conn', "Open connections to OSTs on the MDS"), - ('failover',"""Used to shut down without saving state. - This will allow this node to "give up" a service to a - another node for failover purposes. This will not - be a clean shutdown.""", - Lustre.Options.FLAG, 0), - ('gdb', """Prints message after creating gdb module script - and sleeps for 5 seconds."""), - ('noexec,n', """Prints the commands and steps that will be run for a - config without executing them. This can used to check if a - config file is doing what it should be doing"""), - ('nomod', "Skip load/unload module step."), - ('nosetup', "Skip device setup/cleanup step."), - ('reformat', "Reformat all devices (without question)"), - ('dump', "Dump the kernel debug log to file before portals is unloaded", - PARAM), - ('minlevel', "Minimum level of services to configure/cleanup", - INTPARAM, 0), - ('maxlevel', """Maximum level of services to configure/cleanup - Levels are aproximatly like: - 10 - network - 20 - device, ldlm - 30 - osd, mdd - 40 - mds, ost - 70 - mountpoint, echo_client, osc, mdc, lov""", - INTPARAM, 100), - ('lustre', """Base directory of lustre sources. This parameter will - cause lconf to load modules from a source tree.""", PARAM), - ('portals', """Portals source directory. If this is a relative path, - then it is assumed to be relative to lustre. """, PARAM), - ('timeout', "Set recovery timeout", PARAM), - ('upcall', "Set both portals and lustre upcall script", PARAM), - ('lustre_upcall', "Set lustre upcall script", PARAM), - ('portals_upcall', "Set portals upcall script", PARAM), - ('lctl_dump', "Save lctl ioctls to the dumpfile argument", PARAM), - ('ptldebug', "Set the portals debug level", PARAM), - ('subsystem', "Set the portals debug subsystem", PARAM), - ('gdb_script', "Fullname of gdb debug script", PARAM, default_gdb_script()), - ('debug_path', "Path to save debug dumps", PARAM, default_debug_path()), -# Client recovery options - ('recover', "Recover a device"), - ('group', "The group of devices to configure or cleanup", PARAM), - ('tgt_uuid', "The failed target (required for recovery)", PARAM), - ('client_uuid', "The failed client (required for recovery)", PARAM), - ('conn_uuid', "The failed connection (required for recovery)", PARAM), - ] - -def main(): - global lctl, config - - # in the upcall this is set to SIG_IGN - signal.signal(signal.SIGCHLD, signal.SIG_DFL) - - cl = Lustre.Options("lconf", "config.xml", lconf_options) - try: - config, args = cl.parse(sys.argv[1:]) - except Lustre.OptionError, e: - print e - sys.exit(1) - - setupModulePath(sys.argv[0]) - - host = socket.gethostname() - - # the PRNG is normally seeded with time(), which is not so good for starting - # time-synchronized clusters - input = open('/dev/urandom', 'r') - if not input: - print 'Unable to open /dev/urandom!' - sys.exit(1) - seed = input.read(32) - input.close() - random.seed(seed) - - sanitise_path() - - init_select(config.select) - - if len(args) > 0: - if not os.access(args[0], os.R_OK): - print 'File not found or readable:', args[0] - sys.exit(1) - try: - dom = xml.dom.minidom.parse(args[0]) - except Exception: - panic("%s does not appear to be a config file." % (args[0])) - sys.exit(1) # make sure to die here, even in debug mode. - db = Lustre.LustreDB_XML(dom.documentElement, dom.documentElement) - if not config.config: - config.config = os.path.basename(args[0])# use full path? - if config.config[-4:] == '.xml': - config.config = config.config[:-4] - elif config.ldapurl: - if not config.config: - panic("--ldapurl requires --config name") - dn = "config=%s,fs=lustre" % (config.config) - db = Lustre.LustreDB_LDAP('', {}, base=dn, url = config.ldapurl) - else: - cl.usage() - sys.exit(1) - - ver = db.get_version() - if not ver: - panic("No version found in config data, please recreate.") - if ver != Lustre.CONFIG_VERSION: - panic("Config version", ver, "does not match lconf version", - Lustre.CONFIG_VERSION) - - node_list = [] - if config.node: - node_list.append(config.node) - else: - if len(host) > 0: - node_list.append(host) - node_list.append('localhost') - - debug("configuring for host: ", node_list) - - if len(host) > 0: - config.debug_path = config.debug_path + '-' + host - config.gdb_script = config.gdb_script + '-' + host - - lctl = LCTLInterface('lctl') - - if config.lctl_dump: - lctl.use_save_file(config.lctl_dump) - - doHost(db, node_list) - -if __name__ == "__main__": - try: - main() - except Lustre.LconfError, e: - print e - except CommandError, e: - e.dump() - sys.exit(e.rc) - - if first_cleanup_error: - sys.exit(first_cleanup_error) diff --git a/lustre/utils/lctl.c b/lustre/utils/lctl.c deleted file mode 100644 index 80cdcf2..0000000 --- a/lustre/utils/lctl.c +++ /dev/null @@ -1,260 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Robert Read <rread@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - - -#include <stdlib.h> -#include <stdio.h> -#include <portals/api-support.h> -#include <portals/ptlctl.h> -#include "obdctl.h" -#include "parser.h" - -static int jt_quit(int argc, char **argv) { - Parser_quit(argc, argv); - return 0; -} - -static int jt_noop(int argc, char **argv) { - return 0; -} - -static int jt_opt_ignore_errors(int argc, char **argv) { - Parser_ignore_errors(1); - return 0; -} - -command_t cmdlist[] = { - /* Metacommands */ - {"--device", jt_opt_device, 0, - "run <command> after connecting to device <devno>\n" - "--device <devno> <command [args ...]>"}, - {"--threads", jt_opt_threads, 0, - "run <threads> separate instances of <command> on device <devno>\n" - "--threads <threads> <verbose> <devno> <command [args ...]>"}, - {"--ignore_errors", jt_opt_ignore_errors, 0, - "ignore errors that occur during script processing\n" - "--ignore_errors"}, - {"ignore_errors", jt_opt_ignore_errors, 0, - "ignore errors that occur during script processing\n" - "ignore_errors"}, - {"dump", jt_ioc_dump, 0, "usage: dump file, save ioctl buffer to file"}, - - /* Network configuration commands */ - {"==== network config ====", jt_noop, 0, "network config"}, - {"network", jt_ptl_network, 0, "commands that follow apply to net\n" - "usage: network <tcp/elan/myrinet/scimac>"}, - {"connect", jt_ptl_connect, 0, "connect to a remote nid\n" - "usage: connect [[<hostname> <port>] | <elan id>]"}, - {"disconnect", jt_ptl_disconnect, 0, "disconnect from a remote nid\n" - "usage: disconnect <nid>"}, - {"mynid", jt_ptl_mynid, 0, "inform the socknal of the local nid. " - "The nid defaults to hostname for tcp networks and is automatically " - "setup for elan/myrinet/scimac networks.\n" - "usage: mynid [nid]"}, - {"shownid", jt_ptl_shownid, 0, "print the local NID\n" - "usage: shownid"}, - {"add_uuid", jt_obd_add_uuid, 0, "associate a UUID with a nid\n" - "usage: add_uuid <uuid> <nid> <net_type>"}, - {"close_uuid", jt_obd_close_uuid, 0, "disconnect a UUID\n" - "usage: close_uuid <uuid> <net-type>)"}, - {"del_uuid", jt_obd_del_uuid, 0, "delete a UUID association\n" - "usage: del_uuid <uuid>"}, - {"add_route", jt_ptl_add_route, 0, - "add an entry to the routing table\n" - "usage: add_route <gateway> <target> [target]"}, - {"del_route", jt_ptl_del_route, 0, - "delete an entry from the routing table\n" - "usage: del_route <target>"}, - {"route_list", jt_ptl_print_routes, 0, "print the routing table\n" - "usage: route_list"}, - {"recv_mem", jt_ptl_rxmem, 0, "set socket receive buffer size, " - "if size is omited the current size is reported.\n" - "usage: recv_mem [size]"}, - {"send_mem", jt_ptl_txmem, 0, "set socket send buffer size, " - "if size is omited the current size is reported.\n" - "usage: send_mem [size]"}, - {"nagle", jt_ptl_nagle, 0, "enable/disable nagle, omitting the " - "argument will cause the current nagle setting to be reported.\n" - "usage: nagle [on/off]"}, - {"fail", jt_ptl_fail_nid, 0, "fail/restore communications.\n" - "Omitting the count means indefinitely, 0 means restore, " - "otherwise fail 'count' messages.\n" - "usage: fail nid|_all_ [count]"}, - - /* Device selection commands */ - {"=== device selection ===", jt_noop, 0, "device selection"}, - {"newdev", jt_obd_newdev, 0, "create a new device\n" - "usage: newdev"}, - {"device", jt_obd_device, 0, - "set current device to <%name|$name|devno>\n" - "usage: device <%name|$name|devno>"}, - {"device_list", jt_obd_list, 0, "show all devices\n" - "usage: device_list"}, - {"lustre_build_version", jt_get_version, 0, - "print the build version of lustre\n" - "usage: lustre_build_version"}, - - /* Device configuration commands */ - {"==== device config =====", jt_noop, 0, "device config"}, - {"attach", jt_obd_attach, 0, - "set the type of the current device (with <name> and <uuid>)\n" - "usage: attach type [name [uuid]]"}, - {"setup", jt_obd_setup, 0, - "type specific device configuration information\n" - "usage: setup <args...>"}, - {"cleanup", jt_obd_cleanup, 0, "cleanup previously setup device\n" - "usage: cleanup [force | failover]"}, - {"detach", jt_obd_detach, 0, - "remove driver (and name and uuid) from current device\n" - "usage: detach"}, - {"lov_setconfig", jt_obd_lov_setconfig, 0, - "write lov configuration to an mds device\n" - "usage: lov_setconfig lov-uuid stripe-count stripe-size offset pattern UUID1 [UUID2 ...]"}, - {"lov_getconfig", jt_obd_lov_getconfig, 0, - "read lov configuration from an mds device\n" - "usage: lov_getconfig lov-uuid"}, - - /* Device operations */ - {"=== device operations ==", jt_noop, 0, "device operations"}, - {"probe", jt_obd_connect, 0, - "build a connection handle to a device. This command is used to " - "suspend configuration until lctl has ensured that the mds and osc " - "services are available. This is to avoid mount failures in a " - "rebooting cluster.\n" - "usage: probe [timeout]"}, - {"close", jt_obd_disconnect, 0, - "close the connection handle\n" - "usage: close"}, - {"getattr", jt_obd_getattr, 0, - "get attribute for OST object <objid>\n" - "usage: getattr <objid>"}, - {"setattr", jt_obd_setattr, 0, - "set mode attribute for OST object <objid>\n" - "usage: setattr <objid> <mode>"}, - {"create", jt_obd_create, 0, - "create <num> OST objects (with <mode>)\n" - "usage: create [num [mode [verbose [lsm data]]]]"}, - {"destroy", jt_obd_destroy, 0, - "destroy OST object <objid> [num [verbose]]\n" - "usage: destroy <num> objects, starting at objid <objid>"}, - {"test_getattr", jt_obd_test_getattr, 0, - "do <num> getattrs (on OST object <objid> (objid+1 on each thread))\n" - "usage: test_getattr <num> [verbose [[t]objid]]"}, - {"test_brw", jt_obd_test_brw, 0, - "do <num> bulk read/writes (<npages> per I/O, on OST object <objid>)\n" - "usage: test_brw [t]<num> [write [verbose [npages [[t]objid]]]]"}, - {"get_stripe", jt_obd_get_stripe, 0, - "show stripe info for an echo client object\n" - "usage: get_stripe objid\n"}, - {"set_stripe", jt_obd_set_stripe, 0, - "set stripe info for an echo client object\n" - "usage: set_stripe objid[=width!count[@offset][:id:id...]\n"}, - {"unset_stripe", jt_obd_unset_stripe, 0, - "unset stripe info for an echo client object\n" - "usage: unset_stripe objid\n"}, - {"test_ldlm", jt_obd_test_ldlm, 0, - "perform lock manager test\n" - "usage: test_ldlm"}, - {"ldlm_regress_start", jt_obd_ldlm_regress_start, 0, - "start lock manager stress test\n" - "usage: ldlm_regress_start [numthreads [refheld [numres [numext]]]]"}, - {"ldlm_regress_stop", jt_obd_ldlm_regress_stop, 0, - "stop lock manager stress test (no args)\n"}, - {"dump_ldlm", jt_obd_dump_ldlm, 0, - "dump all lock manager state (no args)"}, - {"activate", jt_obd_activate, 0, "activate an import\n"}, - {"deactivate", jt_obd_deactivate, 0, "deactivate an import\n"}, - {"recover", jt_obd_recover, 0, "usage: recover [<connection UUID>]"}, - {"lookup", jt_obd_mdc_lookup, 0, "usage: lookup <directory> <file>"}, - {"notransno", jt_obd_no_transno, 0, - "disable sending of committed-transno updates\n"}, - {"readonly", jt_obd_set_readonly, 0, - "disable writes to the underlying device\n"}, - {"abort_recovery", jt_obd_abort_recovery, 0, - "abort recovery on MDS device\n"}, - {"mount_option", jt_obd_mount_option, 0, - "dump mount options to file\n"}, - - /* Debug commands */ - {"======== debug =========", jt_noop, 0, "debug"}, - {"debug_daemon", jt_dbg_debug_daemon, 0, - "debug daemon control and dump to a file" - "usage: debug_daemon [start file <#MB>|stop|pause|continue]"}, - {"debug_kernel", jt_dbg_debug_kernel, 0, - "get debug buffer and dump to a file" - "usage: debug_kernel [file] [raw]"}, - {"debug_file", jt_dbg_debug_file, 0, - "read debug buffer from input and dump to output" - "usage: debug_file <input> [output] [raw]"}, - {"clear", jt_dbg_clear_debug_buf, 0, "clear kernel debug buffer\n" - "usage: clear"}, - {"mark", jt_dbg_mark_debug_buf, 0,"insert marker text in kernel debug buffer\n" - "usage: mark <text>"}, - {"filter", jt_dbg_filter, 0, "filter message type\n" - "usage: filter <subsystem id/debug mask>"}, - {"show", jt_dbg_show, 0, "show message type\n" - "usage: show <subsystem id/debug mask>"}, - {"debug_list", jt_dbg_list, 0, "list subsystem and debug types\n" - "usage: debug_list <subs/types>"}, - {"modules", jt_dbg_modules, 0, - "provide gdb-friendly module information\n" - "usage: modules <path>"}, - {"panic", jt_dbg_panic, 0, "force the kernel to panic\n" - "usage: panic"}, - - /* User interface commands */ - {"======= control ========", jt_noop, 0, "control commands"}, - {"help", Parser_help, 0, "help"}, - {"exit", jt_quit, 0, "quit"}, - {"quit", jt_quit, 0, "quit"}, - { 0, 0, 0, NULL } -}; - - - -int main(int argc, char **argv) -{ - int rc; - - setlinebuf(stdout); - - ptl_initialize(argc, argv); - if (obd_initialize(argc, argv) < 0) - exit(2); - if (dbg_initialize(argc, argv) < 0) - exit(3); - - Parser_init("lctl > ", cmdlist); - - if (argc > 1) { - rc = Parser_execarg(argc - 1, argv + 1, cmdlist); - } else { - rc = Parser_commands(); - } - - obd_cleanup(argc, argv); - return rc; -} - diff --git a/lustre/utils/lfind.c b/lustre/utils/lfind.c deleted file mode 100644 index 847dd4f..0000000 --- a/lustre/utils/lfind.c +++ /dev/null @@ -1,296 +0,0 @@ -#include <stdio.h> -#include <getopt.h> -#include <stdlib.h> -#include <stdarg.h> -#include <libgen.h> -#include <ftw.h> -#include <fcntl.h> -#include <unistd.h> -#include <errno.h> -#include <sys/ioctl.h> -#include <sys/types.h> - - -#include <liblustre.h> -#include <linux/obd.h> -#include <linux/lustre_lib.h> -#include <linux/lustre_lite.h> -#include <linux/obd_lov.h> - -/* XXX Max obds per lov currently hardcoded to 1000 in lov/lov_obd.c */ -#define MAX_LOV_UUID_COUNT 1000 -#define OBD_NOT_FOUND (-1) - -char * cmd; -struct option longOpts[] = { - {"help", 0, 0, 'h'}, - {"obd", 1, 0, 'o'}, - {"query", 0, 0, 'q'}, - {"verbose", 0, 0, 'v'}, - {0, 0, 0, 0} - }; -int query; -int verbose; -char * shortOpts = "ho:qv"; -char * usageMsg = "[ --obd <obd uuid> | --query ] <dir|file> ..."; - -int max_ost_count = MAX_LOV_UUID_COUNT; -struct obd_uuid * obduuid; -char * buf; -int buflen; -struct obd_uuid * uuids; -struct obd_ioctl_data data; -struct lov_desc desc; -int uuidslen; -int cfglen; -struct lov_mds_md *lmm; -int lmmlen; - -void init(); -void usage(FILE *stream); -void errMsg(char *fmt, ...); -void processPath(const char *path); - -int -main (int argc, char **argv) { - int c; - - cmd = basename(argv[0]); - - while ((c = getopt_long(argc, argv, shortOpts, longOpts, NULL)) != -1) { - switch (c) { - case 'o': - if (obduuid) { - errMsg("obd '%s' already specified: '%s'.", - obduuid, optarg); - exit(1); - } - - obduuid = (struct obd_uuid *)optarg; - break; - case 'h': - usage(stdout); - exit(0); - case 'q': - query++; - break; - case 'v': - verbose++; - break; - case '?': - usage(stderr); - exit(1); - default: - errMsg("Internal error. Valid '%s' unrecognized.", - argv[optind - 1]); - usage(stderr); - exit(1); - } - } - - if (optind >= argc) { - usage(stderr); - exit(1); - } - - if (obduuid == NULL) - query++; - - init(); - - do { - processPath(argv[optind]); - } while (++optind < argc); - - exit (0); -} - -void -init() -{ - int datalen, desclen; - - datalen = size_round(sizeof(data)); - desclen = size_round(sizeof(desc)); - uuidslen = size_round(max_ost_count * sizeof(*uuids)); - cfglen = datalen + desclen + uuidslen; - lmmlen = lov_mds_md_size(max_ost_count); - if (cfglen > lmmlen) - buflen = cfglen; - else - buflen = lmmlen; - - /* XXX max ioctl buffer size currently hardcoded to 8192 */ - if (buflen > 8192) { - int nuuids, remaining, nluoinfos; - - buflen = 8192; - nuuids = (buflen - datalen - desclen) / sizeof(*uuids); - uuidslen = size_round(nuuids * sizeof(*uuids)); - remaining = nuuids * sizeof(*uuids); - if (uuidslen > remaining) - nuuids--; - nluoinfos = (buflen - sizeof(*lmm)) / sizeof(*lmm->lmm_objects); - if (nuuids > nluoinfos) - max_ost_count = nluoinfos; - else - max_ost_count = nuuids; - - cfglen = datalen + desclen + uuidslen; - lmmlen = lov_mds_md_size(max_ost_count); - } - - if ((buf = malloc(buflen)) == NULL) { - errMsg("Unable to allocate %d bytes of memory for ioctl's.", - buflen); - exit(1); - } - - lmm = (struct lov_mds_md *)buf; - uuids = (struct obd_uuid *)buf; -} - -void -usage(FILE *stream) -{ - fprintf(stream, "usage: %s %s\n", cmd, usageMsg); -} - -void -errMsg(char *fmt, ...) -{ - va_list args; - - fprintf(stderr, "%s: ", cmd); - va_start(args, fmt); - vfprintf(stderr, fmt, args); - va_end(args); - fprintf(stderr, "\n"); -} - -void -processPath(const char *path) -{ - int fd; - int rc; - int i; - int obdindex; - int obdcount; - struct obd_uuid *uuidp; - - if (query || verbose && !obduuid) { - printf("%s\n", path); - } - - if ((fd = open(path, O_RDONLY | O_LOV_DELAY_CREATE)) < 0) { - errMsg("open \"%.20s\" failed.", path); - perror("open"); - return; - } - - memset(&data, 0, sizeof(data)); - data.ioc_inllen1 = sizeof(desc); - data.ioc_inlbuf1 = (char *)&desc; - data.ioc_inllen2 = uuidslen; - data.ioc_inlbuf2 = (char *)uuids; - - memset(&desc, 0, sizeof(desc)); - desc.ld_tgt_count = max_ost_count; - - if (obd_ioctl_pack(&data, &buf, buflen)) { - errMsg("internal buffering error."); - exit(1); - } - - rc = ioctl(fd, OBD_IOC_LOV_GET_CONFIG, buf); - if (rc) { - if (errno == ENOTTY) { - if (!obduuid) { - printf("Not a regular file or not Lustre file.\n\n"); - } - return; - } - errMsg("OBD_IOC_LOV_GET_CONFIG ioctl failed: %d.", errno); - perror("ioctl"); - exit(1); - } - - if (obd_ioctl_unpack(&data, buf, buflen)) { - errMsg("Invalid reply from ioctl."); - exit(1); - } - - obdcount = desc.ld_tgt_count; - if (obdcount == 0) - return; - - obdindex = OBD_NOT_FOUND; - - if (obduuid) { - for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++) { - if (strncmp((const char *)obduuid, (const char *)uuidp, - sizeof(*uuidp)) == 0) { - obdindex = i; - } - } - - if (obdindex == OBD_NOT_FOUND) - return; - } else if (query || verbose) { - printf("OBDS:\n"); - for (i = 0, uuidp = uuids; i < obdcount; i++, uuidp++) - printf("%4d: %s\n", i, (char *)uuidp); - } - - memset((void *)buf, 0, buflen); - lmm->lmm_magic = LOV_MAGIC; - lmm->lmm_ost_count = max_ost_count; - - rc = ioctl(fd, LL_IOC_LOV_GETSTRIPE, (void *)lmm); - if (rc) { - if (errno == ENODATA) { - if(!obduuid) { - printf("Has no stripe information.\n\n"); - } - } - else { - errMsg("LL_IOC_LOV_GETSTRIPE ioctl failed. %d", errno); - perror("ioctl"); - } - return; - } - - close(fd); - - if (obduuid && lmm->lmm_objects[obdindex].l_object_id) - printf("%s\n", path); - - if (verbose) { - printf("lmm_magic: 0x%x\n", lmm->lmm_magic); - printf("lmm_object_id: "LPX64"\n", lmm->lmm_object_id); - printf("lmm_stripe_offset: %u\n", (int)lmm->lmm_stripe_offset); - printf("lmm_stripe_count: %u\n", (int)lmm->lmm_stripe_count); - printf("lmm_stripe_size: %u\n", (int)lmm->lmm_stripe_size); - printf("lmm_ost_count: %u\n", lmm->lmm_ost_count); - printf("lmm_stripe_pattern: %d\n", lmm->lmm_magic & 0xf); - } - - if (query || verbose) { - long long oid; - int ost = lmm->lmm_stripe_offset; - int header = 1; - - for (i = 0; i < lmm->lmm_ost_count; i++, ost++) { - ost %= lmm->lmm_ost_count; - if ((oid = lmm->lmm_objects[ost].l_object_id)) { - if (header) { - printf("\tobdidx\t objid\n"); - header = 0; - } - printf("\t%6u\t%8llu%s\n", - ost, oid, obdindex == ost ? " *" : ""); - } - } - printf("\n"); - } -} diff --git a/lustre/utils/llanalyze b/lustre/utils/llanalyze deleted file mode 100644 index 56e58c8..0000000 --- a/lustre/utils/llanalyze +++ /dev/null @@ -1,278 +0,0 @@ -#!/usr/bin/perl - -use Getopt::Long; -use Term::ANSIColor; - -GetOptions("pid=i" => \$pid, - "trace!" => \$trace, - "silent!" => \$silent, - "rpctrace!" => \$rpctrace, - "nodlm!" => \$nodlm, - "noclass!" => \$noclass, - "nonet!" => \$nonet); - -print "pid: $pid, nodlm $nodlm nonet $nonet trace $trace\n"; - - -$subsys->{UNDEFINED} = 0; -$subsys->{MDC} = 1; -$subsys->{MDS} = 2; -$subsys->{OSC} = 3; -$subsys->{OST} = 4; -$subsys->{CLASS} = 5; -$subsys->{OBDFS} = 6; -$subsys->{LLITE} = 7; -$subsys->{RPC} = 8; -$subsys->{EXT2OBD} = 9; -$subsys->{PORTALS} = 10; -$subsys->{SOCKNAL} = 11; -$subsys->{QSWNAL} = 12; -$subsys->{PINGER} = 13; -$subsys->{FILTER} = 14; -$subsys->{TRACE} = 15; # obdtrace, not to be confused with D_TRACE */ -$subsys->{ECHO} = 16; -$subsys->{LDLM} = 17; -$subsys->{LOV} = 18; -$subsys->{GMNAL} = 19; -$subsys->{PTLROUTER} = 20; - - -$masks->{TRACE} = 1 << 0; # /* ENTRY/EXIT markers */ -$masks->{INODE} = 1 << 1; # -$masks->{SUPER} = 1 << 2; # -$masks->{EXT2} = 1 << 3; # /* anything from ext2_debug */ -$masks->{MALLOC} = 1 << 4; # /* print malloc, free information */ -$masks->{CACHE} = 1 << 5; # /* cache-related items */ -$masks->{INFO} = 1 << 6; # /* general information */ -$masks->{IOCTL} = 1 << 7; # /* ioctl related information */ -$masks->{BLOCKS} = 1 << 8; # /* ext2 block allocation */ -$masks->{NET} = 1 << 9; # /* network communications */ -$masks->{WARNING} = 1 << 10; # -$masks->{BUFFS} = 1 << 11; # -$masks->{OTHER} = 1 << 12; # -$masks->{DENTRY} = 1 << 13; # -$masks->{PORTALS} = 1 << 14; # /* ENTRY/EXIT markers */ -$masks->{PAGE} = 1 << 15; # /* bulk page handling */ -$masks->{DLMTRACE} = 1 << 16; # -$masks->{ERROR} = 1 << 17; # /* CERROR} = ...) == CDEBUG} = D_ERROR, ...) */ -$masks->{EMERG} = 1 << 18; # /* CEMERG} = ...) == CDEBUG} = D_EMERG, ...) */ -$masks->{HA} = 1 << 19; # /* recovery and failover */ -$masks->{RPCTRACE} = 1 << 19; # /* recovery and failover */ - -sub extractpid -{ - $line = shift; -# print "$_\n"; - if ($line =~ m/\(\) ([0-9]*)\+[0-9]*\):/) { - return $1; - } - if ($line =~ m/\(\) ([0-9]*) \| [0-9]*\+[0-9]*\):/) { - return $1; - } -} - -sub extracthostpid -{ - $line = shift; -# print "$_\n"; - if ($line =~ m/\(\) [0-9]* \| ([0-9]*)\+[0-9]*\):/) { - return $1; - } -} - -sub entering -{ - $_ = shift; - $entering = /Process entered/; -} - -sub leaving -{ - $_ = shift; - $entering = /Process leaving/; -} - -sub getsubsys -{ - my ($subsys, $mask) = split ":"; - return hex($subsys); -} - -sub getmask -{ - my ($subsys, $mask) = split ":"; - return hex($mask); -} - -sub setcolor -{ - my $linemask = shift; - if ($linemask == $masks->{TRACE}) { - print color("yellow on_black"); - } - if ($linemask == $masks->{DLMTRACE}) { - print color("magenta on_black"); - } - if ($linemask == $masks->{DLM}) { - print color("magenta on_black"); - } - if ($linemask == $masks->{DENTRY}) { - print color("red on_black"); - } -} - -sub study_lock -{ - $_ = shift; - my $rc; - - $rc = /completion callback handler START ns: (.*) lock: (.*) lrc: (.*) mode/; - if ($rc) { - $completion_callbacks{$1}->{$2} = $3; -# print color("white"); -# print "---CP CB START: $1 $2 $3\n"; -# print color("reset"); - } - $rc = /callback handler finished.* ns: (.*) lock: (.*) lrc: (.*) mode/; - if ($rc) { -# print color("white"); -# print "---CP CB END: $1 $2 $3 deleting $completion_callbacks{$1}->{$2}\n"; -# print color("reset"); - delete $completion_callbacks{$1}->{$2}; - } - - if ($rc) { - $rc = /client blocking AST callback handler START ns: (.*) lock: (.*) lrc: (.*) mode/; - $blocking_callbacks{$1}->{$2} = $3; -# print color("white"); -# print "---BL CB START: $1 $2\n"; -# print color("reset"); - } - $rc = /client blocking callback handler END ns: (.*) lock: (.*) lrc: (.*) mode/; - if ($rc) { -# print color("white"); -# print "---BL CB END: $1 $2 $3 deleting $blocking_callbacks{$1}->{$2}\n"; -# print color("reset"); - delete $blocking_callbacks{$1}->{$2}; - } - - $rc = /ldlm_lock_addref.*ns: (.*) lock: (.*) lrc: (.*) mode/; -# print color("white"); -# print "------>addref ns: $1 lock: $2 lrc: $3\n" if ($rc); -# print color("reset"); - $locks{$1}->{$2} = {$3} if ($rc); - $rc = /ldlm_lock_decref.*ns: (.*) lock: (.*) lrc: (.*) mode/; -# print color("white"); -# print "------>decref ns: $1 lock: $2 lrc: $3\n" if ($rc); -# print color("reset"); - $locks{$1}->{$2} = {$3} if ($rc); -} - -sub hanging_locks -{ - my $found; - my $ns; - - foreach (keys %completion_callbacks) { - $ns = $_; - $found = 0; - foreach (keys %{$completion_callbacks{$ns}}) { - if (!$found) { - print "Unfinished completions in ns $ns: \n"; - $found =1; - } - print " lock: $_ lrc: $completion_callbacks{$ns}->{$_}\n"; - } - } - foreach (keys %blocking_callbacks) { - $ns = $_; - $found = 0; - foreach (keys %{$blocking_callbacks{$ns}}) { - if (!$found) { - print "Unfinished blocking in ns $ns: \n"; - $found =1; - } - printf(" lock: $_ lrc: %s\n", $blocking_callbacks{$ns}->{$_}); - } - } - -} - -sub study_intent -{ - $_ = shift; - my $rc; - - $rc = /D_IT UP dentry (.*) fsdata/; - delete $it{$1} if ($rc); - $rc = /D_IT DOWN dentry (.*) fsdata/; - $it{$1} = "yes" if ($rc); -} - -sub unmatched_intents { - my $found; - foreach (keys %it) { - if (!$found) { - print "Unmatched intents: \n"; - $found =1; - } - print " $_\n"; - } -} - -while (<STDIN>) { - $linepid = extractpid($_); - $linehpid = extracthostpid($_); - $linemask = getmask($_); - $linesubsys = getsubsys($_); - -# printf "---> mask %x subsys %x\n", $linemask, $linesubsys; - - if (leaving($_)) { - chop $prefix->{$linepid}; - chop $prefix->{$linepid}; - } - - if ($linemask == $masks->{DENTRY}) { - study_intent($_); - } - if ($linemask == $masks->{DLMTRACE}) { - study_lock($_); - } - - if ( !$pid || $linepid == $pid || $linehpid == $pid) { - next if ($rpctrace && $linemask != $masks->{RPCTRACE}); - next if ($trace && $linemask != $masks->{TRACE}); - - - next if ($nodlm && - ( $linesubsys == $subsys->{LDLM})); - next if ($noclass && - ( $linesubsys == $subsys->{CLASS})); - - next if ($nonet && - ( $linesubsys == $subsys->{RPC} || - $linesubsys == $subsys->{NET} || - $linesubsys == $subsys->{PORTALS} || - $linesubsys == $subsys->{SOCKNAL} || - $linesubsys == $subsys->{QSWNAL} || - $linesubsys == $subsys->{GMNAL})); - - -# printf "sub/mask: %s - %s\n", getsubsys($_), getmask($_); - if (!$silent) { - setcolor($linemask); - printf("%s%s", $prefix->{$linepid}, $_); - print color("reset"); - } - # last if $count++ > 100; - } - if (entering($_)) { - $prefix->{$linepid} .= ' '; - } -} - - -unmatched_intents(); -hanging_locks(); -# printf "argv %s pid %d\n", $ARGV[0], extractpid($ARGV[0]); diff --git a/lustre/utils/llobdstat.pl b/lustre/utils/llobdstat.pl deleted file mode 100755 index cb39d30..0000000 --- a/lustre/utils/llobdstat.pl +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/perl - -my $pname = $0; - -my $defaultpath = "/proc/fs/lustre"; -my $obdstats = "stats"; - -sub usage() -{ - print STDERR "Usage: $pname <stats_file> [<interval>]\n"; - print STDERR "example: $pname help (to get help message)\n"; - print STDERR "example: $pname ost1 1 (monitor /proc/fs/lustre/ost1/obd_stats once per second\n"; - exit 1; -} - -my $statspath = "None"; -my $interval = 0; - -if (($#ARGV < 0) || ($#ARGV > 1)) { - usage(); -} else { - if ( $ARGV[0] =~ /help$/ ) { - usage(); - } - if ( -f $ARGV[0] ) { - $statspath = $ARGV[0]; - } elsif ( -f "$ARGV[0]/$obdstats" ) { - $statspath = "$ARGV[0]/$obdstats"; - } else { - my $st = `ls $defaultpath/*/$ARGV[0]/$obdstats 2> /dev/null`; - chop $st; - if ( -f "$st" ) { - $statspath = $st; - } - } - if ( $statspath =~ /^None$/ ) { - die "Cannot locate stat file for: $ARGV[0]\n"; - } - if ($#ARGV == 1) { - $interval = $ARGV[1]; - } -} - -print "$pname on $statspath\n"; - -my %cur; -my %last; -my $mhz = 0; -my ($read_bytes, $read, $write_bytes, $write, $getattr, $setattr, $open, $close, $create, $destroy, $statfs, $punch, $snapshot_time) = - ("read_bytes", "read", "write_bytes", "write", "getattr", "setattr", "open", "close", "create", "destroy", "statfs", "punch", "snapshot_time"); - -my @extinfo = ($setattr, $open, $close, $create, $destroy, $statfs, $punch); -my %shortname = ($setattr => "sa", $open => "op", $close => "cl", - $create => "cx", $destroy => "dx", $statfs => "st", $punch => "pu"); - -sub get_cpumhz() -{ - my $cpu_freq; - my $itc_freq; # On Itanium systems use this - if (open(CPUINFO, "/proc/cpuinfo")==0) { - return; - } - while (<CPUINFO>) { - if (/^cpu MHz\s+:\s*([\d\.]+)/) { $cpu_freq=$1; } - elsif (/^itc MHz\s+:\s*([\d\.]+)/) { $itc_freq=$1; } - } - if (defined($itc_freq)) { $mhz = $itc_freq; } - elsif (defined($cpu_freq)) { $mhz = $cpu_freq; } - else { $mhz = 1; } - close CPUINFO; -} - -get_cpumhz(); -print "Processor counters run at $mhz MHz\n"; - -sub readall() -{ - my $prevcount; - my @iodata; - - seek STATS, 0, 0; - while (<STATS>) { - chop; -# ($name, $cumulcount, $samples, $unit, $min, $max, $sum, $sumsquare) - @iodata = split(/\s+/, $_); - my $name = $iodata[0]; - - $prevcount = $cur{$name}; - if (defined($prevcount)) { - $last{$name} = $prevcount; - } - if ($name =~ /^read_bytes$/ || $name =~ /^write_bytes$/) { - $cur{$name} = $iodata[6]; - } - elsif ($name =~ /^snapshot_time$/) { -# $cumulcount =~ /(\d+)/; - $cur{$name} = $iodata[1]; - } - else { - $cur{$name} = $iodata[1]; - } - } -} -sub process_stats() -{ - my $delta; - my $data; - my $last_time = $last{$snapshot_time}; - if (!defined($last_time)) { - printf "R %-g/%-g W %-g/%-g attr %-g/%-g open %-g/%-g create %-g/%-g stat %-g punch %-g\n", - $cur{$read_bytes}, $cur{$read}, - $cur{$write_bytes}, $cur{$write}, - $cur{$getattr}, $cur{$setattr}, - $cur{$open}, $cur{$close}, - $cur{$create}, $cur{$destroy}, - $cur{$statfs}, $cur{$punch}; - } - else { - my $timespan = $cur{$snapshot_time} - $last{$snapshot_time}; - - my $rdelta = $cur{$read} - $last{$read}; - my $rvdelta = int ($rdelta / $timespan); - my $rrate = ($cur{$read_bytes} - $last{$read_bytes}) / - ($timespan * ( 1 << 20 )); - my $wdelta = $cur{$write} - $last{$write}; - my $wvdelta = int ($wdelta / $timespan); - my $wrate = ($cur{$write_bytes} - $last{$write_bytes}) / - ($timespan * ( 1 << 20 )); - printf "R %6lu (%5lu %6.2fMb)/s W %6lu (%5lu %6.2fMb)/s", - $rdelta, $rvdelta, $rrate, - $wdelta, $wvdelta, $wrate; - - $delta = $cur{$getattr} - $last{$getattr}; - if ( $delta != 0 ) { - $rdelta = int ($delta/$timespan); - print " ga:$delta,$rdelta/s"; - } - - for $data ( @extinfo ) { - $delta = $cur{$data} - $last{$data}; - if ($delta != 0) { - print " $shortname{$data}:$delta"; - } - } - print "\n"; - $| = 1; - } -} - -open(STATS, $statspath) || die "Cannot open $statspath: $!\n"; -do { - readall(); - process_stats(); - if ($interval) { - sleep($interval); - %last = %cur; - } -} while ($interval); -close STATS; - diff --git a/lustre/utils/llstat.pl b/lustre/utils/llstat.pl deleted file mode 100755 index eb65e46..0000000 --- a/lustre/utils/llstat.pl +++ /dev/null @@ -1,124 +0,0 @@ -#!/usr/bin/perl - -my $pname = $0; - -sub usage() -{ - print STDERR "Usage: $pname <stats_file> [<interval>]\n"; - exit 1; -} - - -my $statspath; -my $interval = 0; - -if (($#ARGV < 0) || ($#ARGV > 1)) { - usage(); -} else { - $statspath = $ARGV[0]; - if ($#ARGV == 1) { - $interval = $ARGV[1]; - } -} - - - -my %namehash; -my $anysum = 0; -my $anysumsquare = 0; -my $mhz = 0; - -sub get_cpumhz() -{ - my $cpu_freq; - my $itc_freq; # On Itanium systems use this - if (open(CPUINFO, "/proc/cpuinfo")==0) { - return; - } - while (<CPUINFO>) { - if (/^cpu MHz\s+:\s*([\d\.]+)/) { $cpu_freq=$1; } - elsif (/^itc MHz\s+:\s*([\d\.]+)/) { $itc_freq=$1; } - } - if (defined($itc_freq)) { $mhz = $itc_freq; } - elsif (defined($cpu_freq)) { $mhz = $cpu_freq; } - else { $mhz = 1; } -} - -get_cpumhz(); -print "Processor counters run at $mhz MHz\n"; - -sub readstat() -{ - open(STATS, $statspath) || die "Cannot open $statspath: $!\n"; - while (<STATS>) { - chop; - ($name, $cumulcount, $samples, $unit, $min, $max, $sum, $sumsquare) - = split(/\s+/, $_); - - $prevcount = %namehash->{$name}; - if (defined($prevcount)) { - $diff = $cumulcount - $prevcount; - if ($name eq "snapshot_time") { - $tdiff = $diff; - # printf "%-25s prev=$prevcount, cumul=$cumulcount diff=$diff, tdiff=$tdiff\n", $name; - printf "$statspath @ $cumulcount\n"; - printf "%-25s %-10s %-10s %-10s", "Name", "Cur.Count", "Cur.Rate", "#Events"; - if ($anysum) { - printf "%-8s %10s %12s %10s", "Unit", "min", "avg", "max"; - } - if ($anysumsquare) { - printf "%10s", "stddev"; - } - printf "\n"; - $| = 1; - } - elsif ($cumulcount!=0) { - printf "%-25s %-10lu %-10lu %-10lu", - $name, $diff, ($diff/$tdiff), $cumulcount; - - if (defined($sum)) { - my $sum_orig = $sum; - if (($unit eq "[cycles]") && ($mhz != 1)) { - $unit = "[usecs]"; - $min = $min/$mhz; - $sum = $sum/$mhz; - $max = $max/$mhz; - } - printf "%-8s %10lu %12.2f %10lu", $unit, $min, ($sum/$cumulcount), $max; - if (defined($sumsquare)) { - my $s = $sumsquare - (($sum_orig*$sum_orig)/$cumulcount); - if ($s >= 0) { - my $cnt = ($cumulcount >= 2) ? $cumulcount : 2 ; - my $stddev = sqrt($s/($cnt - 1)); - if (($unit eq "[usecs]") && ($mhz != 1)) { - $stddev = $stddev/$mhz; - } - printf " %10.2f", $stddev; - } - } - } - printf "\n"; - $| = 1; - } - } - else { - if ($cumulcount!=0) { - printf "%-25s $cumulcount\n", $name - } - if (defined($sum)) { - $anysum = 1; - } - if (defined($sumsquare)) { - $anysumsquare = 1; - } - } - %namehash->{$name} = $cumulcount; - } -} - -do { - readstat(); - if ($interval) { - sleep($interval); - } -} while ($interval); diff --git a/lustre/utils/lmc b/lustre/utils/lmc deleted file mode 100755 index eaaed71..0000000 --- a/lustre/utils/lmc +++ /dev/null @@ -1,986 +0,0 @@ -#!/usr/bin/env python -# Copyright (C) 2002 Cluster File Systems, Inc. -# Author: Robert Read <rread@clusterfs.com> - -# This file is part of Lustre, http://www.lustre.org. -# -# Lustre is free software; you can redistribute it and/or -# modify it under the terms of version 2 of the GNU General Public -# License as published by the Free Software Foundation. -# -# Lustre is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Lustre; if not, write to the Free Software -# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -# - -""" -lmc - lustre configurtion data manager - - See lustre book for documentation for lmc. - -""" - -import sys, os, getopt, string, exceptions -import xml.dom.minidom -from xml.dom.ext import PrettyPrint - -PYMOD_DIR = "/usr/lib/lustre/python" - -def development_mode(): - base = os.path.dirname(sys.argv[0]) - if os.access(base+"/Makefile.am", os.R_OK): - return 1 - return 0 - -if not development_mode(): - sys.path.append(PYMOD_DIR) - -import Lustre - -DEFAULT_PORT = 988 - -def reference(): - print """usage: lmc --add object [object parameters] - -Object creation command summary: - ---add node - --node node_name - --timeout num - --upcall path - --lustre_upcall path - --portals_upcall path - ---add net - --node node_name - --nid nid - --cluster_id - --nettype tcp|elan|toe|gm|scimac - --hostaddr addr - --port port - --tcpbuf size - --irq_affinity 0|1 - --nid_exchange 0|1 - --router - ---add mds - --node node_name - --mds mds_name - --dev path - --fstype extN|ext3 - --size size - --nspath - ---add lov - --lov lov_name - --mds mds_name - --stripe_sz num - --stripe_cnt num - --stripe_pattern num - --add ost - --node node_name - --ost ost_name - --lov lov_name - --dev path - --size size - --fstype extN|ext3 - --ostuuid uuid - --nspath - ---add mtpt - Mountpoint - --node node_name - --path /mnt/point - --mds mds_name - --ost ost_name OR --lov lov_name -""" - -PARAM = Lustre.Options.PARAM -lmc_options = [ - # lmc input/output options - ('reference', "Print short reference for commands."), - ('verbose,v', "Print system commands as they are run."), - ('merge,m', "Append to the specified config file.", PARAM), - ('output,o', "Write XML configuration into given output file. Overwrite existing content.", PARAM), - ('input,i', "", PARAM), - ('batch', "Used to execute lmc commands in batch mode.", PARAM), - - # commands - ('add', "", PARAM), - - # node options - ('node', "Add a new node in the cluster configuration.", PARAM), - ('timeout', "Set timeout to initiate recovery.", PARAM), - ('upcall', "Set both lustre and portals upcall scripts.", PARAM), - ('lustre_upcall', "Set location of lustre upcall script.", PARAM), - ('portals_upcall', "Set location of portals upcall script.", PARAM), - - # network - ('nettype', "Specify the network type. This can be tcp/elan/gm/scimac.", PARAM), - ('nid', "Give the network ID, e.g ElanID/IP Address as used by portals.", PARAM), - ('tcpbuf', "Optional arguement to specify the TCP buffer size.", PARAM, "0"), - ('port', "Optional arguement to specify the TCP port number.", PARAM, DEFAULT_PORT), - ('nid_exchange', "Optional arguement to indicate if nid exchange should be done.", PARAM, 0), - ('irq_affinity', "Optional arguement.", PARAM, 0), - ('hostaddr', "", PARAM,""), - ('cluster_id', "Specify the cluster ID", PARAM, "0"), - - # routes - ('route', "Add a new route for the cluster.", PARAM), - ('router', "Optional flag to mark a node as router."), - ('gw', "Specify the nid of the gateway for a route.", PARAM), - ('gw_cluster_id', "", PARAM, "0"), - ('target_cluster_id', "", PARAM, "0"), - ('lo', "For a range route, this is the low value nid.", PARAM), - ('hi', "For a range route, this is a hi value nid.", PARAM,""), - - # servers: mds and ost - ('mds', "Specify MDS name.", PARAM), - ('ost', "Specify the OST name.", PARAM,""), - ('osdtype', "This could obdfilter or obdecho.", PARAM, "obdfilter"), - ('failover', ""), - ('group', "", PARAM), - ('dev', "Path of the device on local system.", PARAM,""), - ('size', "Specify the size of the device if needed.", PARAM,"0"), - ('journal_size', "Specify new journal size for underlying ext3 file system.", PARAM,"0"), - ('fstype', "Optional arguement to specify the filesystem type.", PARAM, "ext3"), - ('ostuuid', "", PARAM,""), - ('nspath', "Local mount point of server namespace.", PARAM,""), - ('format', ""), - - # clients: mountpoint and echo - ('echo_client', "", PARAM), - ('path', "Specify the mountpoint for Lustre.", PARAM), - ('filesystem', "Lustre filesystem name", PARAM,""), - - # lov - ('lov', "Specify LOV name.", PARAM,""), - ('stripe_sz', "Specify the stripe size in bytes.", PARAM), - ('stripe_cnt', "Specify the number of OSTs each file should be striped on.", PARAM, 0), - ('stripe_pattern', "Specify the stripe pattern. RAID 0 is the only one currently supported.", PARAM, 0), - - # cobd - ('real_obd', "", PARAM), - ('cache_obd', "", PARAM), - ] - -def error(*args): - msg = string.join(map(str,args)) - raise OptionError("Error: " + msg) - -def panic(cmd, msg): - print "! " + cmd - print msg - sys.exit(1) - - -def warning(*args): - msg = string.join(map(str,args)) - print "Warning: ", msg - -# -# manage names and uuids -# need to initialize this by walking tree to ensure -# no duplicate names or uuids are created. -# this are just place holders for now. -# consider changing this to be like OBD-dev-host -def new_name(base): - ctr = 2 - ret = base - while names.has_key(ret): - ret = "%s_%d" % (base, ctr) - ctr = 1 + ctr - names[ret] = 1 - return ret - -def new_uuid(name): - return "%s_UUID" % (name) - -ldlm_name = 'ldlm' -ldlm_uuid = 'ldlm_UUID' - -def new_lustre(dom): - """Create a new empty lustre document""" - # adding ldlm here is a bit of a hack, but one is enough. - str = """<lustre version="%s"> - <ldlm name="%s" uuid="%s"/> - </lustre>""" % (Lustre.CONFIG_VERSION, ldlm_name, ldlm_uuid) - return dom.parseString(str) - -names = {} -uuids = {} - -def init_names(doc): - """initialize auto-name generation tables""" - global names, uuids - # get all elements that contain a name attribute - for n in doc.childNodes: - if n.nodeType == n.ELEMENT_NODE: - if getName(n): - names[getName(n)] = 1 - uuids[getUUID(n)] = 1 - init_names(n) - -def get_format_flag(options): - if options.format: - return 'yes' - return 'no' - -############################################################ -# Build config objects using DOM -# -class GenConfig: - doc = None - dom = None - def __init__(self, doc): - self.doc = doc - - def ref(self, type, uuid): - """ generate <[type]_ref uuidref="[uuid]"/> """ - tag = "%s_ref" % (type) - ref = self.doc.createElement(tag) - ref.setAttribute("uuidref", uuid) - return ref - - def newService(self, tag, name, uuid): - """ create a new service elmement, which requires name and uuid attributes """ - new = self.doc.createElement(tag) - new.setAttribute("uuid", uuid); - new.setAttribute("name", name); - return new - - def addText(self, node, str): - txt = self.doc.createTextNode(str) - node.appendChild(txt) - - def addElement(self, node, tag, str=None): - """ create a new element and add it as a child to node. If str is passed, - a text node is created for the new element""" - new = self.doc.createElement(tag) - if str: - self.addText(new, str) - node.appendChild(new) - return new - - def network(self, name, uuid, nid, cluster_id, net, hostaddr="", - port=0, tcpbuf=0, irq_aff=0, nid_xchg=0): - """create <network> node""" - network = self.newService("network", name, uuid) - network.setAttribute("nettype", net); - self.addElement(network, "nid", nid) - self.addElement(network, "clusterid", cluster_id) - if hostaddr: - self.addElement(network, "hostaddr", hostaddr) - if port: - self.addElement(network, "port", "%d" %(port)) - if tcpbuf: - self.addElement(network, "sendmem", "%d" %(tcpbuf)) - self.addElement(network, "recvmem", "%d" %(tcpbuf)) - if irq_aff: - self.addElement(network, "irqaffinity", "%d" %(irq_aff)) - if nid_xchg: - self.addElement(network, "nidexchange", "%d" %(nid_xchg)) - - return network - - def routetbl(self, name, uuid): - """create <routetbl> node""" - rtbl = self.newService("routetbl", name, uuid) - return rtbl - - def route(self, gw_net_type, gw, gw_cluster_id, tgt_cluster_id, lo, hi): - """ create one entry for the route table """ - ref = self.doc.createElement('route') - ref.setAttribute("type", gw_net_type) - ref.setAttribute("gw", gw) - ref.setAttribute("gwclusterid", gw_cluster_id) - ref.setAttribute("tgtclusterid", tgt_cluster_id) - ref.setAttribute("lo", lo) - if hi: - ref.setAttribute("hi", hi) - return ref - - def profile(self, name, uuid): - """ create a host """ - profile = self.newService("profile", name, uuid) - return profile - - def node(self, name, uuid, prof_uuid): - """ create a host """ - node = self.newService("node", name, uuid) - node.appendChild(self.ref("profile", prof_uuid)) - return node - - def ldlm(self, name, uuid): - """ create a ldlm """ - ldlm = self.newService("ldlm", name, uuid) - return ldlm - - def osd(self, name, uuid, fs, osdtype, devname, format, ost_uuid, - node_uuid, dev_size=0, journal_size=0, nspath=""): - osd = self.newService("osd", name, uuid) - osd.setAttribute('osdtype', osdtype) - osd.appendChild(self.ref("target", ost_uuid)) - osd.appendChild(self.ref("node", node_uuid)) - if fs: - self.addElement(osd, "fstype", fs) - if devname: - dev = self.addElement(osd, "devpath", devname) - self.addElement(osd, "autoformat", format) - if dev_size: - self.addElement(osd, "devsize", "%s" % (dev_size)) - if journal_size: - self.addElement(osd, "journalsize", "%s" % (journal_size)) - if nspath: - self.addElement(osd, "nspath", nspath) - return osd - - def cobd(self, name, uuid, real_uuid, cache_uuid): - cobd = self.newService("cobd", name, uuid) - cobd.appendChild(self.ref("realobd",real_uuid)) - cobd.appendChild(self.ref("cacheobd",cache_uuid)) - return cobd - - def ost(self, name, uuid, osd_uuid, group=""): - ost = self.newService("ost", name, uuid) - ost.appendChild(self.ref("active", osd_uuid)) - if group: - self.addElement(ost, "group", group) - return ost - - def oss(self, name, uuid): - oss = self.newService("oss", name, uuid) - return oss - - def lov(self, name, uuid, mds_uuid, stripe_sz, stripe_cnt, pattern): - lov = self.newService("lov", name, uuid) - lov.appendChild(self.ref("mds", mds_uuid)) - lov.setAttribute("stripesize", str(stripe_sz)) - lov.setAttribute("stripecount", str(stripe_cnt)) - lov.setAttribute("stripepattern", str(pattern)) - return lov - - def lovconfig(self, name, uuid, lov_uuid): - lovconfig = self.newService("lovconfig", name, uuid) - lovconfig.appendChild(self.ref("lov", lov_uuid)) - return lovconfig - - def mds(self, name, uuid, mdd_uuid, group=""): - mds = self.newService("mds", name, uuid) - mds.appendChild(self.ref("active",mdd_uuid)) - if group: - self.addElement(mds, "group", group) - return mds - - def mdsdev(self, name, uuid, fs, devname, format, node_uuid, - mds_uuid, dev_size=0, journal_size=0, nspath=""): - mdd = self.newService("mdsdev", name, uuid) - self.addElement(mdd, "fstype", fs) - dev = self.addElement(mdd, "devpath", devname) - self.addElement(mdd, "autoformat", format) - if dev_size: - self.addElement(mdd, "devsize", "%s" % (dev_size)) - if journal_size: - self.addElement(mdd, "journalsize", "%s" % (journal_size)) - if nspath: - self.addElement(mdd, "nspath", nspath) - mdd.appendChild(self.ref("node", node_uuid)) - mdd.appendChild(self.ref("target", mds_uuid)) - return mdd - - def mountpoint(self, name, uuid, fs_uuid, path): - mtpt = self.newService("mountpoint", name, uuid) - mtpt.appendChild(self.ref("filesystem", fs_uuid)) - self.addElement(mtpt, "path", path) - return mtpt - - def filesystem(self, name, uuid, mds_uuid, obd_uuid): - fs = self.newService("filesystem", name, uuid) - fs.appendChild(self.ref("mds", mds_uuid)) - fs.appendChild(self.ref("obd", obd_uuid)) - return fs - - def echo_client(self, name, uuid, osc_uuid): - ec = self.newService("echoclient", name, uuid) - ec.appendChild(self.ref("obd", osc_uuid)) - return ec - -############################################################ -# Utilities to query a DOM tree -# Using this functions we can treat use config information -# directly as a database. -def getName(n): - return n.getAttribute('name') - -def getUUID(node): - return node.getAttribute('uuid') - - -def findByName(lustre, name, tag = ""): - for n in lustre.childNodes: - if n.nodeType == n.ELEMENT_NODE: - if tag and n.nodeName != tag: - continue - if getName(n) == name: - return n - else: - n = findByName(n, name) - if n: return n - return None - - -def lookup(node, uuid): - for n in node.childNodes: - if n.nodeType == n.ELEMENT_NODE: - if getUUID(n) == uuid: - return n - else: - n = lookup(n, uuid) - if n: return n - return None - - -def name2uuid(lustre, name, tag="", fatal=1): - ret = findByName(lustre, name, tag) - if not ret: - if fatal: - error('name2uuid:', '"'+name+'"', tag, 'element not found.') - else: - return "" - return getUUID(ret) - -def lookup_filesystem(lustre, mds_uuid, ost_uuid): - for n in lustre.childNodes: - if n.nodeType == n.ELEMENT_NODE and n.nodeName == 'filesystem': - if ref_exists(n, mds_uuid) and ref_exists(n, ost_uuid): - return getUUID(n) - return None - -# XXX: assumes only one network element per node. will fix this -# as soon as support for routers is added -def get_net_uuid(lustre, node_name): - """ get a network uuid for a node_name """ - node = findByName(lustre, node_name, "node") - if not node: - error ('get_net_uuid:', '"'+node_name+'"', "node element not found.") - net = node.getElementsByTagName('network') - if net: - return getUUID(net[0]) - return None - - -def lov_add_obd(gen, lov, osc_uuid): - lov.appendChild(gen.ref("obd", osc_uuid)) - -def ref_exists(profile, uuid): - elist = profile.childNodes - for e in elist: - if e.nodeType == e.ELEMENT_NODE: - ref = e.getAttribute('uuidref') - if ref == uuid: - return 1 - return 0 - -# ensure that uuid is not already in the profile -# return true if uuid is added -def node_add_profile(gen, node, ref, uuid): - refname = "%s_ref" % "profile" - ret = node.getElementsByTagName(refname) - if not ret: - error('node has no profile ref:', node) - prof_uuid = ret[0].getAttribute('uuidref') - profile = lookup(node.parentNode, prof_uuid) - if not profile: - error("no profile found:", prof_uuid) - if ref_exists(profile, uuid): - return 0 - profile.appendChild(gen.ref(ref, uuid)) - return 1 - -def get_attr(dom_node, attr, default=""): - v = dom_node.getAttribute(attr) - if v: - return v - return default - -############################################################ -# Top level commands -# -def set_node_options(gen, node, options): - if options.router: - node.setAttribute('router', '1') - if options.timeout: - gen.addElement(node, "timeout", get_option(options, 'timeout')) - if options.upcall: - default_upcall = get_option(options, 'upcall') - else: - default_upcall = '' - if default_upcall or options.lustre_upcall: - if options.lustre_upcall: - gen.addElement(node, 'lustreUpcall', options.lustre_upcall) - else: - gen.addElement(node, 'lustreUpcall', default_upcall) - if default_upcall or options.portals_upcall: - if options.portals_upcall: - gen.addElement(node, 'portalsUpcall', options.portals_upcall) - else: - gen.addElement(node, 'portalsUpcall', default_upcall) - return node - -def do_add_node(gen, lustre, options, node_name): - uuid = new_uuid(node_name) - prof_name = new_name("PROFILE_" + node_name) - prof_uuid = new_uuid(prof_name) - profile = gen.profile(prof_name, prof_uuid) - node = gen.node(node_name, uuid, prof_uuid) - lustre.appendChild(node) - lustre.appendChild(profile) - - node_add_profile(gen, node, 'ldlm', ldlm_uuid) - set_node_options(gen, node, options) - return node - - -def add_node(gen, lustre, options): - """ create a node with a network config """ - - node_name = get_option(options, 'node') - ret = findByName(lustre, node_name, "node") - if ret: - print "Node:", node_name, "exists." - return - do_add_node(gen, lustre, options, node_name) - - -def add_net(gen, lustre, options): - """ create a node with a network config """ - - node_name = get_option(options, 'node') - nid = get_option(options, 'nid') - cluster_id = get_option(options, 'cluster_id') - hostaddr = get_option(options, 'hostaddr') - net_type = get_option(options, 'nettype') - - if net_type in ('tcp', 'toe'): - port = get_option_int(options, 'port') - tcpbuf = get_option_int(options, 'tcpbuf') - irq_aff = get_option_int(options, 'irq_affinity') - nid_xchg = get_option_int(options, 'nid_exchange') - elif net_type in ('elan', 'gm', 'scimac'): - port = 0 - tcpbuf = 0 - irq_aff = 0 - nid_xchg = 0 - else: - print "Unknown net_type: ", net_type - sys.exit(2) - - ret = findByName(lustre, node_name, "node") - if not ret: - node = do_add_node(gen, lustre, options, node_name) - else: - node = ret - set_node_options(gen, node, options) - - net_name = new_name('NET_'+ node_name +'_'+ net_type) - net_uuid = new_uuid(net_name) - node.appendChild(gen.network(net_name, net_uuid, nid, cluster_id, net_type, - hostaddr, port, tcpbuf, irq_aff, nid_xchg)) - node_add_profile(gen, node, "network", net_uuid) - - -def add_route(gen, lustre, options): - """ create a node with a network config """ - - node_name = get_option(options, 'node') - gw_net_type = get_option(options, 'nettype') - gw = get_option(options, 'gw') - gw_cluster_id = get_option(options, 'gw_cluster_id') - tgt_cluster_id = get_option(options, 'target_cluster_id') - lo = get_option(options, 'lo') - hi = get_option(options, 'hi') - if not hi: - hi = lo - - node = findByName(lustre, node_name, "node") - if not node: - error (node_name, " not found.") - - rlist = node.getElementsByTagName('routetbl') - if len(rlist) > 0: - rtbl = rlist[0] - else: - rtbl_name = new_name("RTBL_" + node_name) - rtbl_uuid = new_uuid(rtbl_name) - rtbl = gen.routetbl(rtbl_name, rtbl_uuid) - node.appendChild(rtbl) - node_add_profile(gen, node, "routetbl", rtbl_uuid) - rtbl.appendChild(gen.route(gw_net_type, gw, gw_cluster_id, tgt_cluster_id, - lo, hi)) - - -def add_mds(gen, lustre, options): - node_name = get_option(options, 'node') - mds_name = get_option(options, 'mds') - mdd_name = new_name("MDD_" + mds_name +"_" + node_name) - mdd_uuid = new_uuid(mdd_name) - - mds_uuid = name2uuid(lustre, mds_name, fatal=0) - if not mds_uuid: - mds_uuid = new_uuid(mds_name) - mds = gen.mds(mds_name, mds_uuid, mdd_uuid, options.group) - lustre.appendChild(mds) - else: - mds = lookup(lustre, mds_uuid) - if options.failover: - mds.setAttribute('failover', "1") - - devname = get_option(options, 'dev') - size = get_option(options, 'size') - fstype = get_option(options, 'fstype') - journal_size = get_option(options, 'journal_size') - nspath = get_option(options, 'nspath') - - node_uuid = name2uuid(lustre, node_name, 'node') - - node = findByName(lustre, node_name, "node") - node_add_profile(gen, node, "mdsdev", mdd_uuid) - net_uuid = get_net_uuid(lustre, node_name) - if not net_uuid: - error("NODE: ", node_name, "not found") - - mdd = gen.mdsdev(mdd_name, mdd_uuid, fstype, devname, - get_format_flag(options), node_uuid, mds_uuid, - size, journal_size, nspath) - lustre.appendChild(mdd) - - -def add_ost(gen, lustre, options): - node_name = get_option(options, 'node') - lovname = get_option(options, 'lov') - osdtype = get_option(options, 'osdtype') - - node_uuid = name2uuid(lustre, node_name) - - if osdtype == 'obdecho': - fstype = '' - devname = '' - size = 0 - fstype = '' - journal_size = '' - else: - devname = get_option(options, 'dev') # can be unset for bluearcs - size = get_option(options, 'size') - fstype = get_option(options, 'fstype') - journal_size = get_option(options, 'journal_size') - - nspath = get_option(options, 'nspath') - - ostname = get_option(options, 'ost') - if not ostname: - ostname = new_name('OST_'+ node_name) - - osdname = new_name("OSD_" + ostname + "_" + node_name) - osd_uuid = new_uuid(osdname) - - ost_uuid = name2uuid(lustre, ostname, fatal=0) - if not ost_uuid: - ost_uuid = get_option(options, 'ostuuid') - if ost_uuid: - if lookup(lustre, ost_uuid): - error("Duplicate OST UUID:", ost_uuid) - else: - ost_uuid = new_uuid(ostname) - - ost = gen.ost(ostname, ost_uuid, osd_uuid, options.group) - lustre.appendChild(ost) - if lovname: - lov = findByName(lustre, lovname, "lov") - if not lov: - error('add_ost:', '"'+lovname+'"', "lov element not found.") - lov_add_obd(gen, lov, ost_uuid) - else: - ost = lookup(lustre, ost_uuid) - - if options.failover: - ost.setAttribute('failover', "1") - - - osd = gen.osd(osdname, osd_uuid, fstype, osdtype, devname, - get_format_flag(options), ost_uuid, node_uuid, size, - journal_size, nspath) - - node = findByName(lustre, node_name, "node") - -## if node_add_profile(gen, node, 'oss', oss_uuid): -## ossname = 'OSS' -## oss_uuid = new_uuid(ossname) -## oss = gen.oss(ossname, oss_uuid) -## lustre.appendChild(oss) - - node_add_profile(gen, node, 'osd', osd_uuid) - lustre.appendChild(osd) - - -def add_cobd(gen, lustre, options): - node_name = get_option(options, 'node') - name = new_name('COBD_' + node_name) - uuid = new_uuid(name) - - real_name = get_option(options, 'real_obd') - cache_name = get_option(options, 'cache_obd') - - real_uuid = name2uuid(lustre, real_name, tag='obd') - cache_uuid = name2uuid(lustre, cache_name, tag='obd') - - node = findByName(lustre, node_name, "node") - node_add_profile(gen, node, "cobd", uuid) - cobd = gen.cobd(name, uuid, real_uuid, cache_uuid) - lustre.appendChild(cobd) - - -def add_echo_client(gen, lustre, options): - """ add an echo client to the profile for this node. """ - node_name = get_option(options, 'node') - lov_name = get_option(options, 'ost') - - node = findByName(lustre, node_name, 'node') - - echoname = new_name('ECHO_'+ node_name) - echo_uuid = new_uuid(echoname) - node_add_profile(gen, node, 'echoclient', echo_uuid) - - lov_uuid = name2uuid(lustre, lov_name, tag='lov', fatal=0) - if not lov_uuid: - lov_uuid = name2uuid(lustre, lov_name, tag='ost', fatal=1) - - echo = gen.echo_client(echoname, echo_uuid, lov_uuid) - lustre.appendChild(echo) - - -def add_lov(gen, lustre, options): - """ create a lov """ - - lov_orig = get_option(options, 'lov') - name = new_name(lov_orig) - if name != lov_orig: - warning("name:", lov_orig, "already used. using:", name) - - mds_name = get_option(options, 'mds') - stripe_sz = get_option_int(options, 'stripe_sz') - stripe_cnt = get_option_int(options, 'stripe_cnt') - pattern = get_option_int(options, 'stripe_pattern') - uuid = new_uuid(name) - - ret = findByName(lustre, name, "lov") - if ret: - error("LOV: ", name, " already exists.") - - mds_uuid = name2uuid(lustre, mds_name, 'mds') - lov = gen.lov(name, uuid, mds_uuid, stripe_sz, stripe_cnt, pattern) - lustre.appendChild(lov) - - # add an lovconfig entry to the active mdsdev profile - lovconfig_name = new_name('LVCFG_' + name) - lovconfig_uuid = new_uuid(lovconfig_name) - mds = findByName(lustre, mds_name) - mds.appendChild(gen.ref("lovconfig", lovconfig_uuid)) - lovconfig = gen.lovconfig(lovconfig_name, lovconfig_uuid, uuid) - lustre.appendChild(lovconfig) - -def new_filesystem(gen, lustre, mds_uuid, obd_uuid): - fs_name = new_name("FS_fsname") - fs_uuid = new_uuid(fs_name) - mds = lookup(lustre, mds_uuid) - mds.appendChild(gen.ref("filesystem", fs_uuid)) - fs = gen.filesystem(fs_name, fs_uuid, mds_uuid, obd_uuid) - lustre.appendChild(fs) - return fs_uuid - -def get_fs_uuid(gen, lustre, mds_name, obd_name): - mds_uuid = name2uuid(lustre, mds_name, tag='mds') - obd_uuid = name2uuid(lustre, obd_name, tag='lov', fatal=0) - if not obd_uuid: - obd_uuid = name2uuid(lustre, obd_name, tag='ost', fatal=1) - fs_uuid = lookup_filesystem(lustre, mds_uuid, obd_uuid) - if not fs_uuid: - fs_uuid = new_filesystem(gen, lustre, mds_uuid, obd_uuid) - return fs_uuid - -def add_mtpt(gen, lustre, options): - """ create mtpt on a node """ - node_name = get_option(options, 'node') - - path = get_option(options, 'path') - fs_name = get_option(options, 'filesystem') - if fs_name == '': - mds_name = get_option(options, 'mds') - lov_name = get_option(options, 'lov') - if lov_name == '': - lov_name = get_option(options, 'ost') - if lov_name == '': - error("--add mtpt requires either --filesystem or --mds with an --lov lov_name or --ost ost_name") - fs_uuid = get_fs_uuid(gen, lustre, mds_name, lov_name) - else: - fs_uuid = name2uuid(lustre, fs_name, tag='filesystem') - - name = new_name('MNT_'+ node_name) - - ret = findByName(lustre, name, "mountpoint") - if ret: - # this can't happen, because new_name creates unique names - error("MOUNTPOINT: ", name, " already exists.") - - uuid = new_uuid(name) - mtpt = gen.mountpoint(name, uuid, fs_uuid, path) - node = findByName(lustre, node_name, "node") - if not node: - error('node:', node_name, "not found.") - node_add_profile(gen, node, "mountpoint", uuid) - lustre.appendChild(mtpt) - -############################################################ -# Command line processing -# -class OptionError (exceptions.Exception): - def __init__(self, args): - self.args = args - -def get_option(options, tag): - """Look for tag in options hash and return the value if set. If not - set, then if return default it is set, otherwise exception.""" - if options.__getattr__(tag) != None: - return options.__getattr__(tag) - else: - raise OptionError("--add %s requires --%s <value>" % (options.add, tag)) - -def get_option_int(options, tag): - """Return an integer option. Raise exception if the value is not an int""" - val = get_option(options, tag) - try: - n = int(val) - except ValueError: - raise OptionError("--%s <num> (value must be integer)" % (tag)) - return n - -# simple class for profiling -import time -class chrono: - def __init__(self): - self._start = 0 - def start(self): - self._stop = 0 - self._start = time.time() - def stop(self, msg=''): - self._stop = time.time() - if msg: - self.display(msg) - def dur(self): - return self._stop - self._start - def display(self, msg): - d = self.dur() - str = '%s: %g secs' % (msg, d) - print str - -############################################################ -# Main -# - -def add(devtype, gen, lustre, options): - if devtype == 'net': - add_net(gen, lustre, options) - elif devtype == 'mtpt': - add_mtpt(gen, lustre, options) - elif devtype == 'mds': - add_mds(gen, lustre, options) - elif devtype == 'ost': - add_ost(gen, lustre, options) - elif devtype == 'lov': - add_lov(gen, lustre, options) - elif devtype == 'route': - add_route(gen, lustre, options) - elif devtype == 'node': - add_node(gen, lustre, options) - elif devtype == 'echo_client': - add_echo_client(gen, lustre, options) - elif devtype == 'cobd': - add_cobd(gen, lustre, options) - else: - error("unknown device type:", devtype) - -def do_command(gen, lustre, options, args): - if options.add: - add(options.add, gen, lustre, options) - else: - error("Missing command") - -def main(): - cl = Lustre.Options("lmc", "", lmc_options) - try: - options, args = cl.parse(sys.argv[1:]) - except Lustre.OptionError, e: - panic("lmc", e) - - if len(args) > 0: - panic(string.join(sys.argv), "Unexpected extra arguments on command line: " + string.join(args)) - - if options.reference: - reference() - sys.exit(0) - - outFile = '-' - - if options.merge: - outFile = options.merge - if os.access(outFile, os.R_OK): - doc = xml.dom.minidom.parse(outFile) - else: - doc = new_lustre(xml.dom.minidom) - elif options.input: - doc = xml.dom.minidom.parse(options.input) - else: - doc = new_lustre(xml.dom.minidom) - - if options.output: - outFile = options.output - - lustre = doc.documentElement - init_names(lustre) - if lustre.tagName != "lustre": - print "Existing config not valid." - sys.exit(1) - - gen = GenConfig(doc) - - if options.batch: - fp = open(options.batch) - batchCommands = fp.readlines() - fp.close() - for cmd in batchCommands: - try: - options, args = cl.parse(string.split(cmd)) - do_command(gen, lustre, options, args) - except OptionError, e: - panic(cmd, e) - except Lustre.OptionError, e: - panic(cmd, e) - else: - try: - do_command(gen, lustre, options, args) - except OptionError, e: - panic(string.join(sys.argv),e) - except Lustre.OptionError, e: - panic("lmc", e) - - if outFile == '-': - PrettyPrint(doc) - else: - PrettyPrint(doc, open(outFile,"w")) - -if __name__ == "__main__": - main() diff --git a/lustre/utils/load_ldap.sh b/lustre/utils/load_ldap.sh deleted file mode 100755 index e914827..0000000 --- a/lustre/utils/load_ldap.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash -# -# Load a lustre config xml into an openldap database. -# See https://projects.clusterfs.com/lustre/LustreLDAP -# for more details. -# -# Usage: load_ldap.sh <xml_file> -set -e - -LDAP_BASE=${LDAP_BASE:-fs=lustre} -LDAP_ROOTDN=${LDAP_ROOTDN:-cn=Manager,fs=lustre} -LDAP_PW=${LDAP_PW:-secret} -LDAP_AUTH="-x -D $LDAP_ROOTDN -w $LDAP_PW" -LUSTRE=${LUSTRE:-`dirname $0`/..} - -if [ -f $LUSTRE/Makefile.am ]; then - CONFDIR=$LUSTRE/conf -else - CONFDIR=/usr/lib/lustre -fi - -TOP=$CONFDIR/top.ldif -XSL=$CONFDIR/lustre2ldif.xsl - -[ ! -z $LDAPURL ] && LDAP_AUTH="$LDAP_AUTH -H $LDAPURL" - -XML=${XML:-$1} - -if [ -z "$XML" ] || [ ! -r $XML ]; then - echo "usage: $0 xmlfile" - exit 1 -fi - -NAME=`basename $XML .xml` -LDIF=/tmp/$NAME.ldif - -# add the top level record, if needed -ldapsearch $LDAP_AUTH -b $LDAP_BASE > /dev/null 2>&1 || - ldapadd $LDAP_AUTH -f $TOP - -# If this config already exists, then delete it -ldapsearch $LDAP_AUTH -b config=$NAME,$LDAP_BASE > /dev/null 2>&1 && - ldapdelete $LDAP_AUTH -r config=$NAME,$LDAP_BASE - -4xslt -D config=$NAME $XML $XSL > $LDIF - -echo "Loading config to 'config=$NAME,$LDAP_BASE' ..." -ldapadd $LDAP_AUTH -f $LDIF - -rm -f $LDIF diff --git a/lustre/utils/lstripe.c b/lustre/utils/lstripe.c deleted file mode 100644 index 2cdf5d2..0000000 --- a/lustre/utils/lstripe.c +++ /dev/null @@ -1,116 +0,0 @@ -#include <ctype.h> -#include <errno.h> -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/ioctl.h> -#include <unistd.h> - - -/****************** Custom includes ********************/ -#include <linux/lustre_lite.h> -#include <linux/lustre_idl.h> - - -/****************** Functions ******************/ - -void usage(char *prog) -{ - fprintf(stderr, "usage: %s <filename> <stripe size> <stripe start> " - "<stripe count>\n", prog); - - fprintf(stderr, - "\tstripe size: number of bytes in each stripe (0 default)\n"); - fprintf(stderr, - "\tstripe start: OST index of first stripe (-1 default)\n"); - fprintf(stderr, - "\tstripe count: number of OSTs to stripe over (0 default)\n"); -} - -int create_file(char *name, long stripe_size, int stripe_offset, - int stripe_count) -{ - struct lov_mds_md a_striping; - int fd, result = 0; - - /* Initialize IOCTL striping pattern structure */ - a_striping.lmm_magic = LOV_MAGIC; - a_striping.lmm_stripe_size = stripe_size; - a_striping.lmm_stripe_offset = stripe_offset; - a_striping.lmm_stripe_count = stripe_count; - - fd = open(name, O_CREAT | O_RDWR | O_LOV_DELAY_CREATE, 0644); - if (fd < 0) { - fprintf(stderr, "\nUnable to open '%s': %s\n", - name, strerror(errno)); - result = -errno; - } else if (ioctl(fd, LL_IOC_LOV_SETSTRIPE, &a_striping)) { - char *errmsg = "stripe already set"; - - if (errno != EEXIST && errno != EALREADY) - errmsg = strerror(errno); - - fprintf(stderr, "\nError on ioctl for '%s' (%d): %s\n", - name, fd, errmsg); - result = -errno; - } else if (close(fd) < 0) { - fprintf(stderr, "\nError on close for '%s' (%d): %s\n", - name, fd, strerror(errno)); - result = -errno; - } - - return result; -} - -int main(int argc, char *argv[]) -{ - int result; - long st_size; - int st_offset, - st_count; - char *end; - - /* Check to make sure we have enough parameters */ - if (argc != 5) { - usage(argv[0]); - return 1; - } - - /* Get the stripe size */ - st_size = strtoul(argv[2], &end, 0); - if (*end != '\0') { - fprintf(stderr, "bad stripe size '%s'\n", argv[2]); - usage(argv[0]); - return 2; - } - - /* - if (st_size & 4095) { - fprintf(stderr, "stripe size must be multiple of page size\n"); - usage(argv[0]); - return 3; - } - */ - - /* Get the stripe offset*/ - st_offset = strtoul(argv[3], &end, 0); - if (*end != '\0') { - fprintf(stderr, "bad stripe offset '%s'\n", argv[3]); - usage(argv[0]); - return 4; - } - - /* Get the stripe count */ - st_count = strtoul(argv[4], &end, 0); - if (*end != '\0') { - fprintf(stderr, "bad stripe count '%s'\n", argv[4]); - usage(argv[0]); - return 5; - } - - /* Create the file, as specified. Return and display any errors. */ - result = create_file(argv[1], st_size, st_offset, st_count); - - return result; -} diff --git a/lustre/utils/mds-failover-sample b/lustre/utils/mds-failover-sample deleted file mode 100755 index f6269f4..0000000 --- a/lustre/utils/mds-failover-sample +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/sh - -MDS=NET_mds_tcp_UUID -MDSHOST=mds - -/r/src/lustre/utils/lctl <<EOF -network tcp -close_uuid $MDS -del_uuid $MDS -connect $MDSHOST 988 -add_uuid $MDS $MDSHOST -quit -EOF - -/r/src/lustre/utils/lctl <<EOF -device \$RPCDEV -probe -newconn $MDS -quit -EOF diff --git a/lustre/utils/obd.c b/lustre/utils/obd.c deleted file mode 100644 index 70cd5bf..0000000 --- a/lustre/utils/obd.c +++ /dev/null @@ -1,2060 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Andreas Dilger <adilger@clusterfs.com> - * Author: Robert Read <rread@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - - -#include <stdlib.h> -#include <sys/ioctl.h> -#include <fcntl.h> -#include <sys/socket.h> -#include <sys/types.h> -#include <sys/wait.h> -#include <sys/stat.h> -#include <stdio.h> -#include <stdarg.h> -#include <signal.h> - -#ifndef __KERNEL__ -#include <liblustre.h> -#endif -#include <linux/lustre_lib.h> -#include <linux/lustre_idl.h> -#include <linux/lustre_dlm.h> -#include <linux/obd.h> /* for struct lov_stripe_md */ -#include <linux/lustre_build_version.h> - -#include <unistd.h> -#include <sys/un.h> -#include <time.h> -#include <sys/time.h> -#include <errno.h> -#include <string.h> - -#include <asm/page.h> /* needed for PAGE_SIZE - rread */ - -#define __KERNEL__ -#include <linux/list.h> -#undef __KERNEL__ - -#include "obdctl.h" -#include <portals/ptlctl.h> -#include "parser.h" -#include <stdio.h> - -#define SHMEM_STATS 1 -#if SHMEM_STATS -# include <sys/ipc.h> -# include <sys/shm.h> - -# define MAX_SHMEM_COUNT 1024 -static long long *shared_counters; -static long long counter_snapshot[2][MAX_SHMEM_COUNT]; -struct timeval prev_time; -#endif - -uint64_t conn_cookie = -1; -char rawbuf[8192]; -char *buf = rawbuf; -int max = sizeof(rawbuf); - -static int thread; - -union lsm_buffer { - char space [4096]; - struct lov_stripe_md lsm; -} lsm_buffer; - -static char *cmdname(char *func); - -#define IOC_INIT(data) \ -do { \ - memset(&data, 0, sizeof(data)); \ - data.ioc_cookie = conn_cookie; \ -} while (0) - -#define IOC_PACK(func, data) \ -do { \ - memset(buf, 0, sizeof(rawbuf)); \ - if (obd_ioctl_pack(&data, &buf, max)) { \ - fprintf(stderr, "error: %s: invalid ioctl\n", \ - cmdname(func)); \ - return -2; \ - } \ -} while (0) - -#define IOC_UNPACK(func, data) \ -do { \ - if (obd_ioctl_unpack(&data, buf, max)) { \ - fprintf(stderr, "error: %s: invalid reply\n", \ - cmdname(func)); \ - return -2; \ - } \ -} while (0) - -char *obdo_print(struct obdo *obd) -{ - char buf[1024]; - - sprintf(buf, "id: "LPX64"\ngrp: "LPX64"\natime: "LPU64"\nmtime: "LPU64 - "\nctime: "LPU64"\nsize: "LPU64"\nblocks: "LPU64 - "\nblksize: %u\nmode: %o\nuid: %d\ngid: %d\nflags: %x\n" - "obdflags: %x\nnlink: %d,\nvalid %x\n", - obd->o_id, obd->o_gr, obd->o_atime, obd->o_mtime, obd->o_ctime, - obd->o_size, obd->o_blocks, obd->o_blksize, obd->o_mode, - obd->o_uid, obd->o_gid, obd->o_flags, obd->o_obdflags, - obd->o_nlink, obd->o_valid); - return strdup(buf); -} - - -#define BAD_VERBOSE (-999999999) - -#define N2D_OFF 0x100 /* So we can tell between error codes and devices */ - -static int do_name2dev(char *func, char *name) -{ - struct obd_ioctl_data data; - int rc; - - IOC_INIT(data); - - data.ioc_inllen1 = strlen(name) + 1; - data.ioc_inlbuf1 = name; - - IOC_PACK(func, data); - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_NAME2DEV, buf); - if (rc < 0) - return errno; - IOC_UNPACK(func, data); - - return data.ioc_dev + N2D_OFF; -} - -/* - * resolve a device name to a device number. - * supports a number, $name or %uuid. - */ -static int parse_devname(char *func, char *name) -{ - int rc; - int ret = -1; - - if (!name) - return ret; - if (name[0] == '$' || name[0] == '%') { - name++; - rc = do_name2dev(func, name); - if (rc >= N2D_OFF) { - ret = rc - N2D_OFF; - printf("Name %s is device %d\n", name, ret); - } else { - printf("No device found for name %s: %s\n", - name, strerror(rc)); - } - } else { - /* Assume it's a number. This means that bogus strings become - * 0. I might care about that some day. */ - ret = strtoul(name, NULL, 0); - printf("Selected device %d\n", ret); - } - - return ret; -} - -static char * -lsm_string (struct lov_stripe_md *lsm) -{ - static char buffer[4096]; - char *p = buffer; - int space = sizeof (buffer); - int i; - int nob; - - *p = 0; - space--; - - nob = snprintf(p, space, LPX64, lsm->lsm_object_id); - p += nob; - space -= nob; - - if (lsm->lsm_stripe_count != 0) { - nob = snprintf (p, space, "=%u#%u@%d", - lsm->lsm_stripe_size, - lsm->lsm_stripe_count, - lsm->lsm_stripe_offset); - p += nob; - space -= nob; - - for (i = 0; i < lsm->lsm_stripe_count; i++) { - nob = snprintf (p, space, ":"LPX64, - lsm->lsm_oinfo[i].loi_id); - p += nob; - space -= nob; - } - } - - if (space == 0) { /* probable overflow */ - fprintf (stderr, "lsm_string() overflowed buffer\n"); - abort (); - } - - return (buffer); -} - -static void -reset_lsmb (union lsm_buffer *lsmb) -{ - memset (lsmb->space, 0, sizeof (lsmb->space)); - lsmb->lsm.lsm_magic = LOV_MAGIC; -} - -static int -parse_lsm (union lsm_buffer *lsmb, char *string) -{ - struct lov_stripe_md *lsm = &lsmb->lsm; - char *end; - int i; - - /* - * object_id[=size#count[@offset][:id]*] - */ - - reset_lsmb (lsmb); - - lsm->lsm_object_id = strtoull (string, &end, 0); - if (end == string) - return (-1); - string = end; - - if (*string == 0) - return (0); - - if (*string != '=') - return (-1); - string++; - - lsm->lsm_stripe_size = strtoul (string, &end, 0); - if (end == string) - return (-1); - string = end; - - if (*string != '#') - return (-1); - string++; - - lsm->lsm_stripe_count = strtoul (string, &end, 0); - if (end == string) - return (-1); - string = end; - - if (*string == '@') { - string++; - lsm->lsm_stripe_offset = strtol (string, &end, 0); - if (end == string) - return (-1); - string = end; - } - - if (*string == 0) /* don't have to specify obj ids */ - return (0); - - for (i = 0; i < lsm->lsm_stripe_count; i++) { - if (*string != ':') - return (-1); - string++; - lsm->lsm_oinfo[i].loi_id = strtoull (string, &end, 0); - string = end; - } - - if (*string != 0) - return (-1); - - return (0); -} - -static char *cmdname(char *func) -{ - static char buf[512]; - - if (thread) { - sprintf(buf, "%s-%d", func, thread); - return buf; - } - - return func; -} - -#define difftime(a, b) \ - ((double)(a)->tv_sec - (b)->tv_sec + \ - ((double)((a)->tv_usec - (b)->tv_usec) / 1000000)) - -static int be_verbose(int verbose, struct timeval *next_time, - __u64 num, __u64 *next_num, int num_total) -{ - struct timeval now; - - if (!verbose) - return 0; - - if (next_time != NULL) - gettimeofday(&now, NULL); - - /* A positive verbosity means to print every X iterations */ - if (verbose > 0 && - (next_num == NULL || num >= *next_num || num >= num_total)) { - *next_num += verbose; - if (next_time) { - next_time->tv_sec = now.tv_sec - verbose; - next_time->tv_usec = now.tv_usec; - } - return 1; - } - - /* A negative verbosity means to print at most each X seconds */ - if (verbose < 0 && next_time != NULL && difftime(&now, next_time) >= 0){ - next_time->tv_sec = now.tv_sec - verbose; - next_time->tv_usec = now.tv_usec; - if (next_num) - *next_num = num; - return 1; - } - - return 0; -} - -static int get_verbose(char *func, const char *arg) -{ - int verbose; - char *end; - - if (!arg || arg[0] == 'v') - verbose = 1; - else if (arg[0] == 's' || arg[0] == 'q') - verbose = 0; - else { - verbose = (int)strtoul(arg, &end, 0); - if (*end) { - fprintf(stderr, "error: %s: bad verbose option '%s'\n", - cmdname(func), arg); - return BAD_VERBOSE; - } - } - - if (verbose < 0) - printf("Print status every %d seconds\n", -verbose); - else if (verbose == 1) - printf("Print status every operation\n"); - else if (verbose > 1) - printf("Print status every %d operations\n", verbose); - - return verbose; -} - -int do_disconnect(char *func, int verbose) -{ - int rc; - struct obd_ioctl_data data; - - if (conn_cookie == -1) - return 0; - - IOC_INIT(data); - - IOC_PACK(func, data); - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_DISCONNECT, buf); - if (rc < 0) { - fprintf(stderr, "error: %s: OPD_IOC_DISCONNECT %s\n", - cmdname(func),strerror(errno)); - } else { - if (verbose) - printf("%s: disconnected conn "LPX64"\n", cmdname(func), - conn_cookie); - conn_cookie = -1; - } - - return rc; -} - -#if SHMEM_STATS -static void shmem_setup(void) -{ - /* Create new segment */ - int shmid = shmget(IPC_PRIVATE, sizeof(counter_snapshot[0]), 0600); - - if (shmid == -1) { - fprintf(stderr, "Can't create shared memory counters: %s\n", - strerror(errno)); - return; - } - - /* Attatch to new segment */ - shared_counters = (long long *)shmat(shmid, NULL, 0); - - if (shared_counters == (long long *)(-1)) { - fprintf(stderr, "Can't attach shared memory counters: %s\n", - strerror(errno)); - shared_counters = NULL; - return; - } - - /* Mark segment as destroyed, so it will disappear when we exit. - * Forks will inherit attached segments, so we should be OK. - */ - if (shmctl(shmid, IPC_RMID, NULL) == -1) { - fprintf(stderr, "Can't destroy shared memory counters: %s\n", - strerror(errno)); - } -} - -static inline void shmem_reset(void) -{ - if (shared_counters == NULL) - return; - - memset(shared_counters, 0, sizeof(counter_snapshot[0])); - memset(counter_snapshot, 0, sizeof(counter_snapshot)); - gettimeofday(&prev_time, NULL); -} - -static inline void shmem_bump(void) -{ - if (shared_counters == NULL || thread <= 0 || thread > MAX_SHMEM_COUNT) - return; - - shared_counters[thread - 1]++; -} - -static void shmem_snap(int n) -{ - struct timeval this_time; - int non_zero = 0; - long long total = 0; - double secs; - int i; - - if (shared_counters == NULL || n > MAX_SHMEM_COUNT) - return; - - memcpy(counter_snapshot[1], counter_snapshot[0], - n * sizeof(counter_snapshot[0][0])); - memcpy(counter_snapshot[0], shared_counters, - n * sizeof(counter_snapshot[0][0])); - gettimeofday(&this_time, NULL); - - for (i = 0; i < n; i++) { - long long this_count = - counter_snapshot[0][i] - counter_snapshot[1][i]; - - if (this_count != 0) { - non_zero++; - total += this_count; - } - } - - secs = (this_time.tv_sec + this_time.tv_usec / 1000000.0) - - (prev_time.tv_sec + prev_time.tv_usec / 1000000.0); - - printf("%d/%d Total: %f/second\n", non_zero, n, total / secs); - - prev_time = this_time; -} - -#define SHMEM_SETUP() shmem_setup() -#define SHMEM_RESET() shmem_reset() -#define SHMEM_BUMP() shmem_bump() -#define SHMEM_SNAP(n) shmem_snap(n) -#else -#define SHMEM_SETUP() -#define SHMEM_RESET() -#define SHMEM_BUMP() -#define SHMEM_SNAP(n) -#endif - -extern command_t cmdlist[]; - -static int do_device(char *func, int dev) -{ - struct obd_ioctl_data data; - - memset(&data, 0, sizeof(data)); - - data.ioc_dev = dev; - - IOC_PACK(func, data); - return l_ioctl(OBD_DEV_ID, OBD_IOC_DEVICE, buf); -} - -int jt_obd_device(int argc, char **argv) -{ - int rc, dev; - do_disconnect(argv[0], 1); - - if (argc != 2) - return CMD_HELP; - - dev = parse_devname(argv[0], argv[1]); - if (dev < 0) - return -1; - - rc = do_device(argv[0], dev); - if (rc < 0) - fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]), - strerror(rc = errno)); - - return rc; -} - -int jt_obd_connect(int argc, char **argv) -{ - struct obd_ioctl_data data; - int rc; - - IOC_INIT(data); - - do_disconnect(argv[0], 1); - - /* XXX TODO: implement timeout per lctl usage for probe */ - if (argc != 1) - return CMD_HELP; - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_CONNECT, buf); - IOC_UNPACK(argv[0], data); - if (rc < 0) - fprintf(stderr, "error: %s: OBD_IOC_CONNECT %s\n", - cmdname(argv[0]), strerror(rc = errno)); - else - conn_cookie = data.ioc_cookie; - return rc; -} - -int jt_obd_disconnect(int argc, char **argv) -{ - if (argc != 1) - return CMD_HELP; - - if (conn_cookie == -1) - return 0; - - return do_disconnect(argv[0], 0); -} - -int jt_opt_device(int argc, char **argv) -{ - char *arg2[3]; - int ret; - int rc; - - if (argc < 3) - return CMD_HELP; - - rc = do_device("device", parse_devname(argv[0], argv[1])); - - if (!rc) { - arg2[0] = "connect"; - arg2[1] = NULL; - rc = jt_obd_connect(1, arg2); - } - - if (!rc) - rc = Parser_execarg(argc - 2, argv + 2, cmdlist); - - ret = do_disconnect(argv[0], 0); - if (!rc) - rc = ret; - - return rc; -} - -int jt_opt_threads(int argc, char **argv) -{ - __u64 threads, next_thread; - int verbose; - int rc = 0; - char *end; - int i; - - if (argc < 5) - return CMD_HELP; - - threads = strtoull(argv[1], &end, 0); - if (*end) { - fprintf(stderr, "error: %s: invalid page count '%s'\n", - cmdname(argv[0]), argv[1]); - return CMD_HELP; - } - - verbose = get_verbose(argv[0], argv[2]); - if (verbose == BAD_VERBOSE) - return CMD_HELP; - - if (verbose != 0) - printf("%s: starting "LPD64" threads on device %s running %s\n", - argv[0], threads, argv[3], argv[4]); - - SHMEM_RESET(); - - for (i = 1, next_thread = verbose; i <= threads; i++) { - rc = fork(); - if (rc < 0) { - fprintf(stderr, "error: %s: #%d - %s\n", argv[0], i, - strerror(rc = errno)); - break; - } else if (rc == 0) { - thread = i; - argv[2] = "--device"; - return jt_opt_device(argc - 2, argv + 2); - } else if (be_verbose(verbose, NULL, i, &next_thread, threads)) - printf("%s: thread #%d (PID %d) started\n", - argv[0], i, rc); - rc = 0; - } - - if (!thread) { /* parent process */ - int live_threads = threads; - - while (live_threads > 0) { - int status; - pid_t ret; - - ret = waitpid(0, &status, verbose < 0 ? WNOHANG : 0); - if (ret == 0) { - if (verbose >= 0) - abort(); - - sleep(-verbose); - SHMEM_SNAP(threads); - continue; - } - - if (ret < 0) { - fprintf(stderr, "error: %s: wait - %s\n", - argv[0], strerror(errno)); - if (!rc) - rc = errno; - } else { - /* - * This is a hack. We _should_ be able to use - * WIFEXITED(status) to see if there was an - * error, but it appears to be broken and it - * always returns 1 (OK). See wait(2). - */ - int err = WEXITSTATUS(status); - if (err || WIFSIGNALED(status)) - fprintf(stderr, - "%s: PID %d had rc=%d\n", - argv[0], ret, err); - if (!rc) - rc = err; - - live_threads--; - } - } - } - - return rc; -} - -int jt_obd_detach(int argc, char **argv) -{ - struct obd_ioctl_data data; - int rc; - - IOC_INIT(data); - - if (argc != 1) - return CMD_HELP; - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_DETACH, buf); - if (rc < 0) - fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]), - strerror(rc = errno)); - - return rc; -} - -int jt_obd_cleanup(int argc, char **argv) -{ - struct obd_ioctl_data data; - char force = 'F'; - char failover = 'A'; - char flags[3]; - int flag_cnt = 0, n; - int rc; - - IOC_INIT(data); - - if (argc < 1 || argc > 3) - return CMD_HELP; - - for (n = 1; n < argc; n++) - if (strcmp(argv[n], "force") == 0) { - flags[flag_cnt++] = force; - } else if (strcmp(argv[n], "failover") == 0) { - flags[flag_cnt++] = failover; - } else { - fprintf(stderr, "unknown option: %s", argv[n]); - return CMD_HELP; - } - - data.ioc_inllen1 = flag_cnt; - if (flag_cnt) - data.ioc_inlbuf1 = flags; - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_CLEANUP, buf); - if (rc < 0) - fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]), - strerror(rc = errno)); - - return rc; -} - -int jt_obd_no_transno(int argc, char **argv) -{ - struct obd_ioctl_data data; - int rc; - - IOC_INIT(data); - - if (argc != 1) - return CMD_HELP; - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_NO_TRANSNO, buf); - if (rc < 0) - fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]), - strerror(rc = errno)); - - return rc; -} - -int jt_obd_set_readonly(int argc, char **argv) -{ - struct obd_ioctl_data data; - int rc; - - IOC_INIT(data); - - if (argc != 1) - return CMD_HELP; - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_SET_READONLY, buf); - if (rc < 0) - fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]), - strerror(rc = errno)); - - return rc; -} - -int jt_obd_abort_recovery(int argc, char **argv) -{ - struct obd_ioctl_data data; - int rc; - - IOC_INIT(data); - - if (argc != 1) - return CMD_HELP; - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_ABORT_RECOVERY, buf); - if (rc < 0) - fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]), - strerror(rc = errno)); - - return rc; -} - -int jt_obd_newdev(int argc, char **argv) -{ - int rc; - struct obd_ioctl_data data; - - IOC_INIT(data); - - if (argc != 1) - return CMD_HELP; - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_NEWDEV, buf); - if (rc < 0) - fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]), - strerror(rc = errno)); - else { - IOC_UNPACK(argv[0], data); - printf("Current device set to %d\n", data.ioc_dev); - } - - return rc; -} - -int jt_obd_mount_option(int argc, char **argv) -{ - int rc; - struct obd_ioctl_data data; - - IOC_INIT(data); - - if (argc != 2) - return CMD_HELP; - - data.ioc_inllen1 = strlen(argv[1]) + 1; - data.ioc_inlbuf1 = argv[1]; - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_MOUNTOPT, buf); - if (rc < 0) { - fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]), - strerror(rc = errno)); - } - - return rc; -} - -int jt_get_version(int argc, char **argv) -{ - int rc; - char buf[8192]; - struct obd_ioctl_data *data = (struct obd_ioctl_data *)buf; - - if (argc != 1) - return CMD_HELP; - - memset(buf, 0, sizeof(buf)); - data->ioc_version = OBD_IOCTL_VERSION; - data->ioc_cookie = conn_cookie; - data->ioc_inllen1 = sizeof(buf) - size_round(sizeof(*data)); - data->ioc_len = obd_ioctl_packlen(data); - - rc = l_ioctl(OBD_DEV_ID, OBD_GET_VERSION, buf); - if (rc < 0) - fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]), - strerror(rc = errno)); - else { - printf("Lustre version: %s\n", data->ioc_bulk); - } - - printf("lctl version: %s\n", BUILD_VERSION); - return rc; -} - -int jt_obd_list(int argc, char **argv) -{ - int rc; - char buf[8192]; - struct obd_ioctl_data *data = (struct obd_ioctl_data *)buf; - - if (argc != 1) - return CMD_HELP; - - memset(buf, 0, sizeof(buf)); - data->ioc_version = OBD_IOCTL_VERSION; - data->ioc_cookie = conn_cookie; - data->ioc_inllen1 = sizeof(buf) - size_round(sizeof(*data)); - data->ioc_len = obd_ioctl_packlen(data); - - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_LIST, data); - if (rc < 0) - fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]), - strerror(rc = errno)); - else { - printf("%s", data->ioc_bulk); - } - - return rc; -} - -int jt_obd_attach(int argc, char **argv) -{ - struct obd_ioctl_data data; - int rc; - - IOC_INIT(data); - - if (argc != 2 && argc != 3 && argc != 4) - return CMD_HELP; - - data.ioc_inllen1 = strlen(argv[1]) + 1; - data.ioc_inlbuf1 = argv[1]; - if (argc >= 3) { - data.ioc_inllen2 = strlen(argv[2]) + 1; - data.ioc_inlbuf2 = argv[2]; - } - - if (argc == 4) { - data.ioc_inllen3 = strlen(argv[3]) + 1; - data.ioc_inlbuf3 = argv[3]; - } - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_ATTACH, buf); - if (rc < 0) - fprintf(stderr, "error: %s: OBD_IOC_ATTACH %s\n", - cmdname(argv[0]), strerror(rc = errno)); - else if (argc == 3) { - char name[1024]; - if (strlen(argv[2]) > 128) { - printf("Name too long to set environment\n"); - return -EINVAL; - } - snprintf(name, 512, "LUSTRE_DEV_%s", argv[2]); - rc = setenv(name, argv[1], 1); - if (rc) { - printf("error setting env variable %s\n", name); - } - } - - return rc; -} - -int jt_obd_setup(int argc, char **argv) -{ - struct obd_ioctl_data data; - int rc; - - IOC_INIT(data); - - if (argc > 5) - return CMD_HELP; - - data.ioc_dev = -1; - if (argc > 1) { - data.ioc_dev = parse_devname(argv[0], argv[1]); - if (data.ioc_dev < 0) - return -1; - data.ioc_inllen1 = strlen(argv[1]) + 1; - data.ioc_inlbuf1 = argv[1]; - } - if (argc > 2) { - data.ioc_inllen2 = strlen(argv[2]) + 1; - data.ioc_inlbuf2 = argv[2]; - } - if (argc > 3) { - data.ioc_inllen3 = strlen(argv[3]) + 1; - data.ioc_inlbuf3 = argv[3]; - } - if (argc > 4) { - data.ioc_inllen4 = strlen(argv[4]) + 1; - data.ioc_inlbuf4 = argv[4]; - } - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_SETUP, buf); - if (rc < 0) - fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]), - strerror(rc = errno)); - - return rc; -} - -/* Get echo client's stripe meta-data for the given object - */ -int jt_obd_get_stripe (int argc, char **argv) -{ - struct obd_ioctl_data data; - __u64 id; - int rc; - char *end; - - if (argc != 2) - return (CMD_HELP); - - id = strtoull (argv[1], &end, 0); - if (*end) { - fprintf (stderr, "Error: %s: invalid object id '%s'\n", - cmdname (argv[0]), argv[1]); - return (CMD_HELP); - } - - memset (&lsm_buffer, 0, sizeof (lsm_buffer)); - - IOC_INIT (data); - data.ioc_obdo1.o_id = id; - data.ioc_obdo1.o_mode = S_IFREG | 0644; - data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLMODE; - data.ioc_pbuf1 = (char *)&lsm_buffer; - data.ioc_plen1 = sizeof (lsm_buffer); - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, ECHO_IOC_GET_STRIPE, buf); - IOC_UNPACK(argv[0], data); - - if (rc != 0) { - fprintf (stderr, "Error: %s: rc %d(%s)\n", - cmdname (argv[0]), rc, strerror (errno)); - return (rc); - } - - printf ("%s\n", lsm_string (&lsm_buffer.lsm)); - - return (rc); -} - -/* Set stripe meta-data for 1 or more objects. Object must be new to - * this echo client instance. - */ -int jt_obd_set_stripe (int argc, char **argv) -{ - struct obd_ioctl_data data; - char *end; - int count = 1; - int i; - int rc; - - if (argc < 2 || argc > 3) - return CMD_HELP; - - rc = parse_lsm (&lsm_buffer, argv[1]); - if (rc != 0) { - fprintf (stderr, "error: %s: invalid object '%s'\n", - cmdname (argv[0]), argv[1]); - return CMD_HELP; - } - - if (argc > 2) { - count = strtol (argv[2], &end, 0); - if (*end != 0) { - fprintf (stderr, "error: %s: invalid count '%s'\n", - cmdname (argv[0]), argv[1]); - return CMD_HELP; - } - } - - for (i = 0; i < count; i++) { - IOC_INIT (data); - data.ioc_obdo1.o_id = lsm_buffer.lsm.lsm_object_id + i; - data.ioc_obdo1.o_mode = S_IFREG | 0644; - data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLMODE; - data.ioc_pbuf1 = (char *)&lsm_buffer; - data.ioc_plen1 = sizeof (lsm_buffer); - - IOC_PACK (argv[0], data); - rc = l_ioctl (OBD_DEV_ID, ECHO_IOC_SET_STRIPE, buf); - IOC_UNPACK (argv[0], data); - - if (rc != 0) { - fprintf (stderr, "Error: %s: rc %d(%s)\n", - cmdname (argv[0]), rc, strerror (errno)); - return (rc); - } - } - - return (0); -} - -/* Clear stripe meta-data info for an object on this echo-client instance - */ -int jt_obd_unset_stripe (int argc, char **argv) -{ - struct obd_ioctl_data data; - char *end; - obd_id id; - int rc; - - if (argc != 2) - return CMD_HELP; - - id = strtoull (argv[1], &end, 0); - if (*end != 0) { - fprintf (stderr, "error: %s: invalid object id '%s'\n", - cmdname (argv[0]), argv[1]); - return CMD_HELP; - } - - IOC_INIT (data); - data.ioc_obdo1.o_id = id; - data.ioc_obdo1.o_mode = S_IFREG | 0644; - data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLMODE; - - IOC_PACK (argv[0], data); - rc = l_ioctl (OBD_DEV_ID, ECHO_IOC_SET_STRIPE, buf); - IOC_UNPACK (argv[0], data); - - if (rc != 0) - fprintf (stderr, "Error: %s: rc %d(%s)\n", - cmdname (argv[0]), rc, strerror (errno)); - - return (0); -} - -/* Create one or more objects, arg[4] may describe stripe meta-data. If - * not, defaults assumed. This echo-client instance stashes the stripe - * object ids. Use get_stripe on this node to print full lsm and - * set_stripe on another node to cut/paste between nodes. - */ -int jt_obd_create(int argc, char **argv) -{ - static __u64 base_id = 1; - - struct obd_ioctl_data data; - struct timeval next_time; - __u64 count = 1, next_count; - int verbose = 1, mode = 0100644, rc = 0, i; - char *end; - - IOC_INIT(data); - if (argc < 2 || argc > 5) - return CMD_HELP; - - count = strtoull(argv[1], &end, 0); - if (*end) { - fprintf(stderr, "error: %s: invalid iteration count '%s'\n", - cmdname(argv[0]), argv[1]); - return CMD_HELP; - } - - if (argc > 2) { - mode = strtoul(argv[2], &end, 0); - if (*end) { - fprintf(stderr, "error: %s: invalid mode '%s'\n", - cmdname(argv[0]), argv[2]); - return CMD_HELP; - } - if (!(mode & S_IFMT)) - mode |= S_IFREG; - } - - if (argc > 3) { - verbose = get_verbose(argv[0], argv[3]); - if (verbose == BAD_VERBOSE) - return CMD_HELP; - } - - if (argc < 5) - reset_lsmb (&lsm_buffer); /* will set default */ - else { - rc = parse_lsm (&lsm_buffer, argv[4]); - if (rc != 0) { - fprintf(stderr, "error: %s: invalid lsm '%s'\n", - cmdname(argv[0]), argv[4]); - return CMD_HELP; - } - base_id = lsm_buffer.lsm.lsm_object_id; - } - - printf("%s: "LPD64" objects\n", cmdname(argv[0]), count); - gettimeofday(&next_time, NULL); - next_time.tv_sec -= verbose; - - for (i = 1, next_count = verbose; i <= count; i++) { - data.ioc_obdo1.o_mode = mode; - data.ioc_obdo1.o_id = base_id++; - data.ioc_obdo1.o_uid = 0; - data.ioc_obdo1.o_gid = 0; - data.ioc_obdo1.o_valid = OBD_MD_FLTYPE | OBD_MD_FLMODE | - OBD_MD_FLID | OBD_MD_FLUID | OBD_MD_FLGID; - - data.ioc_plen1 = sizeof (lsm_buffer); - data.ioc_pbuf1 = (char *)&lsm_buffer; - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_CREATE, buf); - IOC_UNPACK(argv[0], data); - SHMEM_BUMP(); - if (rc < 0) { - fprintf(stderr, "error: %s: #%d - %s\n", - cmdname(argv[0]), i, strerror(rc = errno)); - break; - } - if (!(data.ioc_obdo1.o_valid & OBD_MD_FLID)) { - fprintf(stderr, "error: %s: objid not valid #%d:%08x\n", - cmdname(argv[0]), i, data.ioc_obdo1.o_valid); - rc = EINVAL; - break; - } - - if (be_verbose(verbose, &next_time, i, &next_count, count)) - printf("%s: #%d is object id "LPX64"\n", - cmdname(argv[0]), i, data.ioc_obdo1.o_id); - } - return rc; -} - -int jt_obd_setattr(int argc, char **argv) -{ - struct obd_ioctl_data data; - char *end; - int rc; - - IOC_INIT(data); - if (argc != 2) - return CMD_HELP; - - data.ioc_obdo1.o_id = strtoull(argv[1], &end, 0); - if (*end) { - fprintf(stderr, "error: %s: invalid objid '%s'\n", - cmdname(argv[0]), argv[1]); - return CMD_HELP; - } - data.ioc_obdo1.o_mode = S_IFREG | strtoul(argv[2], &end, 0); - if (*end) { - fprintf(stderr, "error: %s: invalid mode '%s'\n", - cmdname(argv[0]), argv[2]); - return CMD_HELP; - } - data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE; - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_SETATTR, buf); - if (rc < 0) - fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]), - strerror(rc = errno)); - - return rc; -} - -int jt_obd_destroy(int argc, char **argv) -{ - struct obd_ioctl_data data; - struct timeval next_time; - __u64 count = 1, next_count; - int verbose = 1; - __u64 id; - char *end; - int rc = 0, i; - - IOC_INIT(data); - if (argc < 2 || argc > 4) - return CMD_HELP; - - id = strtoull(argv[1], &end, 0); - if (*end) { - fprintf(stderr, "error: %s: invalid objid '%s'\n", - cmdname(argv[0]), argv[1]); - return CMD_HELP; - } - if (argc > 2) { - count = strtoull(argv[2], &end, 0); - if (*end) { - fprintf(stderr, - "error: %s: invalid iteration count '%s'\n", - cmdname(argv[0]), argv[2]); - return CMD_HELP; - } - } - - if (argc > 3) { - verbose = get_verbose(argv[0], argv[3]); - if (verbose == BAD_VERBOSE) - return CMD_HELP; - } - - printf("%s: "LPD64" objects\n", cmdname(argv[0]), count); - gettimeofday(&next_time, NULL); - next_time.tv_sec -= verbose; - - for (i = 1, next_count = verbose; i <= count; i++, id++) { - data.ioc_obdo1.o_id = id; - data.ioc_obdo1.o_mode = S_IFREG | 0644; - data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLMODE; - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_DESTROY, buf); - IOC_UNPACK(argv[0], data); - SHMEM_BUMP(); - if (rc < 0) { - fprintf(stderr, "error: %s: objid "LPX64": %s\n", - cmdname(argv[0]), id, strerror(rc = errno)); - break; - } - - if (be_verbose(verbose, &next_time, i, &next_count, count)) - printf("%s: #%d is object id "LPX64"\n", - cmdname(argv[0]), i, id); - } - - return rc; -} - -int jt_obd_getattr(int argc, char **argv) -{ - struct obd_ioctl_data data; - char *end; - int rc; - - if (argc != 2) - return CMD_HELP; - - IOC_INIT(data); - data.ioc_obdo1.o_id = strtoull(argv[1], &end, 0); - if (*end) { - fprintf(stderr, "error: %s: invalid objid '%s'\n", - cmdname(argv[0]), argv[1]); - return CMD_HELP; - } - /* to help obd filter */ - data.ioc_obdo1.o_mode = 0100644; - data.ioc_obdo1.o_valid = 0xffffffff; - printf("%s: object id "LPX64"\n", cmdname(argv[0]),data.ioc_obdo1.o_id); - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_GETATTR, buf); - IOC_UNPACK(argv[0], data); - if (rc) { - fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]), - strerror(rc = errno)); - } else { - printf("%s: object id "LPX64", mode %o\n", cmdname(argv[0]), - data.ioc_obdo1.o_id, data.ioc_obdo1.o_mode); - } - return rc; -} - -int jt_obd_test_getattr(int argc, char **argv) -{ - struct obd_ioctl_data data; - struct timeval start, next_time; - __u64 i, count, next_count; - int verbose = 1; - obd_id objid = 3; - char *end; - int rc = 0; - - if (argc < 2 && argc > 4) - return CMD_HELP; - - IOC_INIT(data); - count = strtoull(argv[1], &end, 0); - if (*end) { - fprintf(stderr, "error: %s: invalid iteration count '%s'\n", - cmdname(argv[0]), argv[1]); - return CMD_HELP; - } - - if (argc >= 3) { - verbose = get_verbose(argv[0], argv[2]); - if (verbose == BAD_VERBOSE) - return CMD_HELP; - } - - if (argc >= 4) { - if (argv[3][0] == 't') { - objid = strtoull(argv[3] + 1, &end, 0); - if (thread) - objid += thread - 1; - } else - objid = strtoull(argv[3], &end, 0); - if (*end) { - fprintf(stderr, "error: %s: invalid objid '%s'\n", - cmdname(argv[0]), argv[3]); - return CMD_HELP; - } - } - - gettimeofday(&start, NULL); - next_time.tv_sec = start.tv_sec - verbose; - next_time.tv_usec = start.tv_usec; - if (verbose != 0) - printf("%s: getting "LPD64" attrs (objid "LPX64"): %s", - cmdname(argv[0]), count, objid, ctime(&start.tv_sec)); - - for (i = 1, next_count = verbose; i <= count; i++) { - data.ioc_obdo1.o_id = objid; - data.ioc_obdo1.o_mode = S_IFREG; - data.ioc_obdo1.o_valid = 0xffffffff; - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_GETATTR, &data); - SHMEM_BUMP(); - if (rc < 0) { - fprintf(stderr, "error: %s: #"LPD64" - %d:%s\n", - cmdname(argv[0]), i, errno, strerror(rc = errno)); - break; - } else { - if (be_verbose - (verbose, &next_time, i, &next_count, count)) - printf("%s: got attr #"LPD64"\n", - cmdname(argv[0]), i); - } - } - - if (!rc) { - struct timeval end; - double diff; - - gettimeofday(&end, NULL); - - diff = difftime(&end, &start); - - --i; - if (verbose != 0) - printf("%s: "LPD64" attrs in %.4gs (%.4g attr/s): %s", - cmdname(argv[0]), i, diff, (double)i / diff, - ctime(&end.tv_sec)); - } - return rc; -} - -int jt_obd_test_brw(int argc, char **argv) -{ - struct obd_ioctl_data data; - struct timeval start, next_time; - int pages = 1; - __u64 count, next_count; - __u64 objid = 3; - int verbose = 1, write = 0, rw; - char *end; - int thr_offset = 0; - int i; - int len; - int rc = 0; - - if (argc < 2 || argc > 6) { - fprintf(stderr, "error: %s: bad number of arguments: %d\n", - cmdname(argv[0]), argc); - return CMD_HELP; - } - - /* make each thread write to a different offset */ - if (argv[1][0] == 't') { - count = strtoull(argv[1] + 1, &end, 0); - if (thread) - thr_offset = thread - 1; - } else - count = strtoull(argv[1], &end, 0); - - if (*end) { - fprintf(stderr, "error: %s: bad iteration count '%s'\n", - cmdname(argv[0]), argv[1]); - return CMD_HELP; - } - - if (argc >= 3) { - if (argv[2][0] == 'w' || argv[2][0] == '1') - write = 1; - else if (argv[2][0] == 'r' || argv[2][0] == '0') - write = 0; - } - - if (argc >= 4) { - verbose = get_verbose(argv[0], argv[3]); - if (verbose == BAD_VERBOSE) - return CMD_HELP; - } - - if (argc >= 5) { - pages = strtoul(argv[4], &end, 0); - if (*end) { - fprintf(stderr, "error: %s: bad page count '%s'\n", - cmdname(argv[0]), argv[4]); - return CMD_HELP; - } - } - if (argc >= 6) { - if (argv[5][0] == 't') { - objid = strtoull(argv[5] + 1, &end, 0); - if (thread) - objid += thread - 1; - } else - objid = strtoull(argv[5], &end, 0); - if (*end) { - fprintf(stderr, "error: %s: bad objid '%s'\n", - cmdname(argv[0]), argv[5]); - return CMD_HELP; - } - } - - len = pages * PAGE_SIZE; - - IOC_INIT(data); - data.ioc_obdo1.o_id = objid; - data.ioc_obdo1.o_mode = S_IFREG; - data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE; - data.ioc_count = len; - data.ioc_offset = thr_offset * len * count; - - gettimeofday(&start, NULL); - next_time.tv_sec = start.tv_sec - verbose; - next_time.tv_usec = start.tv_usec; - - if (verbose != 0) - printf("%s: %s "LPU64"x%d pages (obj "LPX64", off "LPU64"): %s", - cmdname(argv[0]), write ? "writing" : "reading", count, - pages, objid, data.ioc_offset, ctime(&start.tv_sec)); - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_OPEN, buf); - IOC_UNPACK(argv[0], data); - if (rc) { - fprintf(stderr, "error: brw_open: %s\n", strerror(rc = errno)); - return rc; - } - - rw = write ? OBD_IOC_BRW_WRITE : OBD_IOC_BRW_READ; - for (i = 1, next_count = verbose; i <= count; i++) { - rc = l_ioctl(OBD_DEV_ID, rw, buf); - SHMEM_BUMP(); - if (rc) { - fprintf(stderr, "error: %s: #%d - %s on %s\n", - cmdname(argv[0]), i, strerror(rc = errno), - write ? "write" : "read"); - break; - } else if (be_verbose(verbose, &next_time,i, &next_count,count)) - printf("%s: %s number %dx%d\n", cmdname(argv[0]), - write ? "write" : "read", i, pages); - - data.ioc_offset += len; - } - - if (!rc) { - struct timeval end; - double diff; - - gettimeofday(&end, NULL); - - diff = difftime(&end, &start); - - --i; - if (verbose != 0) - printf("%s: %s %dx%d pages in %.4gs (%.4g pg/s): %s", - cmdname(argv[0]), write ? "wrote" : "read", - i, pages, diff, (double)i * pages / diff, - ctime(&end.tv_sec)); - } - rw = l_ioctl(OBD_DEV_ID, OBD_IOC_CLOSE, buf); - if (rw) { - fprintf(stderr, "error: brw_close: %s\n", strerror(rw = errno)); - if (!rc) - rc = rw; - } - - return rc; -} - -int jt_obd_lov_setconfig(int argc, char **argv) -{ - struct obd_ioctl_data data; - struct lov_desc desc; - struct obd_uuid *uuidarray, *ptr; - int rc, i; - char *end; - - IOC_INIT(data); - - if (argc <= 6) - return CMD_HELP; - - if (strlen(argv[1]) > sizeof(desc.ld_uuid) - 1) { - fprintf(stderr, - "error: %s: LOV uuid '%s' longer than "LPSZ" chars\n", - cmdname(argv[0]), argv[1], sizeof(desc.ld_uuid) - 1); - return -EINVAL; - } - - memset(&desc, 0, sizeof(desc)); - obd_str2uuid(&desc.ld_uuid, argv[1]); - desc.ld_tgt_count = argc - 6; - desc.ld_default_stripe_count = strtoul(argv[2], &end, 0); - if (*end) { - fprintf(stderr, "error: %s: bad default stripe count '%s'\n", - cmdname(argv[0]), argv[2]); - return CMD_HELP; - } - if (desc.ld_default_stripe_count > desc.ld_tgt_count) { - fprintf(stderr, - "error: %s: default stripe count %u > OST count %u\n", - cmdname(argv[0]), desc.ld_default_stripe_count, - desc.ld_tgt_count); - return -EINVAL; - } - - desc.ld_default_stripe_size = strtoull(argv[3], &end, 0); - if (*end) { - fprintf(stderr, "error: %s: bad default stripe size '%s'\n", - cmdname(argv[0]), argv[3]); - return CMD_HELP; - } - if (desc.ld_default_stripe_size < 4096) { - fprintf(stderr, - "error: %s: default stripe size "LPU64" too small\n", - cmdname(argv[0]), desc.ld_default_stripe_size); - return -EINVAL; - } else if ((long)desc.ld_default_stripe_size < - desc.ld_default_stripe_size) { - fprintf(stderr, - "error: %s: default stripe size "LPU64" too large\n", - cmdname(argv[0]), desc.ld_default_stripe_size); - return -EINVAL; - } - desc.ld_default_stripe_offset = strtoull(argv[4], &end, 0); - if (*end) { - fprintf(stderr, "error: %s: bad default stripe offset '%s'\n", - cmdname(argv[0]), argv[4]); - return CMD_HELP; - } - desc.ld_pattern = strtoul(argv[5], &end, 0); - if (*end) { - fprintf(stderr, "error: %s: bad stripe pattern '%s'\n", - cmdname(argv[0]), argv[5]); - return CMD_HELP; - } - - /* NOTE: it is possible to overwrite the default striping parameters, - * but EXTREME care must be taken when saving the OST UUID list. - * It must be EXACTLY the same, or have only additions at the - * end of the list, or only overwrite individual OST entries - * that are restored from backups of the previous OST. - */ - uuidarray = calloc(desc.ld_tgt_count, sizeof(*uuidarray)); - if (!uuidarray) { - fprintf(stderr, "error: %s: no memory for %d UUIDs\n", - cmdname(argv[0]), desc.ld_tgt_count); - rc = -ENOMEM; - goto out; - } - for (i = 6, ptr = uuidarray; i < argc; i++, ptr++) { - if (strlen(argv[i]) >= sizeof(*ptr)) { - fprintf(stderr, "error: %s: arg %d (%s) too long\n", - cmdname(argv[0]), i, argv[i]); - rc = -EINVAL; - goto out; - } - strcpy((char *)ptr, argv[i]); - } - - data.ioc_inllen1 = sizeof(desc); - data.ioc_inlbuf1 = (char *)&desc; - data.ioc_inllen2 = desc.ld_tgt_count * sizeof(*uuidarray); - data.ioc_inlbuf2 = (char *)uuidarray; - - if (obd_ioctl_pack(&data, &buf, max)) { - fprintf(stderr, "error: %s: invalid ioctl\n", cmdname(argv[0])); - rc = -EINVAL; - goto out; - } - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_LOV_SET_CONFIG, buf); - if (rc) - fprintf(stderr, "error: %s: ioctl error: %s\n", - cmdname(argv[0]), strerror(rc = errno)); -out: - free(uuidarray); - return rc; -} - -#define DEF_UUID_ARRAY_LEN (8192 / 40) - -int jt_obd_lov_getconfig(int argc, char **argv) -{ - struct obd_ioctl_data data; - struct lov_desc desc; - struct obd_uuid *uuidarray; - char *path; - int rc, fd; - - IOC_INIT(data); - - if (argc != 2) - return CMD_HELP; - - path = argv[1]; - fd = open(path, O_RDONLY); - if (fd < 0) { - fprintf(stderr, "open \"%s\" failed: %s\n", path, - strerror(errno)); - return -1; - } - - memset(&desc, 0, sizeof(desc)); - obd_str2uuid(&desc.ld_uuid, argv[1]); - desc.ld_tgt_count = DEF_UUID_ARRAY_LEN; -repeat: - uuidarray = calloc(desc.ld_tgt_count, sizeof(*uuidarray)); - if (!uuidarray) { - fprintf(stderr, "error: %s: no memory for %d uuid's\n", - cmdname(argv[0]), desc.ld_tgt_count); - rc = -ENOMEM; - goto out; - } - - data.ioc_inllen1 = sizeof(desc); - data.ioc_inlbuf1 = (char *)&desc; - data.ioc_inllen2 = desc.ld_tgt_count * sizeof(*uuidarray); - data.ioc_inlbuf2 = (char *)uuidarray; - - if (obd_ioctl_pack(&data, &buf, max)) { - fprintf(stderr, "error: %s: invalid ioctl\n", cmdname(argv[0])); - rc = -EINVAL; - goto out; - } - rc = ioctl(fd, OBD_IOC_LOV_GET_CONFIG, buf); - if (rc == -ENOSPC) { - free(uuidarray); - goto repeat; - } else if (rc) { - fprintf(stderr, "error: %s: ioctl error: %s\n", - cmdname(argv[0]), strerror(rc = errno)); - } else { - struct obd_uuid *ptr; - int i; - - if (obd_ioctl_unpack(&data, buf, max)) { - fprintf(stderr, "error: %s: invalid reply\n", - cmdname(argv[0])); - rc = -EINVAL; - goto out; - } - printf("default_stripe_count: %u\n", - desc.ld_default_stripe_count); - printf("default_stripe_size: "LPU64"\n", - desc.ld_default_stripe_size); - printf("default_stripe_offset: "LPU64"\n", - desc.ld_default_stripe_offset); - printf("default_stripe_pattern: %u\n", desc.ld_pattern); - printf("obd_count: %u\n", desc.ld_tgt_count); - for (i = 0, ptr = uuidarray; i < desc.ld_tgt_count; i++, ptr++) - printf("%u: %s\n", i, (char *)ptr); - } -out: - free(uuidarray); - close(fd); - return rc; -} - -int jt_obd_test_ldlm(int argc, char **argv) -{ - struct obd_ioctl_data data; - int rc; - - IOC_INIT(data); - if (argc != 1) - return CMD_HELP; - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, IOC_LDLM_TEST, buf); - if (rc) - fprintf(stderr, "error: %s: test failed: %s\n", - cmdname(argv[0]), strerror(rc = errno)); - return rc; -} - -int jt_obd_dump_ldlm(int argc, char **argv) -{ - struct obd_ioctl_data data; - int rc; - - IOC_INIT(data); - if (argc != 1) - return CMD_HELP; - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, IOC_LDLM_DUMP, buf); - if (rc) - fprintf(stderr, "error: %s failed: %s\n", - cmdname(argv[0]), strerror(rc = errno)); - return rc; -} - -int jt_obd_ldlm_regress_start(int argc, char **argv) -{ - int rc; - struct obd_ioctl_data data; - char argstring[200]; - int i, count = sizeof(argstring) - 1; - - IOC_INIT(data); - if (argc > 5) - return CMD_HELP; - - argstring[0] = '\0'; - for (i = 1; i < argc; i++) { - strncat(argstring, " ", count); - count--; - strncat(argstring, argv[i], count); - count -= strlen(argv[i]); - } - - if (strlen(argstring)) { - data.ioc_inlbuf1 = argstring; - data.ioc_inllen1 = strlen(argstring) + 1; - } - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, IOC_LDLM_REGRESS_START, buf); - if (rc) - fprintf(stderr, "error: %s: test failed: %s\n", - cmdname(argv[0]), strerror(rc = errno)); - - return rc; -} - -int jt_obd_ldlm_regress_stop(int argc, char **argv) -{ - int rc; - struct obd_ioctl_data data; - IOC_INIT(data); - - if (argc != 1) - return CMD_HELP; - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, IOC_LDLM_REGRESS_STOP, buf); - - if (rc) - fprintf(stderr, "error: %s: test failed: %s\n", - cmdname(argv[0]), strerror(rc = errno)); - return rc; -} - -static int do_activate(int argc, char **argv, int flag) -{ - struct obd_ioctl_data data; - int rc; - - IOC_INIT(data); - if (argc != 1) - return CMD_HELP; - - /* reuse offset for 'active' */ - data.ioc_offset = flag; - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, IOC_OSC_SET_ACTIVE, buf); - if (rc) - fprintf(stderr, "error: %s: failed: %s\n", - cmdname(argv[0]), strerror(rc = errno)); - - return rc; -} - -int jt_obd_deactivate(int argc, char **argv) -{ - return do_activate(argc, argv, 0); -} - -int jt_obd_activate(int argc, char **argv) -{ - return do_activate(argc, argv, 1); -} - -int jt_obd_recover(int argc, char **argv) -{ - int rc; - struct obd_ioctl_data data; - - IOC_INIT(data); - if (argc > 2) - return CMD_HELP; - - if (argc == 2) { - data.ioc_inllen1 = strlen(argv[1]) + 1; - data.ioc_inlbuf1 = argv[1]; - } - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_CLIENT_RECOVER, buf); - if (rc < 0) { - fprintf(stderr, "error: %s: %s\n", cmdname(argv[0]), - strerror(rc = errno)); - } - - return rc; -} - -int jt_obd_mdc_lookup(int argc, char **argv) -{ - struct obd_ioctl_data data; - char *parent, *child; - int rc, fd, verbose = 1; - - if (argc < 3 || argc > 4) - return CMD_HELP; - - parent = argv[1]; - child = argv[2]; - if (argc == 4) - verbose = get_verbose(argv[0], argv[3]); - - IOC_INIT(data); - - data.ioc_inllen1 = strlen(child) + 1; - data.ioc_inlbuf1 = child; - - IOC_PACK(argv[0], data); - - fd = open(parent, O_RDONLY); - if (fd < 0) { - fprintf(stderr, "open \"%s\" failed: %s\n", parent, - strerror(errno)); - return -1; - } - - rc = ioctl(fd, IOC_MDC_LOOKUP, buf); - if (rc < 0) { - fprintf(stderr, "error: %s: ioctl error: %s\n", - cmdname(argv[0]), strerror(rc = errno)); - } - close(fd); - - if (verbose) { - IOC_UNPACK(argv[0], data); - printf("%s: mode %o uid %d gid %d\n", child, - data.ioc_obdo1.o_mode, data.ioc_obdo1.o_uid, - data.ioc_obdo1.o_gid); - } - - return rc; -} - -static -int do_add_uuid(char * func, char *uuid, ptl_nid_t nid, int nal) -{ - char tmp[64]; - int rc; - struct obd_ioctl_data data; - - IOC_INIT(data); - data.ioc_nid = nid; - data.ioc_inllen1 = strlen(uuid) + 1; - data.ioc_inlbuf1 = uuid; - data.ioc_nal = nal; - - IOC_PACK(func, data); - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_ADD_UUID, buf); - if (rc) { - fprintf(stderr, "IOC_PORTAL_ADD_UUID failed: %s\n", - strerror(errno)); - return -1; - } - - printf ("Added uuid %s: %s\n", uuid, ptl_nid2str (tmp, nid)); - return 0; -} - -int jt_obd_add_uuid(int argc, char **argv) -{ - ptl_nid_t nid = 0; - int nal; - - if (argc != 4) { - return CMD_HELP; - } - - if (ptl_parse_nid (&nid, argv[2]) != 0) { - fprintf (stderr, "Can't parse NID %s\n", argv[2]); - return (-1); - } - - nal = ptl_name2nal(argv[3]); - - if (nal == 0) { - fprintf (stderr, "Can't parse NAL %s\n", argv[3]); - return -1; - } - - return do_add_uuid(argv[0], argv[1], nid, nal); -} - -int jt_obd_close_uuid(int argc, char **argv) -{ - int rc, nal; - struct obd_ioctl_data data; - - if (argc != 3) { - fprintf(stderr, "usage: %s <uuid> <net-type>\n", argv[0]); - return 0; - } - - nal = ptl_name2nal(argv[2]); - - if (nal == 0) { - fprintf (stderr, "Can't parse NAL %s\n", argv[2]); - return -1; - } - - IOC_INIT(data); - data.ioc_inllen1 = strlen(argv[1]) + 1; - data.ioc_inlbuf1 = argv[1]; - data.ioc_nal = nal; - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_CLOSE_UUID, buf); - if (rc) { - fprintf(stderr, "IOC_PORTAL_CLOSE_UUID failed: %s\n", - strerror(errno)); - return -1; - } - return 0; -} - - -int jt_obd_del_uuid(int argc, char **argv) -{ - int rc; - struct obd_ioctl_data data; - - if (argc != 2) { - fprintf(stderr, "usage: %s <uuid>\n", argv[0]); - return 0; - } - - IOC_INIT(data); - - if (strcmp (argv[1], "_all_")) - { - data.ioc_inllen1 = strlen(argv[1]) + 1; - data.ioc_inlbuf1 = argv[1]; - } - - IOC_PACK(argv[0], data); - rc = l_ioctl(OBD_DEV_ID, OBD_IOC_DEL_UUID, buf); - if (rc) { - fprintf(stderr, "IOC_PORTAL_DEL_UUID failed: %s\n", - strerror(errno)); - return -1; - } - return 0; -} - -static void signal_server(int sig) -{ - if (sig == SIGINT) { - do_disconnect("sigint", 1); - exit(1); - } else - fprintf(stderr, "%s: got signal %d\n", cmdname("sigint"), sig); -} - -int obd_initialize(int argc, char **argv) -{ - SHMEM_SETUP(); - register_ioc_dev(OBD_DEV_ID, OBD_DEV_PATH); - - return 0; -} - - -void obd_cleanup(int argc, char **argv) -{ - struct sigaction sigact; - - sigact.sa_handler = signal_server; - sigfillset(&sigact.sa_mask); - sigact.sa_flags = SA_RESTART; - sigaction(SIGINT, &sigact, NULL); - - do_disconnect(argv[0], 1); -} diff --git a/lustre/utils/obdbarrier.c b/lustre/utils/obdbarrier.c deleted file mode 100644 index 4373071..0000000 --- a/lustre/utils/obdbarrier.c +++ /dev/null @@ -1,224 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Eric Barton <eeb@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <stdio.h> -#include <stdlib.h> -#include <errno.h> -#include <string.h> - -#include <liblustre.h> -#include "obdiolib.h" - -int -parse_kmg (uint64_t *valp, char *str) -{ - uint64_t val; - char mod[32]; - - switch (sscanf (str, LPU64"%1[gGmMkK]", &val, mod)) - { - default: - return (-1); - - case 1: - *valp = val; - return (0); - - case 2: - switch (*mod) - { - case 'g': - case 'G': - *valp = val << 30; - return (0); - - case 'm': - case 'M': - *valp = val << 20; - return (0); - - case 'k': - case 'K': - *valp = val << 10; - return (0); - - default: - *valp = val; - return (0); - } - } -} - -void -usage (char *cmdname, int help) -{ - char *name = strrchr (cmdname, '/'); - - if (name == NULL) - name = cmdname; - - fprintf (help ? stdout : stderr, - "usage: %s -d device -s size -o offset [-i id][-n reps][-l] oid\n", - name); -} - -int -exponential_modulus (int i, int base) -{ - int top = base; - int mod = 1; - - for (;;) { - if (i < top) - return (i%mod == 0); - - mod = top; - top *= base; - } -} - -int -main (int argc, char **argv) -{ - uint64_t bid = (((uint64_t)gethostid()) << 32) | getpid (); - int set_bid = 0; - uint64_t oid; - int setup = 0; - int device = -1; - int npeers = 0; - int reps = 1; - char hostname[128]; - struct obdio_conn *conn; - struct obdio_barrier *b; - char *end; - uint64_t val; - int rc; - int c; - - setvbuf (stdout, NULL, _IOLBF, 0); - memset (hostname, 0, sizeof (hostname)); - gethostname (hostname, sizeof (hostname)); - hostname[sizeof(hostname) - 1] = 0; - - while ((c = getopt (argc, argv, "hsi:d:n:p:")) != -1) - switch (c) { - case 'h': - usage (argv[0], 1); - return (0); - - case 'i': - bid = strtoll (optarg, &end, 0); - if (end == optarg || *end != 0) { - fprintf (stderr, "Can't parse id %s\n", - optarg); - return (1); - } - set_bid = 1; - break; - - case 's': - setup = 1; - break; - - case 'd': - device = strtol (optarg, &end, 0); - if (end == optarg || *end != 0 || device < 0) { - fprintf (stderr, "Can't parse device %s\n", - optarg); - return (1); - } - break; - - case 'n': - if (parse_kmg (&val, optarg) != 0) { - fprintf (stderr, "Can't parse reps %s\n", - optarg); - return (1); - } - reps = (int)val; - break; - - case 'p': - npeers = strtol (optarg, &end, 0); - if (end == optarg || *end != 0 || npeers <= 0) { - fprintf (stderr, "Can't parse npeers %s\n", - optarg); - return (1); - } - break; - - default: - usage (argv[0], 0); - return (1); - } - - if ((!setup && !set_bid) || - npeers <= 0 || - device < 0 || - optind == argc) { - fprintf (stderr, "%s not specified\n", - (!setup && !set_bid) ? "id" : - npeers <= 0 ? "npeers" : - device < 0 ? "device" : "object id"); - return (1); - } - - oid = strtoull (argv[optind], &end, 0); - if (end == argv[optind] || *end != 0) { - fprintf (stderr, "Can't parse object id %s\n", - argv[optind]); - return (1); - } - - conn = obdio_connect (device); - if (conn == NULL) - return (1); - - b = obdio_new_barrier (oid, bid, npeers); - if (b == NULL) - return (1); - - rc = 0; - if (setup) { - rc = obdio_setup_barrier (conn, b); - if (rc == 0) - printf ("Setup barrier: -d %d -i "LPX64" -p %d -n1 "LPX64"\n", - device, bid, npeers, oid); - } else { - for (c = 0; c < reps; c++) { - rc = obdio_barrier (conn, b); - if (rc != 0) - break; - if (exponential_modulus (c, 10)) - printf ("%s: Barrier %d\n", hostname, c); - } - } - - free (b); - - obdio_disconnect (conn); - - return (rc == 0 ? 0 : 1); -} - - diff --git a/lustre/utils/obdctl.c b/lustre/utils/obdctl.c deleted file mode 100644 index 8fd4f7c..0000000 --- a/lustre/utils/obdctl.c +++ /dev/null @@ -1,104 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - - -#include <stdlib.h> -#include <stdio.h> - -#include "obdctl.h" -#include "parser.h" - -/* the functions that were in here are now in obd.c */ - -static int jt_quit(int argc, char **argv) -{ - int rc = 0; - Parser_quit(argc, argv); - - return rc; -} - -command_t cmdlist[] = { - /* Metacommands */ - {"--device", jt_opt_device, 0, "--device <devno> <command [args ...]>"}, - {"--threads", jt_opt_threads, 0, - "--threads <threads> <devno> <command [args ...]>"}, - - /* Device configuration commands */ - {"lov_setconfig", jt_obd_lov_setconfig, 0, "configure lov data on MDS " - "[usage: lovconfig lov-uuid stripecount, stripesize, pattern, UUID1, [UUID2, ...]"}, - {"list", jt_obd_list, 0, "list the devices (no args)"}, - {"newdev", jt_obd_newdev, 0, "set device to a new unused obd (no args)"}, - {"device", jt_obd_device, 0, "set current device (args device_no name)"}, - {"name2dev", jt_obd_name2dev, 0, - "set device by name [usage: name2dev devname]"}, - {"attach", jt_obd_attach, 0, "name the type of device (args: type data"}, - {"setup", jt_obd_setup, 0, "setup device (args: <blkdev> [data]"}, - {"detach", jt_obd_detach, 0, "detach the current device (arg: )"}, - {"cleanup", jt_obd_cleanup, 0, "cleanup the current device (arg: )"}, - - /* Session commands */ - {"connect", jt_obd_connect, 0, "connect - get a connection to device"}, - {"disconnect", jt_obd_disconnect, 0, - "disconnect - break connection to device"}, - - /* Session operations */ - {"create", jt_obd_create, 0, "create <count> [mode [verbose]]"}, - {"destroy", jt_obd_destroy, 0, "destroy <id> [count [verbose]]"}, - {"getattr", jt_obd_getattr, 0, "getattr <id>"}, - {"setattr", jt_obd_setattr, 0, "setattr <id> <mode>"}, - {"newconn", jt_obd_newconn, 0, "newconn <olduuid> [newuuid]"}, - {"test_getattr", jt_obd_test_getattr, 0, "test_getattr <count> [verbose [[t]objid]]"}, - {"test_brw", jt_obd_test_brw, 0, "test_brw [t]<count> [write [verbose [pages [[t]objid]]]]"}, - {"test_ldlm", jt_obd_test_ldlm, 0, "test lock manager (no args)"}, - {"dump_ldlm", jt_obd_dump_ldlm, 0, "dump all lock manager state (no args)"}, - - /* User interface commands */ - {"help", Parser_help, 0, "help"}, - {"exit", jt_quit, 0, "quit"}, - {"quit", jt_quit, 0, "quit"}, - {0, 0, 0, NULL} -}; - - -int main(int argc, char **argv) -{ - int rc; - - setlinebuf(stdout); - - if (obd_initialize(argc, argv) < 0) - exit(1); - - Parser_init("obdctl > ", cmdlist); - - if (argc > 1) { - rc = Parser_execarg(argc - 1, argv + 1, cmdlist); - } else { - rc = Parser_commands(); - } - - obd_cleanup(argc, argv); - return rc; -} diff --git a/lustre/utils/obdctl.h b/lustre/utils/obdctl.h deleted file mode 100644 index 0203579..0000000 --- a/lustre/utils/obdctl.h +++ /dev/null @@ -1,73 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * - * Author: Peter J. Braam <braam@clusterfs.com> - * Author: Phil Schwan <phil@clusterfs.com> - * Author: Robert Read <rread@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ -#ifndef _OBDCTL_H_ -#define _OBDCTL_H_ - -int do_disconnect(char *func, int verbose); - int obd_initialize(int argc, char **argv); -void obd_cleanup(int argc, char **argv); - -int jt_opt_device(int argc, char **argv); -int jt_opt_threads(int argc, char **argv); - -int jt_obd_device(int argc, char **argv); -int jt_obd_connect(int argc, char **argv); -int jt_obd_disconnect(int argc, char **argv); -int jt_obd_detach(int argc, char **argv); -int jt_obd_cleanup(int argc, char **argv); -int jt_obd_no_transno(int argc, char **argv); -int jt_obd_set_readonly(int argc, char **argv); -int jt_obd_abort_recovery(int argc, char **argv); -int jt_obd_newdev(int argc, char **argv); -int jt_obd_mount_option(int argc, char **argv); -int jt_obd_list(int argc, char **argv); -int jt_obd_attach(int argc, char **argv); -int jt_obd_setup(int argc, char **argv); -int jt_obd_create(int argc, char **argv); -int jt_obd_setattr(int argc, char **argv); -int jt_obd_destroy(int argc, char **argv); -int jt_obd_getattr(int argc, char **argv); -int jt_obd_test_getattr(int argc, char **argv); -int jt_obd_test_brw(int argc, char **argv); -int jt_obd_get_stripe(int argc, char **argv); -int jt_obd_set_stripe(int argc, char **argv); -int jt_obd_unset_stripe(int argc, char **argv); -int jt_obd_lov_setconfig(int argc, char **argv); -int jt_obd_lov_getconfig(int argc, char **argv); -int jt_obd_test_ldlm(int argc, char **argv); -int jt_obd_ldlm_regress_start(int argc, char **argv); -int jt_obd_ldlm_regress_stop(int argc, char **argv); -int jt_obd_dump_ldlm(int argc, char **argv); -int jt_obd_activate(int argc, char **argv); -int jt_obd_deactivate(int argc, char **argv); -int jt_obd_recover(int argc, char **argv); -int jt_obd_mdc_lookup(int argc, char **argv); -int jt_get_version(int argc, char **argv); -int jt_obd_add_uuid(int argc, char **argv); -int jt_obd_close_uuid(int argc, char **argv); -int jt_obd_del_uuid(int argc, char **argv); - -#endif diff --git a/lustre/utils/obdio.c b/lustre/utils/obdio.c deleted file mode 100644 index 8264761..0000000 --- a/lustre/utils/obdio.c +++ /dev/null @@ -1,305 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2002 Cluster File Systems, Inc. - * Author: Eric Barton <eeb@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <stdio.h> -#include <stdlib.h> -#include <errno.h> -#include <string.h> - -#include <liblustre.h> -#include "obdiolib.h" - -int -obdio_test_fixed_extent (struct obdio_conn *conn, - uint32_t myhid, uint32_t mypid, - int reps, int locked, uint64_t oid, - uint64_t offset, uint32_t size) -{ - struct lustre_handle fh; - struct lustre_handle lh; - void *space; - void *buffer; - uint32_t *ibuf; - int i; - int j; - int rc; - int rc2; - - rc = obdio_open (conn, oid, &fh); - if (rc != 0) { - fprintf (stderr, "Failed to open object "LPX64": %s\n", - oid, strerror (errno)); - return (rc); - } - - buffer = obdio_alloc_aligned_buffer (&space, size); - if (buffer == NULL) { - fprintf (stderr, "Can't allocate buffer size %d\n", size); - rc = -1; - goto out_0; - } - - for (i = 0; i < reps; i++) { - ibuf = (uint32_t *) buffer; - for (j = 0; j < size / (4 * sizeof (*ibuf)); j++) { - ibuf[0] = myhid; - ibuf[1] = mypid; - ibuf[2] = i; - ibuf[3] = j; - ibuf += 4; - } - - if (locked) { - rc = obdio_enqueue (conn, oid, LCK_PW, offset, size, &lh); - if (rc != 0) { - fprintf (stderr, "Error on enqueue "LPX64" @ "LPU64" for %u: %s\n", - oid, offset, size, strerror (errno)); - goto out_1; - } - } - - rc = obdio_pwrite (conn, oid, buffer, size, offset); - if (rc != 0) { - fprintf (stderr, "Error writing "LPX64" @ "LPU64" for %u: %s\n", - oid, offset, size, strerror (errno)); - if (locked) - obdio_cancel (conn, &lh); - rc = -1; - goto out_1; - } - - memset (buffer, 0xbb, size); - - rc = obdio_pread (conn, oid, buffer, size, offset); - if (rc != 0) { - fprintf (stderr, "Error reading "LPX64" @ "LPU64" for %u: %s\n", - oid, offset, size, strerror (errno)); - if (locked) - obdio_cancel (conn, &lh); - rc = -1; - goto out_1; - } - - if (locked) { - rc = obdio_cancel (conn, &lh); - if (rc != 0) { - fprintf (stderr, "Error on cancel "LPX64" @ "LPU64" for %u: %s\n", - oid, offset, size, strerror (errno)); - rc = -1; - goto out_1; - } - } - - ibuf = (uint32_t *) buffer; - for (j = 0; j < size / (4 * sizeof (*ibuf)); j++) { - if (ibuf[0] != myhid || - ibuf[1] != mypid || - ibuf[2] != i || - ibuf[3] != j) { - fprintf (stderr, "Error checking "LPX64" @ "LPU64" for %u, chunk %d\n", - oid, offset, size, j); - fprintf (stderr, "Expected [%x,%x,%x,%x], got [%x,%x,%x,%x]\n", - myhid, mypid, i, j, ibuf[0], ibuf[1], ibuf[2], ibuf[3]); - rc = -1; - goto out_1; - } - ibuf += 4; - } - } - out_1: - free (space); - out_0: - rc2 = obdio_close (conn, oid, &fh); - if (rc2 != 0) - fprintf (stderr, "Error closing object "LPX64": %s\n", - oid, strerror (errno)); - return (rc); -} - -int -parse_kmg (uint64_t *valp, char *str) -{ - uint64_t val; - char mod[32]; - - switch (sscanf (str, LPU64"%1[gGmMkK]", &val, mod)) - { - default: - return (-1); - - case 1: - *valp = val; - return (0); - - case 2: - switch (*mod) - { - case 'g': - case 'G': - *valp = val << 30; - return (0); - - case 'm': - case 'M': - *valp = val << 20; - return (0); - - case 'k': - case 'K': - *valp = val << 10; - return (0); - - default: - *valp = val; - return (0); - } - } -} - -void -usage (char *cmdname, int help) -{ - char *name = strrchr (cmdname, '/'); - - if (name == NULL) - name = cmdname; - - fprintf (help ? stdout : stderr, - "usage: %s -d device -s size -o offset [-i id][-n reps][-l] oid\n", - name); -} - -int -main (int argc, char **argv) -{ - uint32_t mypid = getpid (); - uint32_t myhid = gethostid (); - uint64_t oid; - uint64_t base_offset = 0; - uint32_t size = 0; - int set_size = 0; - int device = -1; - int reps = 1; - int locked = 0; - char *end; - struct obdio_conn *conn; - uint64_t val; - int v1; - int v2; - int rc; - int c; - - while ((c = getopt (argc, argv, "hi:s:o:d:n:l")) != -1) - switch (c) { - case 'h': - usage (argv[0], 1); - return (0); - - case 'i': - switch (sscanf (optarg, "%i.%i", &v1, &v2)) { - case 1: - mypid = v1; - break; - case 2: - myhid = v1; - mypid = v2; - break; - default: - fprintf (stderr, "Can't parse id %s\n", - optarg); - return (1); - } - break; - - case 's': - if (parse_kmg (&val, optarg) != 0) { - fprintf (stderr, "Can't parse size %s\n", - optarg); - return (1); - } - size = (uint32_t)val; - set_size++; - break; - - case 'o': - if (parse_kmg (&val, optarg) != 0) { - fprintf (stderr, "Can't parse offset %s\n", - optarg); - return (1); - } - base_offset = val; - break; - - case 'd': - device = strtol (optarg, &end, 0); - if (end == optarg || *end != 0 || device < 0) { - fprintf (stderr, "Can't parse device %s\n", - optarg); - return (1); - } - break; - case 'n': - if (parse_kmg (&val, optarg) != 0) { - fprintf (stderr, "Can't parse reps %s\n", - optarg); - return (1); - } - reps = (int)val; - break; - case 'l': - locked = 1; - break; - default: - usage (argv[0], 0); - return (1); - } - - if (!set_size || - device < 0 || - optind == argc) { - fprintf (stderr, "No %s specified\n", - !set_size ? "size" : - device < 0 ? "device" : "object id"); - return (1); - } - - oid = strtoull (argv[optind], &end, 0); - if (end == argv[optind] || *end != 0) { - fprintf (stderr, "Can't parse object id %s\n", - argv[optind]); - return (1); - } - - conn = obdio_connect (device); - if (conn == NULL) - return (1); - - rc = obdio_test_fixed_extent (conn, myhid, mypid, reps, locked, - oid, base_offset, size); - - obdio_disconnect (conn); - - return (rc == 0 ? 0 : 1); -} - - diff --git a/lustre/utils/obdiolib.c b/lustre/utils/obdiolib.c deleted file mode 100644 index c871818..0000000 --- a/lustre/utils/obdiolib.c +++ /dev/null @@ -1,464 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2003 Cluster File Systems, Inc. - * Author: Eric Barton <eeb@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include <stdio.h> -#include <stdlib.h> -#include <errno.h> -#include <string.h> -#include <fcntl.h> -#include <sys/ioctl.h> -#include <sys/types.h> -#include <sys/stat.h> - -#include <liblustre.h> -#include "obdiolib.h" - -void -obdio_iocinit (struct obdio_conn *conn) -{ - memset (&conn->oc_data, 0, sizeof (conn->oc_data)); - conn->oc_data.ioc_version = OBD_IOCTL_VERSION; - conn->oc_data.ioc_cookie = conn->oc_conn_cookie; - conn->oc_data.ioc_len = sizeof (conn->oc_data); -} - -int -obdio_ioctl (struct obdio_conn *conn, int cmd) -{ - char *buf = conn->oc_buffer; - int rc; - int rc2; - - rc = obd_ioctl_pack (&conn->oc_data, &buf, sizeof (conn->oc_buffer)); - if (rc != 0) { - fprintf (stderr, "obdio_ioctl: obd_ioctl_pack: %d (%s)\n", - rc, strerror (errno)); - abort (); - } - - rc = ioctl (conn->oc_fd, cmd, buf); - if (rc != 0) - return (rc); - - rc2 = obd_ioctl_unpack (&conn->oc_data, buf, sizeof (conn->oc_buffer)); - if (rc2 != 0) { - fprintf (stderr, "obdio_ioctl: obd_ioctl_unpack: %d (%s)\n", - rc2, strerror (errno)); - abort (); - } - - return (rc); -} - -struct obdio_conn * -obdio_connect (int device) -{ - struct obdio_conn *conn; - int rc; - - conn = malloc (sizeof (*conn)); - if (conn == NULL) { - fprintf (stderr, "obdio_connect: no memory\n"); - return (NULL); - } - memset (conn, 0, sizeof (*conn)); - - conn->oc_fd = open ("/dev/obd", O_RDWR); - if (conn->oc_fd < 0) { - fprintf (stderr, "obdio_connect: Can't open /dev/obd: %s\n", - strerror (errno)); - goto failed; - } - - obdio_iocinit (conn); - conn->oc_data.ioc_dev = device; - rc = obdio_ioctl (conn, OBD_IOC_DEVICE); - if (rc != 0) { - fprintf (stderr, "obdio_connect: Can't set device %d: %s\n", - device, strerror (errno)); - goto failed; - } - - obdio_iocinit (conn); - rc = obdio_ioctl (conn, OBD_IOC_CONNECT); - if (rc != 0) { - fprintf(stderr, "obdio_connect: Can't connect to device " - "%d: %s\n", device, strerror (errno)); - goto failed; - } - - conn->oc_conn_cookie = conn->oc_data.ioc_cookie; - return (conn); - - failed: - free (conn); - return (NULL); -} - -void -obdio_disconnect (struct obdio_conn *conn) -{ - close (conn->oc_fd); - /* obdclass will automatically close on last ref */ - free (conn); -} - -int -obdio_open (struct obdio_conn *conn, uint64_t oid, struct lustre_handle *fh) -{ - int rc; - - obdio_iocinit (conn); - - conn->oc_data.ioc_obdo1.o_id = oid; - conn->oc_data.ioc_obdo1.o_mode = S_IFREG; - conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE; - - rc = obdio_ioctl (conn, OBD_IOC_OPEN); - - if (rc == 0) - memcpy (fh, obdo_handle(&conn->oc_data.ioc_obdo1), sizeof (*fh)); - - return (rc); -} - -int -obdio_close (struct obdio_conn *conn, uint64_t oid, struct lustre_handle *fh) -{ - obdio_iocinit (conn); - - - conn->oc_data.ioc_obdo1.o_id = oid; - conn->oc_data.ioc_obdo1.o_mode = S_IFREG; - memcpy (obdo_handle (&conn->oc_data.ioc_obdo1), fh, sizeof (*fh)); - conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | - OBD_MD_FLMODE | OBD_MD_FLHANDLE; - - return (obdio_ioctl (conn, OBD_IOC_CLOSE)); -} - -int -obdio_pread (struct obdio_conn *conn, uint64_t oid, - char *buffer, uint32_t count, uint64_t offset) -{ - obdio_iocinit (conn); - - conn->oc_data.ioc_obdo1.o_id = oid; - conn->oc_data.ioc_obdo1.o_mode = S_IFREG; - conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE; - - conn->oc_data.ioc_pbuf2 = buffer; - conn->oc_data.ioc_plen2 = count; - conn->oc_data.ioc_count = count; - conn->oc_data.ioc_offset = offset; - - return (obdio_ioctl (conn, OBD_IOC_BRW_READ)); -} - -int -obdio_pwrite (struct obdio_conn *conn, uint64_t oid, - char *buffer, uint32_t count, uint64_t offset) -{ - obdio_iocinit (conn); - - conn->oc_data.ioc_obdo1.o_id = oid; - conn->oc_data.ioc_obdo1.o_mode = S_IFREG; - conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE; - - conn->oc_data.ioc_pbuf2 = buffer; - conn->oc_data.ioc_plen2 = count; - conn->oc_data.ioc_count = count; - conn->oc_data.ioc_offset = offset; - - return (obdio_ioctl (conn, OBD_IOC_BRW_WRITE)); -} - -int -obdio_enqueue (struct obdio_conn *conn, uint64_t oid, - int mode, uint64_t offset, uint32_t count, - struct lustre_handle *lh) -{ - int rc; - - obdio_iocinit (conn); - - conn->oc_data.ioc_obdo1.o_id = oid; - conn->oc_data.ioc_obdo1.o_mode = S_IFREG; - conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLMODE; - - conn->oc_data.ioc_conn1 = mode; - conn->oc_data.ioc_count = count; - conn->oc_data.ioc_offset = offset; - - rc = obdio_ioctl (conn, ECHO_IOC_ENQUEUE); - - if (rc == 0) - memcpy (lh, obdo_handle (&conn->oc_data.ioc_obdo1), sizeof (*lh)); - - return (rc); -} - -int -obdio_cancel (struct obdio_conn *conn, struct lustre_handle *lh) -{ - obdio_iocinit (conn); - - memcpy (obdo_handle (&conn->oc_data.ioc_obdo1), lh, sizeof (*lh)); - conn->oc_data.ioc_obdo1.o_valid = OBD_MD_FLHANDLE; - - return (obdio_ioctl (conn, ECHO_IOC_CANCEL)); -} - -void * -obdio_alloc_aligned_buffer (void **spacep, int size) -{ - int pagesize = getpagesize(); - void *space = malloc (size + pagesize - 1); - - *spacep = space; - if (space == NULL) - return (NULL); - - return ((void *)(((unsigned long)space + pagesize - 1) & ~(pagesize - 1))); -} - -struct obdio_barrier * -obdio_new_barrier (uint64_t oid, uint64_t id, int npeers) -{ - struct obdio_barrier *b; - - b = (struct obdio_barrier *)malloc (sizeof (*b)); - if (b == NULL) { - fprintf (stderr, "obdio_new_barrier "LPX64": Can't allocate\n", oid); - return (NULL); - } - - b->ob_id = id; - b->ob_oid = oid; - b->ob_npeers = npeers; - b->ob_ordinal = 0; - b->ob_count = 0; - return (b); -} - -int -obdio_setup_barrier (struct obdio_conn *conn, struct obdio_barrier *b) -{ - struct lustre_handle fh; - struct lustre_handle lh; - int rc; - int rc2; - void *space; - struct obdio_barrier *fileb; - - if (b->ob_ordinal != 0 || - b->ob_count != 0) { - fprintf (stderr, "obdio_setup_barrier: invalid parameter\n"); - abort (); - } - - rc = obdio_open (conn, b->ob_oid, &fh); - if (rc != 0) { - fprintf (stderr, "obdio_setup_barrier "LPX64": Failed to open object: %s\n", - b->ob_oid, strerror (errno)); - return (rc); - } - - fileb = (struct obdio_barrier *) obdio_alloc_aligned_buffer (&space, getpagesize ()); - if (fileb == NULL) { - fprintf (stderr, "obdio_setup_barrier "LPX64": Can't allocate page buffer\n", - b->ob_oid); - rc = -1; - goto out_0; - } - - memset (fileb, 0, getpagesize ()); - *fileb = *b; - - rc = obdio_enqueue (conn, b->ob_oid, LCK_PW, 0, getpagesize (), &lh); - if (rc != 0) { - fprintf (stderr, "obdio_setup_barrier "LPX64": Error on enqueue: %s\n", - b->ob_oid, strerror (errno)); - goto out_1; - } - - rc = obdio_pwrite (conn, b->ob_oid, (void *)fileb, getpagesize (), 0); - if (rc != 0) - fprintf (stderr, "obdio_setup_barrier "LPX64": Error on write: %s\n", - b->ob_oid, strerror (errno)); - - rc2 = obdio_cancel (conn, &lh); - if (rc == 0 && rc2 != 0) { - fprintf (stderr, "obdio_setup_barrier "LPX64": Error on cancel: %s\n", - b->ob_oid, strerror (errno)); - rc = rc2; - } - out_1: - free (space); - out_0: - rc2 = obdio_close (conn, b->ob_oid, &fh); - if (rc == 0 && rc2 != 0) { - fprintf (stderr, "obdio_setup_barrier "LPX64": Error on close: %s\n", - b->ob_oid, strerror (errno)); - rc = rc2; - } - - return (rc); -} - -int -obdio_barrier (struct obdio_conn *conn, struct obdio_barrier *b) -{ - struct lustre_handle fh; - struct lustre_handle lh; - int rc; - int rc2; - void *space; - struct obdio_barrier *fileb; - char *mode; - - rc = obdio_open (conn, b->ob_oid, &fh); - if (rc != 0) { - fprintf (stderr, "obdio_barrier "LPX64": Error on open: %s\n", - b->ob_oid, strerror (errno)); - return (rc); - } - - fileb = (struct obdio_barrier *) obdio_alloc_aligned_buffer (&space, getpagesize ()); - if (fileb == NULL) { - fprintf (stderr, "obdio_barrier "LPX64": Can't allocate page buffer\n", - b->ob_oid); - rc = -1; - goto out_0; - } - - rc = obdio_enqueue (conn, b->ob_oid, LCK_PW, 0, getpagesize (), &lh); - if (rc != 0) { - fprintf (stderr, "obdio_barrier "LPX64": Error on PW enqueue: %s\n", - b->ob_oid, strerror (errno)); - goto out_1; - } - - memset (fileb, 0xeb, getpagesize ()); - rc = obdio_pread (conn, b->ob_oid, (void *)fileb, getpagesize (), 0); - if (rc != 0) { - fprintf (stderr, "obdio_barrier "LPX64": Error on initial read: %s\n", - b->ob_oid, strerror (errno)); - goto out_2; - } - - if (fileb->ob_id != b->ob_id || - fileb->ob_oid != b->ob_oid || - fileb->ob_npeers != b->ob_npeers || - fileb->ob_count >= b->ob_npeers || - fileb->ob_ordinal != b->ob_ordinal) { - fprintf (stderr, "obdio_barrier "LPX64": corrupt on initial read\n", b->ob_id); - fprintf (stderr, " got ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n", - fileb->ob_id, fileb->ob_oid, fileb->ob_npeers, - fileb->ob_ordinal, fileb->ob_count); - fprintf (stderr, " expected ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n", - b->ob_id, b->ob_oid, b->ob_npeers, - b->ob_ordinal, b->ob_count); - rc = -1; - goto out_2; - } - - fileb->ob_count++; - if (fileb->ob_count == fileb->ob_npeers) { /* I'm the last joiner */ - fileb->ob_count = 0; /* join count for next barrier */ - fileb->ob_ordinal++; /* signal all joined */ - } - - rc = obdio_pwrite (conn, b->ob_oid, (void *)fileb, getpagesize (), 0); - if (rc != 0) { - fprintf (stderr, "obdio_barrier "LPX64": Error on initial write: %s\n", - b->ob_oid, strerror (errno)); - goto out_2; - } - - mode = "PW"; - b->ob_ordinal++; /* now I wait... */ - while (fileb->ob_ordinal != b->ob_ordinal) { - - rc = obdio_cancel (conn, &lh); - if (rc != 0) { - fprintf (stderr, "obdio_barrier "LPX64": Error on %s cancel: %s\n", - b->ob_oid, mode, strerror (errno)); - goto out_1; - } - - mode = "PR"; - rc = obdio_enqueue (conn, b->ob_oid, LCK_PR, 0, getpagesize (), &lh); - if (rc != 0) { - fprintf (stderr, "obdio_barrier "LPX64": Error on PR enqueue: %s\n", - b->ob_oid, strerror (errno)); - goto out_1; - } - - memset (fileb, 0xeb, getpagesize ()); - rc = obdio_pread (conn, b->ob_oid, (void *)fileb, getpagesize (), 0); - if (rc != 0) { - fprintf (stderr, "obdio_barrier "LPX64": Error on read: %s\n", - b->ob_oid, strerror (errno)); - goto out_2; - } - - if (fileb->ob_id != b->ob_id || - fileb->ob_oid != b->ob_oid || - fileb->ob_npeers != b->ob_npeers || - fileb->ob_count >= b->ob_npeers || - (fileb->ob_ordinal != b->ob_ordinal - 1 && - fileb->ob_ordinal != b->ob_ordinal)) { - fprintf (stderr, "obdio_barrier "LPX64": corrupt\n", b->ob_id); - fprintf (stderr, " got ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n", - fileb->ob_id, fileb->ob_oid, fileb->ob_npeers, - fileb->ob_ordinal, fileb->ob_count); - fprintf (stderr, " expected ["LPX64","LPX64","LPX64","LPX64","LPX64"]\n", - b->ob_id, b->ob_oid, b->ob_npeers, - b->ob_ordinal, b->ob_count); - rc = -1; - goto out_2; - } - } - - out_2: - rc2 = obdio_cancel (conn, &lh); - if (rc == 0 && rc2 != 0) { - fprintf (stderr, "obdio_barrier "LPX64": Error on cancel: %s\n", - b->ob_oid, strerror (errno)); - rc = rc2; - } - out_1: - free (space); - out_0: - rc2 = obdio_close (conn, b->ob_oid, &fh); - if (rc == 0 && rc2 != 0) { - fprintf (stderr, "obdio_barrier "LPX64": Error on close: %s\n", - b->ob_oid, strerror (errno)); - rc = rc2; - } - - return (rc); -} - - diff --git a/lustre/utils/obdiolib.h b/lustre/utils/obdiolib.h deleted file mode 100644 index 3811b41..0000000 --- a/lustre/utils/obdiolib.h +++ /dev/null @@ -1,69 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2003 Cluster File Systems, Inc. - * Author: Eric Barton <eeb@clusterfs.com> - * - * This file is part of Lustre, http://www.lustre.org. - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ -#ifndef _OBDIOLIB_H_ -#define _OBDIOLIB_H_ - -#include <unistd.h> - -#include <sys/types.h> -#include <sys/stat.h> - -#include <linux/lustre_lib.h> -#include <linux/lustre_idl.h> -#include <linux/obd_class.h> - -struct obdio_conn { - int oc_fd; - uint64_t oc_conn_cookie; - struct obd_ioctl_data oc_data; - char oc_buffer[8192]; -}; - -struct obdio_barrier { - uint64_t ob_id; - uint64_t ob_oid; - uint64_t ob_npeers; - uint64_t ob_ordinal; - uint64_t ob_count; -}; - -extern struct obdio_conn * obdio_connect (int device); -extern void obdio_disconnect (struct obdio_conn *conn); -extern int obdio_open (struct obdio_conn *conn, uint64_t oid, - struct lustre_handle *fh); -extern int obdio_close (struct obdio_conn *conn, uint64_t oid, - struct lustre_handle *fh); -extern int obdio_pread (struct obdio_conn *conn, uint64_t oid, - char *buffer, uint32_t count, uint64_t offset); -extern int obdio_pwrite (struct obdio_conn *conn, uint64_t oid, - char *buffer, uint32_t count, uint64_t offset); -extern int obdio_enqueue (struct obdio_conn *conn, uint64_t oid, - int mode, uint64_t offset, uint32_t count, - struct lustre_handle *lh); -extern int obdio_cancel (struct obdio_conn *conn, struct lustre_handle *lh); -extern void *obdio_alloc_aligned_buffer (void **spacep, int size); -extern struct obdio_barrier *obdio_new_barrier (uint64_t oid, uint64_t id, int npeers) ; -extern int obdio_setup_barrier (struct obdio_conn *conn, struct obdio_barrier *b); -extern int obdio_barrier (struct obdio_conn *conn, struct obdio_barrier *b); - -#endif diff --git a/lustre/utils/parser.c b/lustre/utils/parser.c deleted file mode 100644 index fef987b..0000000 --- a/lustre/utils/parser.c +++ /dev/null @@ -1,725 +0,0 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * - * Copyright (C) 2001 Cluster File Systems, Inc. - * - * This file is part of Lustre, http://www.sf.net/projects/lustre/ - * - * Lustre is free software; you can redistribute it and/or - * modify it under the terms of version 2 of the GNU General Public - * License as published by the Free Software Foundation. - * - * Lustre is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Lustre; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ -#include <stdio.h> -#include <stdlib.h> -#include <ctype.h> -#include <string.h> -#include <stddef.h> -#include <unistd.h> -#include <sys/param.h> -#include <assert.h> - -#ifdef HAVE_LIBREADLINE -#define READLINE_LIBRARY -#include <readline/readline.h> - -/* completion_matches() is #if 0-ed out in modern glibc */ -#ifndef completion_matches -#define completion_matches rl_completion_matches -#endif -extern void using_history(void); -extern void stifle_history(int); -extern void add_history(char *); -#endif - -#include "parser.h" - -static command_t * top_level; /* Top level of commands, initialized by - * InitParser */ -static char * parser_prompt = NULL;/* Parser prompt, set by InitParser */ -static int done; /* Set to 1 if user types exit or quit */ -static int ignore_errors; /* Normally, the parser will quit when - an error occurs in non-interacive - mode. Setting this to non-zero will - force it to keep buggering on. */ - - -/* static functions */ -static char *skipwhitespace(char *s); -static char *skiptowhitespace(char *s); -static command_t *find_cmd(char *name, command_t cmds[], char **next); -static int process(char *s, char **next, command_t *lookup, command_t **result, - char **prev); -static void print_commands(char *str, command_t *table); - -static char * skipwhitespace(char * s) -{ - char * t; - int len; - - len = (int)strlen(s); - for (t = s; t <= s + len && isspace(*t); t++); - return(t); -} - - -static char * skiptowhitespace(char * s) -{ - char * t; - - for (t = s; *t && !isspace(*t); t++); - return(t); -} - -static int line2args(char *line, char **argv, int maxargs) -{ - char *arg; - int i = 0; - - arg = strtok(line, " \t"); - if ( arg ) { - argv[i] = arg; - i++; - } else - return 0; - - while( (arg = strtok(NULL, " \t")) && (i <= maxargs)) { - argv[i] = arg; - i++; - } - return i; -} - -/* find a command -- return it if unique otherwise print alternatives */ -static command_t *Parser_findargcmd(char *name, command_t cmds[]) -{ - command_t *cmd; - - for (cmd = cmds; cmd->pc_name; cmd++) { - if (strcmp(name, cmd->pc_name) == 0) - return cmd; - } - return NULL; -} - -void Parser_ignore_errors(int ignore) -{ - ignore_errors = ignore; -} - -int Parser_execarg(int argc, char **argv, command_t cmds[]) -{ - command_t *cmd; - - cmd = Parser_findargcmd(argv[0], cmds); - if ( cmd ) { - return (cmd->pc_func)(argc, argv); - } else { - printf("Try interactive use without arguments or use one of:\n"); - for (cmd = cmds; cmd->pc_name; cmd++) - printf("\"%s\"\n", cmd->pc_name); - printf("as argument.\n"); - } - return -1; -} - -/* returns the command_t * (NULL if not found) corresponding to a - _partial_ match with the first token in name. It sets *next to - point to the following token. Does not modify *name. */ -static command_t * find_cmd(char * name, command_t cmds[], char ** next) -{ - int i, len; - - if (!cmds || !name ) - return NULL; - - /* This sets name to point to the first non-white space character, - and next to the first whitespace after name, len to the length: do - this with strtok*/ - name = skipwhitespace(name); - *next = skiptowhitespace(name); - len = *next - name; - if (len == 0) - return NULL; - - for (i = 0; cmds[i].pc_name; i++) { - if (strncasecmp(name, cmds[i].pc_name, len) == 0) { - *next = skipwhitespace(*next); - return(&cmds[i]); - } - } - return NULL; -} - -/* Recursively process a command line string s and find the command - corresponding to it. This can be ambiguous, full, incomplete, - non-existent. */ -static int process(char *s, char ** next, command_t *lookup, - command_t **result, char **prev) -{ - *result = find_cmd(s, lookup, next); - *prev = s; - - /* non existent */ - if ( ! *result ) - return CMD_NONE; - - /* found entry: is it ambigous, i.e. not exact command name and - more than one command in the list matches. Note that find_cmd - points to the first ambiguous entry */ - if ( strncasecmp(s, (*result)->pc_name, strlen((*result)->pc_name)) && - find_cmd(s, (*result) + 1, next)) - return CMD_AMBIG; - - /* found a unique command: component or full? */ - if ( (*result)->pc_func ) { - return CMD_COMPLETE; - } else { - if ( *next == '\0' ) { - return CMD_INCOMPLETE; - } else { - return process(*next, next, (*result)->pc_sub_cmd, result, prev); - } - } -} - -#ifdef HAVE_LIBREADLINE -static command_t * match_tbl; /* Command completion against this table */ -static char * command_generator(const char * text, int state) -{ - static int index, - len; - char *name; - - /* Do we have a match table? */ - if (!match_tbl) - return NULL; - - /* If this is the first time called on this word, state is 0 */ - if (!state) { - index = 0; - len = (int)strlen(text); - } - - /* Return next name in the command list that paritally matches test */ - while ( (name = (match_tbl + index)->pc_name) ) { - index++; - - if (strncasecmp(name, text, len) == 0) { - return(strdup(name)); - } - } - - /* No more matches */ - return NULL; -} - -/* probably called by readline */ -static char **command_completion(char * text, int start, int end) -{ - command_t * table; - char * pos; - - match_tbl = top_level; - for (table = find_cmd(rl_line_buffer, match_tbl, &pos); - table; - table = find_cmd(pos, match_tbl, &pos)) { - - if (*(pos - 1) == ' ') match_tbl = table->pc_sub_cmd; - } - - return(completion_matches(text, command_generator)); -} -#endif - -/* take a string and execute the function or print help */ -int execute_line(char * line) -{ - command_t *cmd, *ambig; - char *prev; - char *next, *tmp; - char *argv[MAXARGS]; - int i; - int rc = 0; - - switch( process(line, &next, top_level, &cmd, &prev) ) { - case CMD_AMBIG: - fprintf(stderr, "Ambiguous command \'%s\'\nOptions: ", line); - while( (ambig = find_cmd(prev, cmd, &tmp)) ) { - fprintf(stderr, "%s ", ambig->pc_name); - cmd = ambig + 1; - } - fprintf(stderr, "\n"); - break; - case CMD_NONE: - fprintf(stderr, "No such command, type help\n"); - break; - case CMD_INCOMPLETE: - fprintf(stderr, - "'%s' incomplete command. Use '%s x' where x is one of:\n", - line, line); - fprintf(stderr, "\t"); - for (i = 0; cmd->pc_sub_cmd[i].pc_name; i++) { - fprintf(stderr, "%s ", cmd->pc_sub_cmd[i].pc_name); - } - fprintf(stderr, "\n"); - break; - case CMD_COMPLETE: - i = line2args(line, argv, MAXARGS); - rc = (cmd->pc_func)(i, argv); - - if (rc == CMD_HELP) - fprintf(stderr, "%s\n", cmd->pc_help); - - break; - } - - return rc; -} - -int -noop_fn () -{ - return (0); -} - -/* just in case you're ever in an airplane and discover you - forgot to install readline-dev. :) */ -int init_input() -{ - int interactive = isatty (fileno (stdin)); - -#ifdef HAVE_LIBREADLINE - using_history(); - stifle_history(HISTORY); - - if (!interactive) - { - rl_prep_term_function = (rl_vintfunc_t *)noop_fn; - rl_deprep_term_function = (rl_voidfunc_t *)noop_fn; - } - - rl_attempted_completion_function = (CPPFunction *)command_completion; - rl_completion_entry_function = (void *)command_generator; -#endif - return interactive; -} - -#ifndef HAVE_LIBREADLINE -#define add_history(s) -char * readline(char * prompt) -{ - char line[2048]; - int n = 0; - if (prompt) - printf ("%s", prompt); - if (fgets(line, sizeof(line), stdin) == NULL) - return (NULL); - n = strlen(line); - if (n && line[n-1] == '\n') - line[n-1] = '\0'; - return strdup(line); -} -#endif - -/* this is the command execution machine */ -int Parser_commands(void) -{ - char *line, *s; - int rc = 0, save_error = 0; - int interactive; - - interactive = init_input(); - - while(!done) { - line = readline(interactive ? parser_prompt : NULL); - - if (!line) break; - - s = skipwhitespace(line); - - if (*s) { - add_history(s); - rc = execute_line(s); - } - /* stop on error if not-interactive */ - if (rc != 0 && !interactive) { - if (save_error == 0) - save_error = rc; - if (!ignore_errors) - done = 1; - } - - free(line); - } - if (save_error) - rc = save_error; - return rc; -} - - -/* sets the parser prompt */ -void Parser_init(char * prompt, command_t * cmds) -{ - done = 0; - top_level = cmds; - if (parser_prompt) free(parser_prompt); - parser_prompt = strdup(prompt); -} - -/* frees the parser prompt */ -void Parser_exit(int argc, char *argv[]) -{ - done = 1; - free(parser_prompt); - parser_prompt = NULL; -} - -/* convert a string to an integer */ -int Parser_int(char *s, int *val) -{ - int ret; - - if (*s != '0') - ret = sscanf(s, "%d", val); - else if (*(s+1) != 'x') - ret = sscanf(s, "%o", val); - else { - s++; - ret = sscanf(++s, "%x", val); - } - - return(ret); -} - - -void Parser_qhelp(int argc, char *argv[]) { - - printf("Available commands are:\n"); - - print_commands(NULL, top_level); - printf("For more help type: help command-name\n"); -} - -int Parser_help(int argc, char **argv) -{ - char line[1024]; - char *next, *prev, *tmp; - command_t *result, *ambig; - int i; - - if ( argc == 1 ) { - Parser_qhelp(argc, argv); - return 0; - } - - line[0]='\0'; - for ( i = 1 ; i < argc ; i++ ) { - strcat(line, argv[i]); - } - - switch ( process(line, &next, top_level, &result, &prev) ) { - case CMD_COMPLETE: - fprintf(stderr, "%s: %s\n",line, result->pc_help); - break; - case CMD_NONE: - fprintf(stderr, "%s: Unknown command.\n", line); - break; - case CMD_INCOMPLETE: - fprintf(stderr, - "'%s' incomplete command. Use '%s x' where x is one of:\n", - line, line); - fprintf(stderr, "\t"); - for (i = 0; result->pc_sub_cmd[i].pc_name; i++) { - fprintf(stderr, "%s ", result->pc_sub_cmd[i].pc_name); - } - fprintf(stderr, "\n"); - break; - case CMD_AMBIG: - fprintf(stderr, "Ambiguous command \'%s\'\nOptions: ", line); - while( (ambig = find_cmd(prev, result, &tmp)) ) { - fprintf(stderr, "%s ", ambig->pc_name); - result = ambig + 1; - } - fprintf(stderr, "\n"); - break; - } - return 0; -} - - -void Parser_printhelp(char *cmd) -{ - char *argv[] = { "help", cmd }; - Parser_help(2, argv); -} - - -/************************************************************************* - * COMMANDS * - *************************************************************************/ - - -static void print_commands(char * str, command_t * table) { - command_t * cmds; - char buf[80]; - - for (cmds = table; cmds->pc_name; cmds++) { - if (cmds->pc_func) { - if (str) printf("\t%s %s\n", str, cmds->pc_name); - else printf("\t%s\n", cmds->pc_name); - } - if (cmds->pc_sub_cmd) { - if (str) { - sprintf(buf, "%s %s", str, cmds->pc_name); - print_commands(buf, cmds->pc_sub_cmd); - } else { - print_commands(cmds->pc_name, cmds->pc_sub_cmd); - } - } - } -} - -char *Parser_getstr(const char *prompt, const char *deft, char *res, - size_t len) -{ - char *line = NULL; - int size = strlen(prompt) + strlen(deft) + 8; - char *theprompt; - theprompt = malloc(size); - assert(theprompt); - - sprintf(theprompt, "%s [%s]: ", prompt, deft); - - line = readline(theprompt); - free(theprompt); - - if ( line == NULL || *line == '\0' ) { - strncpy(res, deft, len); - } else { - strncpy(res, line, len); - } - - if ( line ) { - free(line); - return res; - } else { - return NULL; - } -} - -/* get integer from prompt, loop forever to get it */ -int Parser_getint(const char *prompt, long min, long max, long deft, int base) -{ - int rc; - long result; - char *line; - int size = strlen(prompt) + 40; - char *theprompt = malloc(size); - assert(theprompt); - sprintf(theprompt,"%s [%ld, (0x%lx)]: ", prompt, deft, deft); - - fflush(stdout); - - do { - line = NULL; - line = readline(theprompt); - if ( !line ) { - fprintf(stdout, "Please enter an integer.\n"); - fflush(stdout); - continue; - } - if ( *line == '\0' ) { - free(line); - result = deft; - break; - } - rc = Parser_arg2int(line, &result, base); - free(line); - if ( rc != 0 ) { - fprintf(stdout, "Invalid string.\n"); - fflush(stdout); - } else if ( result > max || result < min ) { - fprintf(stdout, "Error: response must lie between %ld and %ld.\n", - min, max); - fflush(stdout); - } else { - break; - } - } while ( 1 ) ; - - if (theprompt) - free(theprompt); - return result; - -} - -/* get boolean (starting with YyNn; loop forever */ -int Parser_getbool(const char *prompt, int deft) -{ - int result = 0; - char *line; - int size = strlen(prompt) + 8; - char *theprompt = malloc(size); - assert(theprompt); - - fflush(stdout); - - if ( deft != 0 && deft != 1 ) { - fprintf(stderr, "Error: Parser_getbool given bad default (%d).\n", - deft); - assert ( 0 ); - } - sprintf(theprompt, "%s [%s]: ", prompt, (deft==0)? "N" : "Y"); - - do { - line = NULL; - line = readline(theprompt); - if ( line == NULL ) { - result = deft; - break; - } - if ( *line == '\0' ) { - result = deft; - break; - } - if ( *line == 'y' || *line == 'Y' ) { - result = 1; - break; - } - if ( *line == 'n' || *line == 'N' ) { - result = 0; - break; - } - if ( line ) - free(line); - fprintf(stdout, "Invalid string. Must start with yY or nN\n"); - fflush(stdout); - } while ( 1 ); - - if ( line ) - free(line); - if ( theprompt ) - free(theprompt); - return result; -} - -/* parse int out of a string or prompt for it */ -long Parser_intarg(const char *inp, const char *prompt, int deft, - int min, int max, int base) -{ - long result; - int rc; - - rc = Parser_arg2int(inp, &result, base); - - if ( rc == 0 ) { - return result; - } else { - return Parser_getint(prompt, deft, min, max, base); - } -} - -/* parse int out of a string or prompt for it */ -char *Parser_strarg(char *inp, const char *prompt, const char *deft, - char *answer, int len) -{ - if ( inp == NULL || *inp == '\0' ) { - return Parser_getstr(prompt, deft, answer, len); - } else - return inp; -} - -/* change a string into a number: return 0 on success. No invalid characters - allowed. The processing of base and validity follows strtol(3)*/ -int Parser_arg2int(const char *inp, long *result, int base) -{ - char *endptr; - - if ( (base !=0) && (base < 2 || base > 36) ) - return 1; - - *result = strtol(inp, &endptr, base); - - if ( *inp != '\0' && *endptr == '\0' ) - return 0; - else - return 1; -} - -/* Convert human readable size string to and int; "1k" -> 1000 */ -int Parser_size (int *sizep, char *str) { - int size; - char mod[32]; - - switch (sscanf (str, "%d%1[gGmMkK]", &size, mod)) { - default: - return (-1); - - case 1: - *sizep = size; - return (0); - - case 2: - switch (*mod) { - case 'g': - case 'G': - *sizep = size << 30; - return (0); - - case 'm': - case 'M': - *sizep = size << 20; - return (0); - - case 'k': - case 'K': - *sizep = size << 10; - return (0); - - default: - *sizep = size; - return (0); - } - } -} - -/* Convert a string boolean to an int; "enable" -> 1 */ -int Parser_bool (int *b, char *str) { - if (!strcasecmp (str, "no") || - !strcasecmp (str, "n") || - !strcasecmp (str, "off") || - !strcasecmp (str, "disable")) - { - *b = 0; - return (0); - } - - if (!strcasecmp (str, "yes") || - !strcasecmp (str, "y") || - !strcasecmp (str, "on") || - !strcasecmp (str, "enable")) - { - *b = 1; - return (0); - } - - return (-1); -} - -int Parser_quit(int argc, char **argv) -{ - argc = argc; - argv = argv; - done = 1; - return 0; -} diff --git a/lustre/utils/parser.h b/lustre/utils/parser.h deleted file mode 100644 index 5aece60..0000000 --- a/lustre/utils/parser.h +++ /dev/null @@ -1,74 +0,0 @@ -#ifndef _PARSER_H_ -#define _PARSER_H_ - -#define HISTORY 100 /* Don't let history grow unbounded */ -#define MAXARGS 100 - -#define CMD_COMPLETE 0 -#define CMD_INCOMPLETE 1 -#define CMD_NONE 2 -#define CMD_AMBIG 3 -#define CMD_HELP 4 - -typedef struct parser_cmd { - char *pc_name; - int (* pc_func)(int, char **); - struct parser_cmd * pc_sub_cmd; - char *pc_help; -} command_t; - -typedef struct argcmd { - char *ac_name; - int (*ac_func)(int, char **); - char *ac_help; -} argcmd_t; - -typedef struct network { - char *type; - char *server; - int port; -} network_t; - -int Parser_quit(int argc, char **argv); -void Parser_init(char *, command_t *); /* Set prompt and load command list */ -int Parser_commands(void); /* Start the command parser */ -void Parser_qhelp(int, char **); /* Quick help routine */ -int Parser_help(int, char **); /* Detailed help routine */ -void Parser_ignore_errors(int ignore); /* Set the ignore errors flag */ -void Parser_printhelp(char *); /* Detailed help routine */ -void Parser_exit(int, char **); /* Shuts down command parser */ -int Parser_execarg(int argc, char **argv, command_t cmds[]); -int execute_line(char * line); - -/* Converts a string to an integer */ -int Parser_int(char *, int *); - -/* Prompts for a string, with default values and a maximum length */ -char *Parser_getstr(const char *prompt, const char *deft, char *res, - size_t len); - -/* Prompts for an integer, with minimum, maximum and default values and base */ -int Parser_getint(const char *prompt, long min, long max, long deft, - int base); - -/* Prompts for a yes/no, with default */ -int Parser_getbool(const char *prompt, int deft); - -/* Extracts an integer from a string, or prompts if it cannot get one */ -long Parser_intarg(const char *inp, const char *prompt, int deft, - int min, int max, int base); - -/* Extracts a word from the input, or propmts if it cannot get one */ -char *Parser_strarg(char *inp, const char *prompt, const char *deft, - char *answer, int len); - -/* Extracts an integer from a string with a base */ -int Parser_arg2int(const char *inp, long *result, int base); - -/* Convert human readable size string to and int; "1k" -> 1000 */ -int Parser_size(int *sizep, char *str); - -/* Convert a string boolean to an int; "enable" -> 1 */ -int Parser_bool(int *b, char *str); - -#endif diff --git a/lustre/utils/wirecheck.c b/lustre/utils/wirecheck.c deleted file mode 100644 index 5b6a589..0000000 --- a/lustre/utils/wirecheck.c +++ /dev/null @@ -1,588 +0,0 @@ -#include <stdio.h> -#include <liblustre.h> -#include <linux/lustre_lib.h> -#include <linux/lustre_idl.h> - -#define BLANK_LINE() \ -do { \ - printf ("\n"); \ -} while (0) - -#define COMMENT(c) \ -do { \ - printf (" /* "c" */\n"); \ -} while (0) - -#define STRINGIFY(a) #a - -#define CHECK_DEFINE(a) \ -do { \ - printf(" LASSERT ("#a" == "STRINGIFY(a)");\n"); \ -} while (0) - -#define CHECK_VALUE(a) \ -do { \ - printf(" LASSERT ("#a" == %d);\n", a); \ -} while (0) - -#define CHECK_MEMBER_OFFSET(s,m) \ -do { \ - CHECK_VALUE(offsetof (struct s, m)); \ -} while (0) - -#define CHECK_MEMBER_SIZEOF(s,m) \ -do { \ - CHECK_VALUE((int)sizeof(((struct s *)0)->m)); \ -} while (0) - -#define CHECK_MEMBER(s,m) \ -do { \ - CHECK_MEMBER_OFFSET(s, m); \ - CHECK_MEMBER_SIZEOF(s, m); \ -} while (0) - -#define CHECK_STRUCT(s) \ -do { \ - COMMENT("Checks for struct "#s); \ - CHECK_VALUE((int)sizeof(struct s)); \ -} while (0) - - - -void check1 (void) -{ -#define VALUE 1234567 - - CHECK_VALUE (VALUE); - CHECK_DEFINE (VALUE); -} - -void -check_lustre_handle (void) -{ - BLANK_LINE (); - CHECK_STRUCT (lustre_handle); - CHECK_MEMBER (lustre_handle, cookie); -} - -void -check_lustre_msg (void) -{ - BLANK_LINE (); - CHECK_STRUCT (lustre_msg); - CHECK_MEMBER (lustre_msg, handle); - CHECK_MEMBER (lustre_msg, magic); - CHECK_MEMBER (lustre_msg, type); - CHECK_MEMBER (lustre_msg, version); - CHECK_MEMBER (lustre_msg, opc); - CHECK_MEMBER (lustre_msg, last_xid); - CHECK_MEMBER (lustre_msg, last_committed); - CHECK_MEMBER (lustre_msg, transno); - CHECK_MEMBER (lustre_msg, status); - CHECK_MEMBER (lustre_msg, flags); - CHECK_MEMBER (lustre_msg, bufcount); - CHECK_MEMBER (lustre_msg, buflens[7]); -} - -void -check_obdo (void) -{ - BLANK_LINE (); - CHECK_STRUCT (obdo); - CHECK_MEMBER (obdo, o_id); - CHECK_MEMBER (obdo, o_gr); - CHECK_MEMBER (obdo, o_atime); - CHECK_MEMBER (obdo, o_mtime); - CHECK_MEMBER (obdo, o_ctime); - CHECK_MEMBER (obdo, o_size); - CHECK_MEMBER (obdo, o_blocks); - CHECK_MEMBER (obdo, o_rdev); - CHECK_MEMBER (obdo, o_blksize); - CHECK_MEMBER (obdo, o_mode); - CHECK_MEMBER (obdo, o_uid); - CHECK_MEMBER (obdo, o_gid); - CHECK_MEMBER (obdo, o_flags); - CHECK_MEMBER (obdo, o_nlink); - CHECK_MEMBER (obdo, o_generation); - CHECK_MEMBER (obdo, o_valid); - CHECK_MEMBER (obdo, o_obdflags); - CHECK_MEMBER (obdo, o_easize); - CHECK_MEMBER (obdo, o_inline); -} - -void -check_obd_statfs (void) -{ - BLANK_LINE (); - CHECK_STRUCT (obd_statfs); - CHECK_MEMBER (obd_statfs, os_type); - CHECK_MEMBER (obd_statfs, os_blocks); - CHECK_MEMBER (obd_statfs, os_bfree); - CHECK_MEMBER (obd_statfs, os_bavail); - CHECK_MEMBER (obd_statfs, os_ffree); - CHECK_MEMBER (obd_statfs, os_fsid); - CHECK_MEMBER (obd_statfs, os_bsize); - CHECK_MEMBER (obd_statfs, os_namelen); -} - -void -check_obd_ioobj (void) -{ - BLANK_LINE (); - CHECK_STRUCT (obd_ioobj); - CHECK_MEMBER (obd_ioobj, ioo_id); - CHECK_MEMBER (obd_ioobj, ioo_gr); - CHECK_MEMBER (obd_ioobj, ioo_type); - CHECK_MEMBER (obd_ioobj, ioo_bufcnt); -} - -void -check_niobuf_remote (void) -{ - BLANK_LINE (); - CHECK_STRUCT (niobuf_remote); - CHECK_MEMBER (niobuf_remote, offset); - CHECK_MEMBER (niobuf_remote, len); - CHECK_MEMBER (niobuf_remote, flags); -} - -void -check_ost_body (void) -{ - BLANK_LINE (); - CHECK_STRUCT (ost_body); - CHECK_MEMBER (ost_body, oa); -} - -void -check_ll_fid (void) -{ - BLANK_LINE (); - CHECK_STRUCT (ll_fid); - CHECK_MEMBER (ll_fid, id); - CHECK_MEMBER (ll_fid, generation); - CHECK_MEMBER (ll_fid, f_type); -} - -void -check_mds_status_req (void) -{ - BLANK_LINE (); - CHECK_STRUCT (mds_status_req); - CHECK_MEMBER (mds_status_req, flags); - CHECK_MEMBER (mds_status_req, repbuf); -} - -void -check_mds_fileh_body (void) -{ - BLANK_LINE (); - CHECK_STRUCT (mds_fileh_body); - CHECK_MEMBER (mds_fileh_body, f_fid); -} - -void -check_mds_body (void) -{ - BLANK_LINE (); - CHECK_STRUCT (mds_body); - CHECK_MEMBER (mds_body, fid1); - CHECK_MEMBER (mds_body, fid2); - CHECK_MEMBER (mds_body, handle); - CHECK_MEMBER (mds_body, size); - CHECK_MEMBER (mds_body, blocks); - CHECK_MEMBER (mds_body, ino); - CHECK_MEMBER (mds_body, valid); - CHECK_MEMBER (mds_body, fsuid); - CHECK_MEMBER (mds_body, fsgid); - CHECK_MEMBER (mds_body, capability); - CHECK_MEMBER (mds_body, mode); - CHECK_MEMBER (mds_body, uid); - CHECK_MEMBER (mds_body, gid); - CHECK_MEMBER (mds_body, mtime); - CHECK_MEMBER (mds_body, ctime); - CHECK_MEMBER (mds_body, atime); - CHECK_MEMBER (mds_body, flags); - CHECK_MEMBER (mds_body, rdev); - CHECK_MEMBER (mds_body, nlink); - CHECK_MEMBER (mds_body, generation); - CHECK_MEMBER (mds_body, suppgid); -} - -void -check_mds_rec_setattr (void) -{ - BLANK_LINE (); - CHECK_STRUCT (mds_rec_setattr); - CHECK_MEMBER (mds_rec_setattr, sa_opcode); - CHECK_MEMBER (mds_rec_setattr, sa_fsuid); - CHECK_MEMBER (mds_rec_setattr, sa_fsgid); - CHECK_MEMBER (mds_rec_setattr, sa_cap); - CHECK_MEMBER (mds_rec_setattr, sa_reserved); - CHECK_MEMBER (mds_rec_setattr, sa_valid); - CHECK_MEMBER (mds_rec_setattr, sa_fid); - CHECK_MEMBER (mds_rec_setattr, sa_mode); - CHECK_MEMBER (mds_rec_setattr, sa_uid); - CHECK_MEMBER (mds_rec_setattr, sa_gid); - CHECK_MEMBER (mds_rec_setattr, sa_attr_flags); - CHECK_MEMBER (mds_rec_setattr, sa_size); - CHECK_MEMBER (mds_rec_setattr, sa_atime); - CHECK_MEMBER (mds_rec_setattr, sa_mtime); - CHECK_MEMBER (mds_rec_setattr, sa_ctime); - CHECK_MEMBER (mds_rec_setattr, sa_suppgid); -} - -void -check_mds_rec_create (void) -{ - BLANK_LINE (); - CHECK_STRUCT (mds_rec_create); - CHECK_MEMBER (mds_rec_create, cr_opcode); - CHECK_MEMBER (mds_rec_create, cr_fsuid); - CHECK_MEMBER (mds_rec_create, cr_fsgid); - CHECK_MEMBER (mds_rec_create, cr_cap); - CHECK_MEMBER (mds_rec_create, cr_flags); - CHECK_MEMBER (mds_rec_create, cr_mode); - CHECK_MEMBER (mds_rec_create, cr_fid); - CHECK_MEMBER (mds_rec_create, cr_replayfid); - CHECK_MEMBER (mds_rec_create, cr_uid); - CHECK_MEMBER (mds_rec_create, cr_gid); - CHECK_MEMBER (mds_rec_create, cr_time); - CHECK_MEMBER (mds_rec_create, cr_rdev); - CHECK_MEMBER (mds_rec_create, cr_suppgid); -} - -void -check_mds_rec_link (void) -{ - BLANK_LINE (); - CHECK_STRUCT (mds_rec_link); - CHECK_MEMBER (mds_rec_link, lk_opcode); - CHECK_MEMBER (mds_rec_link, lk_fsuid); - CHECK_MEMBER (mds_rec_link, lk_fsgid); - CHECK_MEMBER (mds_rec_link, lk_cap); - CHECK_MEMBER (mds_rec_link, lk_suppgid1); - CHECK_MEMBER (mds_rec_link, lk_suppgid2); - CHECK_MEMBER (mds_rec_link, lk_fid1); - CHECK_MEMBER (mds_rec_link, lk_fid2); -} - -void -check_mds_rec_unlink (void) -{ - BLANK_LINE (); - CHECK_STRUCT (mds_rec_unlink); - CHECK_MEMBER (mds_rec_unlink, ul_opcode); - CHECK_MEMBER (mds_rec_unlink, ul_fsuid); - CHECK_MEMBER (mds_rec_unlink, ul_fsgid); - CHECK_MEMBER (mds_rec_unlink, ul_cap); - CHECK_MEMBER (mds_rec_unlink, ul_reserved); - CHECK_MEMBER (mds_rec_unlink, ul_mode); - CHECK_MEMBER (mds_rec_unlink, ul_suppgid); - CHECK_MEMBER (mds_rec_unlink, ul_fid1); - CHECK_MEMBER (mds_rec_unlink, ul_fid2); -} - -void -check_mds_rec_rename (void) -{ - BLANK_LINE (); - CHECK_STRUCT (mds_rec_rename); - CHECK_MEMBER (mds_rec_rename, rn_opcode); - CHECK_MEMBER (mds_rec_rename, rn_fsuid); - CHECK_MEMBER (mds_rec_rename, rn_fsgid); - CHECK_MEMBER (mds_rec_rename, rn_cap); - CHECK_MEMBER (mds_rec_rename, rn_suppgid1); - CHECK_MEMBER (mds_rec_rename, rn_suppgid2); - CHECK_MEMBER (mds_rec_rename, rn_fid1); - CHECK_MEMBER (mds_rec_rename, rn_fid2); -} - -void -check_lov_desc (void) -{ - BLANK_LINE (); - CHECK_STRUCT (lov_desc); - CHECK_MEMBER (lov_desc, ld_tgt_count); - CHECK_MEMBER (lov_desc, ld_active_tgt_count); - CHECK_MEMBER (lov_desc, ld_default_stripe_count); - CHECK_MEMBER (lov_desc, ld_default_stripe_size); - CHECK_MEMBER (lov_desc, ld_default_stripe_offset); - CHECK_MEMBER (lov_desc, ld_pattern); - CHECK_MEMBER (lov_desc, ld_uuid); -} - -void -check_ldlm_res_id (void) -{ - BLANK_LINE (); - CHECK_STRUCT (ldlm_res_id); - CHECK_MEMBER (ldlm_res_id, name[RES_NAME_SIZE]); -} - -void -check_ldlm_extent (void) -{ - BLANK_LINE (); - CHECK_STRUCT (ldlm_extent); - CHECK_MEMBER (ldlm_extent, start); - CHECK_MEMBER (ldlm_extent, end); -} - -void -check_ldlm_intent (void) -{ - BLANK_LINE (); - CHECK_STRUCT (ldlm_intent); - CHECK_MEMBER (ldlm_intent, opc); -} - -void -check_ldlm_resource_desc (void) -{ - BLANK_LINE (); - CHECK_STRUCT (ldlm_resource_desc); - CHECK_MEMBER (ldlm_resource_desc, lr_type); - CHECK_MEMBER (ldlm_resource_desc, lr_name); - CHECK_MEMBER (ldlm_resource_desc, lr_version[RES_VERSION_SIZE]); -} - -void -check_ldlm_lock_desc (void) -{ - BLANK_LINE (); - CHECK_STRUCT (ldlm_lock_desc); - CHECK_MEMBER (ldlm_lock_desc, l_resource); - CHECK_MEMBER (ldlm_lock_desc, l_req_mode); - CHECK_MEMBER (ldlm_lock_desc, l_granted_mode); - CHECK_MEMBER (ldlm_lock_desc, l_extent); - CHECK_MEMBER (ldlm_lock_desc, l_version[RES_VERSION_SIZE]); -} - -void -check_ldlm_request (void) -{ - BLANK_LINE (); - CHECK_STRUCT (ldlm_request); - CHECK_MEMBER (ldlm_request, lock_flags); - CHECK_MEMBER (ldlm_request, lock_desc); - CHECK_MEMBER (ldlm_request, lock_handle1); - CHECK_MEMBER (ldlm_request, lock_handle2); -} - -void -check_ldlm_reply (void) -{ - BLANK_LINE (); - CHECK_STRUCT (ldlm_reply); - CHECK_MEMBER (ldlm_reply, lock_flags); - CHECK_MEMBER (ldlm_reply, lock_mode); - CHECK_MEMBER (ldlm_reply, lock_resource_name); - CHECK_MEMBER (ldlm_reply, lock_handle); - CHECK_MEMBER (ldlm_reply, lock_extent); - CHECK_MEMBER (ldlm_reply, lock_policy_res1); - CHECK_MEMBER (ldlm_reply, lock_policy_res2); -} - -void -check_ptlbd_op (void) -{ - BLANK_LINE (); - CHECK_STRUCT (ptlbd_op); - CHECK_MEMBER (ptlbd_op, op_cmd); - CHECK_MEMBER (ptlbd_op, op_lun); - CHECK_MEMBER (ptlbd_op, op_niob_cnt); - CHECK_MEMBER (ptlbd_op, op__padding); - CHECK_MEMBER (ptlbd_op, op_block_cnt); -} - -void -check_ptlbd_niob (void) -{ - BLANK_LINE (); - CHECK_STRUCT (ptlbd_niob); - CHECK_MEMBER (ptlbd_niob, n_xid); - CHECK_MEMBER (ptlbd_niob, n_block_nr); - CHECK_MEMBER (ptlbd_niob, n_offset); - CHECK_MEMBER (ptlbd_niob, n_length); -} - -void -check_ptlbd_rsp (void) -{ - BLANK_LINE (); - CHECK_STRUCT (ptlbd_rsp); - CHECK_MEMBER (ptlbd_rsp, r_status); - CHECK_MEMBER (ptlbd_rsp, r_error_cnt); -} - -int -main (int argc, char **argv) -{ - printf ("void lustre_assert_wire_constants (void)\n" - "{\n"); - - COMMENT ("Wire protocol assertions generated by 'wirecheck'"); - BLANK_LINE (); - - COMMENT ("Constants..."); - CHECK_DEFINE (PTLRPC_MSG_MAGIC); - CHECK_DEFINE (PTLRPC_MSG_VERSION); - - CHECK_VALUE (PTL_RPC_MSG_REQUEST); - CHECK_VALUE (PTL_RPC_MSG_ERR); - CHECK_VALUE (PTL_RPC_MSG_REPLY); - - CHECK_VALUE (MSG_LAST_REPLAY); - CHECK_VALUE (MSG_RESENT); - - CHECK_VALUE (MSG_CONNECT_RECOVERING); - CHECK_VALUE (MSG_CONNECT_RECONNECT); - CHECK_VALUE (MSG_CONNECT_REPLAYABLE); - - CHECK_VALUE (OST_REPLY); - CHECK_VALUE (OST_GETATTR); - CHECK_VALUE (OST_SETATTR); - CHECK_VALUE (OST_READ); - CHECK_VALUE (OST_WRITE); - CHECK_VALUE (OST_CREATE); - CHECK_VALUE (OST_DESTROY); - CHECK_VALUE (OST_GET_INFO); - CHECK_VALUE (OST_CONNECT); - CHECK_VALUE (OST_DISCONNECT); - CHECK_VALUE (OST_PUNCH); - CHECK_VALUE (OST_OPEN); - CHECK_VALUE (OST_CLOSE); - CHECK_VALUE (OST_STATFS); - CHECK_VALUE (OST_SAN_READ); - CHECK_VALUE (OST_SAN_WRITE); - CHECK_VALUE (OST_SYNCFS); - CHECK_VALUE (OST_LAST_OPC); - CHECK_VALUE (OST_FIRST_OPC); - - CHECK_VALUE (OBD_FL_INLINEDATA); - CHECK_VALUE (OBD_FL_OBDMDEXISTS); - - CHECK_VALUE (LOV_MAGIC); - - CHECK_VALUE (OBD_MD_FLALL); - CHECK_VALUE (OBD_MD_FLID); - CHECK_VALUE (OBD_MD_FLATIME); - CHECK_VALUE (OBD_MD_FLMTIME); - CHECK_VALUE (OBD_MD_FLCTIME); - CHECK_VALUE (OBD_MD_FLSIZE); - CHECK_VALUE (OBD_MD_FLBLOCKS); - CHECK_VALUE (OBD_MD_FLBLKSZ); - CHECK_VALUE (OBD_MD_FLMODE); - CHECK_VALUE (OBD_MD_FLTYPE); - CHECK_VALUE (OBD_MD_FLUID); - CHECK_VALUE (OBD_MD_FLGID); - CHECK_VALUE (OBD_MD_FLFLAGS); - CHECK_VALUE (OBD_MD_FLOBDFLG); - CHECK_VALUE (OBD_MD_FLNLINK); - CHECK_VALUE (OBD_MD_FLGENER); - CHECK_VALUE (OBD_MD_FLINLINE); - CHECK_VALUE (OBD_MD_FLRDEV); - CHECK_VALUE (OBD_MD_FLEASIZE); - CHECK_VALUE (OBD_MD_LINKNAME); - CHECK_VALUE (OBD_MD_FLHANDLE); - CHECK_VALUE (OBD_MD_FLCKSUM); - - CHECK_VALUE (OBD_BRW_READ); - CHECK_VALUE (OBD_BRW_WRITE); - CHECK_VALUE (OBD_BRW_CREATE); - CHECK_VALUE (OBD_BRW_SYNC); - - CHECK_DEFINE (OBD_OBJECT_EOF); - - CHECK_VALUE (OST_REQ_HAS_OA1); - - CHECK_VALUE (MDS_GETATTR); - CHECK_VALUE (MDS_GETATTR_NAME); - CHECK_VALUE (MDS_CLOSE); - CHECK_VALUE (MDS_REINT); - CHECK_VALUE (MDS_READPAGE); - CHECK_VALUE (MDS_CONNECT); - CHECK_VALUE (MDS_DISCONNECT); - CHECK_VALUE (MDS_GETSTATUS); - CHECK_VALUE (MDS_STATFS); - CHECK_VALUE (MDS_GETLOVINFO); - CHECK_VALUE (MDS_LAST_OPC); - CHECK_VALUE (MDS_FIRST_OPC); - - CHECK_VALUE (REINT_SETATTR); - CHECK_VALUE (REINT_CREATE); - CHECK_VALUE (REINT_LINK); - CHECK_VALUE (REINT_UNLINK); - CHECK_VALUE (REINT_RENAME); - CHECK_VALUE (REINT_OPEN); - CHECK_VALUE (REINT_MAX); - - CHECK_VALUE (IT_INTENT_EXEC); - CHECK_VALUE (IT_OPEN_LOOKUP); - CHECK_VALUE (IT_OPEN_NEG); - CHECK_VALUE (IT_OPEN_POS); - CHECK_VALUE (IT_OPEN_CREATE); - CHECK_VALUE (IT_OPEN_OPEN); - - CHECK_VALUE (MDS_STATUS_CONN); - CHECK_VALUE (MDS_STATUS_LOV); - - CHECK_VALUE (MDS_OPEN_HAS_EA); - - CHECK_VALUE (LOV_RAID0); - CHECK_VALUE (LOV_RAIDRR); - - CHECK_VALUE (LDLM_ENQUEUE); - CHECK_VALUE (LDLM_CONVERT); - CHECK_VALUE (LDLM_CANCEL); - CHECK_VALUE (LDLM_BL_CALLBACK); - CHECK_VALUE (LDLM_CP_CALLBACK); - CHECK_VALUE (LDLM_LAST_OPC); - CHECK_VALUE (LDLM_FIRST_OPC); - - CHECK_VALUE (PTLBD_QUERY); - CHECK_VALUE (PTLBD_READ); - CHECK_VALUE (PTLBD_WRITE); - CHECK_VALUE (PTLBD_FLUSH); - CHECK_VALUE (PTLBD_CONNECT); - CHECK_VALUE (PTLBD_DISCONNECT); - CHECK_VALUE (PTLBD_LAST_OPC); - CHECK_VALUE (PTLBD_FIRST_OPC); - - CHECK_VALUE (OBD_PING); - - COMMENT ("Sizes and Offsets"); - BLANK_LINE (); - check_lustre_handle (); - check_lustre_msg (); - check_obdo (); - check_obd_statfs (); - check_obd_ioobj (); - check_niobuf_remote (); - check_ost_body (); - check_ll_fid (); - check_mds_status_req (); - check_mds_fileh_body (); - check_mds_body (); - check_mds_rec_setattr (); - check_mds_rec_create (); - check_mds_rec_link (); - check_mds_rec_unlink (); - check_mds_rec_rename (); - check_lov_desc (); - check_ldlm_res_id (); - check_ldlm_extent (); - check_ldlm_intent (); - check_ldlm_resource_desc (); - check_ldlm_lock_desc (); - check_ldlm_request (); - check_ldlm_reply (); - check_ptlbd_op (); - check_ptlbd_niob (); - check_ptlbd_rsp (); - - printf ("}\n\n"); - - return (0); -} -- 1.8.3.1